Merge branch 'master' of ../mesa into vulkan

author Kristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>

Wed, 30 Sep 2015 00:10:50 +0000 (17:10 -0700)

committer Kristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>

Thu, 1 Oct 2015 21:24:29 +0000 (14:24 -0700)
author Kristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
Wed, 30 Sep 2015 00:10:50 +0000 (17:10 -0700)
committer Kristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
Thu, 1 Oct 2015 21:24:29 +0000 (14:24 -0700)
diff --git a/Android.common.mk b/Android.common.mk

index d662d60..948561c 100644 (file)
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -42,6 +42,7 @@ LOCAL_CFLAGS += \
         -DANDROID_VERSION=0x0$(MESA_ANDROID_MAJOR_VERSION)0$(MESA_ANDROID_MINOR_VERSION)
  
  LOCAL_CFLAGS += \
+       -D__STDC_LIMIT_MACROS \
         -DHAVE___BUILTIN_EXPECT \
         -DHAVE___BUILTIN_FFS \
         -DHAVE___BUILTIN_FFSLL \
@@ -70,7 +71,7 @@ endif
  
  ifeq ($(MESA_ENABLE_LLVM),true)
  LOCAL_CFLAGS += \
-       -DHAVE_LLVM=0x0305 -DLLVM_VERSION_PATCH=2 \
+       -DHAVE_LLVM=0x0305 -DMESA_LLVM_VERSION_PATCH=2 \
         -D__STDC_CONSTANT_MACROS \
         -D__STDC_FORMAT_MACROS \
         -D__STDC_LIMIT_MACROS
diff --git a/configure.ac b/configure.ac

index 4669a67..fa64dab 100644 (file)
--- a/configure.ac
+++ b/configure.ac
@@ -74,7 +74,7 @@ LIBDRM_AMDGPU_REQUIRED=2.4.63
  LIBDRM_INTEL_REQUIRED=2.4.61
  LIBDRM_NVVIEUX_REQUIRED=2.4.33
  LIBDRM_NOUVEAU_REQUIRED=2.4.62
-LIBDRM_FREEDRENO_REQUIRED=2.4.64
+LIBDRM_FREEDRENO_REQUIRED=2.4.65
  DRI2PROTO_REQUIRED=2.6
  DRI3PROTO_REQUIRED=1.0
  PRESENTPROTO_REQUIRED=1.0
@@ -534,15 +534,32 @@ AM_CONDITIONAL(HAVE_COMPAT_SYMLINKS, test "x$HAVE_COMPAT_SYMLINKS" = xyes)
  dnl
  dnl library names
  dnl
+dnl Unfortunately we need to do a few things that libtool can't help us with,
+dnl so we need some knowledge of shared library filenames:
+dnl
+dnl LIB_EXT is the extension used when creating symlinks for alternate
+dnl filenames for a shared library which will be dynamically loaded
+dnl
+dnl IMP_LIB_EXT is the extension used when checking for the presence of a
+dnl the file for a shared library we wish to link with
+dnl
  case "$host_os" in
  darwin* )
-    LIB_EXT='dylib' ;;
+    LIB_EXT='dylib'
+    IMP_LIB_EXT=$LIB_EXT
+    ;;
  cygwin* )
-    LIB_EXT='dll' ;;
+    LIB_EXT='dll'
+    IMP_LIB_EXT='dll.a'
+    ;;
  aix* )
-    LIB_EXT='a' ;;
+    LIB_EXT='a'
+    IMP_LIB_EXT=$LIB_EXT
+    ;;
  * )
-    LIB_EXT='so' ;;
+    LIB_EXT='so'
+    IMP_LIB_EXT=$LIB_EXT
+    ;;
  esac
  
  AC_SUBST([LIB_EXT])
@@ -1111,6 +1128,11 @@ AC_MSG_RESULT([$with_sha1])
  AC_SUBST(SHA1_LIBS)
  AC_SUBST(SHA1_CFLAGS)
  
+# Enable a define for SHA1
+if test "x$with_sha1" != "x"; then
+       DEFINES="$DEFINES -DHAVE_SHA1"
+fi
+
  # Allow user to configure out the shader-cache feature
  AC_ARG_ENABLE([shader-cache],
      AS_HELP_STRING([--disable-shader-cache], [Disable binary shader cache]),
@@ -1290,6 +1312,16 @@ AC_SUBST(GLX_TLS, ${GLX_USE_TLS})
  AS_IF([test "x$GLX_USE_TLS" = xyes -a "x$ax_pthread_ok" = xyes],
        [DEFINES="${DEFINES} -DGLX_USE_TLS"])
  
+dnl Read-only text section on x86 hardened platforms
+AC_ARG_ENABLE([glx-read-only-text],
+    [AS_HELP_STRING([--enable-glx-read-only-text],
+        [Disable writable .text section on x86 (decreases performance) @<:@default=disabled@:>@])],
+    [enable_glx_read_only_text="$enableval"],
+    [enable_glx_read_only_text=no])
+if test "x$enable_glx_read_only_text" = xyes; then
+    DEFINES="$DEFINES -DGLX_X86_READONLY_TEXT"
+fi
+
  dnl
  dnl More DRI setup
  dnl
@@ -2058,7 +2090,7 @@ radeon_llvm_check() {
      if test "x$enable_gallium_llvm" != "xyes"; then
          AC_MSG_ERROR([--enable-gallium-llvm is required when building $1])
      fi
-    llvm_check_version_for "3" "4" "2" $1 
+    llvm_check_version_for "3" "5" "0" $1
      if test true && $LLVM_CONFIG --targets-built | grep -iqvw $amdgpu_llvm_target_name ; then
          AC_MSG_ERROR([LLVM $amdgpu_llvm_target_name not enabled in your LLVM build.])
      fi
@@ -2146,11 +2178,8 @@ if test -n "$with_gallium_drivers"; then
              gallium_require_drm "vc4"
              gallium_require_drm_loader
  
-            case "$host_cpu" in
-                i?86 | x86_64 | amd64)
-                USE_VC4_SIMULATOR=yes
-                ;;
-            esac
+            PKG_CHECK_MODULES([SIMPENROSE], [simpenrose],
+                              [USE_VC4_SIMULATOR=yes], [USE_VC4_SIMULATOR=no])
              ;;
          *)
              AC_MSG_ERROR([Unknown Gallium driver: $driver])
@@ -2170,10 +2199,14 @@ if test "x$MESA_LLVM" != x0; then
  
      LLVM_LIBS="`$LLVM_CONFIG --libs ${LLVM_COMPONENTS}`"
  
+    dnl llvm-config may not give the right answer when llvm is a built as a
+    dnl single shared library, so we must work the library name out for
+    dnl ourselves.
+    dnl (See https://llvm.org/bugs/show_bug.cgi?id=6823)
      if test "x$enable_llvm_shared_libs" = xyes; then
          dnl We can't use $LLVM_VERSION because it has 'svn' stripped out,
          LLVM_SO_NAME=LLVM-`$LLVM_CONFIG --version`
-        AS_IF([test -f "$LLVM_LIBDIR/lib$LLVM_SO_NAME.so"], [llvm_have_one_so=yes])
+        AS_IF([test -f "$LLVM_LIBDIR/lib$LLVM_SO_NAME.$IMP_LIB_EXT"], [llvm_have_one_so=yes])
  
          if test "x$llvm_have_one_so" = xyes; then
              dnl LLVM was built using auto*, so there is only one shared object.
@@ -2181,7 +2214,7 @@ if test "x$MESA_LLVM" != x0; then
          else
              dnl If LLVM was built with CMake, there will be one shared object per
              dnl component.
-            AS_IF([test ! -f "$LLVM_LIBDIR/libLLVMTarget.so"],
+            AS_IF([test ! -f "$LLVM_LIBDIR/libLLVMTarget.$IMP_LIB_EXT"],
                      [AC_MSG_ERROR([Could not find llvm shared libraries:
         Please make sure you have built llvm with the --enable-shared option
         and that your llvm libraries are installed in $LLVM_LIBDIR
diff --git a/docs/GL3.txt b/docs/GL3.txt

index 561f204..e020deb 100644 (file)
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -109,14 +109,14 @@ GL 4.0, GLSL 4.00 --- all DONE: nvc0, radeonsi
    - Enhanced per-sample shading                        DONE (r600)
    - Interpolation functions                            DONE (r600)
    - New overload resolution rules                      DONE
-  GL_ARB_gpu_shader_fp64                               DONE (llvmpipe, softpipe)
+  GL_ARB_gpu_shader_fp64                               DONE (r600, llvmpipe, softpipe)
    GL_ARB_sample_shading                                DONE (i965, nv50, r600)
    GL_ARB_shader_subroutine                             DONE (i965, nv50, r600, llvmpipe, softpipe)
    GL_ARB_tessellation_shader                           DONE ()
    GL_ARB_texture_buffer_object_rgb32                   DONE (i965, r600, llvmpipe, softpipe)
    GL_ARB_texture_cube_map_array                        DONE (i965, nv50, r600, llvmpipe, softpipe)
    GL_ARB_texture_gather                                DONE (i965, nv50, r600, llvmpipe, softpipe)
-  GL_ARB_texture_query_lod                             DONE (i965, nv50, r600)
+  GL_ARB_texture_query_lod                             DONE (i965, nv50, r600, softpipe)
    GL_ARB_transform_feedback2                           DONE (i965, nv50, r600, llvmpipe, softpipe)
    GL_ARB_transform_feedback3                           DONE (i965, nv50, r600, llvmpipe, softpipe)
  
@@ -127,7 +127,7 @@ GL 4.1, GLSL 4.10 --- all DONE: nvc0, radeonsi
    GL_ARB_get_program_binary                            DONE (0 binary formats)
    GL_ARB_separate_shader_objects                       DONE (all drivers)
    GL_ARB_shader_precision                              DONE (all drivers that support GLSL 4.10)
-  GL_ARB_vertex_attrib_64bit                           DONE (llvmpipe, softpipe)
+  GL_ARB_vertex_attrib_64bit                           DONE (r600, llvmpipe, softpipe)
    GL_ARB_viewport_array                                DONE (i965, nv50, r600, llvmpipe)
  
  
@@ -164,7 +164,7 @@ GL 4.3, GLSL 4.30:
    GL_ARB_program_interface_query                       DONE (all drivers)
    GL_ARB_robust_buffer_access_behavior                 not started
    GL_ARB_shader_image_size                             DONE (i965)
-  GL_ARB_shader_storage_buffer_object                  in progress (Iago Toral, Samuel Iglesias)
+  GL_ARB_shader_storage_buffer_object                  DONE (i965)
    GL_ARB_stencil_texturing                             DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
    GL_ARB_texture_buffer_range                          DONE (nv50, nvc0, i965, r600, radeonsi, llvmpipe)
    GL_ARB_texture_query_levels                          DONE (all drivers that support GLSL 1.30)
@@ -194,8 +194,8 @@ GL 4.5, GLSL 4.50:
    GL_ARB_derivative_control                            DONE (i965, nv50, nvc0, r600, radeonsi)
    GL_ARB_direct_state_access                           DONE (all drivers)
    GL_ARB_get_texture_sub_image                         DONE (all drivers)
-  GL_ARB_shader_texture_image_samples                  not started
-  GL_ARB_texture_barrier                               DONE (nv50, nvc0, r600, radeonsi)
+  GL_ARB_shader_texture_image_samples                  DONE (i965, nv50, nvc0, r600, radeonsi)
+  GL_ARB_texture_barrier                               DONE (i965, nv50, nvc0, r600, radeonsi)
    GL_KHR_context_flush_control                         DONE (all - but needs GLX/EGL extension to be useful)
    GL_KHR_robust_buffer_access_behavior                 not started
    GL_KHR_robustness                                    90% done (the ARB variant)
@@ -212,7 +212,7 @@ GLES3.1, GLSL ES 3.1
    GL_ARB_shader_atomic_counters                        DONE (i965)
    GL_ARB_shader_image_load_store                       DONE (i965)
    GL_ARB_shader_image_size                             DONE (i965)
-  GL_ARB_shader_storage_buffer_object                  in progress (Iago Toral, Samuel Iglesias)
+  GL_ARB_shader_storage_buffer_object                  DONE (i965)
    GL_ARB_shading_language_packing                      DONE (all drivers)
    GL_ARB_separate_shader_objects                       DONE (all drivers)
    GL_ARB_stencil_texturing                             DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
@@ -223,10 +223,35 @@ GLES3.1, GLSL ES 3.1
    GS5 Packing/bitfield/conversion functions            DONE (i965, nvc0, r600, radeonsi)
    GL_EXT_shader_integer_mix                            DONE (all drivers that support GLSL)
  
-  Additional functions not covered above:
-      glMemoryBarrierByRegion
-      glGetTexLevelParameter[fi]v - needs updates to restrict to GLES enums
-      glGetBooleani_v - needs updates to restrict to GLES enums
+  Additional functionality not covered above:
+      glMemoryBarrierByRegion                          DONE
+      glGetTexLevelParameter[fi]v - needs updates      DONE
+      glGetBooleani_v - restrict to GLES enums
+      gl_HelperInvocation support
+
+GLES3.2, GLSL ES 3.2
+  GL_EXT_color_buffer_float                            DONE (all drivers)
+  GL_KHR_blend_equation_advanced                       not started
+  GL_KHR_debug                                         DONE (all drivers)
+  GL_KHR_robustness                                    90% done (the ARB variant)
+  GL_KHR_texture_compression_astc_ldr                  DONE (i965/gen9+)
+  GL_OES_copy_image                                    not started (based on GL_ARB_copy_image, which is done for some drivers)
+  GL_OES_draw_buffers_indexed                          not started
+  GL_OES_draw_elements_base_vertex                     not started (based on GL_ARB_draw_elements_base_vertex, which is done for all drivers)
+  GL_OES_geometry_shader                               not started (based on GL_ARB_geometry_shader4, which is done for all drivers)
+  GL_OES_gpu_shader5                                   not started (based on parts of GL_ARB_gpu_shader5, which is done for some drivers)
+  GL_OES_primitive_bounding box                        not started
+  GL_OES_sample_shading                                not started (based on parts of GL_ARB_sample_shading, which is done for some drivers)
+  GL_OES_sample_variables                              not started (based on parts of GL_ARB_sample_shading, which is done for some drivers)
+  GL_OES_shader_image_atomic                           not started (based on parts of GL_ARB_shader_image_load_store, which is done for some drivers)
+  GL_OES_shader_io_blocks                              not started (based on parts of GLSL 1.50, which is done)
+  GL_OES_shader_multisample_interpolation              not started (based on parts of GL_ARB_gpu_shader5, which is done)
+  GL_OES_tessellation_shader                           not started (based on GL_ARB_tessellation_shader, which is done for some drivers)
+  GL_OES_texture_border_clamp                          not started (based on GL_ARB_texture_border_clamp, which is done)
+  GL_OES_texture_buffer                                not started (based on GL_ARB_texture_buffer_object, GL_ARB_texture_buffer_range, and GL_ARB_texture_buffer_object_rgb32 that are all done)
+  GL_OES_texture_cube_map_array                        not started (based on GL_ARB_texture_cube_map_array, which is done for all drivers)
+  GL_OES_texture_stencil8                              not started (based on GL_ARB_texture_stencil8, which is done for some drivers)
+  GL_OES_texture_storage_multisample_2d_array          DONE (all drivers that support GL_ARB_texture_multisample)
  
  More info about these features and the work involved can be found at
  http://dri.freedesktop.org/wiki/MissingFunctionality
diff --git a/docs/autoconf.html b/docs/autoconf.html

index 2ef8c63..5c29e5e 100644 (file)
--- a/docs/autoconf.html
+++ b/docs/autoconf.html
@@ -87,6 +87,13 @@ created in a <code>lib64</code> directory at the top of the Mesa source
  tree.</p>
  </dd>
  
+<dt><code>--sysconfdir=DIR</code></dt>
+<dd><p>This option specifies the directory where the configuration
+files will be installed. The default is <code>${prefix}/etc</code>.
+Currently there's only one config file provided when dri drivers are
+enabled - it's <code>drirc</code>.</p>
+</dd>
+
  <dt><code>--enable-static, --disable-shared</code></dt>
  <dd><p>By default, Mesa
  will build shared libraries. Either of these options will force static
@@ -217,7 +224,7 @@ GLX.
  <dt><code>--with-expat=DIR</code>
  <dd><p><strong>DEPRECATED</strong>, use <code>PKG_CONFIG_PATH</code> instead.</p>
  <p>The DRI-enabled libGL uses expat to
-parse the DRI configuration files in <code>/etc/drirc</code> and
+parse the DRI configuration files in <code>${sysconfdir}/drirc</code> and
  <code>~/.drirc</code>. This option allows a specific expat installation
  to be used. For example, <code>--with-expat=/usr/local</code> will
  search for expat headers and libraries in <code>/usr/local/include</code>
diff --git a/docs/envvars.html b/docs/envvars.html

index c0d5a51..bdfe999 100644 (file)
--- a/docs/envvars.html
+++ b/docs/envvars.html
@@ -153,6 +153,7 @@ See the <a href="xlibdriver.html">Xlib software driver page</a> for details.
     <li>no16 - suppress generation of 16-wide fragment shaders. useful for debugging broken shaders</li>
     <li>blorp - emit messages about the blorp operations (blits &amp; clears)</li>
     <li>nodualobj - suppress generation of dual-object geometry shader code</li>
+   <li>optimizer - dump shader assembly to files at each optimization pass and iteration that make progress</li>
  </ul>
  </ul>
  
diff --git a/docs/index.html b/docs/index.html

index b067256..a56c848 100644 (file)
--- a/docs/index.html
+++ b/docs/index.html
@@ -16,25 +16,62 @@
  
  <h1>News</h1>
  
-<h2>August 22 2015</h2>
+<h2>September 28, 2015</h2>
+<p>
+<a href="relnotes/11.0.2.html">Mesa 11.0.2</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>September 26, 2015</h2>
+<p>
+<a href="relnotes/11.0.1.html">Mesa 11.0.1</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>September 20, 2015</h2>
+<p>
+<a href="relnotes/10.6.8.html">Mesa 10.6.8</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>September 12, 2015</h2>
+<p>
+<a href="relnotes/11.0.0.html">Mesa 11.0.0</a> is released.  This is a new
+development release.  See the release notes for more information about
+the release.
+</p>
+
+<h2>September 10, 2015</h2>
+<p>
+<a href="relnotes/10.6.7.html">Mesa 10.6.7</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>September 4, 2015</h2>
+<p>
+<a href="relnotes/10.6.6.html">Mesa 10.6.6</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>August 22, 2015</h2>
  <p>
  <a href="relnotes/10.6.5.html">Mesa 10.6.5</a> is released.
  This is a bug-fix release.
  </p>
  
-<h2>August 11 2015</h2>
+<h2>August 11, 2015</h2>
  <p>
  <a href="relnotes/10.6.4.html">Mesa 10.6.4</a> is released.
  This is a bug-fix release.
  </p>
  
-<h2>July 26 2015</h2>
+<h2>July 26, 2015</h2>
  <p>
  <a href="relnotes/10.6.3.html">Mesa 10.6.3</a> is released.
  This is a bug-fix release.
  </p>
  
-<h2>July 11 2015</h2>
+<h2>July 11, 2015</h2>
  <p>
  <a href="relnotes/10.6.2.html">Mesa 10.6.2</a> is released.
  This is a bug-fix release.
diff --git a/docs/relnotes.html b/docs/relnotes.html

index 2cc4701..948d3f7 100644 (file)
--- a/docs/relnotes.html
+++ b/docs/relnotes.html
@@ -21,6 +21,12 @@ The release notes summarize what's new or changed in each Mesa release.
  </p>
  
  <ul>
+<li><a href="relnotes/11.0.2.html">11.0.2 release notes</a>
+<li><a href="relnotes/11.0.1.html">11.0.1 release notes</a>
+<li><a href="relnotes/10.6.8.html">10.6.8 release notes</a>
+<li><a href="relnotes/11.0.0.html">11.0.0 release notes</a>
+<li><a href="relnotes/10.6.7.html">10.6.7 release notes</a>
+<li><a href="relnotes/10.6.6.html">10.6.6 release notes</a>
  <li><a href="relnotes/10.6.5.html">10.6.5 release notes</a>
  <li><a href="relnotes/10.6.4.html">10.6.4 release notes</a>
  <li><a href="relnotes/10.6.3.html">10.6.3 release notes</a>
diff --git a/docs/relnotes/10.6.6.html b/docs/relnotes/10.6.6.html

new file mode 100644 (file)

index 0000000..eaf54c4
--- /dev/null
+++ b/docs/relnotes/10.6.6.html
@@ -0,0 +1,164 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.6.6 Release Notes / September 04, 2015</h1>
+
+<p>
+Mesa 10.6.6 is a bug fix release which fixes bugs found since the 10.6.5 release.
+</p>
+<p>
+Mesa 10.6.6 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+416517aa9df4791f97d34451a9e4da33c966afcd18c115c5769b92b15b018ef5  mesa-10.6.6.tar.gz
+570f2154b7340ff5db61ff103bc6e85165b8958798b78a50fa2df488e98e5778  mesa-10.6.6.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84677">Bug 84677</a> - Triangle disappears with glPolygonMode GL_LINE</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90734">Bug 90734</a> - glBufferSubData is corrupting data when buffer is &gt; 32k</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90748">Bug 90748</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.depth.rg_half_float_oes fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90902">Bug 90902</a> - [bsw][regression] dEQP: &quot;Found invalid pixel values&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90925">Bug 90925</a> - &quot;high fidelity&quot;: Segfault in _mesa_program_resource_find_name</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91254">Bug 91254</a> - (regresion) video using VA-API on Intel slow and freeze system with mesa 10.6 or 10.6.1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91292">Bug 91292</a> - [BDW+] glVertexAttribDivisor not working in combination with glPolygonMode</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91673">Bug 91673</a> - Segfault when calling glTexSubImage2D on storage texture to bound FBO</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91726">Bug 91726</a> - R600 asserts in tgsi_cmp/make_src_for_op3</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Chris Wilson (2):</p>
+<ul>
+  <li>i965: Prevent coordinate overflow in intel_emit_linear_blit</li>
+  <li>i965: Always re-emit the pipeline select during invariant state emission</li>
+</ul>
+
+<p>Daniel Scharrer (1):</p>
+<ul>
+  <li>mesa: add missing queries for ARB_direct_state_access</li>
+</ul>
+
+<p>Dave Airlie (8):</p>
+<ul>
+  <li>mesa/arb_gpu_shader_fp64: add support for glGetUniformdv</li>
+  <li>mesa/texgetimage: fix missing stencil check</li>
+  <li>st/readpixels: fix accel path for skipimages.</li>
+  <li>texcompress_s3tc/fxt1: fix stride checks (v1.1)</li>
+  <li>mesa/readpixels: check strides are equal before skipping conversion</li>
+  <li>mesa: enable texture stencil8 for multisample</li>
+  <li>r600/sb: update last_cf for finalize if.</li>
+  <li>r600g: fix calculation for gpr allocation</li>
+</ul>
+
+<p>David Heidelberg (1):</p>
+<ul>
+  <li>st/nine: Require gcc &gt;= 4.6</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 10.6.5</li>
+  <li>get-pick-list.sh: Require explicit "10.6" for nominating stable patches</li>
+</ul>
+
+<p>Glenn Kennard (4):</p>
+<ul>
+  <li>r600g: Fix assert in tgsi_cmp</li>
+  <li>r600g/sb: Handle undef in read port tracker</li>
+  <li>r600g/sb: Don't read junk after EOP</li>
+  <li>r600g/sb: Don't crash on empty if jump target</li>
+</ul>
+
+<p>Ilia Mirkin (5):</p>
+<ul>
+  <li>st/mesa: fix assignments with 4-operand arguments (i.e. BFI)</li>
+  <li>st/mesa: pass through 4th opcode argument in bitmap/pixel visitors</li>
+  <li>nv50,nvc0: disable depth bounds test on blit</li>
+  <li>nv50: fix 2d engine blits for 64- and 128-bit formats</li>
+  <li>mesa: only copy the requested teximage faces</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>i965/fs: Split VGRFs after lowering pull constants</li>
+</ul>
+
+<p>Kenneth Graunke (3):</p>
+<ul>
+  <li>i965: Fix copy propagation type changes.</li>
+  <li>Revert "i965: Advertise a line width of 40.0 on Cherryview and Skylake."</li>
+  <li>i965: Momentarily pretend to support ARB_texture_stencil8 for blits.</li>
+</ul>
+
+<p>Marek Olšák (3):</p>
+<ul>
+  <li>gallium/radeon: fix the ADDRESS_HI mask for EVENT_WRITE CIK packets</li>
+  <li>mesa: create multisample fallback textures like normal textures</li>
+  <li>radeonsi: fix a Unigine Heaven hang when drirc is missing</li>
+</ul>
+
+<p>Matt Turner (1):</p>
+<ul>
+  <li>i965/fs: Handle MRF destinations in lower_integer_multiplication().</li>
+</ul>
+
+<p>Neil Roberts (2):</p>
+<ul>
+  <li>i965: Swap the order of the vertex ID and edge flag attributes</li>
+  <li>i965/bdw: Fix 3DSTATE_VF_INSTANCING when the edge flag is used</li>
+</ul>
+
+<p>Tapani Pälli (5):</p>
+<ul>
+  <li>mesa: update fbo state in glTexStorage</li>
+  <li>glsl: build stageref mask using IR, not symbol table</li>
+  <li>glsl: expose build_program_resource_list function</li>
+  <li>glsl: create program resource list after LinkShader</li>
+  <li>mesa: add GL_RED, GL_RG support for floating point textures</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff --git a/docs/relnotes/10.6.7.html b/docs/relnotes/10.6.7.html

new file mode 100644 (file)

index 0000000..3810edc
--- /dev/null
+++ b/docs/relnotes/10.6.7.html
@@ -0,0 +1,75 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.6.7 Release Notes / September 10, 2015</h1>
+
+<p>
+Mesa 10.6.7 is a bug fix release which fixes bugs found since the 10.6.6 release.
+</p>
+<p>
+Mesa 10.6.7 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+4ba10c59abee30d72476543a57afd2f33803dabf4620dc333b335d47966ff842  mesa-10.6.7.tar.gz
+feb1f640b915dada88a7c793dfaff0ae23580f8903f87a6b76469253de0d28d8  mesa-10.6.7.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90751">Bug 90751</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.stencil.stencil_index8 fails</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>mesa/teximage: use correct extension for accept stencil texture.</li>
+</ul>
+
+<p>Emil Velikov (3):</p>
+<ul>
+  <li>docs: add sha256 checksums for 10.6.6</li>
+  <li>Revert "i965: Momentarily pretend to support ARB_texture_stencil8 for blits."</li>
+  <li>Update version to 10.6.7</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>glsl: Handle attribute aliasing in attribute storage limit check.</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff --git a/docs/relnotes/10.6.8.html b/docs/relnotes/10.6.8.html

new file mode 100644 (file)

index 0000000..a5abd44
--- /dev/null
+++ b/docs/relnotes/10.6.8.html
@@ -0,0 +1,136 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.6.8 Release Notes / September 20, 2015</h1>
+
+<p>
+Mesa 10.6.8 is a bug fix release which fixes bugs found since the 10.6.7 release.
+</p>
+<p>
+Mesa 10.6.8 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+1f34dba2a8059782e3e4e0f18b9628004e253b2c69085f735b846d2e63c9e250  mesa-10.6.8.tar.gz
+e36ee5ceeadb3966fb5ce5b4cf18322dbb76a4f075558ae49c3bba94f57d58fd  mesa-10.6.8.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90621">Bug 90621</a> - Mesa fail to build from git</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91526">Bug 91526</a> - World of Warcraft (on Wine) has UI corruption with nouveau</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91719">Bug 91719</a> - [SNB,HSW,BYT] dEQP regressions associated with using NIR for vertex shaders</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alejandro Piñeiro (1):</p>
+<ul>
+  <li>i965/vec4: fill src_reg type using the constructor type parameter</li>
+</ul>
+
+<p>Antia Puentes (1):</p>
+<ul>
+  <li>i965/vec4: Fix saturation errors when coalescing registers</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 10.6.7</li>
+  <li>cherry-ignore: add commit non applicable for 10.6</li>
+</ul>
+
+<p>Hans de Goede (4):</p>
+<ul>
+  <li>nv30: Fix creation of scanout buffers</li>
+  <li>nv30: Implement color resolve for msaa</li>
+  <li>nv30: Fix max width / height checks in nv30 sifm code</li>
+  <li>nv30: Disable msaa unless requested from the env by NV30_MAX_MSAA</li>
+</ul>
+
+<p>Ian Romanick (2):</p>
+<ul>
+  <li>mesa: Pass the type to _mesa_uniform_matrix as a glsl_base_type</li>
+  <li>mesa: Don't allow wrong type setters for matrix uniforms</li>
+</ul>
+
+<p>Ilia Mirkin (5):</p>
+<ul>
+  <li>st/mesa: don't fall back to 16F when 32F is requested</li>
+  <li>nvc0: always emit a full shader colormask</li>
+  <li>nvc0: remove BGRA4 format support</li>
+  <li>st/mesa: avoid integer overflows with buffers &gt;= 512MB</li>
+  <li>nv50, nvc0: fix max texture buffer size to 128M elements</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>i965/vec4: Don't reswizzle hardware registers</li>
+</ul>
+
+<p>Jose Fonseca (1):</p>
+<ul>
+  <li>gallivm: Workaround LLVM PR23628.</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>i965: Momentarily pretend to support ARB_texture_stencil8 for blits.</li>
+</ul>
+
+<p>Oded Gabbay (1):</p>
+<ul>
+  <li>llvmpipe: convert double to long long instead of unsigned long long</li>
+</ul>
+
+<p>Ray Strode (1):</p>
+<ul>
+  <li>gbm: convert gbm bo format to fourcc format on dma-buf import</li>
+</ul>
+
+<p>Ulrich Weigand (1):</p>
+<ul>
+  <li>mesa: Fix texture compression on big-endian systems</li>
+</ul>
+
+<p>Vinson Lee (1):</p>
+<ul>
+  <li>gallivm: Do not use NoFramePointerElim with LLVM 3.7.</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff --git a/docs/relnotes/11.0.0.html b/docs/relnotes/11.0.0.html

index 537b883..cfce907 100644 (file)
--- a/docs/relnotes/11.0.0.html
+++ b/docs/relnotes/11.0.0.html
@@ -14,7 +14,7 @@
  <iframe src="../contents.html"></iframe>
  <div class="content">
  
-<h1>Mesa 11.0.0 Release Notes / TBD</h1>
+<h1>Mesa 11.0.0 Release Notes / September 12, 2015</h1>
  
  <p>
  Mesa 11.0.0 is a new development release.
@@ -33,7 +33,8 @@ because compatibility contexts are not supported.
  
  <h2>SHA256 checksums</h2>
  <pre>
-TBD.
+7d7e4ddffa3b162506efa01e2cc41e329caa4995336b92e5cc21f2e1fb36c1b3  mesa-11.0.0.tar.gz
+e095a3eb2eca9dfde7efca8946527c8ae20a0cc938a8c78debc7f158ad44af32  mesa-11.0.0.tar.xz
  </pre>
  
  
@@ -83,13 +84,175 @@ Note: some of the new features are only available with certain drivers.
  <li>EGL 1.5 on r600, radeonsi, nv50, nvc0</li>
  </ul>
  
+
  <h2>Bug fixes</h2>
  
-TBD.
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=51658">Bug 51658</a> - r200 (&amp; possibly radeon) DRI fixes for gnome shell on Mesa 8.0.3</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=65525">Bug 65525</a> - [llvmpipe] lp_scene.h:210:lp_scene_alloc: Assertion `size &lt;= (64 * 1024)' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=66346">Bug 66346</a> - shader_query.cpp:49: error: invalid conversion from 'void*' to 'GLuint'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=73512">Bug 73512</a> - [clover] mesa.icd. should contain full path</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=73528">Bug 73528</a> - Deferred lighting in Second Life causes system hiccups and screen flickering</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=74329">Bug 74329</a> - Please expose OES_texture_float and OES_texture_half_float on the ES3 context</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=80500">Bug 80500</a> - Flickering shadows in unreleased title trace</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82186">Bug 82186</a> - [r600g] BARTS GPU lockup with minecraft shaders</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84225">Bug 84225</a> - Allow constant-index-expression sampler array indexing with GLSL-ES &lt; 300</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84677">Bug 84677</a> - Triangle disappears with glPolygonMode GL_LINE</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=85252">Bug 85252</a> - Segfault in compiler while processing ternary operator with void arguments</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89131">Bug 89131</a> - [Bisected] Graphical corruption in Weston,  shows old framebuffer pieces</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90000">Bug 90000</a> - [i965 Bisected NIR] Piglit/gglean_fragprog1-z-write_test fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90073">Bug 90073</a> - Leaks in xcb_dri3_open_reply_fds() and get_render_node_from_id_path_tag</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90249">Bug 90249</a> - Fails to build egl_dri2 on osx</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90310">Bug 90310</a> - Fails to build gallium_dri.so at linking stage with clang because of multiple redefinitions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90347">Bug 90347</a> - [NVE0+] Failure to insert texbar under some circumstances (causing bad colors in Terasology)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90466">Bug 90466</a> - arm: linker error ndefined reference to `nir_metadata_preserve'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90520">Bug 90520</a> - Register spilling clobbers registers used elsewhere in the shader</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90537">Bug 90537</a> - radeonsi bo/va conflict on RADEON_GEM_VA (rscreen-&gt;ws-&gt;buffer_from_handle returns NULL)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90547">Bug 90547</a> - [BDW/BSW/SKL Bisected]Piglit/glean&#64;vertprog1-rsq_test_2_(reciprocal_square_root_of_negative_value) fais</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90580">Bug 90580</a> - [HSW bisected] integer multiplication bug</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90600">Bug 90600</a> - IOError: [Errno 2] No such file or directory: 'gl_API.xml'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90621">Bug 90621</a> - Mesa fail to build from git</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90629">Bug 90629</a> - [i965] SIMD16 dual_source_blend assertion `src[i].file != GRF || src[i].width == dst.width' failed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90691">Bug 90691</a> - [BSW]Piglit/spec/nv_conditional_render/dlist fails intermittently</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90728">Bug 90728</a> - dvd playback with vlc and vdpau causes segmentation fault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90734">Bug 90734</a> - glBufferSubData is corrupting data when buffer is &gt; 32k</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90748">Bug 90748</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.depth.rg_half_float_oes fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90749">Bug 90749</a> - [BDW Bisected]dEQP-GLES3.functional.rasterization.fbo.rbo_multisample_max.primitives.lines_wide fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90751">Bug 90751</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.stencil.stencil_index8 fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90797">Bug 90797</a> - [ALL bisected] Mesa change cause performance case manhattan fail.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90817">Bug 90817</a> - swrast fails to load with certain remote X servers</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90830">Bug 90830</a> - [bsw bisected regression] GPU hang for spec.arb_gpu_shader5.execution.sampler_array_indexing.vs-nonzero-base</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90839">Bug 90839</a> - [10.5.5/10.6 regression, bisected] PBO glDrawPixels no longer using blit fastpath</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90873">Bug 90873</a> - Kernel hang, TearFree On, Mate desktop environment</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90887">Bug 90887</a> - PhiMovesPass in register allocator broken</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90895">Bug 90895</a> - [IVB/HSW/BDW/BSW Bisected] GLB2.7 Egypt, GfxBench3.0 T-Rex &amp; ALU and many SynMark cases performance reduced by 10-23%</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90902">Bug 90902</a> - [bsw][regression] dEQP: &quot;Found invalid pixel values&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90903">Bug 90903</a> - egl_dri2.c:dri2_load fails to load libglapi on osx</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90904">Bug 90904</a> - OSX: EXC_BAD_ACCESS when using translate_sse + gallium + softpipe/llvmpipe</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90905">Bug 90905</a> - mesa: Finish subdir-objects transition</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90925">Bug 90925</a> - &quot;high fidelity&quot;: Segfault in _mesa_program_resource_find_name</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91022">Bug 91022</a> - [g45 g965 bisected] assertions generated from textureGrad cube samplers fix</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91047">Bug 91047</a> - [SNB Bisected] Messed up Fog in Super Smash Bros. Melee in Dolphin</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91056">Bug 91056</a> - The Bard's Tale (2005, native)  has rendering issues</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91077">Bug 91077</a> - dri2_glx.c:1186: undefined reference to `loader_open_device'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91099">Bug 91099</a> - [llvmpipe] piglit glsl-max-varyings &gt;max_varying_components regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91101">Bug 91101</a> - [softpipe] piglit glsl-1.50&#64;execution&#64;geometry&#64;max-input-components regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91117">Bug 91117</a> - Nimbus (running in wine) has rendering issues, objects are semi-transparent</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91124">Bug 91124</a> - Civilization V (in Wine) has rendering issues: text missing, menu bar corrupted</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91173">Bug 91173</a> - Oddworld: Stranger's Wrath HD: disfigured models in wrong colors</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91193">Bug 91193</a> - [290x] Dota2 reborn ingame rendering breaks with git-af4b9c7</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91222">Bug 91222</a> - lp_test_format regression on CentOS 7</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91226">Bug 91226</a> - Crash in glLinkProgram (NEW)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91231">Bug 91231</a> - [NV92] Psychonauts (native) segfaults on start when DRI3 enabled</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91254">Bug 91254</a> - (regresion) video using VA-API on Intel slow and freeze system with mesa 10.6 or 10.6.1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91290">Bug 91290</a> - SIGSEGV glcpp/glcpp-parse.y:1077</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91292">Bug 91292</a> - [BDW+] glVertexAttribDivisor not working in combination with glPolygonMode</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91337">Bug 91337</a> - OSMesaGetProcAdress(&quot;OSMesaPixelStore&quot;) returns nil</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91418">Bug 91418</a> - Visual Studio 2015 vsnprintf build error</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91425">Bug 91425</a> - [regression, bisected] Piglit spec/ext_packed_float/ getteximage-invalid-format-for-packed-type fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91441">Bug 91441</a> - make check DispatchSanity_test.GL30 regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91444">Bug 91444</a> - regression bisected radeonsi: don't change pipe_resource in resource_copy_region</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91461">Bug 91461</a> - gl_TessLevel* writes have no effect for all but the last TCS invocation</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91513">Bug 91513</a> - [IVB/HSW/BDW/SKL Bisected] Lightsmark performance reduced by 7%-10%</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91526">Bug 91526</a> - World of Warcraft (on Wine) has UI corruption with nouveau</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91544">Bug 91544</a> - [i965, regression, bisected] regression of several tests in 93977d3a151675946c03e</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91551">Bug 91551</a> - DXTn compressed normal maps produce severe artifacts on all NV5x and NVDx chipsets</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91570">Bug 91570</a> - Upgrading mesa to 10.6 causes segfault in OpenGL applications with GeForce4 MX 440 / AGP 8X</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91591">Bug 91591</a> - rounding.h:102:2: error: #error &quot;Unsupported or undefined LONG_BIT&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91610">Bug 91610</a> - [BSW] GPU hang for spec.shaders.point-vertex-id gl_instanceid divisor</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91673">Bug 91673</a> - Segfault when calling glTexSubImage2D on storage texture to bound FBO</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91726">Bug 91726</a> - R600 asserts in tgsi_cmp/make_src_for_op3</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91847">Bug 91847</a> - glGenerateTextureMipmap not working (no errors) unless glActiveTexture(GL_TEXTURE1) is called before</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91857">Bug 91857</a> - Mesa 10.6.3 linker is slow</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91881">Bug 91881</a> - regression: GPU lockups since mesa-11.0.0_rc1 on RV620 (r600) driver</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91890">Bug 91890</a> - [nve7] witcher2: blurry image &amp; DATA_ERRORs (class 0xa097 mthd 0x2380/0x238c)</li>
+
+</ul>
+
  
  <h2>Changes</h2>
  
-TBD.
+<li>Removed the EGL loader from the Linux SCons build.</li>
  
  </div>
  </body>
diff --git a/docs/relnotes/11.0.1.html b/docs/relnotes/11.0.1.html

new file mode 100644 (file)

index 0000000..9051e7c
--- /dev/null
+++ b/docs/relnotes/11.0.1.html
@@ -0,0 +1,134 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.0.1 Release Notes / September 26, 2015</h1>
+
+<p>
+Mesa 11.0.1 is a bug fix release which fixes bugs found since the 11.0.0 release.
+</p>
+<p>
+Mesa 11.0.1 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+6dab262877e12c0546a0e2970c6835a0f217e6d4026ccecb3cd5dd733d1ce867  mesa-11.0.1.tar.gz
+43d0dfcd1f1e36f07f8228cd76d90175d3fc74c1ed25d7071794a100a98ef2a6  mesa-11.0.1.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38109">Bug 38109</a> - i915 driver crashes if too few vertices are submitted (Mesa 7.10.2)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91114">Bug 91114</a> - ES3-CTS.gtf.GL3Tests.shadow.shadow_execution_vert fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91716">Bug 91716</a> - [bisected] piglit.shaders.glsl-vs-int-attrib regresses on 32 bit BYT, HSW, IVB, SNB</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91719">Bug 91719</a> - [SNB,HSW,BYT] dEQP regressions associated with using NIR for vertex shaders</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92009">Bug 92009</a> - ES3-CTS.gtf.GL3Tests.packed_pixels.packed_pixels fails</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Antia Puentes (2):</p>
+<ul>
+  <li>i965/vec4: Fix saturation errors when coalescing registers</li>
+  <li>i965/vec4_nir: Load constants as integers</li>
+</ul>
+
+<p>Anuj Phogat (1):</p>
+<ul>
+  <li>meta: Abort meta pbo path if TexSubImage need signed unsigned conversion</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 11.0.0</li>
+  <li>Update version to 11.0.1</li>
+</ul>
+
+<p>Iago Toral Quiroga (1):</p>
+<ul>
+  <li>mesa: Fix GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE for default framebuffer.</li>
+</ul>
+
+<p>Ian Romanick (5):</p>
+<ul>
+  <li>t_dd_dmatmp: Make "count" actually be the count</li>
+  <li>t_dd_dmatmp: Clean up improper code formatting from previous patch</li>
+  <li>t_dd_dmatmp: Use '&amp; 3' instead of '% 4' everywhere</li>
+  <li>t_dd_dmatmp: Pull out common 'count -= count &amp; 3' code</li>
+  <li>t_dd_dmatmp: Use addition instead of subtraction in loop bounds</li>
+</ul>
+
+<p>Ilia Mirkin (6):</p>
+<ul>
+  <li>st/mesa: avoid integer overflows with buffers &gt;= 512MB</li>
+  <li>nv50, nvc0: fix max texture buffer size to 128M elements</li>
+  <li>freedreno/a3xx: fix blending of L8 format</li>
+  <li>nv50,nvc0: detect underlying resource changes and update tic</li>
+  <li>nv50,nvc0: flush texture cache in presence of coherent bufs</li>
+  <li>radeonsi: load fmask ptr relative to the resources array</li>
+</ul>
+
+<p>Jason Ekstrand (2):</p>
+<ul>
+  <li>nir: Fix a bunch of ralloc parenting errors</li>
+  <li>i965/vec4: Don't reswizzle hardware registers</li>
+</ul>
+
+<p>Jeremy Huddleston (1):</p>
+<ul>
+  <li>configure.ac: Add support to enable read-only text segment on x86.</li>
+</ul>
+
+<p>Ray Strode (1):</p>
+<ul>
+  <li>gbm: convert gbm bo format to fourcc format on dma-buf import</li>
+</ul>
+
+<p>Tapani Pälli (2):</p>
+<ul>
+  <li>mesa: fix errors when reading depth with glReadPixels</li>
+  <li>i965: fix textureGrad for cubemaps</li>
+</ul>
+
+<p>Ulrich Weigand (1):</p>
+<ul>
+  <li>mesa: Fix texture compression on big-endian systems</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff --git a/docs/relnotes/11.0.2.html b/docs/relnotes/11.0.2.html

new file mode 100644 (file)

index 0000000..651e5e7
--- /dev/null
+++ b/docs/relnotes/11.0.2.html
@@ -0,0 +1,85 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.0.2 Release Notes / September 28, 2015</h1>
+
+<p>
+Mesa 11.0.2 is a bug fix release which fixes bugs found since the 11.0.1 release.
+</p>
+<p>
+Mesa 11.0.2 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+45170773500d6ae2f9eb93fc85efee69f7c97084411ada4eddf92f78bca56d20  mesa-11.0.2.tar.gz
+fce11fb27eb87adf1e620a76455d635c6136dfa49ae58c53b34ef8d0c7b7eae4  mesa-11.0.2.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91582">Bug 91582</a> - [bisected] Regression in DEQP gles2.functional.negative_api.texture.texsubimage2d_neg_offset</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91970">Bug 91970</a> - [BSW regression] dEQP-GLES3.functional.shaders.precision.int.highp_mul_vertex</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92095">Bug 92095</a> - [Regression, bisected] arb_shader_atomic_counters.compiler.builtins.frag</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Eduardo Lima Mitev (3):</p>
+<ul>
+  <li>mesa: Fix order of format+type and internal format checks for glTexImageXD ops</li>
+  <li>mesa: Move _mesa_base_tex_format() from teximage to glformats files</li>
+  <li>mesa: Use the effective internal format instead for validation</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 11.0.1</li>
+  <li>Update version to 11.0.2</li>
+</ul>
+
+<p>Kristian Høgsberg Kristensen (1):</p>
+<ul>
+  <li>i965: Respect stride and subreg_offset for ATTR registers</li>
+</ul>
+
+<p>Matt Turner (1):</p>
+<ul>
+  <li>glsl: Expose gl_MaxTess{Control,Evaluation}AtomicCounters.</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html

index 7f80206..c755c98 100644 (file)
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -44,7 +44,12 @@ Note: some of the new features are only available with certain drivers.
  </p>
  
  <ul>
-TBD.
+<li>GL_ARB_blend_func_extended on freedreno (a3xx)</li>
+<li>GL_ARB_shader_storage_buffer_object on i965</li>
+<li>GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600, radeonsi</li>
+<li>GL_ARB_texture_barrier / GL_NV_texture_barrier on i965</li>
+<li>GL_ARB_texture_query_lod on softpipe</li>
+<li>GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips</li>
  </ul>
  
  <h2>Bug fixes</h2>
diff --git a/docs/shading.html b/docs/shading.html

index 77a0ee4..e9fe3dd 100644 (file)
--- a/docs/shading.html
+++ b/docs/shading.html
@@ -63,6 +63,20 @@ execution.  These are generally used for debugging.
  Example:  export MESA_GLSL=dump,nopt
  </p>
  
+<p>
+Shaders can be dumped and replaced on runtime for debugging purposes. Mesa 
+needs to be configured with '--with-sha1' to enable this functionality. This 
+feature is not currently supported by SCons build.
+
+This is controlled via following environment variables:
+<ul>
+<li><b>MESA_SHADER_DUMP_PATH</b> - path where shader sources are dumped
+<li><b>MESA_SHADER_READ_PATH</b> - path where replacement shaders are read
+</ul>
+Note, path set must exist before running for dumping or replacing to work. 
+When both are set, these paths should be different so the dumped shaders do 
+not clobber the replacement shaders.
+</p>
  
  <h2 id="support">GLSL Version</h2>
  
diff --git a/docs/vmware-guest.html b/docs/vmware-guest.html

index b5ea4e0..284c6c2 100644 (file)
--- a/docs/vmware-guest.html
+++ b/docs/vmware-guest.html
@@ -27,6 +27,31 @@ MacOS are all supported.
  </p>
  
  <p>
+With the August 2015 Workstation 12 / Fusion 8 releases, OpenGL 3.3
+is supported in the guest.
+This requires:
+<ul>
+<li>The VM is configured for virtual hardware version 12.
+<li>The host OS, GPU and graphics driver supports DX11 (Windows) or
+    OpenGL 4.0 (Linux, Mac)
+<li>On Linux, the vmwgfx kernel module must be version 2.9.0 or later.
+<li>A recent version of Mesa with the updated svga gallium driver.
+</ul>
+</p>
+
+<p>
+Otherwise, OpenGL 2.1 is supported.
+</p>
+
+<p>
+OpenGL 3.3 support can be disabled by setting the environment variable
+SVGA_VGPU10=0.
+You will then have OpenGL 2.1 support.
+This may be useful to work around application bugs (such as incorrect use
+of the OpenGL 3.x core profile).
+</p>
+
+<p>
  Most modern Linux distros include the SVGA3D driver so end users shouldn't
  be concerned with this information.
  But if your distro lacks the driver or you want to update to the latest code
@@ -227,6 +252,16 @@ If you don't see this, try setting this environment variable:
  then rerun glxinfo and examine the output for error messages.
  </p>
  
+<p>
+If OpenGL 3.3 is not working (you only get OpenGL 2.1):
+</p>
+<ul>
+<li>Make sure the VM uses hardware version 12.
+<li>Make sure the vmwgfx kernel module is version 2.9.0 or later.
+<li>Check the vmware.log file for errors.
+<li>Run 'dmesg | grep vmwgfx' and look for "DX: yes".
+
+
  </div>
  </body>
  </html>
diff --git a/include/c11/threads_posix.h b/include/c11/threads_posix.h

index 2182c28..3def6c4 100644 (file)
--- a/include/c11/threads_posix.h
+++ b/include/c11/threads_posix.h
@@ -102,9 +102,8 @@ call_once(once_flag *flag, void (*func)(void))
  static inline int
  cnd_broadcast(cnd_t *cond)
  {
-    if (!cond) return thrd_error;
-    pthread_cond_broadcast(cond);
-    return thrd_success;
+    assert(cond != NULL);
+    return (pthread_cond_broadcast(cond) == 0) ? thrd_success : thrd_error;
  }
  
  // 7.25.3.2
@@ -119,18 +118,16 @@ cnd_destroy(cnd_t *cond)
  static inline int
  cnd_init(cnd_t *cond)
  {
-    if (!cond) return thrd_error;
-    pthread_cond_init(cond, NULL);
-    return thrd_success;
+    assert(cond != NULL);
+    return (pthread_cond_init(cond, NULL) == 0) ? thrd_success : thrd_error;
  }
  
  // 7.25.3.4
  static inline int
  cnd_signal(cnd_t *cond)
  {
-    if (!cond) return thrd_error;
-    pthread_cond_signal(cond);
-    return thrd_success;
+    assert(cond != NULL);
+    return (pthread_cond_signal(cond) == 0) ? thrd_success : thrd_error;
  }
  
  // 7.25.3.5
@@ -139,7 +136,8 @@ cnd_timedwait(cnd_t *cond, mtx_t *mtx, const xtime *xt)
  {
      struct timespec abs_time;
      int rt;
-    if (!cond || !mtx || !xt) return thrd_error;
+    assert(mtx != NULL);
+    assert(cond != NULL);
      rt = pthread_cond_timedwait(cond, mtx, &abs_time);
      if (rt == ETIMEDOUT)
          return thrd_busy;
@@ -150,9 +148,9 @@ cnd_timedwait(cnd_t *cond, mtx_t *mtx, const xtime *xt)
  static inline int
  cnd_wait(cnd_t *cond, mtx_t *mtx)
  {
-    if (!cond || !mtx) return thrd_error;
-    pthread_cond_wait(cond, mtx);
-    return thrd_success;
+    assert(mtx != NULL);
+    assert(cond != NULL);
+    return (pthread_cond_wait(cond, mtx) == 0) ? thrd_success : thrd_error;
  }
  
  
@@ -161,7 +159,7 @@ cnd_wait(cnd_t *cond, mtx_t *mtx)
  static inline void
  mtx_destroy(mtx_t *mtx)
  {
-    assert(mtx);
+    assert(mtx != NULL);
      pthread_mutex_destroy(mtx);
  }
  
@@ -170,7 +168,7 @@ static inline int
  mtx_init(mtx_t *mtx, int type)
  {
      pthread_mutexattr_t attr;
-    if (!mtx) return thrd_error;
+    assert(mtx != NULL);
      if (type != mtx_plain && type != mtx_timed && type != mtx_try
        && type != (mtx_plain|mtx_recursive)
        && type != (mtx_timed|mtx_recursive)
@@ -188,9 +186,8 @@ mtx_init(mtx_t *mtx, int type)
  static inline int
  mtx_lock(mtx_t *mtx)
  {
-    if (!mtx) return thrd_error;
-    pthread_mutex_lock(mtx);
-    return thrd_success;
+    assert(mtx != NULL);
+    return (pthread_mutex_lock(mtx) == 0) ? thrd_success : thrd_error;
  }
  
  static inline int
@@ -203,7 +200,9 @@ thrd_yield(void);
  static inline int
  mtx_timedlock(mtx_t *mtx, const xtime *xt)
  {
-    if (!mtx || !xt) return thrd_error;
+    assert(mtx != NULL);
+    assert(xt != NULL);
+
      {
  #ifdef EMULATED_THREADS_USE_NATIVE_TIMEDLOCK
      struct timespec ts;
@@ -233,7 +232,7 @@ mtx_timedlock(mtx_t *mtx, const xtime *xt)
  static inline int
  mtx_trylock(mtx_t *mtx)
  {
-    if (!mtx) return thrd_error;
+    assert(mtx != NULL);
      return (pthread_mutex_trylock(mtx) == 0) ? thrd_success : thrd_busy;
  }
  
@@ -241,9 +240,8 @@ mtx_trylock(mtx_t *mtx)
  static inline int
  mtx_unlock(mtx_t *mtx)
  {
-    if (!mtx) return thrd_error;
-    pthread_mutex_unlock(mtx);
-    return thrd_success;
+    assert(mtx != NULL);
+    return (pthread_mutex_unlock(mtx) == 0) ? thrd_success : thrd_error;
  }
  
  
@@ -253,7 +251,7 @@ static inline int
  thrd_create(thrd_t *thr, thrd_start_t func, void *arg)
  {
      struct impl_thrd_param *pack;
-    if (!thr) return thrd_error;
+    assert(thr != NULL);
      pack = (struct impl_thrd_param *)malloc(sizeof(struct impl_thrd_param));
      if (!pack) return thrd_nomem;
      pack->func = func;
@@ -329,7 +327,7 @@ thrd_yield(void)
  static inline int
  tss_create(tss_t *key, tss_dtor_t dtor)
  {
-    if (!key) return thrd_error;
+    assert(key != NULL);
      return (pthread_key_create(key, dtor) == 0) ? thrd_success : thrd_error;
  }
  
diff --git a/src/Makefile.am b/src/Makefile.am

index da638a8..13cfaa5 100644 (file)
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -66,6 +66,7 @@ AM_CPPFLAGS = \
  noinst_LTLIBRARIES = libglsl_util.la
  
  libglsl_util_la_SOURCES = \
+       glsl/shader_enums.c \
         mesa/main/imports.c \
         mesa/program/prog_hash_table.c \
         mesa/program/symbol_table.c \
diff --git a/src/egl/SConscript b/src/egl/SConscript

index f8102db..8f8b11a 100644 (file)
--- a/src/egl/SConscript
+++ b/src/egl/SConscript
@@ -8,6 +8,7 @@ env = env.Clone()
  
  env.Append(CPPPATH = [
      '#/include',
+    '#/include/HaikuGL',
      '#/src/egl/main',
      '#/src',
  ])
diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c

index 461735f..1740ee3 100644 (file)
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -27,6 +27,7 @@
  
  #define WL_HIDE_DEPRECATED
  
+#include <stdbool.h>
  #include <stdint.h>
  #include <stdbool.h>
  #include <stdlib.h>
@@ -588,7 +589,8 @@ dri2_setup_screen(_EGLDisplay *disp)
                                     __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB))
        disp->Extensions.KHR_gl_colorspace = EGL_TRUE;
  
-   if (dri2_dpy->dri2 && dri2_dpy->dri2->base.version >= 3) {
+   if ((dri2_dpy->dri2 && dri2_dpy->dri2->base.version >= 3) ||
+       (dri2_dpy->swrast && dri2_dpy->swrast->base.version >= 3)) {
        disp->Extensions.KHR_create_context = EGL_TRUE;
  
        if (dri2_dpy->robustness)
@@ -784,7 +786,7 @@ dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp)
  
     if (dri2_dpy->own_dri_screen)
        dri2_dpy->core->destroyScreen(dri2_dpy->dri_screen);
-   if (dri2_dpy->fd)
+   if (dri2_dpy->fd >= 0)
        close(dri2_dpy->fd);
     if (dri2_dpy->driver)
        dlclose(dri2_dpy->driver);
@@ -902,6 +904,55 @@ dri2_create_context_attribs_error(int dri_error)
     _eglError(egl_error, "dri2_create_context");
  }
  
+static bool
+dri2_fill_context_attribs(struct dri2_egl_context *dri2_ctx,
+                          struct dri2_egl_display *dri2_dpy,
+                          uint32_t *ctx_attribs,
+                          unsigned *num_attribs)
+{
+   int pos = 0;
+
+   assert(*num_attribs >= 8);
+
+   ctx_attribs[pos++] = __DRI_CTX_ATTRIB_MAJOR_VERSION;
+   ctx_attribs[pos++] = dri2_ctx->base.ClientMajorVersion;
+   ctx_attribs[pos++] = __DRI_CTX_ATTRIB_MINOR_VERSION;
+   ctx_attribs[pos++] = dri2_ctx->base.ClientMinorVersion;
+
+   if (dri2_ctx->base.Flags != 0) {
+      /* If the implementation doesn't support the __DRI2_ROBUSTNESS
+       * extension, don't even try to send it the robust-access flag.
+       * It may explode.  Instead, generate the required EGL error here.
+       */
+      if ((dri2_ctx->base.Flags & EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR) != 0
+            && !dri2_dpy->robustness) {
+         _eglError(EGL_BAD_MATCH, "eglCreateContext");
+         return false;
+      }
+
+      ctx_attribs[pos++] = __DRI_CTX_ATTRIB_FLAGS;
+      ctx_attribs[pos++] = dri2_ctx->base.Flags;
+   }
+
+   if (dri2_ctx->base.ResetNotificationStrategy != EGL_NO_RESET_NOTIFICATION_KHR) {
+      /* If the implementation doesn't support the __DRI2_ROBUSTNESS
+       * extension, don't even try to send it a reset strategy.  It may
+       * explode.  Instead, generate the required EGL error here.
+       */
+      if (!dri2_dpy->robustness) {
+         _eglError(EGL_BAD_CONFIG, "eglCreateContext");
+         return false;
+      }
+
+      ctx_attribs[pos++] = __DRI_CTX_ATTRIB_RESET_STRATEGY;
+      ctx_attribs[pos++] = __DRI_CTX_RESET_LOSE_CONTEXT;
+   }
+
+   *num_attribs = pos;
+
+   return true;
+}
+
  /**
   * Called via eglCreateContext(), drv->API.CreateContext().
   */
@@ -987,44 +1038,12 @@ dri2_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf,
     if (dri2_dpy->dri2) {
        if (dri2_dpy->dri2->base.version >= 3) {
           unsigned error;
-         unsigned num_attribs = 0;
+         unsigned num_attribs = 8;
           uint32_t ctx_attribs[8];
  
-         ctx_attribs[num_attribs++] = __DRI_CTX_ATTRIB_MAJOR_VERSION;
-         ctx_attribs[num_attribs++] = dri2_ctx->base.ClientMajorVersion;
-         ctx_attribs[num_attribs++] = __DRI_CTX_ATTRIB_MINOR_VERSION;
-         ctx_attribs[num_attribs++] = dri2_ctx->base.ClientMinorVersion;
-
-         if (dri2_ctx->base.Flags != 0) {
-            /* If the implementation doesn't support the __DRI2_ROBUSTNESS
-             * extension, don't even try to send it the robust-access flag.
-             * It may explode.  Instead, generate the required EGL error here.
-             */
-            if ((dri2_ctx->base.Flags & EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR) != 0
-                && !dri2_dpy->robustness) {
-               _eglError(EGL_BAD_MATCH, "eglCreateContext");
-               goto cleanup;
-            }
-
-            ctx_attribs[num_attribs++] = __DRI_CTX_ATTRIB_FLAGS;
-            ctx_attribs[num_attribs++] = dri2_ctx->base.Flags;
-         }
-
-         if (dri2_ctx->base.ResetNotificationStrategy != EGL_NO_RESET_NOTIFICATION_KHR) {
-            /* If the implementation doesn't support the __DRI2_ROBUSTNESS
-             * extension, don't even try to send it a reset strategy.  It may
-             * explode.  Instead, generate the required EGL error here.
-             */
-            if (!dri2_dpy->robustness) {
-               _eglError(EGL_BAD_CONFIG, "eglCreateContext");
-               goto cleanup;
-            }
-
-            ctx_attribs[num_attribs++] = __DRI_CTX_ATTRIB_RESET_STRATEGY;
-            ctx_attribs[num_attribs++] = __DRI_CTX_RESET_LOSE_CONTEXT;
-         }
-
-         assert(num_attribs <= ARRAY_SIZE(ctx_attribs));
+         if (!dri2_fill_context_attribs(dri2_ctx, dri2_dpy, ctx_attribs,
+                                        &num_attribs))
+            goto cleanup;
  
          dri2_ctx->dri_context =
             dri2_dpy->dri2->createContextAttribs(dri2_dpy->dri_screen,
@@ -1046,12 +1065,33 @@ dri2_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf,
        }
     } else {
        assert(dri2_dpy->swrast);
-      dri2_ctx->dri_context =
-         dri2_dpy->swrast->createNewContextForAPI(dri2_dpy->dri_screen,
-                                                  api,
-                                                  dri_config,
-                                                  shared,
-                                                  dri2_ctx);
+      if (dri2_dpy->swrast->base.version >= 3) {
+         unsigned error;
+         unsigned num_attribs = 8;
+         uint32_t ctx_attribs[8];
+
+         if (!dri2_fill_context_attribs(dri2_ctx, dri2_dpy, ctx_attribs,
+                                        &num_attribs))
+            goto cleanup;
+
+         dri2_ctx->dri_context =
+            dri2_dpy->swrast->createContextAttribs(dri2_dpy->dri_screen,
+                                                   api,
+                                                   dri_config,
+                                                   shared,
+                                                   num_attribs / 2,
+                                                   ctx_attribs,
+                                                   & error,
+                                                   dri2_ctx);
+         dri2_create_context_attribs_error(error);
+      } else {
+         dri2_ctx->dri_context =
+            dri2_dpy->swrast->createNewContextForAPI(dri2_dpy->dri_screen,
+                                                     api,
+                                                     dri_config,
+                                                     shared,
+                                                     dri2_ctx);
+      }
     }
  
     if (!dri2_ctx->dri_context)
diff --git a/src/egl/drivers/dri2/platform_drm.c b/src/egl/drivers/dri2/platform_drm.c

index eda5087..050c309 100644 (file)
--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@@ -623,27 +623,19 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)
        dri2_dpy->own_device = 1;
        gbm = gbm_create_device(fd);
        if (gbm == NULL)
-         return EGL_FALSE;
+         goto cleanup;
+   } else {
+      fd = fcntl(gbm_device_get_fd(gbm), F_DUPFD_CLOEXEC, 3);
+      if (fd < 0)
+         goto cleanup;
     }
  
-   if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0) {
-      free(dri2_dpy);
-      return EGL_FALSE;
-   }
+   if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0)
+      goto cleanup;
  
     dri2_dpy->gbm_dri = gbm_dri_device(gbm);
-   if (dri2_dpy->gbm_dri->base.type != GBM_DRM_DRIVER_TYPE_DRI) {
-      free(dri2_dpy);
-      return EGL_FALSE;
-   }
-
-   if (fd < 0) {
-      fd = fcntl(gbm_device_get_fd(gbm), F_DUPFD_CLOEXEC, 3);
-      if (fd < 0) {
-         free(dri2_dpy);
-         return EGL_FALSE;
-      }
-   }
+   if (dri2_dpy->gbm_dri->base.type != GBM_DRM_DRIVER_TYPE_DRI)
+      goto cleanup;
  
     dri2_dpy->fd = fd;
     dri2_dpy->device_name = loader_get_device_name_for_fd(dri2_dpy->fd);
@@ -727,4 +719,11 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)
     dri2_dpy->vtbl = &dri2_drm_display_vtbl;
  
     return EGL_TRUE;
+
+cleanup:
+   if (fd >= 0)
+      close(fd);
+
+   free(dri2_dpy);
+   return EGL_FALSE;
  }
diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c

index dbc64ba..6cf5461 100644 (file)
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -1804,6 +1804,7 @@ dri2_initialize_wayland_swrast(_EGLDriver *drv, _EGLDisplay *disp)
     if (roundtrip(dri2_dpy) < 0 || dri2_dpy->formats == 0)
        goto cleanup_shm;
  
+   dri2_dpy->fd = -1;
     dri2_dpy->driver_name = strdup("swrast");
     if (!dri2_load_driver_swrast(disp))
        goto cleanup_shm;
diff --git a/src/egl/drivers/dri2/platform_x11.c b/src/egl/drivers/dri2/platform_x11.c

index bf7d2be..7991fc2 100644 (file)
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -1161,6 +1161,7 @@ dri2_initialize_x11_swrast(_EGLDriver *drv, _EGLDisplay *disp)
      * Every hardware driver_name is set using strdup. Doing the same in
      * here will allow is to simply free the memory at dri2_terminate().
      */
+   dri2_dpy->fd = -1;
     dri2_dpy->driver_name = strdup("swrast");
     if (!dri2_load_driver_swrast(disp))
        goto cleanup_conn;
diff --git a/src/egl/wayland/wayland-drm/wayland-drm.c b/src/egl/wayland/wayland-drm/wayland-drm.c

index e9c6e0a..e00d9be 100644 (file)
--- a/src/egl/wayland/wayland-drm/wayland-drm.c
+++ b/src/egl/wayland/wayland-drm/wayland-drm.c
@@ -197,7 +197,7 @@ drm_authenticate(struct wl_client *client,
                 wl_resource_post_event(resource, WL_DRM_AUTHENTICATED);
  }
  
-const static struct wl_drm_interface drm_interface = {
+static const struct wl_drm_interface drm_interface = {
         drm_authenticate,
         drm_create_buffer,
          drm_create_planar_buffer,
diff --git a/src/egl/wayland/wayland-egl/wayland-egl.c b/src/egl/wayland/wayland-egl/wayland-egl.c

index ae78595..80a5be5 100644 (file)
--- a/src/egl/wayland/wayland-egl/wayland-egl.c
+++ b/src/egl/wayland/wayland-egl/wayland-egl.c
@@ -1,3 +1,32 @@
+/*
+ * Copyright © 2011 Kristian Høgsberg
+ * Copyright © 2011 Benjamin Franzke
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Kristian Høgsberg <krh@bitplanet.net>
+ *    Benjamin Franzke <benjaminfranzke@googlemail.com>
+ */
+
  #include <stdlib.h>
  
  #include <wayland-client.h>
diff --git a/src/gallium/auxiliary/Makefile.am b/src/gallium/auxiliary/Makefile.am

index 04f77d0..a728162 100644 (file)
--- a/src/gallium/auxiliary/Makefile.am
+++ b/src/gallium/auxiliary/Makefile.am
@@ -38,18 +38,23 @@ libgallium_la_SOURCES += \
  
  endif
  
-indices/u_indices_gen.c: $(srcdir)/indices/u_indices_gen.py
-       $(AM_V_at)$(MKDIR_P) indices
-       $(AM_V_GEN) $(PYTHON2) $< > $@
-
-indices/u_unfilled_gen.c: $(srcdir)/indices/u_unfilled_gen.py
-       $(AM_V_at)$(MKDIR_P) indices
-       $(AM_V_GEN) $(PYTHON2) $< > $@
-
-util/u_format_table.c: $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format_pack.py $(srcdir)/util/u_format_parse.py $(srcdir)/util/u_format.csv
-       $(AM_V_at)$(MKDIR_P) util
-       $(AM_V_GEN) $(PYTHON2) $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format.csv > $@
-
+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+PYTHON_GEN =  $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
+
+indices/u_indices_gen.c: indices/u_indices_gen.py
+       $(MKDIR_GEN)
+       $(PYTHON_GEN) $(srcdir)/indices/u_indices_gen.py > $@
+
+indices/u_unfilled_gen.c: indices/u_unfilled_gen.py
+       $(MKDIR_GEN)
+       $(PYTHON_GEN) $(srcdir)/indices/u_unfilled_gen.py > $@
+
+util/u_format_table.c: util/u_format_table.py \
+                       util/u_format_pack.py \
+                       util/u_format_parse.py \
+                       util/u_format.csv
+       $(MKDIR_GEN)
+       $(PYTHON_GEN) $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format.csv > $@
  
  noinst_LTLIBRARIES += libgalliumvl_stub.la
  libgalliumvl_stub_la_SOURCES = \
diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources

index 3616d88..1fa3641 100644 (file)
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -129,6 +129,8 @@ C_SOURCES := \
         rtasm/rtasm_execmem.h \
         rtasm/rtasm_x86sse.c \
         rtasm/rtasm_x86sse.h \
+       tgsi/tgsi_aa_point.c \
+       tgsi/tgsi_aa_point.h \
         tgsi/tgsi_build.c \
         tgsi/tgsi_build.h \
         tgsi/tgsi_dump.c \
@@ -144,6 +146,8 @@ C_SOURCES := \
         tgsi/tgsi_opcode_tmp.h \
         tgsi/tgsi_parse.c \
         tgsi/tgsi_parse.h \
+       tgsi/tgsi_point_sprite.c \
+       tgsi/tgsi_point_sprite.h \
         tgsi/tgsi_sanity.c \
         tgsi/tgsi_sanity.h \
         tgsi/tgsi_scan.c \
@@ -154,6 +158,8 @@ C_SOURCES := \
         tgsi/tgsi_text.h \
         tgsi/tgsi_transform.c \
         tgsi/tgsi_transform.h \
+       tgsi/tgsi_two_side.c \
+       tgsi/tgsi_two_side.h \
         tgsi/tgsi_ureg.c \
         tgsi/tgsi_ureg.h \
         tgsi/tgsi_util.c \
@@ -260,6 +266,8 @@ C_SOURCES := \
         util/u_pack_color.h \
         util/u_pointer.h \
         util/u_prim.h \
+       util/u_prim_restart.c \
+       util/u_prim_restart.h \
         util/u_pstipple.c \
         util/u_pstipple.h \
         util/u_range.h \
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c

index 3918923..063e368 100644 (file)
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -240,7 +240,8 @@ aa_transform_prolog(struct tgsi_transform_context *ctx)
                                 TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W);
  
     /* KILL_IF -tmp0.yyyy;   # if -tmp0.y < 0, KILL */
-   tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y);
+   tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, tmp0,
+                            TGSI_SWIZZLE_Y, TRUE);
  
     /* compute coverage factor = (1-d)/(1-k) */
  
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c

index 186b4cb..a51e91f 100644 (file)
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -280,7 +280,8 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx)
  
     /* KILL_IF -texTemp.wwww;   # if -texTemp < 0, KILL fragment */
     tgsi_transform_kill_inst(ctx,
-                            TGSI_FILE_TEMPORARY, pctx->texTemp, TGSI_SWIZZLE_W);
+                            TGSI_FILE_TEMPORARY, pctx->texTemp,
+                            TGSI_SWIZZLE_W, TRUE);
  }
  
  
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.c b/src/gallium/auxiliary/gallivm/lp_bld_const.c

index 0f5a8f8..9cd7c55 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_const.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c
@@ -311,7 +311,7 @@ lp_build_const_elem(struct gallivm_state *gallivm,
     else {
        double dscale = lp_const_scale(type);
  
-      elem = LLVMConstInt(elem_type, round(val*dscale), 0);
+      elem = LLVMConstInt(elem_type, (long long) round(val*dscale), 0);
     }
  
     return elem;
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c

index db50734..cf43ef2 100644 (file)
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -94,6 +94,128 @@ struct ttn_compile {
  #define ttn_channel(b, src, swiz) \
     nir_swizzle(b, src, SWIZ(swiz, swiz, swiz, swiz), 1, false)
  
+static gl_varying_slot
+tgsi_varying_semantic_to_slot(unsigned semantic, unsigned index)
+{
+   switch (semantic) {
+   case TGSI_SEMANTIC_POSITION:
+      return VARYING_SLOT_POS;
+   case TGSI_SEMANTIC_COLOR:
+      if (index == 0)
+         return VARYING_SLOT_COL0;
+      else
+         return VARYING_SLOT_COL1;
+   case TGSI_SEMANTIC_BCOLOR:
+      if (index == 0)
+         return VARYING_SLOT_BFC0;
+      else
+         return VARYING_SLOT_BFC1;
+   case TGSI_SEMANTIC_FOG:
+      return VARYING_SLOT_FOGC;
+   case TGSI_SEMANTIC_PSIZE:
+      return VARYING_SLOT_PSIZ;
+   case TGSI_SEMANTIC_GENERIC:
+      return VARYING_SLOT_VAR0 + index;
+   case TGSI_SEMANTIC_FACE:
+      return VARYING_SLOT_FACE;
+   case TGSI_SEMANTIC_EDGEFLAG:
+      return VARYING_SLOT_EDGE;
+   case TGSI_SEMANTIC_PRIMID:
+      return VARYING_SLOT_PRIMITIVE_ID;
+   case TGSI_SEMANTIC_CLIPDIST:
+      if (index == 0)
+         return VARYING_SLOT_CLIP_DIST0;
+      else
+         return VARYING_SLOT_CLIP_DIST1;
+   case TGSI_SEMANTIC_CLIPVERTEX:
+      return VARYING_SLOT_CLIP_VERTEX;
+   case TGSI_SEMANTIC_TEXCOORD:
+      return VARYING_SLOT_TEX0 + index;
+   case TGSI_SEMANTIC_PCOORD:
+      return VARYING_SLOT_PNTC;
+   case TGSI_SEMANTIC_VIEWPORT_INDEX:
+      return VARYING_SLOT_VIEWPORT;
+   case TGSI_SEMANTIC_LAYER:
+      return VARYING_SLOT_LAYER;
+   default:
+      fprintf(stderr, "Bad TGSI semantic: %d/%d\n", semantic, index);
+      abort();
+   }
+}
+
+/* Temporary helper to remap back to TGSI style semantic name/index
+ * values, for use in drivers that haven't been converted to using
+ * VARYING_SLOT_
+ */
+void
+varying_slot_to_tgsi_semantic(gl_varying_slot slot,
+                              unsigned *semantic_name, unsigned *semantic_index)
+{
+   static const unsigned map[][2] = {
+      [VARYING_SLOT_POS] = { TGSI_SEMANTIC_POSITION, 0 },
+      [VARYING_SLOT_COL0] = { TGSI_SEMANTIC_COLOR, 0 },
+      [VARYING_SLOT_COL1] = { TGSI_SEMANTIC_COLOR, 1 },
+      [VARYING_SLOT_BFC0] = { TGSI_SEMANTIC_BCOLOR, 0 },
+      [VARYING_SLOT_BFC1] = { TGSI_SEMANTIC_BCOLOR, 1 },
+      [VARYING_SLOT_FOGC] = { TGSI_SEMANTIC_FOG, 0 },
+      [VARYING_SLOT_PSIZ] = { TGSI_SEMANTIC_PSIZE, 0 },
+      [VARYING_SLOT_FACE] = { TGSI_SEMANTIC_FACE, 0 },
+      [VARYING_SLOT_EDGE] = { TGSI_SEMANTIC_EDGEFLAG, 0 },
+      [VARYING_SLOT_PRIMITIVE_ID] = { TGSI_SEMANTIC_PRIMID, 0 },
+      [VARYING_SLOT_CLIP_DIST0] = { TGSI_SEMANTIC_CLIPDIST, 0 },
+      [VARYING_SLOT_CLIP_DIST1] = { TGSI_SEMANTIC_CLIPDIST, 1 },
+      [VARYING_SLOT_CLIP_VERTEX] = { TGSI_SEMANTIC_CLIPVERTEX, 0 },
+      [VARYING_SLOT_PNTC] = { TGSI_SEMANTIC_PCOORD, 0 },
+      [VARYING_SLOT_VIEWPORT] = { TGSI_SEMANTIC_VIEWPORT_INDEX, 0 },
+      [VARYING_SLOT_LAYER] = { TGSI_SEMANTIC_LAYER, 0 },
+   };
+
+   if (slot >= VARYING_SLOT_VAR0) {
+      *semantic_name = TGSI_SEMANTIC_GENERIC;
+      *semantic_index = slot - VARYING_SLOT_VAR0;
+      return;
+   }
+
+   if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
+      *semantic_name = TGSI_SEMANTIC_TEXCOORD;
+      *semantic_index = slot - VARYING_SLOT_TEX0;
+      return;
+   }
+
+   if (slot >= ARRAY_SIZE(map)) {
+      fprintf(stderr, "Unknown varying slot %d\n", slot);
+      abort();
+   }
+
+   *semantic_name = map[slot][0];
+   *semantic_index = map[slot][1];
+}
+
+/* Temporary helper to remap back to TGSI style semantic name/index
+ * values, for use in drivers that haven't been converted to using
+ * FRAG_RESULT_
+ */
+void
+frag_result_to_tgsi_semantic(gl_frag_result slot,
+                             unsigned *semantic_name, unsigned *semantic_index)
+{
+   static const unsigned map[][2] = {
+      [FRAG_RESULT_DEPTH] = { TGSI_SEMANTIC_POSITION, 0 },
+      [FRAG_RESULT_COLOR] = { TGSI_SEMANTIC_COLOR, -1 },
+      [FRAG_RESULT_DATA0 + 0] = { TGSI_SEMANTIC_COLOR, 0 },
+      [FRAG_RESULT_DATA0 + 1] = { TGSI_SEMANTIC_COLOR, 1 },
+      [FRAG_RESULT_DATA0 + 2] = { TGSI_SEMANTIC_COLOR, 2 },
+      [FRAG_RESULT_DATA0 + 3] = { TGSI_SEMANTIC_COLOR, 3 },
+      [FRAG_RESULT_DATA0 + 4] = { TGSI_SEMANTIC_COLOR, 4 },
+      [FRAG_RESULT_DATA0 + 5] = { TGSI_SEMANTIC_COLOR, 5 },
+      [FRAG_RESULT_DATA0 + 6] = { TGSI_SEMANTIC_COLOR, 6 },
+      [FRAG_RESULT_DATA0 + 7] = { TGSI_SEMANTIC_COLOR, 7 },
+   };
+
+   *semantic_name = map[slot][0];
+   *semantic_index = map[slot][1];
+}
+
  static nir_ssa_def *
  ttn_src_for_dest(nir_builder *b, nir_alu_dest *dest)
  {
@@ -216,12 +338,15 @@ ttn_emit_declaration(struct ttn_compile *c)
              var->data.mode = nir_var_shader_in;
              var->name = ralloc_asprintf(var, "in_%d", idx);
  
-            /* We should probably translate to a VERT_ATTRIB_* or VARYING_SLOT_*
-             * instead, but nothing in NIR core is looking at the value
-             * currently, and this is less change to drivers.
-             */
-            var->data.location = decl->Semantic.Name;
-            var->data.index = decl->Semantic.Index;
+            if (c->scan->processor == TGSI_PROCESSOR_FRAGMENT) {
+               var->data.location =
+                  tgsi_varying_semantic_to_slot(decl->Semantic.Name,
+                                                decl->Semantic.Index);
+            } else {
+               assert(!decl->Declaration.Semantic);
+               var->data.location = VERT_ATTRIB_GENERIC0 + idx;
+            }
+            var->data.index = 0;
  
              /* We definitely need to translate the interpolation field, because
               * nir_print will decode it.
@@ -241,6 +366,8 @@ ttn_emit_declaration(struct ttn_compile *c)
              exec_list_push_tail(&b->shader->inputs, &var->node);
              break;
           case TGSI_FILE_OUTPUT: {
+            int semantic_name = decl->Semantic.Name;
+            int semantic_index = decl->Semantic.Index;
              /* Since we can't load from outputs in the IR, we make temporaries
               * for the outputs and emit stores to the real outputs at the end of
               * the shader.
@@ -252,14 +379,40 @@ ttn_emit_declaration(struct ttn_compile *c)
  
              var->data.mode = nir_var_shader_out;
              var->name = ralloc_asprintf(var, "out_%d", idx);
-
-            var->data.location = decl->Semantic.Name;
-            if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
-                decl->Semantic.Index == 0 &&
-                c->scan->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
-               var->data.index = -1;
-            else
-               var->data.index = decl->Semantic.Index;
+            var->data.index = 0;
+
+            if (c->scan->processor == TGSI_PROCESSOR_FRAGMENT) {
+               switch (semantic_name) {
+               case TGSI_SEMANTIC_COLOR: {
+                  /* TODO tgsi loses some information, so we cannot
+                   * actually differentiate here between DSB and MRT
+                   * at this point.  But so far no drivers using tgsi-
+                   * to-nir support dual source blend:
+                   */
+                  bool dual_src_blend = false;
+                  if (dual_src_blend && (semantic_index == 1)) {
+                     var->data.location = FRAG_RESULT_DATA0;
+                     var->data.index = 1;
+                  } else {
+                     if (c->scan->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
+                        var->data.location = FRAG_RESULT_COLOR;
+                     else
+                        var->data.location = FRAG_RESULT_DATA0 + semantic_index;
+                  }
+                  break;
+               }
+               case TGSI_SEMANTIC_POSITION:
+                  var->data.location = FRAG_RESULT_DEPTH;
+                  break;
+               default:
+                  fprintf(stderr, "Bad TGSI semantic: %d/%d\n",
+                          decl->Semantic.Name, decl->Semantic.Index);
+                  abort();
+               }
+            } else {
+               var->data.location =
+                  tgsi_varying_semantic_to_slot(semantic_name, semantic_index);
+            }
  
              if (is_array) {
                 unsigned j;
@@ -921,10 +1074,6 @@ ttn_if(struct ttn_compile *c, nir_ssa_def *src, bool is_uint)
  {
     nir_builder *b = &c->build;
  
-   /* Save the outside-of-the-if-statement node list. */
-   c->if_stack[c->if_stack_pos] = b->cursor;
-   c->if_stack_pos++;
-
     src = ttn_channel(b, src, X);
  
     nir_if *if_stmt = nir_if_create(b->shader);
@@ -935,6 +1084,9 @@ ttn_if(struct ttn_compile *c, nir_ssa_def *src, bool is_uint)
     }
     nir_builder_cf_insert(b, &if_stmt->cf_node);
  
+   c->if_stack[c->if_stack_pos] = nir_after_cf_node(&if_stmt->cf_node);
+   c->if_stack_pos++;
+
     b->cursor = nir_after_cf_list(&if_stmt->then_list);
  
     c->if_stack[c->if_stack_pos] = nir_after_cf_list(&if_stmt->else_list);
@@ -963,13 +1115,12 @@ ttn_bgnloop(struct ttn_compile *c)
  {
     nir_builder *b = &c->build;
  
-   /* Save the outside-of-the-loop node list. */
-   c->loop_stack[c->loop_stack_pos] = b->cursor;
-   c->loop_stack_pos++;
-
     nir_loop *loop = nir_loop_create(b->shader);
     nir_builder_cf_insert(b, &loop->cf_node);
  
+   c->loop_stack[c->loop_stack_pos] = nir_after_cf_node(&loop->cf_node);
+   c->loop_stack_pos++;
+
     b->cursor = nir_after_cf_list(&loop->body);
  }
  
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.h b/src/gallium/auxiliary/nir/tgsi_to_nir.h

index 687348a..1a185a8 100644 (file)
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.h
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.h
@@ -28,3 +28,9 @@ struct nir_shader_compiler_options *options;
  struct nir_shader *
  tgsi_to_nir(const void *tgsi_tokens,
              const struct nir_shader_compiler_options *options);
+void
+varying_slot_to_tgsi_semantic(gl_varying_slot slot,
+                              unsigned *semantic_name, unsigned *semantic_index);
+void
+frag_result_to_tgsi_semantic(gl_frag_result slot,
+                             unsigned *semantic_name, unsigned *semantic_index);
diff --git a/src/gallium/auxiliary/os/os_misc.c b/src/gallium/auxiliary/os/os_misc.c

index c46078b..d6b83e9 100644 (file)
--- a/src/gallium/auxiliary/os/os_misc.c
+++ b/src/gallium/auxiliary/os/os_misc.c
@@ -96,11 +96,13 @@ os_log_message(const char *message)
  }
  
  
+#if !defined(PIPE_SUBSYSTEM_EMBEDDED)
  const char *
  os_get_option(const char *name)
  {
     return getenv(name);
  }
+#endif /* !PIPE_SUBSYSTEM_EMBEDDED */
  
  
  /**
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h

index 147ce39..1638d96 100644 (file)
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
@@ -166,6 +166,11 @@ pb_cache_manager_create(struct pb_manager *provider,
                          unsigned bypass_usage,
                          uint64_t maximum_cache_size);
  
+/**
+ * Remove a buffer from the cache, but keep it alive.
+ */
+void
+pb_cache_manager_remove_buffer(struct pb_buffer *buf);
  
  struct pb_fence_ops;
  
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c

index 3b35049..cc8ae84 100644 (file)
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
@@ -104,18 +104,42 @@ pb_cache_manager(struct pb_manager *mgr)
  }
  
  
+static void
+_pb_cache_manager_remove_buffer_locked(struct pb_cache_buffer *buf)
+{
+   struct pb_cache_manager *mgr = buf->mgr;
+
+   if (buf->head.next) {
+      LIST_DEL(&buf->head);
+      assert(mgr->numDelayed);
+      --mgr->numDelayed;
+      mgr->cache_size -= buf->base.size;
+   }
+   buf->mgr = NULL;
+}
+
+void
+pb_cache_manager_remove_buffer(struct pb_buffer *pb_buf)
+{
+   struct pb_cache_buffer *buf = (struct pb_cache_buffer*)pb_buf;
+   struct pb_cache_manager *mgr = buf->mgr;
+
+   if (!mgr)
+      return;
+
+   pipe_mutex_lock(mgr->mutex);
+   _pb_cache_manager_remove_buffer_locked(buf);
+   pipe_mutex_unlock(mgr->mutex);
+}
+
  /**
   * Actually destroy the buffer.
   */
  static inline void
  _pb_cache_buffer_destroy(struct pb_cache_buffer *buf)
  {
-   struct pb_cache_manager *mgr = buf->mgr;
-
-   LIST_DEL(&buf->head);
-   assert(mgr->numDelayed);
-   --mgr->numDelayed;
-   mgr->cache_size -= buf->base.size;
+   if (buf->mgr)
+      _pb_cache_manager_remove_buffer_locked(buf);
     assert(!pipe_is_referenced(&buf->base.reference));
     pb_reference(&buf->buffer, NULL);
     FREE(buf);
@@ -156,6 +180,12 @@ pb_cache_buffer_destroy(struct pb_buffer *_buf)
     struct pb_cache_buffer *buf = pb_cache_buffer(_buf);   
     struct pb_cache_manager *mgr = buf->mgr;
  
+   if (!mgr) {
+      pb_reference(&buf->buffer, NULL);
+      FREE(buf);
+      return;
+   }
+
     pipe_mutex_lock(mgr->mutex);
     assert(!pipe_is_referenced(&buf->base.reference));
     
diff --git a/src/gallium/auxiliary/tgsi/tgsi_aa_point.c b/src/gallium/auxiliary/tgsi/tgsi_aa_point.c

new file mode 100644 (file)

index 0000000..9016eff
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_aa_point.c
@@ -0,0 +1,309 @@
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * This utility transforms the fragment shader to support anti-aliasing points.
+ */
+
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "tgsi_info.h"
+#include "tgsi_aa_point.h"
+#include "tgsi_transform.h"
+
+#define INVALID_INDEX 9999
+
+struct aa_transform_context
+{
+   struct tgsi_transform_context base;
+
+   unsigned tmp;           // temp register
+   unsigned color_out;     // frag color out register
+   unsigned color_tmp;     // frag color temp register
+   unsigned num_tmp;       // number of temp registers
+   unsigned num_imm;       // number of immediates
+   unsigned num_input;     // number of inputs
+   unsigned aa_point_coord_index;
+};
+
+static inline struct aa_transform_context *
+aa_transform_context(struct tgsi_transform_context *ctx)
+{
+   return (struct aa_transform_context *) ctx;
+}
+
+/**
+ * TGSI declaration transform callback.
+ */
+static void
+aa_decl(struct tgsi_transform_context *ctx,
+              struct tgsi_full_declaration *decl)
+{
+   struct aa_transform_context *ts = aa_transform_context(ctx);
+
+   if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
+       decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
+       decl->Semantic.Index == 0) {
+         ts->color_out = decl->Range.First;
+   }
+   else if (decl->Declaration.File == TGSI_FILE_INPUT) {
+      ts->num_input++;
+   }
+   else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
+      ts->num_tmp = MAX2(ts->num_tmp, decl->Range.Last + 1);
+   }
+
+   ctx->emit_declaration(ctx, decl);
+}
+
+/**
+ * TGSI immediate declaration transform callback.
+ */
+static void
+aa_immediate(struct tgsi_transform_context *ctx,
+                  struct tgsi_full_immediate *imm)
+{
+   struct aa_transform_context *ts = aa_transform_context(ctx);
+
+   ctx->emit_immediate(ctx, imm);
+   ts->num_imm++;
+}
+
+/**
+ * TGSI transform prolog callback.
+ */
+static void
+aa_prolog(struct tgsi_transform_context *ctx)
+{
+   struct aa_transform_context *ts = aa_transform_context(ctx);
+   unsigned tmp0;
+   unsigned texIn;
+   unsigned imm;
+
+   /* Declare two temporary registers, one for temporary and
+    * one for color.
+    */
+   ts->tmp = ts->num_tmp++;
+   ts->color_tmp = ts->num_tmp++;
+
+   tgsi_transform_temps_decl(ctx, ts->tmp, ts->color_tmp);
+
+   /* Declare new generic input/texcoord */
+   texIn = ts->num_input++;
+   tgsi_transform_input_decl(ctx, texIn, TGSI_SEMANTIC_GENERIC,
+                             ts->aa_point_coord_index, TGSI_INTERPOLATE_LINEAR);
+
+   /* Declare extra immediates */
+   imm = ts->num_imm++;
+   tgsi_transform_immediate_decl(ctx, 0.5, 0.5, 0.45, 1.0);
+
+   /*
+    * Emit code to compute fragment coverage.
+    * The point always has radius 0.5.  The threshold value will be a
+    * value less than, but close to 0.5, such as 0.45.
+    * We compute a coverage factor from the distance and threshold.
+    * If the coverage is negative, the fragment is outside the circle and
+    * it's discarded.
+    * If the coverage is >= 1, the fragment is fully inside the threshold
+    * distance.  We limit/clamp the coverage to 1.
+    * Otherwise, the fragment is between the threshold value and 0.5 and we
+    * compute a coverage value in [0,1].
+    *
+    * Input reg (texIn) usage:
+    *  texIn.x = x point coord in [0,1]
+    *  texIn.y = y point coord in [0,1]
+    *  texIn.z = "k" the smoothing threshold distance
+    *  texIn.w = unused
+    *
+    * Temp reg (t0) usage:
+    *  t0.x = distance of fragment from center point
+    *  t0.y = boolean, is t0.x > 0.5, also misc temp usage
+    *  t0.z = temporary for computing 1/(0.5-k) value
+    *  t0.w = final coverage value
+    */
+
+   tmp0 = ts->tmp;
+
+   /* SUB t0.xy, texIn, (0.5, 0,5) */
+   tgsi_transform_op2_inst(ctx, TGSI_OPCODE_SUB,
+                           TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_XY,
+                           TGSI_FILE_INPUT, texIn,
+                           TGSI_FILE_IMMEDIATE, imm);
+
+   /* DP2 t0.x, t0.xy, t0.xy;  # t0.x = x^2 + y^2 */
+   tgsi_transform_op2_inst(ctx, TGSI_OPCODE_DP2,
+                           TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
+                           TGSI_FILE_TEMPORARY, tmp0,
+                           TGSI_FILE_TEMPORARY, tmp0);
+
+   /* SQRT t0.x, t0.x */
+   tgsi_transform_op1_inst(ctx, TGSI_OPCODE_SQRT,
+                           TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
+                           TGSI_FILE_TEMPORARY, tmp0);
+
+   /* compute coverage factor = (0.5-d)/(0.5-k) */
+
+   /* SUB t0.w, 0.5, texIn.z;  # t0.w = 0.5-k */
+   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
+                               TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_X,
+                               TGSI_FILE_INPUT, texIn, TGSI_SWIZZLE_Z);
+
+   /* SUB t0.y, 0.5, t0.x;  # t0.y = 0.5-d */
+   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y,
+                               TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_X,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X);
+
+   /* DIV t0.w, t0.y, t0.w;  # coverage = (0.5-d)/(0.5-k) */
+   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_DIV,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_W);
+
+   /* If the coverage value is negative, it means the fragment is outside
+    * the point's circular boundary.  Kill it.
+    */
+   /* KILL_IF tmp0.w;  # if tmp0.w < 0 KILL */
+   tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, tmp0,
+                            TGSI_SWIZZLE_W, FALSE);
+
+   /* If the distance is less than the threshold, the coverage/alpha value
+    * will be greater than one.  Clamp to one here.
+    */
+   /* MIN tmp0.w, tmp0.w, 1.0 */
+   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MIN,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_W,
+                               TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_W);
+}
+
+/**
+ * TGSI instruction transform callback.
+ */
+static void
+aa_inst(struct tgsi_transform_context *ctx,
+        struct tgsi_full_instruction *inst)
+{
+   struct aa_transform_context *ts = aa_transform_context(ctx);
+   unsigned i;
+
+   /* Look for writes to color output reg and replace it with
+    * color temp reg.
+    */
+   for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
+      struct tgsi_full_dst_register *dst = &inst->Dst[i];
+      if (dst->Register.File == TGSI_FILE_OUTPUT &&
+          dst->Register.Index == ts->color_out) {
+         dst->Register.File = TGSI_FILE_TEMPORARY;
+         dst->Register.Index = ts->color_tmp;
+      }
+   }
+
+   ctx->emit_instruction(ctx, inst);
+}
+
+/**
+ * TGSI transform epilog callback.
+ */
+static void
+aa_epilog(struct tgsi_transform_context *ctx)
+{
+   struct aa_transform_context *ts = aa_transform_context(ctx);
+
+   /* add alpha modulation code at tail of program */
+   assert(ts->color_out != INVALID_INDEX);
+   assert(ts->color_tmp != INVALID_INDEX);
+
+   /* MOV output.color.xyz colorTmp */
+   tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
+                           TGSI_FILE_OUTPUT, ts->color_out,
+                           TGSI_WRITEMASK_XYZ,
+                           TGSI_FILE_TEMPORARY, ts->color_tmp);
+
+   /* MUL output.color.w colorTmp.w tmp0.w */
+   tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL,
+                           TGSI_FILE_OUTPUT, ts->color_out,
+                           TGSI_WRITEMASK_W,
+                           TGSI_FILE_TEMPORARY, ts->color_tmp,
+                           TGSI_FILE_TEMPORARY, ts->tmp);
+}
+
+/**
+ * TGSI utility to transform a fragment shader to support antialiasing point.
+ *
+ * This utility accepts two inputs:
+ *\param tokens_in  -- the original token string of the shader
+ *\param aa_point_coord_index -- the semantic index of the generic register
+ *                            that contains the point sprite texture coord
+ *
+ * For each fragment in the point, we compute the distance of the fragment
+ * from the point center using the point sprite texture coordinates.
+ * If the distance is greater than 0.5, we'll discard the fragment.
+ * Otherwise, we'll compute a coverage value which approximates how much
+ * of the fragment is inside the bounding circle of the point. If the distance
+ * is less than 'k', the coverage is 1. Else, the coverage is between 0 and 1.
+ * The final fragment color's alpha channel is then modulated by the coverage
+ * value.
+ */
+struct tgsi_token *
+tgsi_add_aa_point(const struct tgsi_token *tokens_in,
+                  const int aa_point_coord_index)
+{
+   struct aa_transform_context transform;
+   const uint num_new_tokens = 200; /* should be enough */
+   const uint new_len = tgsi_num_tokens(tokens_in) + num_new_tokens;
+   struct tgsi_token *new_tokens;
+
+   /* allocate new tokens buffer */
+   new_tokens = tgsi_alloc_tokens(new_len);
+   if (!new_tokens)
+      return NULL;
+
+   /* setup transformation context */
+   memset(&transform, 0, sizeof(transform));
+   transform.base.transform_declaration = aa_decl;
+   transform.base.transform_instruction = aa_inst;
+   transform.base.transform_immediate = aa_immediate;
+   transform.base.prolog = aa_prolog;
+   transform.base.epilog = aa_epilog;
+
+   transform.tmp = INVALID_INDEX;
+   transform.color_out = INVALID_INDEX;
+   transform.color_tmp = INVALID_INDEX;
+
+   assert(aa_point_coord_index != -1);
+   transform.aa_point_coord_index = (unsigned)aa_point_coord_index;
+
+   transform.num_tmp = 0;
+   transform.num_imm = 0;
+   transform.num_input = 0;
+
+   /* transform the shader */
+   tgsi_transform_shader(tokens_in, new_tokens, new_len, &transform.base);
+
+   return new_tokens;
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_aa_point.h b/src/gallium/auxiliary/tgsi/tgsi_aa_point.h

new file mode 100644 (file)

index 0000000..d89f40c
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_aa_point.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef TGSI_AA_POINT_H
+#define TGSI_AA_POINT_H
+
+struct tgsi_token;
+
+struct tgsi_token *
+tgsi_add_aa_point(const struct tgsi_token *tokens_in,
+                  const int aa_point_coord_index);
+
+#endif /* TGSI_AA_POINT_H */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c

index 75cd0d5..f67c162 100644 (file)
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2021,7 +2021,7 @@ fetch_sampler_unit(struct tgsi_exec_machine *mach,
  /*
   * execute a texture instruction.
   *
- * modifier is used to control the channel routing for the\
+ * modifier is used to control the channel routing for the
   * instruction variants like proj, lod, and texture with lod bias.
   * sampler indicates which src register the sampler is contained in.
   */
@@ -2032,7 +2032,7 @@ exec_tex(struct tgsi_exec_machine *mach,
  {
     const union tgsi_exec_channel *args[5], *proj = NULL;
     union tgsi_exec_channel r[5];
-   enum tgsi_sampler_control control =  tgsi_sampler_lod_none;
+   enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
     uint chan;
     uint unit;
     int8_t offsets[3];
@@ -2078,11 +2078,11 @@ exec_tex(struct tgsi_exec_machine *mach,
           args[i] = &ZeroVec;
  
        if (modifier == TEX_MODIFIER_EXPLICIT_LOD)
-         control = tgsi_sampler_lod_explicit;
+         control = TGSI_SAMPLER_LOD_EXPLICIT;
        else if (modifier == TEX_MODIFIER_LOD_BIAS)
-         control = tgsi_sampler_lod_bias;
+         control = TGSI_SAMPLER_LOD_BIAS;
        else if (modifier == TEX_MODIFIER_GATHER)
-         control = tgsi_sampler_gather;
+         control = TGSI_SAMPLER_GATHER;
     }
     else {
        for (i = dim; i < Elements(args); i++)
@@ -2132,6 +2132,46 @@ exec_tex(struct tgsi_exec_machine *mach,
     }
  }
  
+static void
+exec_lodq(struct tgsi_exec_machine *mach,
+          const struct tgsi_full_instruction *inst)
+{
+   uint unit;
+   int dim;
+   int i;
+   union tgsi_exec_channel coords[4];
+   const union tgsi_exec_channel *args[Elements(coords)];
+   union tgsi_exec_channel r[2];
+
+   unit = fetch_sampler_unit(mach, inst, 1);
+   dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture, NULL);
+   assert(dim <= Elements(coords));
+   /* fetch coordinates */
+   for (i = 0; i < dim; i++) {
+      FETCH(&coords[i], 0, TGSI_CHAN_X + i);
+      args[i] = &coords[i];
+   }
+   for (i = dim; i < Elements(coords); i++) {
+      args[i] = &ZeroVec;
+   }
+   mach->Sampler->query_lod(mach->Sampler, unit, unit,
+                            args[0]->f,
+                            args[1]->f,
+                            args[2]->f,
+                            args[3]->f,
+                            TGSI_SAMPLER_LOD_NONE,
+                            r[0].f,
+                            r[1].f);
+
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+      store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X,
+                 TGSI_EXEC_DATA_FLOAT);
+   }
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+      store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y,
+                 TGSI_EXEC_DATA_FLOAT);
+   }
+}
  
  static void
  exec_txd(struct tgsi_exec_machine *mach,
@@ -2155,7 +2195,7 @@ exec_txd(struct tgsi_exec_machine *mach,
  
        fetch_texel(mach->Sampler, unit, unit,
                    &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                    &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
        break;
  
@@ -2171,7 +2211,7 @@ exec_txd(struct tgsi_exec_machine *mach,
  
        fetch_texel(mach->Sampler, unit, unit,
                    &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                    &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
        break;
  
@@ -2185,7 +2225,7 @@ exec_txd(struct tgsi_exec_machine *mach,
  
        fetch_texel(mach->Sampler, unit, unit,
                    &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                    &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
        break;
  
@@ -2205,7 +2245,7 @@ exec_txd(struct tgsi_exec_machine *mach,
  
        fetch_texel(mach->Sampler, unit, unit,
                    &r[0], &r[1], &r[2], &r[3], &ZeroVec,   /* inputs */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                    &r[0], &r[1], &r[2], &r[3]);     /* outputs */
        break;
  
@@ -2225,7 +2265,7 @@ exec_txd(struct tgsi_exec_machine *mach,
  
        fetch_texel(mach->Sampler, unit, unit,
                    &r[0], &r[1], &r[2], &r[3], &ZeroVec,   /* inputs */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                    &r[0], &r[1], &r[2], &r[3]);     /* outputs */
        break;
  
@@ -2364,7 +2404,7 @@ exec_sample(struct tgsi_exec_machine *mach,
     const uint sampler_unit = inst->Src[2].Register.Index;
     union tgsi_exec_channel r[5], c1;
     const union tgsi_exec_channel *lod = &ZeroVec;
-   enum tgsi_sampler_control control = tgsi_sampler_lod_none;
+   enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
     uint chan;
     unsigned char swizzles[4];
     int8_t offsets[3];
@@ -2378,16 +2418,16 @@ exec_sample(struct tgsi_exec_machine *mach,
        if (modifier == TEX_MODIFIER_LOD_BIAS) {
           FETCH(&c1, 3, TGSI_CHAN_X);
           lod = &c1;
-         control = tgsi_sampler_lod_bias;
+         control = TGSI_SAMPLER_LOD_BIAS;
        }
        else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
           FETCH(&c1, 3, TGSI_CHAN_X);
           lod = &c1;
-         control = tgsi_sampler_lod_explicit;
+         control = TGSI_SAMPLER_LOD_EXPLICIT;
        }
        else {
           assert(modifier == TEX_MODIFIER_LEVEL_ZERO);
-         control = tgsi_sampler_lod_zero;
+         control = TGSI_SAMPLER_LOD_ZERO;
        }
     }
  
@@ -2513,7 +2553,7 @@ exec_sample_d(struct tgsi_exec_machine *mach,
  
        fetch_texel(mach->Sampler, resource_unit, sampler_unit,
                    &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                    &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
        break;
  
@@ -2529,7 +2569,7 @@ exec_sample_d(struct tgsi_exec_machine *mach,
  
        fetch_texel(mach->Sampler, resource_unit, sampler_unit,
                    &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec,   /* inputs */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                    &r[0], &r[1], &r[2], &r[3]);     /* outputs */
        break;
  
@@ -2547,7 +2587,7 @@ exec_sample_d(struct tgsi_exec_machine *mach,
  
        fetch_texel(mach->Sampler, resource_unit, sampler_unit,
                    &r[0], &r[1], &r[2], &r[3], &ZeroVec,
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                    &r[0], &r[1], &r[2], &r[3]);
        break;
  
@@ -4378,6 +4418,12 @@ exec_instruction(
        exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2);
        break;
  
+   case TGSI_OPCODE_LODQ:
+      /* src[0] = texcoord */
+      /* src[1] = sampler unit */
+      exec_lodq(mach, inst);
+      break;
+
     case TGSI_OPCODE_UP2H:
        assert (0);
        break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h

index 5d56aab..a371aa9 100644 (file)
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -88,13 +88,14 @@ struct tgsi_interp_coef
     float dady[TGSI_NUM_CHANNELS];
  };
  
-enum tgsi_sampler_control {
-   tgsi_sampler_lod_none,
-   tgsi_sampler_lod_bias,
-   tgsi_sampler_lod_explicit,
-   tgsi_sampler_lod_zero,
-   tgsi_sampler_derivs_explicit,
-   tgsi_sampler_gather,
+enum tgsi_sampler_control
+{
+   TGSI_SAMPLER_LOD_NONE,
+   TGSI_SAMPLER_LOD_BIAS,
+   TGSI_SAMPLER_LOD_EXPLICIT,
+   TGSI_SAMPLER_LOD_ZERO,
+   TGSI_SAMPLER_DERIVS_EXPLICIT,
+   TGSI_SAMPLER_GATHER,
  };
  
  /**
@@ -138,6 +139,16 @@ struct tgsi_sampler
                       const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE],
                       const int lod[TGSI_QUAD_SIZE], const int8_t offset[3],
                       float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
+   void (*query_lod)(const struct tgsi_sampler *tgsi_sampler,
+                     const unsigned sview_index,
+                     const unsigned sampler_index,
+                     const float s[TGSI_QUAD_SIZE],
+                     const float t[TGSI_QUAD_SIZE],
+                     const float p[TGSI_QUAD_SIZE],
+                     const float c0[TGSI_QUAD_SIZE],
+                     const enum tgsi_sampler_control control,
+                     float mipmap[TGSI_QUAD_SIZE],
+                     float lod[TGSI_QUAD_SIZE]);
  };
  
  #define TGSI_EXEC_NUM_TEMPS       4096
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c

index fb29ea0..3b40c3d 100644 (file)
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -141,7 +141,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
     { 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP },
     { 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB },
     { 1, 1, 1, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ },
-   { 0, 0, 0, 0, 0, 0, NONE, "", 104 },     /* removed */
+   { 1, 1, 1, 0, 0, 0, OTHR, "TXQS", TGSI_OPCODE_TXQS },
     { 0, 0, 0, 0, 0, 0, NONE, "", 105 },     /* removed */
     { 0, 0, 0, 0, 0, 0, NONE, "", 106 },     /* removed */
     { 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP },
@@ -331,6 +331,7 @@ tgsi_opcode_infer_type( uint opcode )
     case TGSI_OPCODE_SAD: /* XXX some src args may be signed for SAD ? */
     case TGSI_OPCODE_TXQ:
     case TGSI_OPCODE_TXQ_LZ:
+   case TGSI_OPCODE_TXQS:
     case TGSI_OPCODE_F2U:
     case TGSI_OPCODE_UDIV:
     case TGSI_OPCODE_UMAD:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c b/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c

new file mode 100644 (file)

index 0000000..cb8dbcb
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c
@@ -0,0 +1,582 @@
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * This utility transforms the geometry shader to emulate point sprite by
+ * drawing a quad. It also adds an extra output for the original point position
+ * if the point position is to be written to a stream output buffer.
+ * Note: It assumes the driver will add a constant for the inverse viewport
+ *       after the user defined constants.
+ */
+
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "tgsi_info.h"
+#include "tgsi_point_sprite.h"
+#include "tgsi_transform.h"
+#include "pipe/p_state.h"
+
+#define INVALID_INDEX 9999
+
+/* Set swizzle based on the immediates (0, 1, 0, -1) */
+static inline unsigned
+set_swizzle(int x, int y, int z, int w)
+{
+   static const unsigned map[3] = {TGSI_SWIZZLE_W, TGSI_SWIZZLE_X,
+                                   TGSI_SWIZZLE_Y};
+   assert(x >= -1);
+   assert(x <= 1);
+   assert(y >= -1);
+   assert(y <= 1);
+   assert(z >= -1);
+   assert(z <= 1);
+   assert(w >= -1);
+   assert(w <= 1);
+
+   return map[x+1] | (map[y+1] << 2) | (map[z+1] << 4) | (map[w+1] << 6);
+}
+
+static inline unsigned
+get_swizzle(unsigned swizzle, unsigned component)
+{
+   assert(component < 4);
+   return (swizzle >> (component * 2)) & 0x3;
+}
+
+struct psprite_transform_context
+{
+   struct tgsi_transform_context base;
+   unsigned num_tmp;
+   unsigned num_out;
+   unsigned num_orig_out;
+   unsigned num_const;
+   unsigned num_imm;
+   unsigned point_size_in;          // point size input
+   unsigned point_size_out;         // point size output
+   unsigned point_size_tmp;         // point size temp
+   unsigned point_pos_in;           // point pos input
+   unsigned point_pos_out;          // point pos output
+   unsigned point_pos_sout;         // original point pos for streamout
+   unsigned point_pos_tmp;          // point pos temp
+   unsigned point_scale_tmp;        // point scale temp
+   unsigned point_color_out;        // point color output
+   unsigned point_color_tmp;        // point color temp
+   unsigned point_imm;              // point immediates
+   unsigned point_ivp;              // point inverseViewport constant
+   unsigned point_dir_swz[4];       // point direction swizzle
+   unsigned point_coord_swz[4];     // point coord swizzle
+   unsigned point_coord_enable;     // point coord enable mask
+   unsigned point_coord_decl;       // point coord output declared mask
+   unsigned point_coord_out;        // point coord output starting index
+   unsigned point_coord_aa;         // aa point coord semantic index
+   unsigned point_coord_k;          // aa point coord threshold distance
+   unsigned stream_out_point_pos:1; // set if to stream out original point pos
+   unsigned aa_point:1;             // set if doing aa point
+   unsigned out_tmp_index[PIPE_MAX_SHADER_OUTPUTS];
+   int max_generic;
+};
+
+static inline struct psprite_transform_context *
+psprite_transform_context(struct tgsi_transform_context *ctx)
+{
+   return (struct psprite_transform_context *) ctx;
+}
+
+
+/**
+ * TGSI declaration transform callback.
+ */
+static void
+psprite_decl(struct tgsi_transform_context *ctx,
+             struct tgsi_full_declaration *decl)
+{
+   struct psprite_transform_context *ts = psprite_transform_context(ctx);
+
+   if (decl->Declaration.File == TGSI_FILE_INPUT) {
+      if (decl->Semantic.Name == TGSI_SEMANTIC_PSIZE) {
+         ts->point_size_in = decl->Range.First;
+      }
+      else if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
+         ts->point_pos_in = decl->Range.First;
+      }
+   }
+   else if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
+      if (decl->Semantic.Name == TGSI_SEMANTIC_PSIZE) {
+         ts->point_size_out = decl->Range.First;
+      }
+      else if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
+         ts->point_pos_out = decl->Range.First;
+      }
+      else if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
+               decl->Semantic.Index < 32) {
+         ts->point_coord_decl |= 1 << decl->Semantic.Index;
+         ts->max_generic = MAX2(ts->max_generic, decl->Semantic.Index);
+      }
+      ts->num_out = MAX2(ts->num_out, decl->Range.Last + 1);
+   }
+   else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
+      ts->num_tmp = MAX2(ts->num_tmp, decl->Range.Last + 1);
+   }
+   else if (decl->Declaration.File == TGSI_FILE_CONSTANT) {
+      ts->num_const = MAX2(ts->num_const, decl->Range.Last + 1);
+   }
+
+   ctx->emit_declaration(ctx, decl);
+}
+
+/**
+ * TGSI immediate declaration transform callback.
+ */
+static void
+psprite_immediate(struct tgsi_transform_context *ctx,
+                  struct tgsi_full_immediate *imm)
+{
+   struct psprite_transform_context *ts = psprite_transform_context(ctx);
+
+   ctx->emit_immediate(ctx, imm);
+   ts->num_imm++;
+}
+
+
+/**
+ * TGSI transform prolog callback.
+ */
+static void
+psprite_prolog(struct tgsi_transform_context *ctx)
+{
+   struct psprite_transform_context *ts = psprite_transform_context(ctx);
+   unsigned point_coord_enable, en;
+   int i;
+
+   /* Replace output registers with temporary registers */
+   for (i = 0; i < ts->num_out; i++) {
+      ts->out_tmp_index[i] = ts->num_tmp++;
+   }
+   ts->num_orig_out = ts->num_out;
+
+   /* Declare a tmp register for point scale */
+   ts->point_scale_tmp = ts->num_tmp++;
+
+   if (ts->point_size_out != INVALID_INDEX)
+      ts->point_size_tmp = ts->out_tmp_index[ts->point_size_out];
+   else
+      ts->point_size_tmp = ts->num_tmp++;
+
+   assert(ts->point_pos_out != INVALID_INDEX);
+   ts->point_pos_tmp = ts->out_tmp_index[ts->point_pos_out];
+   ts->out_tmp_index[ts->point_pos_out] = INVALID_INDEX;
+
+   /* Declare one more tmp register for point coord threshold distance
+    * if we are generating anti-aliased point.
+    */
+   if (ts->aa_point)
+      ts->point_coord_k = ts->num_tmp++;
+
+   tgsi_transform_temps_decl(ctx, ts->point_size_tmp, ts->num_tmp-1);
+
+   /* Declare an extra output for the original point position for stream out */
+   if (ts->stream_out_point_pos) {
+      ts->point_pos_sout = ts->num_out++;
+      tgsi_transform_output_decl(ctx, ts->point_pos_sout,
+                                 TGSI_SEMANTIC_GENERIC, 0, 0);
+   }
+
+   /* point coord outputs to be declared */
+   point_coord_enable = ts->point_coord_enable & ~ts->point_coord_decl;
+
+   /* Declare outputs for those point coord that are enabled but are not
+    * already declared in this shader.
+    */
+   ts->point_coord_out = ts->num_out;
+   if (point_coord_enable) {
+      for (i = 0, en = point_coord_enable; en; en>>=1, i++) {
+         if (en & 0x1) {
+            tgsi_transform_output_decl(ctx, ts->num_out++,
+                                       TGSI_SEMANTIC_GENERIC, i, 0);
+            ts->max_generic = MAX2(ts->max_generic, i);
+         }
+      }
+   }
+
+   /* add an extra generic output for aa point texcoord */
+   if (ts->aa_point) {
+      ts->point_coord_aa = ts->max_generic + 1;
+      assert((ts->point_coord_enable & (1 << ts->point_coord_aa)) == 0);
+      ts->point_coord_enable |= 1 << (ts->point_coord_aa);
+      tgsi_transform_output_decl(ctx, ts->num_out++, TGSI_SEMANTIC_GENERIC,
+                                 ts->point_coord_aa, 0);
+   }
+
+   /* Declare extra immediates */
+   ts->point_imm = ts->num_imm;
+   tgsi_transform_immediate_decl(ctx, 0, 1, 0.5, -1);
+
+   /* Declare point constant -
+    * constant.xy -- inverseViewport
+    * constant.z -- current point size
+    * constant.w -- max point size
+    * The driver needs to add this constant to the constant buffer
+    */
+   ts->point_ivp = ts->num_const++;
+   tgsi_transform_const_decl(ctx, ts->point_ivp, ts->point_ivp);
+
+   /* If this geometry shader does not specify point size,
+    * get the current point size from the point constant.
+    */
+   if (ts->point_size_out == INVALID_INDEX) {
+      struct tgsi_full_instruction inst;
+
+      inst = tgsi_default_full_instruction();
+      inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+      inst.Instruction.NumDstRegs = 1;
+      tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,
+                             ts->point_size_tmp, TGSI_WRITEMASK_XYZW);
+      inst.Instruction.NumSrcRegs = 1;
+      tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_CONSTANT,
+                             ts->point_ivp, TGSI_SWIZZLE_Z,
+                             TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z);
+      ctx->emit_instruction(ctx, &inst);
+   }
+}
+
+
+/**
+ * Add the point sprite emulation instructions at the emit vertex instruction
+ */
+static void
+psprite_emit_vertex_inst(struct tgsi_transform_context *ctx,
+                         struct tgsi_full_instruction *vert_inst)
+{
+   struct psprite_transform_context *ts = psprite_transform_context(ctx);
+   struct tgsi_full_instruction inst;
+   unsigned point_coord_enable, en;
+   unsigned i, j, s;
+
+   /* new point coord outputs */
+   point_coord_enable = ts->point_coord_enable & ~ts->point_coord_decl;
+
+   /* OUTPUT[pos_sout] = TEMP[pos] */
+   if (ts->point_pos_sout != INVALID_INDEX) {
+      tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
+                              TGSI_FILE_OUTPUT, ts->point_pos_sout,
+                              TGSI_WRITEMASK_XYZW,
+                              TGSI_FILE_TEMPORARY, ts->point_pos_tmp);
+   }
+
+   /**
+    * Set up the point scale vector
+    * scale = pointSize * pos.w * inverseViewport
+    */
+
+   /* MUL point_scale.x, point_size.x, point_pos.w */
+   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MUL,
+                  TGSI_FILE_TEMPORARY, ts->point_scale_tmp, TGSI_WRITEMASK_X,
+                  TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X,
+                  TGSI_FILE_TEMPORARY, ts->point_pos_tmp, TGSI_SWIZZLE_W);
+
+   /* MUL point_scale.xy, point_scale.xx, inverseViewport.xy */
+   inst = tgsi_default_full_instruction();
+   inst.Instruction.Opcode = TGSI_OPCODE_MUL;
+   inst.Instruction.NumDstRegs = 1;
+   tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,
+                          ts->point_scale_tmp, TGSI_WRITEMASK_XY);
+   inst.Instruction.NumSrcRegs = 2;
+   tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_TEMPORARY,
+                          ts->point_scale_tmp, TGSI_SWIZZLE_X,
+                          TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+   tgsi_transform_src_reg(&inst.Src[1], TGSI_FILE_CONSTANT,
+                          ts->point_ivp, TGSI_SWIZZLE_X,
+                          TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z);
+   ctx->emit_instruction(ctx, &inst);
+
+   /**
+    * Set up the point coord threshold distance
+    * k = 0.5 - 1 / pointsize
+    */
+   if (ts->aa_point) {
+      tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_DIV,
+                                  TGSI_FILE_TEMPORARY, ts->point_coord_k,
+                                  TGSI_WRITEMASK_X,
+                                  TGSI_FILE_IMMEDIATE, ts->point_imm,
+                                  TGSI_SWIZZLE_Y,
+                                  TGSI_FILE_TEMPORARY, ts->point_size_tmp,
+                                  TGSI_SWIZZLE_X);
+
+      tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB,
+                                  TGSI_FILE_TEMPORARY, ts->point_coord_k,
+                                  TGSI_WRITEMASK_X,
+                                  TGSI_FILE_IMMEDIATE, ts->point_imm,
+                                  TGSI_SWIZZLE_Z,
+                                  TGSI_FILE_TEMPORARY, ts->point_coord_k,
+                                  TGSI_SWIZZLE_X);
+   }
+
+
+   for (i = 0; i < 4; i++) {
+      unsigned point_dir_swz = ts->point_dir_swz[i];
+      unsigned point_coord_swz = ts->point_coord_swz[i];
+
+      /* All outputs need to be emitted for each vertex */
+      for (j = 0; j < ts->num_orig_out; j++) {
+         if (ts->out_tmp_index[j] != INVALID_INDEX) {
+            tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
+                                    TGSI_FILE_OUTPUT, j,
+                                    TGSI_WRITEMASK_XYZW,
+                                    TGSI_FILE_TEMPORARY, ts->out_tmp_index[j]);
+         }
+      }
+
+      /* pos = point_scale * point_dir + point_pos */
+      inst = tgsi_default_full_instruction();
+      inst.Instruction.Opcode = TGSI_OPCODE_MAD;
+      inst.Instruction.NumDstRegs = 1;
+      tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_OUTPUT, ts->point_pos_out,
+                             TGSI_WRITEMASK_XYZW);
+      inst.Instruction.NumSrcRegs = 3;
+      tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_TEMPORARY, ts->point_scale_tmp,
+                             TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X,
+                             TGSI_SWIZZLE_X);
+      tgsi_transform_src_reg(&inst.Src[1], TGSI_FILE_IMMEDIATE, ts->point_imm,
+                             get_swizzle(point_dir_swz, 0),
+                             get_swizzle(point_dir_swz, 1),
+                             get_swizzle(point_dir_swz, 2),
+                             get_swizzle(point_dir_swz, 3));
+      tgsi_transform_src_reg(&inst.Src[2], TGSI_FILE_TEMPORARY, ts->point_pos_tmp,
+                             TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
+                             TGSI_SWIZZLE_W);
+      ctx->emit_instruction(ctx, &inst);
+
+      /* point coord */
+      for (j = 0, s = 0, en = point_coord_enable; en; en>>=1, s++) {
+         unsigned dstReg;
+
+         if (en & 0x1) {
+            dstReg = ts->point_coord_out + j;
+
+            inst = tgsi_default_full_instruction();
+            inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+            inst.Instruction.NumDstRegs = 1;
+            tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_OUTPUT,
+                                   dstReg, TGSI_WRITEMASK_XYZW);
+            inst.Instruction.NumSrcRegs = 1;
+            tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_IMMEDIATE, ts->point_imm,
+                                   get_swizzle(point_coord_swz, 0),
+                                   get_swizzle(point_coord_swz, 1),
+                                   get_swizzle(point_coord_swz, 2),
+                                   get_swizzle(point_coord_swz, 3));
+            ctx->emit_instruction(ctx, &inst);
+
+            /* MOV point_coord.z  point_coord_k.x */
+            if (s == ts->point_coord_aa) {
+               tgsi_transform_op1_swz_inst(ctx, TGSI_OPCODE_MOV,
+                                           TGSI_FILE_OUTPUT, dstReg, TGSI_WRITEMASK_Z,
+                                           TGSI_FILE_TEMPORARY, ts->point_coord_k,
+                                           TGSI_SWIZZLE_X);
+            }
+            j++;  /* the next point coord output offset */
+         }
+      }
+
+      /* Emit the EMIT instruction for each vertex of the quad */
+      ctx->emit_instruction(ctx, vert_inst);
+   }
+
+   /* Emit the ENDPRIM instruction for the quad */
+   inst = tgsi_default_full_instruction();
+   inst.Instruction.Opcode = TGSI_OPCODE_ENDPRIM;
+   inst.Instruction.NumDstRegs = 0;
+   inst.Instruction.NumSrcRegs = 1;
+   inst.Src[0] = vert_inst->Src[0];
+   ctx->emit_instruction(ctx, &inst);
+}
+
+
+/**
+ * TGSI instruction transform callback.
+ */
+static void
+psprite_inst(struct tgsi_transform_context *ctx,
+             struct tgsi_full_instruction *inst)
+{
+   struct psprite_transform_context *ts = psprite_transform_context(ctx);
+
+   if (inst->Instruction.Opcode == TGSI_OPCODE_EMIT) {
+      psprite_emit_vertex_inst(ctx, inst);
+   }
+   else if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
+            inst->Dst[0].Register.Index == ts->point_size_out) {
+      /**
+       * Replace point size output reg with tmp reg.
+       * The tmp reg will be later used as a src reg for computing
+       * the point scale factor.
+       */
+      inst->Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      inst->Dst[0].Register.Index = ts->point_size_tmp;
+      ctx->emit_instruction(ctx, inst);
+
+      /* Clamp the point size */
+      /* MAX point_size_tmp.x, point_size_tmp.x, point_imm.y */
+      tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MAX,
+                 TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_WRITEMASK_X,
+                 TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X,
+                 TGSI_FILE_IMMEDIATE, ts->point_imm, TGSI_SWIZZLE_Y);
+
+      /* MIN point_size_tmp.x, point_size_tmp.x, point_ivp.w */
+      tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MIN,
+                 TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_WRITEMASK_X,
+                 TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X,
+                 TGSI_FILE_CONSTANT, ts->point_ivp, TGSI_SWIZZLE_W);
+   }
+   else if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
+            inst->Dst[0].Register.Index == ts->point_pos_out) {
+      /**
+       * Replace point pos output reg with tmp reg.
+       */
+      inst->Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      inst->Dst[0].Register.Index = ts->point_pos_tmp;
+      ctx->emit_instruction(ctx, inst);
+   }
+   else if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
+      /**
+       * Replace output reg with tmp reg.
+       */
+      inst->Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      inst->Dst[0].Register.Index = ts->out_tmp_index[inst->Dst[0].Register.Index];
+      ctx->emit_instruction(ctx, inst);
+   }
+   else {
+      ctx->emit_instruction(ctx, inst);
+   }
+}
+
+
+/**
+ * TGSI property instruction transform callback.
+ * Transforms a point into a 4-vertex triangle strip.
+ */
+static void
+psprite_property(struct tgsi_transform_context *ctx,
+                 struct tgsi_full_property *prop)
+{
+   switch (prop->Property.PropertyName) {
+   case TGSI_PROPERTY_GS_OUTPUT_PRIM:
+       prop->u[0].Data = PIPE_PRIM_TRIANGLE_STRIP;
+       break;
+   case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
+       prop->u[0].Data *= 4;
+       break;
+   default:
+       break;
+   }
+   ctx->emit_property(ctx, prop);
+}
+
+/**
+ * TGSI utility to transform a geometry shader to support point sprite.
+ */
+struct tgsi_token *
+tgsi_add_point_sprite(const struct tgsi_token *tokens_in,
+                      const unsigned point_coord_enable,
+                      const bool sprite_origin_lower_left,
+                      const bool stream_out_point_pos,
+                      int *aa_point_coord_index)
+{
+   struct psprite_transform_context transform;
+   const uint num_new_tokens = 200; /* should be enough */
+   const uint new_len = tgsi_num_tokens(tokens_in) + num_new_tokens;
+   struct tgsi_token *new_tokens;
+
+   /* setup transformation context */
+   memset(&transform, 0, sizeof(transform));
+   transform.base.transform_declaration = psprite_decl;
+   transform.base.transform_instruction = psprite_inst;
+   transform.base.transform_property = psprite_property;
+   transform.base.transform_immediate = psprite_immediate;
+   transform.base.prolog = psprite_prolog;
+
+   transform.point_size_in = INVALID_INDEX;
+   transform.point_size_out = INVALID_INDEX;
+   transform.point_size_tmp = INVALID_INDEX;
+   transform.point_pos_in = INVALID_INDEX;
+   transform.point_pos_out = INVALID_INDEX;
+   transform.point_pos_sout = INVALID_INDEX;
+   transform.point_pos_tmp = INVALID_INDEX;
+   transform.point_scale_tmp = INVALID_INDEX;
+   transform.point_imm = INVALID_INDEX;
+   transform.point_coord_aa = INVALID_INDEX;
+   transform.point_coord_k = INVALID_INDEX;
+
+   transform.stream_out_point_pos = stream_out_point_pos;
+   transform.point_coord_enable = point_coord_enable;
+   transform.aa_point = aa_point_coord_index != NULL;
+   transform.max_generic = -1;
+
+   /* point sprite directions based on the immediates (0, 1, 0.5, -1) */
+   /* (-1, -1, 0, 0) */
+   transform.point_dir_swz[0] = set_swizzle(-1, -1, 0, 0);
+   /* (-1, 1, 0, 0) */
+   transform.point_dir_swz[1] = set_swizzle(-1, 1, 0, 0);
+   /* (1, -1, 0, 0) */
+   transform.point_dir_swz[2] = set_swizzle(1, -1, 0, 0);
+   /* (1, 1, 0, 0) */
+   transform.point_dir_swz[3] = set_swizzle(1, 1, 0, 0);
+
+   /* point coord based on the immediates (0, 1, 0, -1) */
+   if (sprite_origin_lower_left) {
+      /* (0, 0, 0, 1) */
+      transform.point_coord_swz[0] = set_swizzle(0, 0, 0, 1);
+      /* (0, 1, 0, 1) */
+      transform.point_coord_swz[1] = set_swizzle(0, 1, 0, 1);
+      /* (1, 0, 0, 1) */
+      transform.point_coord_swz[2] = set_swizzle(1, 0, 0, 1);
+      /* (1, 1, 0, 1) */
+      transform.point_coord_swz[3] = set_swizzle(1, 1, 0, 1);
+   }
+   else {
+      /* (0, 1, 0, 1) */
+      transform.point_coord_swz[0] = set_swizzle(0, 1, 0, 1);
+      /* (0, 0, 0, 1) */
+      transform.point_coord_swz[1] = set_swizzle(0, 0, 0, 1);
+      /* (1, 1, 0, 1) */
+      transform.point_coord_swz[2] = set_swizzle(1, 1, 0, 1);
+      /* (1, 0, 0, 1) */
+      transform.point_coord_swz[3] = set_swizzle(1, 0, 0, 1);
+   }
+
+
+   /* allocate new tokens buffer */
+   new_tokens = tgsi_alloc_tokens(new_len);
+   if (!new_tokens)
+      return NULL;
+
+   /* transform the shader */
+   tgsi_transform_shader(tokens_in, new_tokens, new_len, &transform.base);
+
+   if (aa_point_coord_index)
+      *aa_point_coord_index = transform.point_coord_aa;
+
+   return new_tokens;
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_point_sprite.h b/src/gallium/auxiliary/tgsi/tgsi_point_sprite.h

new file mode 100644 (file)

index 0000000..d195891
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_point_sprite.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef TGSI_POINT_SPRITE_H
+#define TGSI_POINT_SPRITE_H
+
+struct tgsi_token;
+
+struct tgsi_token *
+tgsi_add_point_sprite(const struct tgsi_token *tokens_in,
+                      const unsigned point_coord_enable,
+                      const bool sprite_origin_lower_left,
+                      const bool stream_out_point_pos,
+                      int *aa_point_coord_index);
+
+#endif /* TGSI_POINT_SPRITE_H */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c

index 7523baf..66306d7 100644 (file)
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -56,6 +56,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
  {
     uint procType, i;
     struct tgsi_parse_context parse;
+   unsigned current_depth = 0;
  
     memset(info, 0, sizeof(*info));
     for (i = 0; i < TGSI_FILE_COUNT; i++)
@@ -100,6 +101,25 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
              assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST);
              info->opcode_count[fullinst->Instruction.Opcode]++;
  
+            switch (fullinst->Instruction.Opcode) {
+            case TGSI_OPCODE_IF:
+            case TGSI_OPCODE_UIF:
+            case TGSI_OPCODE_BGNLOOP:
+               current_depth++;
+               info->max_depth = MAX2(info->max_depth, current_depth);
+               break;
+            case TGSI_OPCODE_ENDIF:
+            case TGSI_OPCODE_ENDLOOP:
+               current_depth--;
+               break;
+            default:
+               break;
+            }
+
+            if (fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D &&
+                fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG)
+               info->uses_doubles = true;
+
              for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
                 const struct tgsi_full_src_register *src =
                    &fullinst->Src[i];
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h

index b81bdd7..42539ee 100644 (file)
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -95,7 +95,7 @@ struct tgsi_shader_info
     boolean writes_viewport_index;
     boolean writes_layer;
     boolean is_msaa_sampler[PIPE_MAX_SAMPLERS];
-
+   boolean uses_doubles; /**< uses any of the double instructions */
     unsigned clipdist_writemask;
     unsigned culldist_writemask;
     unsigned num_written_culldistance;
@@ -113,6 +113,11 @@ struct tgsi_shader_info
     unsigned indirect_files_written;
  
     unsigned properties[TGSI_PROPERTY_COUNT]; /* index with TGSI_PROPERTY_ */
+
+   /**
+    * Max nesting limit of loops/if's
+    */
+   unsigned max_depth;
  };
  
  extern void
diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.h b/src/gallium/auxiliary/tgsi/tgsi_transform.h

index ceb7c2e..3bd512b 100644 (file)
--- a/src/gallium/auxiliary/tgsi/tgsi_transform.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_transform.h
@@ -95,20 +95,39 @@ struct tgsi_transform_context
   * Helper for emitting temporary register declarations.
   */
  static inline void
-tgsi_transform_temp_decl(struct tgsi_transform_context *ctx,
-                         unsigned index)
+tgsi_transform_temps_decl(struct tgsi_transform_context *ctx,
+                          unsigned firstIdx, unsigned lastIdx)
  {
     struct tgsi_full_declaration decl;
  
     decl = tgsi_default_full_declaration();
     decl.Declaration.File = TGSI_FILE_TEMPORARY;
-   decl.Range.First =
-   decl.Range.Last = index;
+   decl.Range.First = firstIdx;
+   decl.Range.Last = lastIdx;
     ctx->emit_declaration(ctx, &decl);
  }
  
+static inline void
+tgsi_transform_temp_decl(struct tgsi_transform_context *ctx,
+                         unsigned index)
+{
+   tgsi_transform_temps_decl(ctx, index, index);
+}
  
  static inline void
+tgsi_transform_const_decl(struct tgsi_transform_context *ctx,
+                          unsigned firstIdx, unsigned lastIdx)
+{
+   struct tgsi_full_declaration decl;
+
+   decl = tgsi_default_full_declaration();
+   decl.Declaration.File = TGSI_FILE_CONSTANT;
+   decl.Range.First = firstIdx;
+   decl.Range.Last = lastIdx;
+   ctx->emit_declaration(ctx, &decl);
+}
+ 
+static inline void
  tgsi_transform_input_decl(struct tgsi_transform_context *ctx,
                            unsigned index,
                            unsigned sem_name, unsigned sem_index,
@@ -129,6 +148,26 @@ tgsi_transform_input_decl(struct tgsi_transform_context *ctx,
     ctx->emit_declaration(ctx, &decl);
  }
  
+static inline void
+tgsi_transform_output_decl(struct tgsi_transform_context *ctx,
+                          unsigned index,
+                          unsigned sem_name, unsigned sem_index,
+                          unsigned interp)
+{
+   struct tgsi_full_declaration decl;
+
+   decl = tgsi_default_full_declaration();
+   decl.Declaration.File = TGSI_FILE_OUTPUT;
+   decl.Declaration.Interpolate = 1;
+   decl.Declaration.Semantic = 1;
+   decl.Semantic.Name = sem_name;
+   decl.Semantic.Index = sem_index;
+   decl.Range.First =
+   decl.Range.Last = index;
+   decl.Interp.Interpolate = interp;
+
+   ctx->emit_declaration(ctx, &decl);
+}
  
  static inline void
  tgsi_transform_sampler_decl(struct tgsi_transform_context *ctx,
@@ -182,6 +221,28 @@ tgsi_transform_immediate_decl(struct tgsi_transform_context *ctx,
     ctx->emit_immediate(ctx, &immed);
  }
  
+static inline void
+tgsi_transform_dst_reg(struct tgsi_full_dst_register *reg,
+                       unsigned file, unsigned index, unsigned writemask)
+{
+   reg->Register.File = file;
+   reg->Register.Index = index;
+   reg->Register.WriteMask = writemask;
+}
+
+static inline void
+tgsi_transform_src_reg(struct tgsi_full_src_register *reg,
+                       unsigned file, unsigned index, 
+                       unsigned swizzleX, unsigned swizzleY,
+                       unsigned swizzleZ, unsigned swizzleW)
+{
+   reg->Register.File = file;
+   reg->Register.Index = index;
+   reg->Register.SwizzleX = swizzleX; 
+   reg->Register.SwizzleY = swizzleY; 
+   reg->Register.SwizzleZ = swizzleZ; 
+   reg->Register.SwizzleW = swizzleW; 
+}
  
  /**
   * Helper for emitting 1-operand instructions.
@@ -399,7 +460,8 @@ static inline void
  tgsi_transform_kill_inst(struct tgsi_transform_context *ctx,
                           unsigned src_file,
                           unsigned src_index,
-                         unsigned src_swizzle)
+                         unsigned src_swizzle,
+                         boolean negate)
  {
     struct tgsi_full_instruction inst;
  
@@ -413,7 +475,7 @@ tgsi_transform_kill_inst(struct tgsi_transform_context *ctx,
     inst.Src[0].Register.SwizzleY =
     inst.Src[0].Register.SwizzleZ =
     inst.Src[0].Register.SwizzleW = src_swizzle;
-   inst.Src[0].Register.Negate = 1;
+   inst.Src[0].Register.Negate = negate;
  
     ctx->emit_instruction(ctx, &inst);
  }
diff --git a/src/gallium/auxiliary/tgsi/tgsi_two_side.c b/src/gallium/auxiliary/tgsi/tgsi_two_side.c

new file mode 100644 (file)

index 0000000..2406e28
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_two_side.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright 2013 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * This utility transforms fragment shaders to facilitate two-sided lighting.
+ *
+ * Basically, if the FS has any color inputs (TGSI_SEMANTIC_COLOR) we'll:
+ * 1. create corresponding back-color inputs (TGSI_SEMANTIC_BCOLOR)
+ * 2. use the FACE register to choose between front/back colors and put the
+ *    selected color in new temp regs.
+ * 3. replace reads of the original color inputs with the new temp regs.
+ *
+ * Then, the driver just needs to link the VS front/back output colors to
+ * the FS front/back input colors.
+ */
+
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "tgsi_info.h"
+#include "tgsi_two_side.h"
+#include "tgsi_transform.h"
+
+
+#define INVALID_INDEX 9999
+
+
+struct two_side_transform_context
+{
+   struct tgsi_transform_context base;
+   uint num_temps;
+   uint num_inputs;
+   uint face_input;           /**< index of the FACE input */
+   uint front_color_input[2]; /**< INPUT regs */
+   uint front_color_interp[2];/**< TGSI_INTERPOLATE_x */
+   uint back_color_input[2];  /**< INPUT regs */
+   uint new_colors[2];        /**< TEMP regs */
+};
+
+
+static inline struct two_side_transform_context *
+two_side_transform_context(struct tgsi_transform_context *ctx)
+{
+   return (struct two_side_transform_context *) ctx;
+}
+
+
+static void
+xform_decl(struct tgsi_transform_context *ctx,
+           struct tgsi_full_declaration *decl)
+{
+   struct two_side_transform_context *ts = two_side_transform_context(ctx);
+
+   if (decl->Declaration.File == TGSI_FILE_INPUT) {
+      if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR) {
+         /* found a front color */
+         assert(decl->Semantic.Index < 2);
+         ts->front_color_input[decl->Semantic.Index] = decl->Range.First;
+         ts->front_color_interp[decl->Semantic.Index] = decl->Interp.Interpolate;
+      }
+      else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
+         ts->face_input = decl->Range.First;
+      }
+      ts->num_inputs = MAX2(ts->num_inputs, decl->Range.Last + 1);
+   }
+   else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
+      ts->num_temps = MAX2(ts->num_temps, decl->Range.Last + 1);
+   }
+
+   ctx->emit_declaration(ctx, decl);
+}
+
+
+static void
+emit_prolog(struct tgsi_transform_context *ctx)
+{
+   struct two_side_transform_context *ts = two_side_transform_context(ctx);
+   struct tgsi_full_declaration decl;
+   struct tgsi_full_instruction inst;
+   uint num_colors = 0;
+   uint i;
+
+   /* Declare 0, 1 or 2 new BCOLOR inputs */
+   for (i = 0; i < 2; i++) {
+      if (ts->front_color_input[i] != INVALID_INDEX) {
+         decl = tgsi_default_full_declaration();
+         decl.Declaration.File = TGSI_FILE_INPUT;
+         decl.Declaration.Interpolate = 1;
+         decl.Declaration.Semantic = 1;
+         decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
+         decl.Semantic.Index = i;
+         decl.Range.First = decl.Range.Last = ts->num_inputs++;
+         decl.Interp.Interpolate = ts->front_color_interp[i];
+         ctx->emit_declaration(ctx, &decl);
+         ts->back_color_input[i] = decl.Range.First;
+         num_colors++;
+      }
+   }
+
+   if (num_colors > 0) {
+      /* Declare 1 or 2 temp registers */
+      decl = tgsi_default_full_declaration();
+      decl.Declaration.File = TGSI_FILE_TEMPORARY;
+      decl.Range.First = ts->num_temps;
+      decl.Range.Last = ts->num_temps + num_colors - 1;
+      ctx->emit_declaration(ctx, &decl);
+      ts->new_colors[0] = ts->num_temps;
+      ts->new_colors[1] = ts->num_temps + 1;
+
+      if (ts->face_input == INVALID_INDEX) {
+         /* declare FACE INPUT register */
+         decl = tgsi_default_full_declaration();
+         decl.Declaration.File = TGSI_FILE_INPUT;
+         decl.Declaration.Semantic = 1;
+         decl.Semantic.Name = TGSI_SEMANTIC_FACE;
+         decl.Semantic.Index = 0;
+         decl.Range.First = decl.Range.Last = ts->num_inputs++;
+         ctx->emit_declaration(ctx, &decl);
+         ts->face_input = decl.Range.First;
+      }
+
+      /* CMP temp[c0], face, bcolor[c0], fcolor[c0]
+       * temp[c0] = face < 0.0 ? bcolor[c0] : fcolor[c0]
+       */
+      for (i = 0; i < 2; i++) {
+         if (ts->front_color_input[i] != INVALID_INDEX) {
+            inst = tgsi_default_full_instruction();
+            inst.Instruction.Opcode = TGSI_OPCODE_CMP;
+            inst.Instruction.NumDstRegs = 1;
+            inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+            inst.Dst[0].Register.Index = ts->new_colors[i];
+            inst.Instruction.NumSrcRegs = 3;
+            inst.Src[0].Register.File = TGSI_FILE_INPUT;
+            inst.Src[0].Register.Index = ts->face_input;
+            inst.Src[1].Register.File = TGSI_FILE_INPUT;
+            inst.Src[1].Register.Index = ts->back_color_input[i];
+            inst.Src[2].Register.File = TGSI_FILE_INPUT;
+            inst.Src[2].Register.Index = ts->front_color_input[i];
+
+            ctx->emit_instruction(ctx, &inst);
+         }
+      }
+   }
+}
+
+
+static void
+xform_inst(struct tgsi_transform_context *ctx,
+           struct tgsi_full_instruction *inst)
+{
+   struct two_side_transform_context *ts = two_side_transform_context(ctx);
+   const struct tgsi_opcode_info *info =
+      tgsi_get_opcode_info(inst->Instruction.Opcode);
+   uint i, j;
+
+   /* Look for src regs which reference the input color and replace
+    * them with the temp color.
+    */
+   for (i = 0; i < info->num_src; i++) {
+      if (inst->Src[i].Register.File == TGSI_FILE_INPUT) {
+         for (j = 0; j < 2; j++) {
+            if (inst->Src[i].Register.Index == ts->front_color_input[j]) {
+               /* replace color input with temp reg */
+               inst->Src[i].Register.File = TGSI_FILE_TEMPORARY;
+               inst->Src[i].Register.Index = ts->new_colors[j];
+               break;
+            }
+         }
+      }
+   }
+
+   ctx->emit_instruction(ctx, inst);
+}
+
+
+struct tgsi_token *
+tgsi_add_two_side(const struct tgsi_token *tokens_in)
+{
+   struct two_side_transform_context transform;
+   const uint num_new_tokens = 100; /* should be enough */
+   const uint new_len = tgsi_num_tokens(tokens_in) + num_new_tokens;
+   struct tgsi_token *new_tokens;
+
+   /* setup transformation context */
+   memset(&transform, 0, sizeof(transform));
+   transform.base.transform_declaration = xform_decl;
+   transform.base.transform_instruction = xform_inst;
+   transform.base.prolog = emit_prolog;
+   transform.face_input = INVALID_INDEX;
+   transform.front_color_input[0] = INVALID_INDEX;
+   transform.front_color_input[1] = INVALID_INDEX;
+   transform.front_color_interp[0] = TGSI_INTERPOLATE_COLOR;
+   transform.front_color_interp[1] = TGSI_INTERPOLATE_COLOR;
+   transform.back_color_input[0] = INVALID_INDEX;
+   transform.back_color_input[1] = INVALID_INDEX;
+
+   /* allocate new tokens buffer */
+   new_tokens = tgsi_alloc_tokens(new_len);
+   if (!new_tokens)
+      return NULL;
+
+   /* transform the shader */
+   tgsi_transform_shader(tokens_in, new_tokens, new_len, &transform.base);
+
+   return new_tokens;
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_two_side.h b/src/gallium/auxiliary/tgsi/tgsi_two_side.h

new file mode 100644 (file)

index 0000000..bac239e
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_two_side.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2013 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef TGSI_TWO_SIDE_H
+#define TGSI_TWO_SIDE_H
+
+struct tgsi_token;
+
+struct tgsi_token *
+tgsi_add_two_side(const struct tgsi_token *tokens_in);
+
+#endif /* TGSI_TWO_SIDE_H */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c

index e5b8427..653e650 100644 (file)
--- a/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -462,3 +462,21 @@ tgsi_util_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample)
  
     return dim;
  }
+
+
+boolean
+tgsi_is_shadow_target(unsigned target)
+{
+   switch (target) {
+   case TGSI_TEXTURE_SHADOW1D:
+   case TGSI_TEXTURE_SHADOW2D:
+   case TGSI_TEXTURE_SHADOWRECT:
+   case TGSI_TEXTURE_SHADOW1D_ARRAY:
+   case TGSI_TEXTURE_SHADOW2D_ARRAY:
+   case TGSI_TEXTURE_SHADOWCUBE:
+   case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
+      return TRUE;
+   default:
+      return FALSE;
+   }
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h b/src/gallium/auxiliary/tgsi/tgsi_util.h

index deb1ecc..6175d95 100644 (file)
--- a/src/gallium/auxiliary/tgsi/tgsi_util.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.h
@@ -82,6 +82,9 @@ tgsi_util_get_src_from_ind(const struct tgsi_ind_register *reg);
  int
  tgsi_util_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample);
  
+boolean
+tgsi_is_shadow_target(unsigned target);
+
  #if defined __cplusplus
  }
  #endif
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c

index 9bba07a..b7b1ece 100644 (file)
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -1190,6 +1190,8 @@ static void blitter_draw(struct blitter_context_priv *ctx,
  
     u_upload_data(ctx->upload, 0, sizeof(ctx->vertices), ctx->vertices,
                   &vb.buffer_offset, &vb.buffer);
+   if (!vb.buffer)
+      return;
     u_upload_unmap(ctx->upload);
  
     pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, &vb);
@@ -2063,7 +2065,7 @@ void util_blitter_clear_buffer(struct blitter_context *blitter,
     struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
     struct pipe_context *pipe = ctx->base.pipe;
     struct pipe_vertex_buffer vb = {0};
-   struct pipe_stream_output_target *so_target;
+   struct pipe_stream_output_target *so_target = NULL;
     unsigned offsets[PIPE_MAX_SO_BUFFERS] = {0};
  
     assert(num_channels >= 1);
@@ -2089,6 +2091,9 @@ void util_blitter_clear_buffer(struct blitter_context *blitter,
  
     u_upload_data(ctx->upload, 0, num_channels*4, clear_value,
                   &vb.buffer_offset, &vb.buffer);
+   if (!vb.buffer)
+      goto out;
+
     vb.stride = 0;
  
     blitter_set_running_flag(ctx);
@@ -2112,6 +2117,7 @@ void util_blitter_clear_buffer(struct blitter_context *blitter,
  
     util_draw_arrays(pipe, PIPE_PRIM_POINTS, 0, size / 4);
  
+out:
     blitter_restore_vertex_states(ctx);
     blitter_restore_render_cond(ctx);
     blitter_unset_running_flag(ctx);
diff --git a/src/gallium/auxiliary/util/u_helpers.c b/src/gallium/auxiliary/util/u_helpers.c

index ac1edcd..09619c1 100644 (file)
--- a/src/gallium/auxiliary/util/u_helpers.c
+++ b/src/gallium/auxiliary/util/u_helpers.c
@@ -88,3 +88,18 @@ void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst,
  
     *dst_count = util_last_bit(enabled_buffers);
  }
+
+
+void
+util_set_index_buffer(struct pipe_index_buffer *dst,
+                      const struct pipe_index_buffer *src)
+{
+   if (src) {
+      pipe_resource_reference(&dst->buffer, src->buffer);
+      memcpy(dst, src, sizeof(*dst));
+   }
+   else {
+      pipe_resource_reference(&dst->buffer, NULL);
+      memset(dst, 0, sizeof(*dst));
+   }
+}
diff --git a/src/gallium/auxiliary/util/u_helpers.h b/src/gallium/auxiliary/util/u_helpers.h

index 09c7116..f25f280 100644 (file)
--- a/src/gallium/auxiliary/util/u_helpers.h
+++ b/src/gallium/auxiliary/util/u_helpers.h
@@ -44,6 +44,9 @@ void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst,
                                     const struct pipe_vertex_buffer *src,
                                     unsigned start_slot, unsigned count);
  
+void util_set_index_buffer(struct pipe_index_buffer *dst,
+                           const struct pipe_index_buffer *src);
+
  #ifdef __cplusplus
  }
  #endif
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h

index 56bd185..e92f83a 100644 (file)
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -389,6 +389,26 @@ unsigned ffs( unsigned u )
  #define ffs __builtin_ffs
  #endif
  
+#ifdef HAVE___BUILTIN_FFSLL
+#define ffsll __builtin_ffsll
+#else
+static inline int
+ffsll(long long int val)
+{
+   int bit;
+
+   bit = ffs((unsigned) (val & 0xffffffff));
+   if (bit != 0)
+      return bit;
+
+   bit = ffs((unsigned) (val >> 32));
+   if (bit != 0)
+      return 32 + bit;
+
+   return 0;
+}
+#endif
+
  #endif /* FFS_DEFINED */
  
  /**
@@ -483,6 +503,26 @@ u_bit_scan64(uint64_t *mask)
  }
  #endif
  
+/* For looping over a bitmask when you want to loop over consecutive bits
+ * manually, for example:
+ *
+ * while (mask) {
+ *    int start, count, i;
+ *
+ *    u_bit_scan_consecutive_range(&mask, &start, &count);
+ *
+ *    for (i = 0; i < count; i++)
+ *       ... process element (start+i)
+ * }
+ */
+static inline void
+u_bit_scan_consecutive_range(unsigned *mask, int *start, int *count)
+{
+   *start = ffs(*mask) - 1;
+   *count = ffs(~(*mask >> *start)) - 1;
+   *mask &= ~(((1 << *count) - 1) << *start);
+}
+
  /**
   * Return float bits.
   */
diff --git a/src/gallium/auxiliary/util/u_prim_restart.c b/src/gallium/auxiliary/util/u_prim_restart.c

new file mode 100644 (file)

index 0000000..a4d7c14
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_prim_restart.c
@@ -0,0 +1,267 @@
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+
+#include "u_inlines.h"
+#include "u_memory.h"
+#include "u_prim_restart.h"
+
+
+/**
+ * Translate an index buffer for primitive restart.
+ * Create a new index buffer which is a copy of the original index buffer
+ * except that instances of 'restart_index' are converted to 0xffff or
+ * 0xffffffff.
+ * Also, index buffers using 1-byte indexes are converted to 2-byte indexes.
+ */
+enum pipe_error
+util_translate_prim_restart_ib(struct pipe_context *context,
+                               struct pipe_index_buffer *src_buffer,
+                               struct pipe_resource **dst_buffer,
+                               unsigned num_indexes,
+                               unsigned restart_index)
+{
+   struct pipe_screen *screen = context->screen;
+   struct pipe_transfer *src_transfer = NULL, *dst_transfer = NULL;
+   void *src_map = NULL, *dst_map = NULL;
+   const unsigned src_index_size = src_buffer->index_size;
+   unsigned dst_index_size;
+
+   /* 1-byte indexes are converted to 2-byte indexes, 4-byte stays 4-byte */
+   dst_index_size = MAX2(2, src_buffer->index_size);
+   assert(dst_index_size == 2 || dst_index_size == 4);
+
+   /* no user buffers for now */
+   assert(src_buffer->user_buffer == NULL);
+
+   /* Create new index buffer */
+   *dst_buffer = pipe_buffer_create(screen, PIPE_BIND_INDEX_BUFFER,
+                                    PIPE_USAGE_STREAM,
+                                    num_indexes * dst_index_size);
+   if (!*dst_buffer)
+      goto error;
+
+   /* Map new / dest index buffer */
+   dst_map = pipe_buffer_map(context, *dst_buffer,
+                             PIPE_TRANSFER_WRITE, &dst_transfer);
+   if (!dst_map)
+      goto error;
+
+   /* Map original / src index buffer */
+   src_map = pipe_buffer_map_range(context, src_buffer->buffer,
+                                   src_buffer->offset,
+                                   num_indexes * src_index_size,
+                                   PIPE_TRANSFER_READ,
+                                   &src_transfer);
+   if (!src_map)
+      goto error;
+
+   if (src_index_size == 1 && dst_index_size == 2) {
+      uint8_t *src = (uint8_t *) src_map;
+      uint16_t *dst = (uint16_t *) dst_map;
+      unsigned i;
+      for (i = 0; i < num_indexes; i++) {
+         dst[i] = (src[i] == restart_index) ? 0xffff : src[i];
+      }
+   }
+   else if (src_index_size == 2 && dst_index_size == 2) {
+      uint16_t *src = (uint16_t *) src_map;
+      uint16_t *dst = (uint16_t *) dst_map;
+      unsigned i;
+      for (i = 0; i < num_indexes; i++) {
+         dst[i] = (src[i] == restart_index) ? 0xffff : src[i];
+      }
+   }
+   else {
+      uint32_t *src = (uint32_t *) src_map;
+      uint32_t *dst = (uint32_t *) dst_map;
+      unsigned i;
+      assert(src_index_size == 4);
+      assert(dst_index_size == 4);
+      for (i = 0; i < num_indexes; i++) {
+         dst[i] = (src[i] == restart_index) ? 0xffffffff : src[i];
+      }
+   }
+
+   pipe_buffer_unmap(context, src_transfer);
+   pipe_buffer_unmap(context, dst_transfer);
+
+   return PIPE_OK;
+
+error:
+   if (src_transfer)
+      pipe_buffer_unmap(context, src_transfer);
+   if (dst_transfer)
+      pipe_buffer_unmap(context, dst_transfer);
+   if (*dst_buffer)
+      screen->resource_destroy(screen, *dst_buffer);
+   return PIPE_ERROR_OUT_OF_MEMORY;
+}
+
+
+/** Helper structs for util_draw_vbo_without_prim_restart() */
+
+struct range {
+   unsigned start, count;
+};
+
+struct range_info {
+   struct range *ranges;
+   unsigned count, max;
+};
+
+
+/**
+ * Helper function for util_draw_vbo_without_prim_restart()
+ * \return true for success, false if out of memory
+ */
+static boolean
+add_range(struct range_info *info, unsigned start, unsigned count)
+{
+   if (info->max == 0) {
+      info->max = 10;
+      info->ranges = MALLOC(info->max * sizeof(struct range));
+      if (!info->ranges) {
+         return FALSE;
+      }
+   }
+   else if (info->count == info->max) {
+      /* grow the ranges[] array */
+      info->ranges = REALLOC(info->ranges,
+                             info->max * sizeof(struct range),
+                             2 * info->max * sizeof(struct range));
+      if (!info->ranges) {
+         return FALSE;
+      }
+
+      info->max *= 2;
+   }
+
+   /* save the range */
+   info->ranges[info->count].start = start;
+   info->ranges[info->count].count = count;
+   info->count++;
+
+   return TRUE;
+}
+
+
+/**
+ * Implement primitive restart by breaking an indexed primitive into
+ * pieces which do not contain restart indexes.  Each piece is then
+ * drawn by calling pipe_context::draw_vbo().
+ * \return PIPE_OK if no error, an error code otherwise.
+ */
+enum pipe_error
+util_draw_vbo_without_prim_restart(struct pipe_context *context,
+                                   const struct pipe_index_buffer *ib,
+                                   const struct pipe_draw_info *info)
+{
+   const void *src_map;
+   struct range_info ranges = {0};
+   struct pipe_draw_info new_info;
+   struct pipe_transfer *src_transfer = NULL;
+   unsigned i, start, count;
+
+   assert(info->indexed);
+   assert(info->primitive_restart);
+
+   /* Get pointer to the index data */
+   if (ib->buffer) {
+      /* map the index buffer (only the range we need to scan) */
+      src_map = pipe_buffer_map_range(context, ib->buffer,
+                                      ib->offset + info->start * ib->index_size,
+                                      info->count * ib->index_size,
+                                      PIPE_TRANSFER_READ,
+                                      &src_transfer);
+      if (!src_map) {
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      }
+   }
+   else {
+      if (!ib->user_buffer) {
+         debug_printf("User-space index buffer is null!");
+         return PIPE_ERROR_BAD_INPUT;
+      }
+      src_map = (const uint8_t *) ib->user_buffer
+         + ib->offset
+         + info->start * ib->index_size;
+   }
+
+#define SCAN_INDEXES(TYPE) \
+   for (i = 0; i <= info->count; i++) { \
+      if (i == info->count || \
+          ((const TYPE *) src_map)[i] == info->restart_index) { \
+         /* cut / restart */ \
+         if (count > 0) { \
+            if (!add_range(&ranges, info->start + start, count)) { \
+               if (src_transfer) \
+                  pipe_buffer_unmap(context, src_transfer); \
+               return PIPE_ERROR_OUT_OF_MEMORY; \
+            } \
+         } \
+         start = i + 1; \
+         count = 0; \
+      } \
+      else { \
+         count++; \
+      } \
+   }
+
+   start = info->start;
+   count = 0;
+   switch (ib->index_size) {
+   case 1:
+      SCAN_INDEXES(uint8_t);
+      break;
+   case 2:
+      SCAN_INDEXES(uint16_t);
+      break;
+   case 4:
+      SCAN_INDEXES(uint32_t);
+      break;
+   default:
+      assert(!"Bad index size");
+      return PIPE_ERROR_BAD_INPUT;
+   }
+
+   /* unmap index buffer */
+   if (src_transfer)
+      pipe_buffer_unmap(context, src_transfer);
+
+   /* draw ranges between the restart indexes */
+   new_info = *info;
+   new_info.primitive_restart = FALSE;
+   for (i = 0; i < ranges.count; i++) {
+      new_info.start = ranges.ranges[i].start;
+      new_info.count = ranges.ranges[i].count;
+      context->draw_vbo(context, &new_info);
+   }
+
+   FREE(ranges.ranges);
+
+   return PIPE_OK;
+}
diff --git a/src/gallium/auxiliary/util/u_prim_restart.h b/src/gallium/auxiliary/util/u_prim_restart.h

new file mode 100644 (file)

index 0000000..1e98e0e
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_prim_restart.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef U_PRIM_RESTART_H
+#define U_PRIM_RESTART_H
+
+
+#include "pipe/p_defines.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+struct pipe_context;
+struct pipe_draw_info;
+struct pipe_index_buffer;
+struct pipe_resource;
+
+
+enum pipe_error
+util_translate_prim_restart_ib(struct pipe_context *context,
+                               struct pipe_index_buffer *src_buffer,
+                               struct pipe_resource **dst_buffer,
+                               unsigned num_indexes,
+                               unsigned restart_index);
+
+enum pipe_error
+util_draw_vbo_without_prim_restart(struct pipe_context *context,
+                                   const struct pipe_index_buffer *ib,
+                                   const struct pipe_draw_info *info);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_pstipple.c b/src/gallium/auxiliary/util/u_pstipple.c

index 1f65672..0bb46ff 100644 (file)
--- a/src/gallium/auxiliary/util/u_pstipple.c
+++ b/src/gallium/auxiliary/util/u_pstipple.c
@@ -339,7 +339,7 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx)
     /* KILL_IF -texTemp;   # if -texTemp < 0, kill fragment */
     tgsi_transform_kill_inst(ctx,
                              TGSI_FILE_TEMPORARY, texTemp,
-                            TGSI_SWIZZLE_W);
+                            TGSI_SWIZZLE_W, TRUE);
  }
  
  
diff --git a/src/gallium/auxiliary/util/u_rect.h b/src/gallium/auxiliary/util/u_rect.h

index b26f671..221d918 100644 (file)
--- a/src/gallium/auxiliary/util/u_rect.h
+++ b/src/gallium/auxiliary/util/u_rect.h
@@ -42,6 +42,7 @@ struct u_rect {
  };
  
  /* Do two rectangles intersect?
+ * Note: empty rectangles are valid as inputs (and never intersect).
   */
  static inline boolean
  u_rect_test_intersection(const struct u_rect *a,
@@ -50,7 +51,11 @@ u_rect_test_intersection(const struct u_rect *a,
     return (!(a->x1 < b->x0 ||
               b->x1 < a->x0 ||
               a->y1 < b->y0 ||
-             b->y1 < a->y0));
+             b->y1 < a->y0 ||
+             a->x1 < a->x0 ||
+             a->y1 < a->y0 ||
+             b->x1 < b->x0 ||
+             b->y1 < b->y0));
  }
  
  /* Find the intersection of two rectangles known to intersect.
@@ -82,7 +87,12 @@ u_rect_possible_intersection(const struct u_rect *a,
        u_rect_find_intersection(a,b);
     }
     else {
-      b->x0 = b->x1 = b->y0 = b->y1 = 0;
+      /*
+       * Note the u_rect_xx tests deal with inclusive coordinates
+       * hence all-zero would not be an empty box.
+       */
+      b->x0 = b->y0 = 0;
+      b->x1 = b->y1 = -1;
     }
  }
  
diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c

index 6d29cab..6eed337 100644 (file)
--- a/src/gallium/auxiliary/util/u_simple_shaders.c
+++ b/src/gallium/auxiliary/util/u_simple_shaders.c
@@ -831,3 +831,54 @@ util_make_fs_msaa_resolve_bilinear(struct pipe_context *pipe,
  
     return ureg_create_shader_and_destroy(ureg, pipe);
  }
+
+void *
+util_make_geometry_passthrough_shader(struct pipe_context *pipe,
+                                      uint num_attribs,
+                                      const ubyte *semantic_names,
+                                      const ubyte *semantic_indexes)
+{
+   static const unsigned zero[4] = {0, 0, 0, 0};
+
+   struct ureg_program *ureg;
+   struct ureg_dst dst[PIPE_MAX_SHADER_OUTPUTS];
+   struct ureg_src src[PIPE_MAX_SHADER_INPUTS];
+   struct ureg_src imm;
+
+   unsigned i;
+
+   ureg = ureg_create(TGSI_PROCESSOR_GEOMETRY);
+   if (ureg == NULL)
+      return NULL;
+
+   ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, PIPE_PRIM_POINTS);
+   ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, PIPE_PRIM_POINTS);
+   ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES, 1);
+   ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS, 1);
+   imm = ureg_DECL_immediate_uint(ureg, zero, 4);
+
+   /**
+    * Loop over all the attribs and declare the corresponding
+    * declarations in the geometry shader
+    */
+   for (i = 0; i < num_attribs; i++) {
+      src[i] = ureg_DECL_input(ureg, semantic_names[i],
+                               semantic_indexes[i], 0, 1);
+      src[i] = ureg_src_dimension(src[i], 0);
+      dst[i] = ureg_DECL_output(ureg, semantic_names[i], semantic_indexes[i]);
+   }
+
+   /* MOV dst[i] src[i] */
+   for (i = 0; i < num_attribs; i++) {
+      ureg_MOV(ureg, dst[i], src[i]);
+   }
+
+   /* EMIT IMM[0] */
+   ureg_insn(ureg, TGSI_OPCODE_EMIT, NULL, 0, &imm, 1);
+
+   /* END */
+   ureg_END(ureg);
+
+   return ureg_create_shader_and_destroy(ureg, pipe);
+}
+
diff --git a/src/gallium/auxiliary/util/u_simple_shaders.h b/src/gallium/auxiliary/util/u_simple_shaders.h

index 08d798e..cda0f2e 100644 (file)
--- a/src/gallium/auxiliary/util/u_simple_shaders.h
+++ b/src/gallium/auxiliary/util/u_simple_shaders.h
@@ -146,6 +146,12 @@ util_make_fs_msaa_resolve_bilinear(struct pipe_context *pipe,
                                     unsigned tgsi_tex, unsigned nr_samples,
                                     enum tgsi_return_type stype);
  
+extern void *
+util_make_geometry_passthrough_shader(struct pipe_context *pipe,
+                                      uint num_attribs,
+                                      const ubyte *semantic_names,
+                                      const ubyte *semantic_indexes);
+
  #ifdef __cplusplus
  }
  #endif
diff --git a/src/gallium/auxiliary/util/u_string.h b/src/gallium/auxiliary/util/u_string.h

index f7ab09c..adcdf20 100644 (file)
--- a/src/gallium/auxiliary/util/u_string.h
+++ b/src/gallium/auxiliary/util/u_string.h
@@ -199,6 +199,8 @@ util_memmove(void *dest, const void *src, size_t n)
  }
  
  
+#define util_strcasecmp stricmp
+
  #else
  
  #define util_vsnprintf vsnprintf
@@ -211,6 +213,7 @@ util_memmove(void *dest, const void *src, size_t n)
  #define util_strncat strncat
  #define util_strstr strstr
  #define util_memmove memmove
+#define util_strcasecmp strcasecmp
  
  #endif
  
diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c

index 744ea2e..59207a1 100644 (file)
--- a/src/gallium/auxiliary/util/u_upload_mgr.c
+++ b/src/gallium/auxiliary/util/u_upload_mgr.c
@@ -129,9 +129,9 @@ void u_upload_destroy( struct u_upload_mgr *upload )
  }
  
  
-static enum pipe_error 
-u_upload_alloc_buffer( struct u_upload_mgr *upload,
-                       unsigned min_size )
+static void
+u_upload_alloc_buffer(struct u_upload_mgr *upload,
+                      unsigned min_size)
  {
     struct pipe_screen *screen = upload->pipe->screen;
     struct pipe_resource buffer;
@@ -161,9 +161,8 @@ u_upload_alloc_buffer( struct u_upload_mgr *upload,
     }
  
     upload->buffer = screen->resource_create(screen, &buffer);
-   if (upload->buffer == NULL) {
-      return PIPE_ERROR_OUT_OF_MEMORY;
-   }
+   if (upload->buffer == NULL)
+      return;
  
     /* Map the new buffer. */
     upload->map = pipe_buffer_map_range(upload->pipe, upload->buffer,
@@ -172,52 +171,54 @@ u_upload_alloc_buffer( struct u_upload_mgr *upload,
     if (upload->map == NULL) {
        upload->transfer = NULL;
        pipe_resource_reference(&upload->buffer, NULL);
-      return PIPE_ERROR_OUT_OF_MEMORY;
+      return;
     }
  
     upload->offset = 0;
-   return PIPE_OK;
  }
  
-enum pipe_error u_upload_alloc( struct u_upload_mgr *upload,
-                                unsigned min_out_offset,
-                                unsigned size,
-                                unsigned *out_offset,
-                                struct pipe_resource **outbuf,
-                                void **ptr )
+void
+u_upload_alloc(struct u_upload_mgr *upload,
+               unsigned min_out_offset,
+               unsigned size,
+               unsigned *out_offset,
+               struct pipe_resource **outbuf,
+               void **ptr)
  {
-   unsigned alloc_size = align( size, upload->alignment );
+   unsigned alloc_size = align(size, upload->alignment);
     unsigned alloc_offset = align(min_out_offset, upload->alignment);
+   unsigned buffer_size = upload->buffer ? upload->buffer->width0 : 0;
     unsigned offset;
  
-   /* Init these return values here in case we fail below to make
-    * sure the caller doesn't get garbage values.
-    */
-   *out_offset = ~0;
-   pipe_resource_reference(outbuf, NULL);
-   *ptr = NULL;
-
     /* Make sure we have enough space in the upload buffer
      * for the sub-allocation. */
-   if (!upload->buffer ||
-       MAX2(upload->offset, alloc_offset) + alloc_size > upload->buffer->width0) {
-      enum pipe_error ret = u_upload_alloc_buffer(upload,
-                                                  alloc_offset + alloc_size);
-      if (ret != PIPE_OK)
-         return ret;
+   if (unlikely(MAX2(upload->offset, alloc_offset) + alloc_size > buffer_size)) {
+      u_upload_alloc_buffer(upload, alloc_offset + alloc_size);
+
+      if (unlikely(!upload->buffer)) {
+         *out_offset = ~0;
+         pipe_resource_reference(outbuf, NULL);
+         *ptr = NULL;
+         return;
+      }
+
+      buffer_size = upload->buffer->width0;
     }
  
     offset = MAX2(upload->offset, alloc_offset);
  
-   if (!upload->map) {
+   if (unlikely(!upload->map)) {
        upload->map = pipe_buffer_map_range(upload->pipe, upload->buffer,
                                            offset,
-                                          upload->buffer->width0 - offset,
+                                          buffer_size - offset,
                                            upload->map_flags,
                                           &upload->transfer);
-      if (!upload->map) {
+      if (unlikely(!upload->map)) {
           upload->transfer = NULL;
-         return PIPE_ERROR_OUT_OF_MEMORY;
+         *out_offset = ~0;
+         pipe_resource_reference(outbuf, NULL);
+         *ptr = NULL;
+         return;
        }
  
        upload->map -= offset;
@@ -229,46 +230,37 @@ enum pipe_error u_upload_alloc( struct u_upload_mgr *upload,
  
     /* Emit the return values: */
     *ptr = upload->map + offset;
-   pipe_resource_reference( outbuf, upload->buffer );
+   pipe_resource_reference(outbuf, upload->buffer);
     *out_offset = offset;
  
     upload->offset = offset + alloc_size;
-   return PIPE_OK;
  }
  
-enum pipe_error u_upload_data( struct u_upload_mgr *upload,
-                               unsigned min_out_offset,
-                               unsigned size,
-                               const void *data,
-                               unsigned *out_offset,
-                               struct pipe_resource **outbuf)
+void u_upload_data(struct u_upload_mgr *upload,
+                   unsigned min_out_offset,
+                   unsigned size,
+                   const void *data,
+                   unsigned *out_offset,
+                   struct pipe_resource **outbuf)
  {
     uint8_t *ptr;
-   enum pipe_error ret = u_upload_alloc(upload, min_out_offset, size,
-                                        out_offset, outbuf,
-                                        (void**)&ptr);
-   if (ret != PIPE_OK)
-      return ret;
-
-   memcpy(ptr, data, size);
-   return PIPE_OK;
-}
  
+   u_upload_alloc(upload, min_out_offset, size,
+                  out_offset, outbuf,
+                  (void**)&ptr);
+   if (ptr)
+      memcpy(ptr, data, size);
+}
  
-/* As above, but upload the full contents of a buffer.  Useful for
- * uploading user buffers, avoids generating an explosion of GPU
- * buffers if you have an app that does lots of small vertex buffer
- * renders or DrawElements calls.
- */
-enum pipe_error u_upload_buffer( struct u_upload_mgr *upload,
-                                 unsigned min_out_offset,
-                                 unsigned offset,
-                                 unsigned size,
-                                 struct pipe_resource *inbuf,
-                                 unsigned *out_offset,
-                                 struct pipe_resource **outbuf)
+/* XXX: Remove. It's basically a CPU fallback of resource_copy_region. */
+void u_upload_buffer(struct u_upload_mgr *upload,
+                     unsigned min_out_offset,
+                     unsigned offset,
+                     unsigned size,
+                     struct pipe_resource *inbuf,
+                     unsigned *out_offset,
+                     struct pipe_resource **outbuf)
  {
-   enum pipe_error ret = PIPE_OK;
     struct pipe_transfer *transfer = NULL;
     const char *map = NULL;
  
@@ -279,20 +271,13 @@ enum pipe_error u_upload_buffer( struct u_upload_mgr *upload,
                                               &transfer);
  
     if (map == NULL) {
-      return PIPE_ERROR_OUT_OF_MEMORY;
+      pipe_resource_reference(outbuf, NULL);
+      return;
     }
  
     if (0)
        debug_printf("upload ptr %p ofs %d sz %d\n", map, offset, size);
  
-   ret = u_upload_data( upload,
-                        min_out_offset,
-                        size,
-                        map,
-                        out_offset,
-                        outbuf);
-
+   u_upload_data(upload, min_out_offset, size, map, out_offset, outbuf);
     pipe_buffer_unmap( upload->pipe, transfer );
-
-   return ret;
  }
diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h b/src/gallium/auxiliary/util/u_upload_mgr.h

index 63bf30e..67c6daa 100644 (file)
--- a/src/gallium/auxiliary/util/u_upload_mgr.h
+++ b/src/gallium/auxiliary/util/u_upload_mgr.h
@@ -78,12 +78,12 @@ void u_upload_unmap( struct u_upload_mgr *upload );
   * \param outbuf           Pointer to where the upload buffer will be returned.
   * \param ptr              Pointer to the allocated memory that is returned.
   */
-enum pipe_error u_upload_alloc( struct u_upload_mgr *upload,
-                                unsigned min_out_offset,
-                                unsigned size,
-                                unsigned *out_offset,
-                                struct pipe_resource **outbuf,
-                                void **ptr );
+void u_upload_alloc(struct u_upload_mgr *upload,
+                    unsigned min_out_offset,
+                    unsigned size,
+                    unsigned *out_offset,
+                    struct pipe_resource **outbuf,
+                    void **ptr);
  
  
  /**
@@ -92,12 +92,12 @@ enum pipe_error u_upload_alloc( struct u_upload_mgr *upload,
   * Same as u_upload_alloc, but in addition to that, it copies "data"
   * to the pointer returned from u_upload_alloc.
   */
-enum pipe_error u_upload_data( struct u_upload_mgr *upload,
-                               unsigned min_out_offset,
-                               unsigned size,
-                               const void *data,
-                               unsigned *out_offset,
-                               struct pipe_resource **outbuf);
+void u_upload_data(struct u_upload_mgr *upload,
+                   unsigned min_out_offset,
+                   unsigned size,
+                   const void *data,
+                   unsigned *out_offset,
+                   struct pipe_resource **outbuf);
  
  
  /**
@@ -106,13 +106,13 @@ enum pipe_error u_upload_data( struct u_upload_mgr *upload,
   * Same as u_upload_data, except that the input data comes from a buffer
   * instead of a user pointer.
   */
-enum pipe_error u_upload_buffer( struct u_upload_mgr *upload,
-                                 unsigned min_out_offset,
-                                 unsigned offset,
-                                 unsigned size,
-                                 struct pipe_resource *inbuf,
-                                 unsigned *out_offset,
-                                 struct pipe_resource **outbuf);
+void u_upload_buffer(struct u_upload_mgr *upload,
+                     unsigned min_out_offset,
+                     unsigned offset,
+                     unsigned size,
+                     struct pipe_resource *inbuf,
+                     unsigned *out_offset,
+                     struct pipe_resource **outbuf);
  
  
  
diff --git a/src/gallium/auxiliary/util/u_vbuf.c b/src/gallium/auxiliary/util/u_vbuf.c

index 02ae0b8..3d2193c 100644 (file)
--- a/src/gallium/auxiliary/util/u_vbuf.c
+++ b/src/gallium/auxiliary/util/u_vbuf.c
@@ -406,7 +406,6 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
     struct pipe_resource *out_buffer = NULL;
     uint8_t *out_map;
     unsigned out_offset, mask;
-   enum pipe_error err;
  
     /* Get a translate object. */
     tr = translate_cache_find(mgr->translate_cache, key);
@@ -454,12 +453,12 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
        assert((ib->buffer || ib->user_buffer) && ib->index_size);
  
        /* Create and map the output buffer. */
-      err = u_upload_alloc(mgr->uploader, 0,
-                           key->output_stride * num_indices,
-                           &out_offset, &out_buffer,
-                           (void**)&out_map);
-      if (err != PIPE_OK)
-         return err;
+      u_upload_alloc(mgr->uploader, 0,
+                     key->output_stride * num_indices,
+                     &out_offset, &out_buffer,
+                     (void**)&out_map);
+      if (!out_buffer)
+         return PIPE_ERROR_OUT_OF_MEMORY;
  
        if (ib->user_buffer) {
           map = (uint8_t*)ib->user_buffer + offset;
@@ -486,13 +485,13 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
        }
     } else {
        /* Create and map the output buffer. */
-      err = u_upload_alloc(mgr->uploader,
-                           key->output_stride * start_vertex,
-                           key->output_stride * num_vertices,
-                           &out_offset, &out_buffer,
-                           (void**)&out_map);
-      if (err != PIPE_OK)
-         return err;
+      u_upload_alloc(mgr->uploader,
+                     key->output_stride * start_vertex,
+                     key->output_stride * num_vertices,
+                     &out_offset, &out_buffer,
+                     (void**)&out_map);
+      if (!out_buffer)
+         return PIPE_ERROR_OUT_OF_MEMORY;
  
        out_offset -= key->output_stride * start_vertex;
  
@@ -977,7 +976,6 @@ u_vbuf_upload_buffers(struct u_vbuf *mgr,
        unsigned start, end;
        struct pipe_vertex_buffer *real_vb;
        const uint8_t *ptr;
-      enum pipe_error err;
  
        i = u_bit_scan(&buffer_mask);
  
@@ -988,10 +986,10 @@ u_vbuf_upload_buffers(struct u_vbuf *mgr,
        real_vb = &mgr->real_vertex_buffer[i];
        ptr = mgr->vertex_buffer[i].user_buffer;
  
-      err = u_upload_data(mgr->uploader, start, end - start, ptr + start,
-                          &real_vb->buffer_offset, &real_vb->buffer);
-      if (err != PIPE_OK)
-         return err;
+      u_upload_data(mgr->uploader, start, end - start, ptr + start,
+                    &real_vb->buffer_offset, &real_vb->buffer);
+      if (!real_vb->buffer)
+         return PIPE_ERROR_OUT_OF_MEMORY;
  
        real_vb->buffer_offset -= start;
     }
diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst

index 2c0da01..e780047 100644 (file)
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -267,6 +267,7 @@ The integer capabilities:
  * ``PIPE_CAP_DEPTH_BOUNDS_TEST``: Whether bounds_test, bounds_min, and
    bounds_max states of pipe_depth_stencil_alpha_state behave according
    to the GL_EXT_depth_bounds_test specification.
+* ``PIPE_CAP_TGSI_TXQS``: Whether the `TXQS` opcode is supported
  
  
  .. _pipe_capf:
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst

index 314c9ca..314fe1b 100644 (file)
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -960,7 +960,6 @@ XXX doesn't look like most of the opcodes really belong here.
    For components which don't return a resource dimension, their value
    is undefined.
  
-
  .. math::
  
    lod = src0.x
@@ -973,6 +972,17 @@ XXX doesn't look like most of the opcodes really belong here.
  
    dst.w = texture\_levels(unit)
  
+
+.. opcode:: TXQS - Texture Samples Query
+
+  This retrieves the number of samples in the texture, and stores it
+  into the x component. The other components are undefined.
+
+.. math::
+
+  dst.x = texture\_samples(unit)
+
+
  .. opcode:: TG4 - Texture Gather
  
    As per ARB_texture_gather, gathers the four texels to be used in a bi-linear
diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h

index dd48956..2853787 100644 (file)
--- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
+++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
@@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
  - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2015-05-20 20:03:07)
  - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
  - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
  - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63915 bytes, from 2015-08-24 16:56:28)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63970 bytes, from 2015-09-14 20:50:12)
  
  Copyright (C) 2013-2015 by the following authors:
  - Rob Clark <robdclark@gmail.com> (robclark)
diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h

index a157dc3..4bbcb33 100644 (file)
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
  - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2015-05-20 20:03:07)
  - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
  - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
  - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63915 bytes, from 2015-08-24 16:56:28)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63970 bytes, from 2015-09-14 20:50:12)
  
  Copyright (C) 2013-2015 by the following authors:
  - Rob Clark <robdclark@gmail.com> (robclark)
@@ -280,6 +280,8 @@ enum a3xx_rb_blend_opcode {
  enum a3xx_intp_mode {
         SMOOTH = 0,
         FLAT = 1,
+       ZERO = 2,
+       ONE = 3,
  };
  
  enum a3xx_repl_mode {
@@ -684,6 +686,12 @@ static inline uint32_t REG_A3XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000460
  #define A3XX_GRAS_CL_CLIP_CNTL_ZCOORD                          0x00800000
  #define A3XX_GRAS_CL_CLIP_CNTL_WCOORD                          0x01000000
  #define A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE                   0x02000000
+#define A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__MASK      0x1c000000
+#define A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__SHIFT     26
+static inline uint32_t A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES(uint32_t val)
+{
+       return ((val) << A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__SHIFT) & A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__MASK;
+}
  
  #define REG_A3XX_GRAS_CL_GB_CLIP_ADJ                           0x00002044
  #define A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK                    0x000003ff
@@ -774,7 +782,7 @@ static inline uint32_t A3XX_GRAS_SU_POINT_SIZE(float val)
  #define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT              0
  static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(float val)
  {
-       return ((((int32_t)(val * 16384.0))) << A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK;
+       return ((((int32_t)(val * 1048576.0))) << A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK;
  }
  
  #define REG_A3XX_GRAS_SU_POLY_OFFSET_OFFSET                    0x0000206d
@@ -895,6 +903,9 @@ static inline uint32_t A3XX_RB_MODE_CONTROL_MRT(uint32_t val)
  #define A3XX_RB_MODE_CONTROL_PACKER_TIMER_ENABLE               0x00010000
  
  #define REG_A3XX_RB_RENDER_CONTROL                             0x000020c1
+#define A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE            0x00000001
+#define A3XX_RB_RENDER_CONTROL_YUV_IN_ENABLE                   0x00000002
+#define A3XX_RB_RENDER_CONTROL_COV_VALUE_INPUT_ENABLE          0x00000004
  #define A3XX_RB_RENDER_CONTROL_FACENESS                                0x00000008
  #define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK                 0x00000ff0
  #define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__SHIFT                        4
@@ -908,6 +919,8 @@ static inline uint32_t A3XX_RB_RENDER_CONTROL_BIN_WIDTH(uint32_t val)
  #define A3XX_RB_RENDER_CONTROL_YCOORD                          0x00008000
  #define A3XX_RB_RENDER_CONTROL_ZCOORD                          0x00010000
  #define A3XX_RB_RENDER_CONTROL_WCOORD                          0x00020000
+#define A3XX_RB_RENDER_CONTROL_I_CLAMP_ENABLE                  0x00080000
+#define A3XX_RB_RENDER_CONTROL_COV_VALUE_OUTPUT_ENABLE         0x00100000
  #define A3XX_RB_RENDER_CONTROL_ALPHA_TEST                      0x00400000
  #define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK           0x07000000
  #define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT          24
@@ -915,6 +928,8 @@ static inline uint32_t A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(enum adreno_compar
  {
         return ((val) << A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK;
  }
+#define A3XX_RB_RENDER_CONTROL_ALPHA_TO_COVERAGE               0x40000000
+#define A3XX_RB_RENDER_CONTROL_ALPHA_TO_ONE                    0x80000000
  
  #define REG_A3XX_RB_MSAA_CONTROL                               0x000020c2
  #define A3XX_RB_MSAA_CONTROL_DISABLE                           0x00000400
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_blend.c b/src/gallium/drivers/freedreno/a3xx/fd3_blend.c

index 6f5de9d..35360f3 100644 (file)
--- a/src/gallium/drivers/freedreno/a3xx/fd3_blend.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_blend.c
@@ -28,6 +28,7 @@
  
  #include "pipe/p_state.h"
  #include "util/u_blend.h"
+#include "util/u_dual_blend.h"
  #include "util/u_string.h"
  #include "util/u_memory.h"
  
@@ -131,5 +132,8 @@ fd3_blend_state_create(struct pipe_context *pctx,
                         so->rb_mrt[i].control |= A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS);
         }
  
+       if (cso->rt[0].blend_enable && util_blend_state_is_dual(cso, 0))
+               so->rb_render_control = A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE;
+
         return so;
  }
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_blend.h b/src/gallium/drivers/freedreno/a3xx/fd3_blend.h

index 142df7c..59e0010 100644 (file)
--- a/src/gallium/drivers/freedreno/a3xx/fd3_blend.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_blend.h
@@ -36,6 +36,7 @@
  
  struct fd3_blend_stateobj {
         struct pipe_blend_state base;
+       uint32_t rb_render_control;
         struct {
                 /* Blend control bits for color if there is an alpha channel */
                 uint32_t blend_control_rgb;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.h b/src/gallium/drivers/freedreno/a3xx/fd3_context.h

index 250bcf8..b4c2ebe 100644 (file)
--- a/src/gallium/drivers/freedreno/a3xx/fd3_context.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.h
@@ -73,22 +73,6 @@ struct fd3_context {
          */
         struct fd_vertex_state blit_vbuf_state;
  
-
-       /*
-        * Border color layout *appears* to be as arrays of 0x40 byte
-        * elements, with frag shader elements starting at (16 x 0x40).
-        * But at some point I should probably experiment more with
-        * samplers in vertex shaders to be sure.  Unclear about why
-        * there is this offset when there are separate VS and FS base
-        * addr regs.
-        *
-        * The first 8 bytes of each entry are the requested border
-        * color in fp16.  Unclear about the rest.. could be used for
-        * other formats, or could simply be for aligning the pitch
-        * to 32 pixels.
-        */
-#define BORDERCOLOR_SIZE 0x40
-
         struct u_upload_mgr *border_color_uploader;
         struct pipe_resource *border_color_buf;
  
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c

index 6f514ed..6153d92 100644 (file)
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -149,6 +149,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
                         &fd3_ctx->border_color_buf,
                         &ptr);
  
+       fd_setup_border_colors(tex, ptr, tex_off[sb]);
+
         if (tex->num_samplers > 0) {
                 /* output sampler state: */
                 OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * tex->num_samplers));
@@ -163,57 +165,6 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
                         const struct fd3_sampler_stateobj *sampler = tex->samplers[i] ?
                                         fd3_sampler_stateobj(tex->samplers[i]) :
                                         &dummy_sampler;
-                       uint16_t *bcolor = (uint16_t *)((uint8_t *)ptr +
-                                       (BORDERCOLOR_SIZE * tex_off[sb]) +
-                                       (BORDERCOLOR_SIZE * i));
-                       uint32_t *bcolor32 = (uint32_t *)&bcolor[16];
-
-                       /*
-                        * XXX HACK ALERT XXX
-                        *
-                        * The border colors need to be swizzled in a particular
-                        * format-dependent order. Even though samplers don't know about
-                        * formats, we can assume that with a GL state tracker, there's a
-                        * 1:1 correspondence between sampler and texture. Take advantage
-                        * of that knowledge.
-                        */
-                       if (i < tex->num_textures && tex->textures[i]) {
-                               const struct util_format_description *desc =
-                                       util_format_description(tex->textures[i]->format);
-                               for (j = 0; j < 4; j++) {
-                                       if (desc->swizzle[j] >= 4)
-                                               continue;
-
-                                       const struct util_format_channel_description *chan =
-                                               &desc->channel[desc->swizzle[j]];
-                                       int size = chan->size;
-
-                                       /* The Z16 texture format we use seems to look in the
-                                        * 32-bit border color slots
-                                        */
-                                       if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
-                                               size = 32;
-
-                                       /* Formats like R11G11B10 or RGB9_E5 don't specify
-                                        * per-channel sizes properly.
-                                        */
-                                       if (desc->layout == UTIL_FORMAT_LAYOUT_OTHER)
-                                               size = 16;
-
-                                       if (chan->pure_integer && size > 16)
-                                               bcolor32[desc->swizzle[j] + 4] =
-                                                       sampler->base.border_color.i[j];
-                                       else if (size > 16)
-                                               bcolor32[desc->swizzle[j]] =
-                                                       fui(sampler->base.border_color.f[j]);
-                                       else if (chan->pure_integer)
-                                               bcolor[desc->swizzle[j] + 8] =
-                                                       sampler->base.border_color.i[j];
-                                       else
-                                               bcolor[desc->swizzle[j]] =
-                                                       util_float_to_half(sampler->base.border_color.f[j]);
-                               }
-                       }
  
                         OUT_RING(ring, sampler->texsamp0);
                         OUT_RING(ring, sampler->texsamp1);
@@ -400,15 +351,27 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
         unsigned vtxcnt_regid = regid(63, 0);
  
         for (i = 0; i < vp->inputs_count; i++) {
-               uint8_t semantic = sem2name(vp->inputs[i].semantic);
-               if (semantic == TGSI_SEMANTIC_VERTEXID_NOBASE)
-                       vertex_regid = vp->inputs[i].regid;
-               else if (semantic == TGSI_SEMANTIC_INSTANCEID)
-                       instance_regid = vp->inputs[i].regid;
-               else if (semantic == IR3_SEMANTIC_VTXCNT)
-                       vtxcnt_regid = vp->inputs[i].regid;
-               else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask)
+               if (vp->inputs[i].sysval) {
+                       switch(vp->inputs[i].slot) {
+                       case SYSTEM_VALUE_BASE_VERTEX:
+                               /* handled elsewhere */
+                               break;
+                       case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
+                               vertex_regid = vp->inputs[i].regid;
+                               break;
+                       case SYSTEM_VALUE_INSTANCE_ID:
+                               instance_regid = vp->inputs[i].regid;
+                               break;
+                       case SYSTEM_VALUE_VERTEX_CNT:
+                               vtxcnt_regid = vp->inputs[i].regid;
+                               break;
+                       default:
+                               unreachable("invalid system value");
+                               break;
+                       }
+               } else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) {
                         last = i;
+               }
         }
  
         /* hw doesn't like to be configured for zero vbo's, it seems: */
@@ -419,7 +382,7 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
                 return;
  
         for (i = 0, j = 0; i <= last; i++) {
-               assert(sem2name(vp->inputs[i].semantic) == 0);
+               assert(!vp->inputs[i].sysval);
                 if (vp->inputs[i].compmask) {
                         struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
                         const struct pipe_vertex_buffer *vb =
@@ -492,8 +455,10 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
                                 A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(ctx->sample_mask));
         }
  
-       if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) && !emit->key.binning_pass) {
-               uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_render_control;
+       if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG | FD_DIRTY_BLEND_DUAL)) &&
+               !emit->key.binning_pass) {
+               uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_render_control |
+                       fd3_blend_stateobj(ctx->blend)->rb_render_control;
  
                 val |= COND(fp->frag_face, A3XX_RB_RENDER_CONTROL_FACENESS);
                 val |= COND(fp->frag_coord, A3XX_RB_RENDER_CONTROL_XCOORD |
@@ -564,7 +529,8 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
                 val |= COND(fp->frag_coord, A3XX_GRAS_CL_CLIP_CNTL_ZCOORD |
                                 A3XX_GRAS_CL_CLIP_CNTL_WCOORD);
                 /* TODO only use if prog doesn't use clipvertex/clipdist */
-               val |= MIN2(util_bitcount(ctx->rasterizer->clip_plane_enable), 6) << 26;
+               val |= A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES(
+                               MIN2(util_bitcount(ctx->rasterizer->clip_plane_enable), 6));
                 OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
                 OUT_RING(ring, val);
         }
@@ -639,9 +605,13 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
                 OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2]));
         }
  
-       if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) {
+       if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_BLEND_DUAL)) {
                 struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
-               fd3_program_emit(ring, emit, pfb->nr_cbufs, pfb->cbufs);
+               int nr_cbufs = pfb->nr_cbufs;
+               if (fd3_blend_stateobj(ctx->blend)->rb_render_control &
+                       A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE)
+                       nr_cbufs++;
+               fd3_program_emit(ring, emit, nr_cbufs, pfb->cbufs);
         }
  
         /* TODO we should not need this or fd_wfi() before emit_constants():
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.c b/src/gallium/drivers/freedreno/a3xx/fd3_format.c

index 04cb9b9..857d156 100644 (file)
--- a/src/gallium/drivers/freedreno/a3xx/fd3_format.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.c
@@ -355,6 +355,8 @@ fd3_fs_output_format(enum pipe_format format)
         case PIPE_FORMAT_R16G16_FLOAT:
         case PIPE_FORMAT_R11G11B10_FLOAT:
                 return RB_R16G16B16A16_FLOAT;
+       case PIPE_FORMAT_L8_UNORM:
+               return RB_R8G8B8A8_UNORM;
         default:
                 return fd3_pipe2color(format);
         }
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c

index b536079..4ed04b3 100644 (file)
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -194,24 +194,17 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
         /* seems like vs->constlen + fs->constlen > 256, then CONSTMODE=1 */
         constmode = ((vp->constlen + fp->constlen) > 256) ? 1 : 0;
  
-       pos_regid = ir3_find_output_regid(vp,
-               ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
-       posz_regid = ir3_find_output_regid(fp,
-               ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
-       psize_regid = ir3_find_output_regid(vp,
-               ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0));
+       pos_regid = ir3_find_output_regid(vp, VARYING_SLOT_POS);
+       posz_regid = ir3_find_output_regid(fp, FRAG_RESULT_DEPTH);
+       psize_regid = ir3_find_output_regid(vp, VARYING_SLOT_PSIZ);
         if (fp->color0_mrt) {
                 color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
-                       ir3_find_output_regid(fp, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
+                       ir3_find_output_regid(fp, FRAG_RESULT_COLOR);
         } else {
-               for (i = 0; i < fp->outputs_count; i++) {
-                       ir3_semantic sem = fp->outputs[i].semantic;
-                       unsigned idx = sem2idx(sem);
-                       if (sem2name(sem) != TGSI_SEMANTIC_COLOR)
-                               continue;
-                       debug_assert(idx < ARRAY_SIZE(color_regid));
-                       color_regid[idx] = fp->outputs[i].regid;
-               }
+               color_regid[0] = ir3_find_output_regid(fp, FRAG_RESULT_DATA0);
+               color_regid[1] = ir3_find_output_regid(fp, FRAG_RESULT_DATA1);
+               color_regid[2] = ir3_find_output_regid(fp, FRAG_RESULT_DATA2);
+               color_regid[3] = ir3_find_output_regid(fp, FRAG_RESULT_DATA3);
         }
  
         /* adjust regids for alpha output formats. there is no alpha render
@@ -280,14 +273,14 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
  
                 j = ir3_next_varying(fp, j);
                 if (j < fp->inputs_count) {
-                       k = ir3_find_output(vp, fp->inputs[j].semantic);
+                       k = ir3_find_output(vp, fp->inputs[j].slot);
                         reg |= A3XX_SP_VS_OUT_REG_A_REGID(vp->outputs[k].regid);
                         reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(fp->inputs[j].compmask);
                 }
  
                 j = ir3_next_varying(fp, j);
                 if (j < fp->inputs_count) {
-                       k = ir3_find_output(vp, fp->inputs[j].semantic);
+                       k = ir3_find_output(vp, fp->inputs[j].slot);
                         reg |= A3XX_SP_VS_OUT_REG_B_REGID(vp->outputs[k].regid);
                         reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(fp->inputs[j].compmask);
                 }
@@ -394,7 +387,6 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
  
                 /* figure out VARYING_INTERP / FLAT_SHAD register values: */
                 for (j = -1; (j = ir3_next_varying(fp, j)) < (int)fp->inputs_count; ) {
-                       uint32_t interp = fp->inputs[j].interpolate;
  
                         /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
                          * instead.. rather than -8 everywhere else..
@@ -406,8 +398,8 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
                          */
                         debug_assert((inloc % 4) == 0);
  
-                       if ((interp == TGSI_INTERPOLATE_CONSTANT) ||
-                                       ((interp == TGSI_INTERPOLATE_COLOR) && emit->rasterflat)) {
+                       if ((fp->inputs[j].interpolate == INTERP_QUALIFIER_FLAT) ||
+                                       (fp->inputs[j].rasterflat && emit->rasterflat)) {
                                 uint32_t loc = inloc;
                                 for (i = 0; i < 4; i++, loc++) {
                                         vinterp[loc / 16] |= FLAT << ((loc % 16) * 2);
@@ -415,14 +407,20 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
                                 }
                         }
  
-                       /* Replace the .xy coordinates with S/T from the point sprite. Set
-                        * interpolation bits for .zw such that they become .01
-                        */
-                       if (emit->sprite_coord_enable & (1 << sem2idx(fp->inputs[j].semantic))) {
-                               vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09)
-                                       << ((inloc % 16) * 2);
-                               vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2);
-                               vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2);
+                       gl_varying_slot slot = fp->inputs[j].slot;
+
+                       /* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */
+                       if (slot >= VARYING_SLOT_VAR0) {
+                               unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0);
+                               /* Replace the .xy coordinates with S/T from the point sprite. Set
+                                * interpolation bits for .zw such that they become .01
+                                */
+                               if (emit->sprite_coord_enable & texmask) {
+                                       vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09)
+                                                       << ((inloc % 16) * 2);
+                                       vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2);
+                                       vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2);
+                               }
                         }
                 }
  
diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h

index 2e1d712..819f5b1 100644 (file)
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
  - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2015-05-20 20:03:07)
  - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
  - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
  - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63915 bytes, from 2015-08-24 16:56:28)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63970 bytes, from 2015-09-14 20:50:12)
  
  Copyright (C) 2013-2015 by the following authors:
  - Rob Clark <robdclark@gmail.com> (robclark)
@@ -249,7 +249,8 @@ enum a4xx_tex_clamp {
         A4XX_TEX_REPEAT = 0,
         A4XX_TEX_CLAMP_TO_EDGE = 1,
         A4XX_TEX_MIRROR_REPEAT = 2,
-       A4XX_TEX_CLAMP_NONE = 3,
+       A4XX_TEX_CLAMP_TO_BORDER = 3,
+       A4XX_TEX_MIRROR_CLAMP = 4,
  };
  
  enum a4xx_tex_aniso {
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.c b/src/gallium/drivers/freedreno/a4xx/fd4_context.c

index 625512c..e53e0c5 100644 (file)
--- a/src/gallium/drivers/freedreno/a4xx/fd4_context.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.c
@@ -55,6 +55,8 @@ fd4_context_destroy(struct pipe_context *pctx)
         pipe_resource_reference(&fd4_ctx->solid_vbuf, NULL);
         pipe_resource_reference(&fd4_ctx->blit_texcoord_vbuf, NULL);
  
+       u_upload_destroy(fd4_ctx->border_color_uploader);
+
         fd_context_destroy(pctx);
  }
  
@@ -169,5 +171,8 @@ fd4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
  
         fd4_query_context_init(pctx);
  
+       fd4_ctx->border_color_uploader = u_upload_create(pctx, 4096,
+                       2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE, 0);
+
         return pctx;
  }
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.h b/src/gallium/drivers/freedreno/a4xx/fd4_context.h

index af94756..074c5a7 100644 (file)
--- a/src/gallium/drivers/freedreno/a4xx/fd4_context.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.h
@@ -29,6 +29,8 @@
  #ifndef FD4_CONTEXT_H_
  #define FD4_CONTEXT_H_
  
+#include "util/u_upload_mgr.h"
+
  #include "freedreno_drmif.h"
  
  #include "freedreno_context.h"
@@ -70,6 +72,9 @@ struct fd4_context {
          */
         struct fd_vertex_state blit_vbuf_state;
  
+       struct u_upload_mgr *border_color_uploader;
+       struct pipe_resource *border_color_buf;
+
         /* if *any* of bits are set in {v,f}saturate_{s,t,r} */
         bool vsaturate, fsaturate;
  
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c

index 2bd2ca2..025753c 100644 (file)
--- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
@@ -123,6 +123,7 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
                         // TODO set .half_precision based on render target format,
                         // ie. float16 and smaller use half, float32 use full..
                         .half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
+                       .ucp_enables = ctx->rasterizer ? ctx->rasterizer->clip_plane_enable : 0,
                         .has_per_samp = (fd4_ctx->fsaturate || fd4_ctx->vsaturate),
                         .vsaturate_s = fd4_ctx->vsaturate_s,
                         .vsaturate_t = fd4_ctx->vsaturate_t,
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c

index b75be29..c7ed1d2 100644 (file)
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -124,7 +124,20 @@ static void
  emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
                 enum adreno_state_block sb, struct fd_texture_stateobj *tex)
  {
-       unsigned i;
+       static const uint32_t bcolor_reg[] = {
+                       [SB_VERT_TEX] = REG_A4XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR,
+                       [SB_FRAG_TEX] = REG_A4XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
+       };
+       struct fd4_context *fd4_ctx = fd4_context(ctx);
+       unsigned i, off;
+       void *ptr;
+
+       u_upload_alloc(fd4_ctx->border_color_uploader,
+                       0, 2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE, &off,
+                       &fd4_ctx->border_color_buf,
+                       &ptr);
+
+       fd_setup_border_colors(tex, ptr, 0);
  
         if (tex->num_samplers > 0) {
                 int num_samplers;
@@ -190,6 +203,11 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
                         OUT_RING(ring, 0x00000000);
                 }
         }
+
+       OUT_PKT0(ring, bcolor_reg[sb], 1);
+       OUT_RELOC(ring, fd_resource(fd4_ctx->border_color_buf)->bo, off, 0, 0);
+
+       u_upload_unmap(fd4_ctx->border_color_uploader);
  }
  
  /* emit texture state for mem->gmem restore operation.. eventually it would
@@ -315,17 +333,30 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
         unsigned vtxcnt_regid = regid(63, 0);
  
         for (i = 0; i < vp->inputs_count; i++) {
-               uint8_t semantic = sem2name(vp->inputs[i].semantic);
-               if (semantic == TGSI_SEMANTIC_VERTEXID_NOBASE)
-                       vertex_regid = vp->inputs[i].regid;
-               else if (semantic == TGSI_SEMANTIC_INSTANCEID)
-                       instance_regid = vp->inputs[i].regid;
-               else if (semantic == IR3_SEMANTIC_VTXCNT)
-                       vtxcnt_regid = vp->inputs[i].regid;
-               else if ((i < vtx->vtx->num_elements) && vp->inputs[i].compmask)
+               if (vp->inputs[i].sysval) {
+                       switch(vp->inputs[i].slot) {
+                       case SYSTEM_VALUE_BASE_VERTEX:
+                               /* handled elsewhere */
+                               break;
+                       case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
+                               vertex_regid = vp->inputs[i].regid;
+                               break;
+                       case SYSTEM_VALUE_INSTANCE_ID:
+                               instance_regid = vp->inputs[i].regid;
+                               break;
+                       case SYSTEM_VALUE_VERTEX_CNT:
+                               vtxcnt_regid = vp->inputs[i].regid;
+                               break;
+                       default:
+                               unreachable("invalid system value");
+                               break;
+                       }
+               } else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) {
                         last = i;
+               }
         }
  
+
         /* hw doesn't like to be configured for zero vbo's, it seems: */
         if ((vtx->vtx->num_elements == 0) &&
                         (vertex_regid == regid(63, 0)) &&
@@ -334,7 +365,7 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
                 return;
  
         for (i = 0, j = 0; i <= last; i++) {
-               assert(sem2name(vp->inputs[i].semantic) == 0);
+               assert(!vp->inputs[i].sysval);
                 if (vp->inputs[i].compmask) {
                         struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
                         const struct pipe_vertex_buffer *vb =
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/src/gallium/drivers/freedreno/a4xx/fd4_format.c

index 6c9e217..847d4fb 100644 (file)
--- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
@@ -89,13 +89,14 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
         _T(L8_UNORM,   8_UNORM, R8_UNORM, WZYX),
         _T(I8_UNORM,   8_UNORM, NONE,     WZYX),
  
-       /* NOTE: should be TFMT_8_UINT (which then gets remapped to
-        * TFMT_8_UNORM for mem2gmem in _gmem_restore_format()), but
-        * we don't know TFMT_8_UINT yet.. so just use TFMT_8_UNORM
-        * for now.. sampling from stencil as a texture might not
-        * work right, but at least should be fine for zsbuf..
-        */
-       _T(S8_UINT,    8_UNORM,  R8_UNORM, WZYX),
+       _T(A8_UINT,    8_UINT,  NONE,     WZYX),
+       _T(A8_SINT,    8_SINT,  NONE,     WZYX),
+       _T(L8_UINT,    8_UINT,  NONE,     WZYX),
+       _T(L8_SINT,    8_SINT,  NONE,     WZYX),
+       _T(I8_UINT,    8_UINT,  NONE,     WZYX),
+       _T(I8_SINT,    8_SINT,  NONE,     WZYX),
+
+       _T(S8_UINT,    8_UINT,  R8_UNORM, WZYX),
  
         /* 16-bit */
         V_(R16_UNORM,   16_UNORM, NONE,     WZYX),
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c

index a3d7123..e3d5dab 100644 (file)
--- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
@@ -227,27 +227,22 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
         /* blob seems to always use constmode currently: */
         constmode = 1;
  
-       pos_regid = ir3_find_output_regid(s[VS].v,
-               ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
-       posz_regid = ir3_find_output_regid(s[FS].v,
-               ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
-       psize_regid = ir3_find_output_regid(s[VS].v,
-               ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0));
+       pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS);
+       posz_regid = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DEPTH);
+       psize_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_PSIZ);
         if (s[FS].v->color0_mrt) {
                 color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
                 color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] =
-                       ir3_find_output_regid(s[FS].v, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
+                       ir3_find_output_regid(s[FS].v, FRAG_RESULT_COLOR);
         } else {
-               const struct ir3_shader_variant *fp = s[FS].v;
-               memset(color_regid, 0, sizeof(color_regid));
-               for (i = 0; i < fp->outputs_count; i++) {
-                       ir3_semantic sem = fp->outputs[i].semantic;
-                       unsigned idx = sem2idx(sem);
-                       if (sem2name(sem) != TGSI_SEMANTIC_COLOR)
-                               continue;
-                       debug_assert(idx < ARRAY_SIZE(color_regid));
-                       color_regid[idx] = fp->outputs[i].regid;
-               }
+               color_regid[0] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA0);
+               color_regid[1] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA1);
+               color_regid[2] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA2);
+               color_regid[3] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA3);
+               color_regid[4] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA4);
+               color_regid[5] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA5);
+               color_regid[6] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA6);
+               color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7);
         }
  
         /* adjust regids for alpha output formats. there is no alpha render
@@ -257,7 +252,6 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
                 if (util_format_is_alpha(pipe_surface_format(bufs[i])))
                         color_regid[i] += 3;
  
-
         /* TODO get these dynamically: */
         face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0);
         coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0);
@@ -348,14 +342,14 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
  
                 j = ir3_next_varying(s[FS].v, j);
                 if (j < s[FS].v->inputs_count) {
-                       k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].semantic);
+                       k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].slot);
                         reg |= A4XX_SP_VS_OUT_REG_A_REGID(s[VS].v->outputs[k].regid);
                         reg |= A4XX_SP_VS_OUT_REG_A_COMPMASK(s[FS].v->inputs[j].compmask);
                 }
  
                 j = ir3_next_varying(s[FS].v, j);
                 if (j < s[FS].v->inputs_count) {
-                       k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].semantic);
+                       k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].slot);
                         reg |= A4XX_SP_VS_OUT_REG_B_REGID(s[VS].v->outputs[k].regid);
                         reg |= A4XX_SP_VS_OUT_REG_B_COMPMASK(s[FS].v->inputs[j].compmask);
                 }
@@ -492,7 +486,6 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
                  */
                 /* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
                 for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) {
-                       uint32_t interp = s[FS].v->inputs[j].interpolate;
  
                         /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
                          * instead.. rather than -8 everywhere else..
@@ -504,8 +497,8 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
                          */
                         debug_assert((inloc % 4) == 0);
  
-                       if ((interp == TGSI_INTERPOLATE_CONSTANT) ||
-                                       ((interp == TGSI_INTERPOLATE_COLOR) && emit->rasterflat)) {
+                       if ((s[FS].v->inputs[j].interpolate == INTERP_QUALIFIER_FLAT) ||
+                                       (s[FS].v->inputs[j].rasterflat && emit->rasterflat)) {
                                 uint32_t loc = inloc;
  
                                 for (i = 0; i < 4; i++, loc++) {
@@ -514,14 +507,20 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
                                 }
                         }
  
-                       /* Replace the .xy coordinates with S/T from the point sprite. Set
-                        * interpolation bits for .zw such that they become .01
-                        */
-                       if (emit->sprite_coord_enable & (1 << sem2idx(s[FS].v->inputs[j].semantic))) {
-                               vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09)
-                                       << ((inloc % 16) * 2);
-                               vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2);
-                               vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2);
+                       gl_varying_slot slot = s[FS].v->inputs[j].slot;
+
+                       /* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */
+                       if (slot >= VARYING_SLOT_VAR0) {
+                               unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0);
+                               /* Replace the .xy coordinates with S/T from the point sprite. Set
+                                * interpolation bits for .zw such that they become .01
+                                */
+                               if (emit->sprite_coord_enable & texmask) {
+                                       vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09)
+                                               << ((inloc % 16) * 2);
+                                       vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2);
+                                       vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2);
+                               }
                         }
                 }
  
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c

index 213b29c..dbff5a7 100644 (file)
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
@@ -35,32 +35,32 @@
  #include "fd4_texture.h"
  #include "fd4_format.h"
  
-/* TODO do we need to emulate clamp-to-edge like a3xx? */
  static enum a4xx_tex_clamp
-tex_clamp(unsigned wrap)
+tex_clamp(unsigned wrap, bool clamp_to_edge)
  {
-       /* hardware probably supports more, but we can't coax all the
-        * wrap/clamp modes out of the GLESv2 blob driver.
-        *
-        * TODO once we have basics working, go back and just try
-        * different values and see what happens
-        */
+       /* Hardware does not support _CLAMP, but we emulate it: */
+       if (wrap == PIPE_TEX_WRAP_CLAMP) {
+               wrap = (clamp_to_edge) ?
+                       PIPE_TEX_WRAP_CLAMP_TO_EDGE : PIPE_TEX_WRAP_CLAMP_TO_BORDER;
+       }
+
         switch (wrap) {
         case PIPE_TEX_WRAP_REPEAT:
                 return A4XX_TEX_REPEAT;
-       case PIPE_TEX_WRAP_CLAMP:
         case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
                 return A4XX_TEX_CLAMP_TO_EDGE;
         case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
-// TODO
-//             return A4XX_TEX_CLAMP_TO_BORDER;
-       case PIPE_TEX_WRAP_MIRROR_CLAMP:
-       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+               return A4XX_TEX_CLAMP_TO_BORDER;
         case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
-// TODO
-//             return A4XX_TEX_MIRROR_CLAMP;
+               /* only works for PoT.. need to emulate otherwise! */
+               return A4XX_TEX_MIRROR_CLAMP;
         case PIPE_TEX_WRAP_MIRROR_REPEAT:
                 return A4XX_TEX_MIRROR_REPEAT;
+       case PIPE_TEX_WRAP_MIRROR_CLAMP:
+       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+               /* these two we could perhaps emulate, but we currently
+                * just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP
+                */
         default:
                 DBG("invalid wrap: %u", wrap);
                 return 0;
@@ -88,6 +88,7 @@ fd4_sampler_state_create(struct pipe_context *pctx,
         struct fd4_sampler_stateobj *so = CALLOC_STRUCT(fd4_sampler_stateobj);
         unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8));
         bool miplinear = false;
+       bool clamp_to_edge;
  
         if (!so)
                 return NULL;
@@ -97,14 +98,29 @@ fd4_sampler_state_create(struct pipe_context *pctx,
  
         so->base = *cso;
  
+       /*
+        * For nearest filtering, _CLAMP means _CLAMP_TO_EDGE;  for linear
+        * filtering, _CLAMP means _CLAMP_TO_BORDER while additionally
+        * clamping the texture coordinates to [0.0, 1.0].
+        *
+        * The clamping will be taken care of in the shaders.  There are two
+        * filters here, but let the minification one has a say.
+        */
+       clamp_to_edge = (cso->min_img_filter == PIPE_TEX_FILTER_NEAREST);
+       if (!clamp_to_edge) {
+               so->saturate_s = (cso->wrap_s == PIPE_TEX_WRAP_CLAMP);
+               so->saturate_t = (cso->wrap_t == PIPE_TEX_WRAP_CLAMP);
+               so->saturate_r = (cso->wrap_r == PIPE_TEX_WRAP_CLAMP);
+       }
+
         so->texsamp0 =
                 COND(miplinear, A4XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
                 A4XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
                 A4XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
                 A4XX_TEX_SAMP_0_ANISO(aniso) |
-               A4XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s)) |
-               A4XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t)) |
-               A4XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r));
+               A4XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, clamp_to_edge)) |
+               A4XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, clamp_to_edge)) |
+               A4XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, clamp_to_edge));
  
         so->texsamp1 =
  //             COND(miplinear, A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR) |
@@ -122,6 +138,50 @@ fd4_sampler_state_create(struct pipe_context *pctx,
         return so;
  }
  
+static void
+fd4_sampler_states_bind(struct pipe_context *pctx,
+               unsigned shader, unsigned start,
+               unsigned nr, void **hwcso)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       struct fd4_context *fd4_ctx = fd4_context(ctx);
+       uint16_t saturate_s = 0, saturate_t = 0, saturate_r = 0;
+       unsigned i;
+
+       for (i = 0; i < nr; i++) {
+               if (hwcso[i]) {
+                       struct fd4_sampler_stateobj *sampler =
+                                       fd4_sampler_stateobj(hwcso[i]);
+                       if (sampler->saturate_s)
+                               saturate_s |= (1 << i);
+                       if (sampler->saturate_t)
+                               saturate_t |= (1 << i);
+                       if (sampler->saturate_r)
+                               saturate_r |= (1 << i);
+               }
+       }
+
+       fd_sampler_states_bind(pctx, shader, start, nr, hwcso);
+
+       if (shader == PIPE_SHADER_FRAGMENT) {
+               fd4_ctx->fsaturate =
+                       (saturate_s != 0) ||
+                       (saturate_t != 0) ||
+                       (saturate_r != 0);
+               fd4_ctx->fsaturate_s = saturate_s;
+               fd4_ctx->fsaturate_t = saturate_t;
+               fd4_ctx->fsaturate_r = saturate_r;
+       } else if (shader == PIPE_SHADER_VERTEX) {
+               fd4_ctx->vsaturate =
+                       (saturate_s != 0) ||
+                       (saturate_t != 0) ||
+                       (saturate_r != 0);
+               fd4_ctx->vsaturate_s = saturate_s;
+               fd4_ctx->vsaturate_t = saturate_t;
+               fd4_ctx->vsaturate_r = saturate_r;
+       }
+}
+
  static enum a4xx_tex_type
  tex_type(unsigned target)
  {
@@ -209,7 +269,7 @@ void
  fd4_texture_init(struct pipe_context *pctx)
  {
         pctx->create_sampler_state = fd4_sampler_state_create;
-       pctx->bind_sampler_states = fd_sampler_states_bind;
+       pctx->bind_sampler_states = fd4_sampler_states_bind;
         pctx->create_sampler_view = fd4_sampler_view_create;
         pctx->set_sampler_views = fd_set_sampler_views;
  }
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h

index 84ee7ec..3195577 100644 (file)
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
@@ -40,6 +40,7 @@
  struct fd4_sampler_stateobj {
         struct pipe_sampler_state base;
         uint32_t texsamp0, texsamp1;
+       bool saturate_s, saturate_t, saturate_r;
  };
  
  static inline struct fd4_sampler_stateobj *
diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h

index 29944b7..906368c 100644 (file)
--- a/src/gallium/drivers/freedreno/adreno_common.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_common.xml.h
@@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
  - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2015-05-20 20:03:07)
  - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
  - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
  - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63915 bytes, from 2015-08-24 16:56:28)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63970 bytes, from 2015-09-14 20:50:12)
  
  Copyright (C) 2013-2015 by the following authors:
  - Rob Clark <robdclark@gmail.com> (robclark)
@@ -85,6 +85,10 @@ enum adreno_rb_blend_factor {
         FACTOR_CONSTANT_ALPHA = 14,
         FACTOR_ONE_MINUS_CONSTANT_ALPHA = 15,
         FACTOR_SRC_ALPHA_SATURATE = 16,
+       FACTOR_SRC1_COLOR = 20,
+       FACTOR_ONE_MINUS_SRC1_COLOR = 21,
+       FACTOR_SRC1_ALPHA = 22,
+       FACTOR_ONE_MINUS_SRC1_ALPHA = 23,
  };
  
  enum adreno_rb_surface_endian {
diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h

index 432dce3..490cf5b 100644 (file)
--- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
@@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
  - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2015-05-20 20:03:07)
  - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
  - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
  - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63915 bytes, from 2015-08-24 16:56:28)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63970 bytes, from 2015-09-14 20:50:12)
  
  Copyright (C) 2013-2015 by the following authors:
  - Rob Clark <robdclark@gmail.com> (robclark)
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h

index 3486c2f..61c4c6d 100644 (file)
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -335,6 +335,7 @@ struct fd_context {
                 FD_DIRTY_SCISSOR     = (1 << 17),
                 FD_DIRTY_STREAMOUT   = (1 << 18),
                 FD_DIRTY_UCP         = (1 << 19),
+               FD_DIRTY_BLEND_DUAL  = (1 << 20),
         } dirty;
  
         struct pipe_blend_state *blend;
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c

index 17dd47c..9a684d4 100644 (file)
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -163,7 +163,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
         case PIPE_CAP_TEXTURE_MULTISAMPLE:
         case PIPE_CAP_TEXTURE_BARRIER:
         case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
-       case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
         case PIPE_CAP_START_INSTANCE:
         case PIPE_CAP_COMPUTE:
                 return 0;
@@ -235,6 +234,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
         case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
         case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
         case PIPE_CAP_DEPTH_BOUNDS_TEST:
+       case PIPE_CAP_TGSI_TXQS:
                 return 0;
  
         case PIPE_CAP_MAX_VIEWPORTS:
@@ -277,6 +277,8 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
         /* Render targets. */
         case PIPE_CAP_MAX_RENDER_TARGETS:
                 return screen->max_rts;
+       case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+               return is_a3xx(screen) ? 1 : 0;
  
         /* Queries. */
         case PIPE_CAP_QUERY_TIME_ELAPSED:
@@ -482,6 +484,7 @@ fd_screen_create(struct fd_device *dev)
         pscreen = &screen->base;
  
         screen->dev = dev;
+       screen->refcnt = 1;
  
         // maybe this should be in context?
         screen->pipe = fd_pipe_new(screen->dev, FD_PIPE_3D);
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h

index 4e5c3a6..8fb096a 100644 (file)
--- a/src/gallium/drivers/freedreno/freedreno_screen.h
+++ b/src/gallium/drivers/freedreno/freedreno_screen.h
@@ -42,6 +42,16 @@ struct fd_bo;
  struct fd_screen {
         struct pipe_screen base;
  
+       /* it would be tempting to use pipe_reference here, but that
+        * really doesn't work well if it isn't the first member of
+        * the struct, so not quite so awesome to be adding refcnting
+        * further down the inheritance hierarchy:
+        */
+       int refcnt;
+
+       /* place for winsys to stash it's own stuff: */
+       void *winsys_priv;
+
         uint32_t gmemsize_bytes;
         uint32_t device_id;
         uint32_t gpu_id;         /* 220, 305, etc */
diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c

index e75865a..685d3a7 100644 (file)
--- a/src/gallium/drivers/freedreno/freedreno_state.c
+++ b/src/gallium/drivers/freedreno/freedreno_state.c
@@ -27,6 +27,7 @@
   */
  
  #include "pipe/p_state.h"
+#include "util/u_dual_blend.h"
  #include "util/u_string.h"
  #include "util/u_memory.h"
  #include "util/u_helpers.h"
@@ -225,8 +226,17 @@ static void
  fd_blend_state_bind(struct pipe_context *pctx, void *hwcso)
  {
         struct fd_context *ctx = fd_context(pctx);
+       struct pipe_blend_state *cso = hwcso;
+       bool old_is_dual = ctx->blend ?
+               ctx->blend->rt[0].blend_enable && util_blend_state_is_dual(ctx->blend, 0) :
+               false;
+       bool new_is_dual = cso ?
+               cso->rt[0].blend_enable && util_blend_state_is_dual(cso, 0) :
+               false;
         ctx->blend = hwcso;
         ctx->dirty |= FD_DIRTY_BLEND;
+       if (old_is_dual != new_is_dual)
+               ctx->dirty |= FD_DIRTY_BLEND_DUAL;
  }
  
  static void
diff --git a/src/gallium/drivers/freedreno/freedreno_texture.c b/src/gallium/drivers/freedreno/freedreno_texture.c

index eaa6629..04e4643 100644 (file)
--- a/src/gallium/drivers/freedreno/freedreno_texture.c
+++ b/src/gallium/drivers/freedreno/freedreno_texture.c
@@ -162,3 +162,69 @@ fd_texture_init(struct pipe_context *pctx)
  
         pctx->sampler_view_destroy = fd_sampler_view_destroy;
  }
+
+/* helper for setting up border-color buffer for a3xx/a4xx: */
+void
+fd_setup_border_colors(struct fd_texture_stateobj *tex, void *ptr,
+               unsigned offset)
+{
+       unsigned i, j;
+
+       for (i = 0; i < tex->num_samplers; i++) {
+               struct pipe_sampler_state *sampler = tex->samplers[i];
+               uint16_t *bcolor = (uint16_t *)((uint8_t *)ptr +
+                               (BORDERCOLOR_SIZE * offset) +
+                               (BORDERCOLOR_SIZE * i));
+               uint32_t *bcolor32 = (uint32_t *)&bcolor[16];
+
+               if (!sampler)
+                       continue;
+
+               /*
+                * XXX HACK ALERT XXX
+                *
+                * The border colors need to be swizzled in a particular
+                * format-dependent order. Even though samplers don't know about
+                * formats, we can assume that with a GL state tracker, there's a
+                * 1:1 correspondence between sampler and texture. Take advantage
+                * of that knowledge.
+                */
+               if (i < tex->num_textures && tex->textures[i]) {
+                       const struct util_format_description *desc =
+                                       util_format_description(tex->textures[i]->format);
+                       for (j = 0; j < 4; j++) {
+                               if (desc->swizzle[j] >= 4)
+                                       continue;
+
+                               const struct util_format_channel_description *chan =
+                                               &desc->channel[desc->swizzle[j]];
+                               int size = chan->size;
+
+                               /* The Z16 texture format we use seems to look in the
+                                * 32-bit border color slots
+                                */
+                               if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
+                                       size = 32;
+
+                               /* Formats like R11G11B10 or RGB9_E5 don't specify
+                                * per-channel sizes properly.
+                                */
+                               if (desc->layout == UTIL_FORMAT_LAYOUT_OTHER)
+                                       size = 16;
+
+                               if (chan->pure_integer && size > 16)
+                                       bcolor32[desc->swizzle[j] + 4] =
+                                                       sampler->border_color.i[j];
+                               else if (size > 16)
+                                       bcolor32[desc->swizzle[j]] =
+                                                       fui(sampler->border_color.f[j]);
+                               else if (chan->pure_integer)
+                                       bcolor[desc->swizzle[j] + 8] =
+                                                       sampler->border_color.i[j];
+                               else
+                                       bcolor[desc->swizzle[j]] =
+                                                       util_float_to_half(sampler->border_color.f[j]);
+                       }
+               }
+       }
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_texture.h b/src/gallium/drivers/freedreno/freedreno_texture.h

index 43571a9..fa27d1c 100644 (file)
--- a/src/gallium/drivers/freedreno/freedreno_texture.h
+++ b/src/gallium/drivers/freedreno/freedreno_texture.h
@@ -41,4 +41,35 @@ void fd_set_sampler_views(struct pipe_context *pctx, unsigned shader,
  
  void fd_texture_init(struct pipe_context *pctx);
  
+struct fd_texture_stateobj;
+
+/* Both a3xx/a4xx share the same layout for the border-color buffer,
+ * which contains the pre-swizzled (based on texture format) border
+ * color value, with the following layout (per sampler):
+ *
+ *  offset | description
+ *  -------+-------------
+ *  0x00:  | fp16[0]   \
+ *         | fp16[1]   |___ swizzled fp16 channel values for "small float"
+ *         | fp16[2]   |    formats (<= 16 bits per component, !integer)
+ *         | fp16[3]   /
+ *  0x08:  | padding
+ *  0x10:  | int16[0]  \
+ *         | int16[1]  |___ swizzled int16 channels for for "small integer"
+ *         | int16[2]  |    formats (<= 16 bits per component, integer)
+ *         | int16[3]  /
+ *  0x18:  | padding
+ *  0x20:  | fp32[0]   \
+ *         | fp32[1]   |___ swizzled fp32 channel values for "large float"
+ *         | fp32[2]   |    formats (> 16 bits per component, !integer)
+ *         | fp32[3]   /
+ *  0x30:  | int32[0]  \
+ *         | int32[1]  |___ swizzled int32 channel values for "large int"
+ *         | int32[2]  |    formats (> 16 bits per component, integer)
+ *         | int32[3]  /
+ */
+#define BORDERCOLOR_SIZE 0x40
+void fd_setup_border_colors(struct fd_texture_stateobj *tex, void *ptr,
+               unsigned offset);
+
  #endif /* FREEDRENO_TEXTURE_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_util.c b/src/gallium/drivers/freedreno/freedreno_util.c

index 2acce06..c8f2127 100644 (file)
--- a/src/gallium/drivers/freedreno/freedreno_util.c
+++ b/src/gallium/drivers/freedreno/freedreno_util.c
@@ -104,10 +104,13 @@ fd_blend_factor(unsigned factor)
         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
                 return FACTOR_ONE_MINUS_CONSTANT_ALPHA;
         case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+               return FACTOR_ONE_MINUS_SRC1_COLOR;
         case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+               return FACTOR_ONE_MINUS_SRC1_ALPHA;
         case PIPE_BLENDFACTOR_SRC1_COLOR:
+               return FACTOR_SRC1_COLOR;
         case PIPE_BLENDFACTOR_SRC1_ALPHA:
-               /* I don't think these are supported */
+               return FACTOR_SRC1_ALPHA;
         default:
                 DBG("invalid blend factor: %x", factor);
                 return 0;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c

index ede29f4..e768e61 100644 (file)
--- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
@@ -94,6 +94,8 @@ static void print_usage(void)
         printf("    --saturate-t MASK - bitmask of samplers to saturate T coord\n");
         printf("    --saturate-r MASK - bitmask of samplers to saturate R coord\n");
         printf("    --stream-out      - enable stream-out (aka transform feedback)\n");
+       printf("    --ucp MASK        - bitmask of enabled user-clip-planes\n");
+       printf("    --gpu GPU_ID      - specify gpu-id (default 320)\n");
         printf("    --help            - show this message\n");
  }
  
@@ -107,6 +109,7 @@ int main(int argc, char **argv)
         struct ir3_shader_variant v;
         struct ir3_shader s;
         struct ir3_shader_key key = {};
+       unsigned gpu_id = 320;
         const char *info;
         void *ptr;
         size_t size;
@@ -190,6 +193,20 @@ int main(int argc, char **argv)
                         continue;
                 }
  
+               if (!strcmp(argv[n], "--ucp")) {
+                       debug_printf(" %s %s", argv[n], argv[n+1]);
+                       key.ucp_enables = strtol(argv[n+1], NULL, 0);
+                       n += 2;
+                       continue;
+               }
+
+               if (!strcmp(argv[n], "--gpu")) {
+                       debug_printf(" %s %s", argv[n], argv[n+1]);
+                       gpu_id = strtol(argv[n+1], NULL, 0);
+                       n += 2;
+                       continue;
+               }
+
                 if (!strcmp(argv[n], "--help")) {
                         print_usage();
                         return 0;
@@ -232,7 +249,7 @@ int main(int argc, char **argv)
         }
  
         /* TODO cmdline option to target different gpus: */
-       compiler = ir3_compiler_create(320);
+       compiler = ir3_compiler_create(gpu_id);
  
         info = "NIR compiler";
         ret = ir3_compile_shader_nir(compiler, &v);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c

index 071901a..7eddbdd 100644 (file)
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -127,17 +127,44 @@ struct ir3_compile {
  static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val);
  static struct ir3_block * get_block(struct ir3_compile *ctx, nir_block *nblock);
  
-static struct nir_shader *to_nir(const struct tgsi_token *tokens)
+static struct nir_shader *to_nir(struct ir3_compile *ctx,
+               const struct tgsi_token *tokens, struct ir3_shader_variant *so)
  {
-       struct nir_shader_compiler_options options = {
+       static const nir_shader_compiler_options options = {
                         .lower_fpow = true,
                         .lower_fsat = true,
                         .lower_scmp = true,
                         .lower_flrp = true,
+                       .lower_ffract = true,
                         .native_integers = true,
         };
+       struct nir_lower_tex_options tex_options = {
+                       .lower_rect = 0,
+       };
         bool progress;
  
+       switch (so->type) {
+       case SHADER_FRAGMENT:
+       case SHADER_COMPUTE:
+               tex_options.saturate_s = so->key.fsaturate_s;
+               tex_options.saturate_t = so->key.fsaturate_t;
+               tex_options.saturate_r = so->key.fsaturate_r;
+               break;
+       case SHADER_VERTEX:
+               tex_options.saturate_s = so->key.vsaturate_s;
+               tex_options.saturate_t = so->key.vsaturate_t;
+               tex_options.saturate_r = so->key.vsaturate_r;
+               break;
+       }
+
+       if (ctx->compiler->gpu_id >= 400) {
+               /* a4xx seems to have *no* sam.p */
+               tex_options.lower_txp = ~0;  /* lower all txp */
+       } else {
+               /* a3xx just needs to avoid sam.p for 3d tex */
+               tex_options.lower_txp = (1 << GLSL_SAMPLER_DIM_3D);
+       }
+
         struct nir_shader *s = tgsi_to_nir(tokens, &options);
  
         if (fd_mesa_debug & FD_DBG_OPTMSGS) {
@@ -148,6 +175,14 @@ static struct nir_shader *to_nir(const struct tgsi_token *tokens)
  
         nir_opt_global_to_local(s);
         nir_convert_to_ssa(s);
+       if (s->stage == MESA_SHADER_VERTEX) {
+               nir_lower_clip_vs(s, so->key.ucp_enables);
+       } else if (s->stage == MESA_SHADER_FRAGMENT) {
+               nir_lower_clip_fs(s, so->key.ucp_enables);
+       }
+       nir_lower_tex(s, &tex_options);
+       if (so->key.color_two_side)
+               nir_lower_two_sided_color(s);
         nir_lower_idiv(s);
         nir_lower_load_const_to_scalar(s);
  
@@ -179,49 +214,12 @@ static struct nir_shader *to_nir(const struct tgsi_token *tokens)
         return s;
  }
  
-/* TODO nir doesn't lower everything for us yet, but ideally it would: */
-static const struct tgsi_token *
-lower_tgsi(struct ir3_compile *ctx, const struct tgsi_token *tokens,
-               struct ir3_shader_variant *so)
-{
-       struct tgsi_shader_info info;
-       struct tgsi_lowering_config lconfig = {
-                       .color_two_side = so->key.color_two_side,
-                       .lower_FRC = true,
-       };
-
-       switch (so->type) {
-       case SHADER_FRAGMENT:
-       case SHADER_COMPUTE:
-               lconfig.saturate_s = so->key.fsaturate_s;
-               lconfig.saturate_t = so->key.fsaturate_t;
-               lconfig.saturate_r = so->key.fsaturate_r;
-               break;
-       case SHADER_VERTEX:
-               lconfig.saturate_s = so->key.vsaturate_s;
-               lconfig.saturate_t = so->key.vsaturate_t;
-               lconfig.saturate_r = so->key.vsaturate_r;
-               break;
-       }
-
-       if (ctx->compiler->gpu_id >= 400) {
-               /* a4xx seems to have *no* sam.p */
-               lconfig.lower_TXP = ~0;  /* lower all txp */
-       } else {
-               /* a3xx just needs to avoid sam.p for 3d tex */
-               lconfig.lower_TXP = (1 << TGSI_TEXTURE_3D);
-       }
-
-       return tgsi_transform_lowering(&lconfig, tokens, &info);
-}
-
  static struct ir3_compile *
  compile_init(struct ir3_compiler *compiler,
                 struct ir3_shader_variant *so,
                 const struct tgsi_token *tokens)
  {
         struct ir3_compile *ctx = rzalloc(NULL, struct ir3_compile);
-       const struct tgsi_token *lowered_tokens;
  
         if (compiler->gpu_id >= 400) {
                 /* need special handling for "flat" */
@@ -248,13 +246,7 @@ compile_init(struct ir3_compiler *compiler,
         ctx->block_ht = _mesa_hash_table_create(ctx,
                         _mesa_hash_pointer, _mesa_key_pointer_equal);
  
-       lowered_tokens = lower_tgsi(ctx, tokens, so);
-       if (!lowered_tokens)
-               lowered_tokens = tokens;
-       ctx->s = to_nir(lowered_tokens);
-
-       if (lowered_tokens != tokens)
-               free((void *)lowered_tokens);
+       ctx->s = to_nir(ctx, tokens, so);
  
         so->first_driver_param = so->first_immediate = ctx->s->num_uniforms;
  
@@ -263,7 +255,7 @@ compile_init(struct ir3_compiler *compiler,
          *    num_uniform * vec4  -  user consts
          *    4 * vec4            -  UBO addresses
          *    if (vertex shader) {
-        *        1 * vec4        -  driver params (IR3_DP_*)
+        *        N * vec4        -  driver params (IR3_DP_*)
          *        1 * vec4        -  stream-out addresses
          *    }
          *
@@ -275,8 +267,8 @@ compile_init(struct ir3_compiler *compiler,
         so->first_immediate += 4;
  
         if (so->type == SHADER_VERTEX) {
-               /* one (vec4) slot for driver params (see ir3_driver_param): */
-               so->first_immediate++;
+               /* driver params (see ir3_driver_param): */
+               so->first_immediate += IR3_DP_COUNT/4;  /* convert to vec4 */
                 /* one (vec4) slot for stream-output base addresses: */
                 so->first_immediate++;
         }
@@ -828,7 +820,9 @@ static struct ir3_instruction *
  create_driver_param(struct ir3_compile *ctx, enum ir3_driver_param dp)
  {
         /* first four vec4 sysval's reserved for UBOs: */
-       unsigned r = regid(ctx->so->first_driver_param + 4, dp);
+       /* NOTE: dp is in scalar, but there can be >4 dp components: */
+       unsigned n = ctx->so->first_driver_param + IR3_DRIVER_PARAM_OFF;
+       unsigned r = regid(n + dp / 4, dp % 4);
         return create_uniform(ctx, r);
  }
  
@@ -1199,7 +1193,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
         struct ir3_block *b = ctx->block;
         struct ir3_instruction *addr, *src0, *src1;
         /* UBO addresses are the first driver params: */
-       unsigned ubo = regid(ctx->so->first_driver_param, 0);
+       unsigned ubo = regid(ctx->so->first_driver_param + IR3_UBOS_OFF, 0);
         unsigned off = intr->const_index[0];
  
         /* First src is ubo index, which could either be an immed or not: */
@@ -1349,17 +1343,18 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
         }
  }
  
-static void add_sysval_input(struct ir3_compile *ctx, unsigned name,
+static void add_sysval_input(struct ir3_compile *ctx, gl_system_value slot,
                 struct ir3_instruction *instr)
  {
         struct ir3_shader_variant *so = ctx->so;
         unsigned r = regid(so->inputs_count, 0);
         unsigned n = so->inputs_count++;
  
-       so->inputs[n].semantic = ir3_semantic_name(name, 0);
+       so->inputs[n].sysval = true;
+       so->inputs[n].slot = slot;
         so->inputs[n].compmask = 1;
         so->inputs[n].regid = r;
-       so->inputs[n].interpolate = TGSI_INTERPOLATE_CONSTANT;
+       so->inputs[n].interpolate = INTERP_QUALIFIER_FLAT;
         so->total_in++;
  
         ctx->ir->ninputs = MAX2(ctx->ir->ninputs, r + 1);
@@ -1437,7 +1432,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
         case nir_intrinsic_load_base_vertex:
                 if (!ctx->basevertex) {
                         ctx->basevertex = create_driver_param(ctx, IR3_DP_VTXID_BASE);
-                       add_sysval_input(ctx, TGSI_SEMANTIC_BASEVERTEX,
+                       add_sysval_input(ctx, SYSTEM_VALUE_BASE_VERTEX,
                                         ctx->basevertex);
                 }
                 dst[0] = ctx->basevertex;
@@ -1445,7 +1440,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
         case nir_intrinsic_load_vertex_id_zero_base:
                 if (!ctx->vertex_id) {
                         ctx->vertex_id = create_input(ctx->block, 0);
-                       add_sysval_input(ctx, TGSI_SEMANTIC_VERTEXID_NOBASE,
+                       add_sysval_input(ctx, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
                                         ctx->vertex_id);
                 }
                 dst[0] = ctx->vertex_id;
@@ -1453,11 +1448,17 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
         case nir_intrinsic_load_instance_id:
                 if (!ctx->instance_id) {
                         ctx->instance_id = create_input(ctx->block, 0);
-                       add_sysval_input(ctx, TGSI_SEMANTIC_INSTANCEID,
+                       add_sysval_input(ctx, SYSTEM_VALUE_INSTANCE_ID,
                                         ctx->instance_id);
                 }
                 dst[0] = ctx->instance_id;
                 break;
+       case nir_intrinsic_load_user_clip_plane:
+               for (int i = 0; i < intr->num_components; i++) {
+                       unsigned n = idx * 4 + i;
+                       dst[i] = create_driver_param(ctx, IR3_DP_UCP0_X + n);
+               }
+               break;
         case nir_intrinsic_discard_if:
         case nir_intrinsic_discard: {
                 struct ir3_instruction *cond, *kill;
@@ -1623,6 +1624,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
         case nir_texop_lod:
         case nir_texop_tg4:
         case nir_texop_query_levels:
+       case nir_texop_texture_samples:
                 compile_error(ctx, "Unhandled NIR tex type: %d\n", tex->op);
                 return;
         }
@@ -2020,7 +2022,7 @@ emit_stream_out(struct ir3_compile *ctx)
          * of the shader:
          */
         vtxcnt = create_input(ctx->in_block, 0);
-       add_sysval_input(ctx, IR3_SEMANTIC_VTXCNT, vtxcnt);
+       add_sysval_input(ctx, SYSTEM_VALUE_VERTEX_CNT, vtxcnt);
  
         maxvtxcnt = create_driver_param(ctx, IR3_DP_VTXCNT_MAX);
  
@@ -2064,7 +2066,7 @@ emit_stream_out(struct ir3_compile *ctx)
                 unsigned stride = strmout->stride[i];
                 struct ir3_instruction *base, *off;
  
-               base = create_uniform(ctx, regid(v->first_driver_param + 5, i));
+               base = create_uniform(ctx, regid(v->first_driver_param + IR3_TFBOS_OFF, i));
  
                 /* 24-bit should be enough: */
                 off = ir3_MUL_U(ctx->block, vtxcnt, 0,
@@ -2132,74 +2134,56 @@ setup_input(struct ir3_compile *ctx, nir_variable *in)
         struct ir3_shader_variant *so = ctx->so;
         unsigned array_len = MAX2(glsl_get_length(in->type), 1);
         unsigned ncomp = glsl_get_components(in->type);
-       /* XXX: map loc slots to semantics */
-       unsigned semantic_name = in->data.location;
-       unsigned semantic_index = in->data.index;
         unsigned n = in->data.driver_location;
+       unsigned slot = in->data.location;
  
-       DBG("; in: %u:%u, len=%ux%u, loc=%u",
-                       semantic_name, semantic_index, array_len,
-                       ncomp, n);
+       DBG("; in: slot=%u, len=%ux%u, drvloc=%u",
+                       slot, array_len, ncomp, n);
  
-       so->inputs[n].semantic =
-                       ir3_semantic_name(semantic_name, semantic_index);
+       so->inputs[n].slot = slot;
         so->inputs[n].compmask = (1 << ncomp) - 1;
         so->inputs[n].inloc = ctx->next_inloc;
-       so->inputs[n].interpolate = 0;
+       so->inputs[n].interpolate = INTERP_QUALIFIER_NONE;
         so->inputs_count = MAX2(so->inputs_count, n + 1);
+       so->inputs[n].interpolate = in->data.interpolation;
  
-       /* the fdN_program_emit() code expects tgsi consts here, so map
-        * things back to tgsi for now:
-        */
-       switch (in->data.interpolation) {
-       case INTERP_QUALIFIER_FLAT:
-               so->inputs[n].interpolate = TGSI_INTERPOLATE_CONSTANT;
-               break;
-       case INTERP_QUALIFIER_NOPERSPECTIVE:
-               so->inputs[n].interpolate = TGSI_INTERPOLATE_LINEAR;
-               break;
-       case INTERP_QUALIFIER_SMOOTH:
-               so->inputs[n].interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
-               break;
-       }
-
-       for (int i = 0; i < ncomp; i++) {
-               struct ir3_instruction *instr = NULL;
-               unsigned idx = (n * 4) + i;
+       if (ctx->so->type == SHADER_FRAGMENT) {
+               for (int i = 0; i < ncomp; i++) {
+                       struct ir3_instruction *instr = NULL;
+                       unsigned idx = (n * 4) + i;
  
-               if (ctx->so->type == SHADER_FRAGMENT) {
-                       if (semantic_name == TGSI_SEMANTIC_POSITION) {
+                       if (slot == VARYING_SLOT_POS) {
                                 so->inputs[n].bary = false;
                                 so->frag_coord = true;
                                 instr = create_frag_coord(ctx, i);
-                       } else if (semantic_name == TGSI_SEMANTIC_FACE) {
+                       } else if (slot == VARYING_SLOT_FACE) {
                                 so->inputs[n].bary = false;
                                 so->frag_face = true;
                                 instr = create_frag_face(ctx, i);
                         } else {
                                 bool use_ldlv = false;
  
-                               /* with NIR, we need to infer TGSI_INTERPOLATE_COLOR
-                                * from the semantic name:
+                               /* detect the special case for front/back colors where
+                                * we need to do flat vs smooth shading depending on
+                                * rast state:
                                  */
-                               if ((in->data.interpolation == INTERP_QUALIFIER_NONE) &&
-                                               ((semantic_name == TGSI_SEMANTIC_COLOR) ||
-                                                       (semantic_name == TGSI_SEMANTIC_BCOLOR)))
-                                       so->inputs[n].interpolate = TGSI_INTERPOLATE_COLOR;
+                               if (in->data.interpolation == INTERP_QUALIFIER_NONE) {
+                                       switch (slot) {
+                                       case VARYING_SLOT_COL0:
+                                       case VARYING_SLOT_COL1:
+                                       case VARYING_SLOT_BFC0:
+                                       case VARYING_SLOT_BFC1:
+                                               so->inputs[n].rasterflat = true;
+                                               break;
+                                       default:
+                                               break;
+                                       }
+                               }
  
                                 if (ctx->flat_bypass) {
-                                       /* with NIR, we need to infer TGSI_INTERPOLATE_COLOR
-                                        * from the semantic name:
-                                        */
-                                       switch (so->inputs[n].interpolate) {
-                                       case TGSI_INTERPOLATE_COLOR:
-                                               if (!ctx->so->key.rasterflat)
-                                                       break;
-                                               /* fallthrough */
-                                       case TGSI_INTERPOLATE_CONSTANT:
+                                       if ((so->inputs[n].interpolate == INTERP_QUALIFIER_FLAT) ||
+                                                       (so->inputs[n].rasterflat && ctx->so->key.rasterflat))
                                                 use_ldlv = true;
-                                               break;
-                                       }
                                 }
  
                                 so->inputs[n].bary = true;
@@ -2207,11 +2191,16 @@ setup_input(struct ir3_compile *ctx, nir_variable *in)
                                 instr = create_frag_input(ctx,
                                                 so->inputs[n].inloc + i - 8, use_ldlv);
                         }
-               } else {
-                       instr = create_input(ctx->block, idx);
-               }
  
-               ctx->ir->inputs[idx] = instr;
+                       ctx->ir->inputs[idx] = instr;
+               }
+       } else if (ctx->so->type == SHADER_VERTEX) {
+               for (int i = 0; i < ncomp; i++) {
+                       unsigned idx = (n * 4) + i;
+                       ctx->ir->inputs[idx] = create_input(ctx->block, idx);
+               }
+       } else {
+               compile_error(ctx, "unknown shader type: %d\n", ctx->so->type);
         }
  
         if (so->inputs[n].bary || (ctx->so->type == SHADER_VERTEX)) {
@@ -2226,56 +2215,59 @@ setup_output(struct ir3_compile *ctx, nir_variable *out)
         struct ir3_shader_variant *so = ctx->so;
         unsigned array_len = MAX2(glsl_get_length(out->type), 1);
         unsigned ncomp = glsl_get_components(out->type);
-       /* XXX: map loc slots to semantics */
-       unsigned semantic_name = out->data.location;
-       unsigned semantic_index = out->data.index;
         unsigned n = out->data.driver_location;
+       unsigned slot = out->data.location;
         unsigned comp = 0;
  
-       DBG("; out: %u:%u, len=%ux%u, loc=%u",
-                       semantic_name, semantic_index, array_len,
-                       ncomp, n);
+       DBG("; out: slot=%u, len=%ux%u, drvloc=%u",
+                       slot, array_len, ncomp, n);
  
-       if (ctx->so->type == SHADER_VERTEX) {
-               switch (semantic_name) {
-               case TGSI_SEMANTIC_POSITION:
+       if (ctx->so->type == SHADER_FRAGMENT) {
+               switch (slot) {
+               case FRAG_RESULT_DEPTH:
+                       comp = 2;  /* tgsi will write to .z component */
                         so->writes_pos = true;
                         break;
-               case TGSI_SEMANTIC_PSIZE:
-                       so->writes_psize = true;
-                       break;
-               case TGSI_SEMANTIC_COLOR:
-               case TGSI_SEMANTIC_BCOLOR:
-               case TGSI_SEMANTIC_GENERIC:
-               case TGSI_SEMANTIC_FOG:
-               case TGSI_SEMANTIC_TEXCOORD:
+               case FRAG_RESULT_COLOR:
+                       so->color0_mrt = 1;
                         break;
                 default:
-                       compile_error(ctx, "unknown VS semantic name: %s\n",
-                                       tgsi_semantic_names[semantic_name]);
+                       if (slot >= FRAG_RESULT_DATA0)
+                               break;
+                       compile_error(ctx, "unknown FS output name: %s\n",
+                                       gl_frag_result_name(slot));
                 }
-       } else {
-               switch (semantic_name) {
-               case TGSI_SEMANTIC_POSITION:
-                       comp = 2;  /* tgsi will write to .z component */
+       } else if (ctx->so->type == SHADER_VERTEX) {
+               switch (slot) {
+               case VARYING_SLOT_POS:
                         so->writes_pos = true;
                         break;
-               case TGSI_SEMANTIC_COLOR:
-                       if (semantic_index == -1) {
-                               semantic_index = 0;
-                               so->color0_mrt = 1;
-                       }
+               case VARYING_SLOT_PSIZ:
+                       so->writes_psize = true;
+                       break;
+               case VARYING_SLOT_COL0:
+               case VARYING_SLOT_COL1:
+               case VARYING_SLOT_BFC0:
+               case VARYING_SLOT_BFC1:
+               case VARYING_SLOT_FOGC:
+               case VARYING_SLOT_CLIP_DIST0:
+               case VARYING_SLOT_CLIP_DIST1:
                         break;
                 default:
-                       compile_error(ctx, "unknown FS semantic name: %s\n",
-                                       tgsi_semantic_names[semantic_name]);
+                       if (slot >= VARYING_SLOT_VAR0)
+                               break;
+                       if ((VARYING_SLOT_TEX0 <= slot) && (slot <= VARYING_SLOT_TEX7))
+                               break;
+                       compile_error(ctx, "unknown VS output name: %s\n",
+                                       gl_varying_slot_name(slot));
                 }
+       } else {
+               compile_error(ctx, "unknown shader type: %d\n", ctx->so->type);
         }
  
         compile_assert(ctx, n < ARRAY_SIZE(so->outputs));
  
-       so->outputs[n].semantic =
-                       ir3_semantic_name(semantic_name, semantic_index);
+       so->outputs[n].slot = slot;
         so->outputs[n].regid = regid(n, comp);
         so->outputs_count = MAX2(so->outputs_count, n + 1);
  
@@ -2468,12 +2460,10 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
         /* at this point, for binning pass, throw away unneeded outputs: */
         if (so->key.binning_pass) {
                 for (i = 0, j = 0; i < so->outputs_count; i++) {
-                       unsigned name = sem2name(so->outputs[i].semantic);
-                       unsigned idx = sem2idx(so->outputs[i].semantic);
+                       unsigned slot = so->outputs[i].slot;
  
                         /* throw away everything but first position/psize */
-                       if ((idx == 0) && ((name == TGSI_SEMANTIC_POSITION) ||
-                                       (name == TGSI_SEMANTIC_PSIZE))) {
+                       if ((slot == VARYING_SLOT_POS) || (slot == VARYING_SLOT_PSIZ)) {
                                 if (i != j) {
                                         so->outputs[j] = so->outputs[i];
                                         ir->outputs[(j*4)+0] = ir->outputs[(i*4)+0];
@@ -2572,7 +2562,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
                  * but what we give the hw is the scalar register:
                  */
                 if ((so->type == SHADER_FRAGMENT) &&
-                       (sem2name(so->outputs[i].semantic) == TGSI_SEMANTIC_POSITION))
+                       (so->outputs[i].slot == FRAG_RESULT_DEPTH))
                         so->outputs[i].regid += 2;
         }
  
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c

index d57eb2b..4ec0e2b 100644 (file)
--- a/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c
@@ -294,8 +294,7 @@ lower_if_else_block(nir_block *block, void *void_state)
                 sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
  
                 nir_ssa_def_rewrite_uses(&phi->dest.ssa,
-                               nir_src_for_ssa(&sel->dest.dest.ssa),
-                               state->mem_ctx);
+                               nir_src_for_ssa(&sel->dest.dest.ssa));
  
                 nir_instr_insert_before(&phi->instr, &sel->instr);
                 nir_instr_remove(&phi->instr);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c

index 312174c..7b56533 100644 (file)
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -300,11 +300,11 @@ static void dump_reg(const char *name, uint32_t r)
                 debug_printf("; %s: r%d.%c\n", name, r >> 2, "xyzw"[r & 0x3]);
  }
  
-static void dump_semantic(struct ir3_shader_variant *so,
-               unsigned sem, const char *name)
+static void dump_output(struct ir3_shader_variant *so,
+               unsigned slot, const char *name)
  {
         uint32_t regid;
-       regid = ir3_find_output_regid(so, ir3_semantic_name(sem, 0));
+       regid = ir3_find_output_regid(so, slot);
         dump_reg(name, regid);
  }
  
@@ -355,27 +355,51 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin)
  
         disasm_a3xx(bin, so->info.sizedwords, 0, so->type);
  
-       debug_printf("; %s: outputs:", type);
-       for (i = 0; i < so->outputs_count; i++) {
-               uint8_t regid = so->outputs[i].regid;
-               ir3_semantic sem = so->outputs[i].semantic;
-               debug_printf(" r%d.%c (%u:%u)",
-                               (regid >> 2), "xyzw"[regid & 0x3],
-                               sem2name(sem), sem2idx(sem));
-       }
-       debug_printf("\n");
-       debug_printf("; %s: inputs:", type);
-       for (i = 0; i < so->inputs_count; i++) {
-               uint8_t regid = so->inputs[i].regid;
-               ir3_semantic sem = so->inputs[i].semantic;
-               debug_printf(" r%d.%c (%u:%u,cm=%x,il=%u,b=%u)",
-                               (regid >> 2), "xyzw"[regid & 0x3],
-                               sem2name(sem), sem2idx(sem),
-                               so->inputs[i].compmask,
-                               so->inputs[i].inloc,
-                               so->inputs[i].bary);
+       switch (so->type) {
+       case SHADER_VERTEX:
+               debug_printf("; %s: outputs:", type);
+               for (i = 0; i < so->outputs_count; i++) {
+                       uint8_t regid = so->outputs[i].regid;
+                       debug_printf(" r%d.%c (%s)",
+                                       (regid >> 2), "xyzw"[regid & 0x3],
+                                       gl_varying_slot_name(so->outputs[i].slot));
+               }
+               debug_printf("\n");
+               debug_printf("; %s: inputs:", type);
+               for (i = 0; i < so->inputs_count; i++) {
+                       uint8_t regid = so->inputs[i].regid;
+                       debug_printf(" r%d.%c (cm=%x,il=%u,b=%u)",
+                                       (regid >> 2), "xyzw"[regid & 0x3],
+                                       so->inputs[i].compmask,
+                                       so->inputs[i].inloc,
+                                       so->inputs[i].bary);
+               }
+               debug_printf("\n");
+               break;
+       case SHADER_FRAGMENT:
+               debug_printf("; %s: outputs:", type);
+               for (i = 0; i < so->outputs_count; i++) {
+                       uint8_t regid = so->outputs[i].regid;
+                       debug_printf(" r%d.%c (%s)",
+                                       (regid >> 2), "xyzw"[regid & 0x3],
+                                       gl_frag_result_name(so->outputs[i].slot));
+               }
+               debug_printf("\n");
+               debug_printf("; %s: inputs:", type);
+               for (i = 0; i < so->inputs_count; i++) {
+                       uint8_t regid = so->inputs[i].regid;
+                       debug_printf(" r%d.%c (%s,cm=%x,il=%u,b=%u)",
+                                       (regid >> 2), "xyzw"[regid & 0x3],
+                                       gl_varying_slot_name(so->inputs[i].slot),
+                                       so->inputs[i].compmask,
+                                       so->inputs[i].inloc,
+                                       so->inputs[i].bary);
+               }
+               debug_printf("\n");
+               break;
+       case SHADER_COMPUTE:
+               break;
         }
-       debug_printf("\n");
  
         /* print generic shader info: */
         debug_printf("; %s prog %d/%d: %u instructions, %d half, %d full\n",
@@ -391,13 +415,24 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin)
         /* print shader type specific info: */
         switch (so->type) {
         case SHADER_VERTEX:
-               dump_semantic(so, TGSI_SEMANTIC_POSITION, "pos");
-               dump_semantic(so, TGSI_SEMANTIC_PSIZE, "psize");
+               dump_output(so, VARYING_SLOT_POS, "pos");
+               dump_output(so, VARYING_SLOT_PSIZ, "psize");
                 break;
         case SHADER_FRAGMENT:
                 dump_reg("pos (bary)", so->pos_regid);
-               dump_semantic(so, TGSI_SEMANTIC_POSITION, "posz");
-               dump_semantic(so, TGSI_SEMANTIC_COLOR, "color");
+               dump_output(so, FRAG_RESULT_DEPTH, "posz");
+               if (so->color0_mrt) {
+                       dump_output(so, FRAG_RESULT_COLOR, "color");
+               } else {
+                       dump_output(so, FRAG_RESULT_DATA0, "data0");
+                       dump_output(so, FRAG_RESULT_DATA1, "data1");
+                       dump_output(so, FRAG_RESULT_DATA2, "data2");
+                       dump_output(so, FRAG_RESULT_DATA3, "data3");
+                       dump_output(so, FRAG_RESULT_DATA4, "data4");
+                       dump_output(so, FRAG_RESULT_DATA5, "data5");
+                       dump_output(so, FRAG_RESULT_DATA6, "data6");
+                       dump_output(so, FRAG_RESULT_DATA7, "data7");
+               }
                 /* these two are hard-coded since we don't know how to
                  * program them to anything but all 0's...
                  */
@@ -466,7 +501,7 @@ static void
  emit_ubos(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
                 struct fd_constbuf_stateobj *constbuf)
  {
-       uint32_t offset = v->first_driver_param;  /* UBOs after user consts */
+       uint32_t offset = v->first_driver_param + IR3_UBOS_OFF;
         if (v->constlen > offset) {
                 struct fd_context *ctx = fd_context(v->shader->pctx);
                 uint32_t params = MIN2(4, v->constlen - offset) * 4;
@@ -519,7 +554,8 @@ emit_immediates(struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
  static void
  emit_tfbos(struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
  {
-       uint32_t offset = v->first_driver_param + 5;  /* streamout addresses after driver-params*/
+       /* streamout addresses after driver-params: */
+       uint32_t offset = v->first_driver_param + IR3_TFBOS_OFF;
         if (v->constlen > offset) {
                 struct fd_context *ctx = fd_context(v->shader->pctx);
                 struct fd_streamout_stateobj *so = &ctx->streamout;
@@ -622,17 +658,33 @@ ir3_emit_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
         /* emit driver params every time: */
         /* TODO skip emit if shader doesn't use driver params to avoid WFI.. */
         if (info && (v->type == SHADER_VERTEX)) {
-               uint32_t offset = v->first_driver_param + 4;  /* driver params after UBOs */
+               uint32_t offset = v->first_driver_param + IR3_DRIVER_PARAM_OFF;
                 if (v->constlen >= offset) {
-                       uint32_t vertex_params[4] = {
+                       uint32_t vertex_params[IR3_DP_COUNT] = {
                                 [IR3_DP_VTXID_BASE] = info->indexed ?
                                                 info->index_bias : info->start,
                                 [IR3_DP_VTXCNT_MAX] = max_tf_vtx(v),
                         };
+                       /* if no user-clip-planes, we don't need to emit the
+                        * entire thing:
+                        */
+                       uint32_t vertex_params_size = 4;
+
+                       if (v->key.ucp_enables) {
+                               struct pipe_clip_state *ucp = &ctx->ucp;
+                               unsigned pos = IR3_DP_UCP0_X;
+                               for (unsigned i = 0; pos <= IR3_DP_UCP7_W; i++) {
+                                       for (unsigned j = 0; j < 4; j++) {
+                                               vertex_params[pos] = fui(ucp->ucp[i][j]);
+                                               pos++;
+                                       }
+                               }
+                               vertex_params_size = ARRAY_SIZE(vertex_params);
+                       }
  
                         fd_wfi(ctx, ring);
                         ctx->emit_const(ring, SHADER_VERTEX, offset * 4, 0,
-                                       ARRAY_SIZE(vertex_params), vertex_params, NULL);
+                                       vertex_params_size, vertex_params, NULL);
  
                         /* if needed, emit stream-out buffer addresses: */
                         if (vertex_params[IR3_DP_VTXCNT_MAX] > 0) {
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h

index 1bbbdbd..6dc0ce1 100644 (file)
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -30,6 +30,7 @@
  #define IR3_SHADER_H_
  
  #include "pipe/p_state.h"
+#include "glsl/shader_enums.h"
  
  #include "ir3.h"
  #include "disasm.h"
@@ -38,29 +39,28 @@
  enum ir3_driver_param {
         IR3_DP_VTXID_BASE = 0,
         IR3_DP_VTXCNT_MAX = 1,
+       /* user-clip-plane components, up to 8x vec4's: */
+       IR3_DP_UCP0_X     = 4,
+       /* .... */
+       IR3_DP_UCP7_W     = 35,
+       IR3_DP_COUNT      = 36   /* must be aligned to vec4 */
  };
  
-/* internal semantic used for passing vtxcnt to vertex shader to
- * implement transform feedback:
+/* Layout of constant registers:
+ *
+ *    num_uniform * vec4  -  user consts
+ *    4 * vec4            -  UBO addresses
+ *    if (vertex shader) {
+ *        N * vec4        -  driver params (IR3_DP_*)
+ *        1 * vec4        -  stream-out addresses
+ *    }
+ *
+ * TODO this could be made more dynamic, to at least skip sections
+ * that we don't need..
   */
-#define IR3_SEMANTIC_VTXCNT (TGSI_SEMANTIC_COUNT + 0)
-
-typedef uint16_t ir3_semantic;  /* semantic name + index */
-static inline ir3_semantic
-ir3_semantic_name(uint8_t name, uint16_t index)
-{
-       return (name << 8) | (index & 0xff);
-}
-
-static inline uint8_t sem2name(ir3_semantic sem)
-{
-       return sem >> 8;
-}
-
-static inline uint16_t sem2idx(ir3_semantic sem)
-{
-       return sem & 0xff;
-}
+#define IR3_UBOS_OFF         0  /* UBOs after user consts */
+#define IR3_DRIVER_PARAM_OFF 4  /* driver params after UBOs */
+#define IR3_TFBOS_OFF       (IR3_DRIVER_PARAM_OFF + IR3_DP_COUNT/4)
  
  /* Configuration key used to identify a shader variant.. different
   * shader variants can be used to implement features not supported
@@ -69,6 +69,11 @@ static inline uint16_t sem2idx(ir3_semantic sem)
  struct ir3_shader_key {
         union {
                 struct {
+                       /*
+                        * Combined Vertex/Fragment shader parameters:
+                        */
+                       unsigned ucp_enables : 8;
+
                         /* do we need to check {v,f}saturate_{s,t,r}? */
                         unsigned has_per_samp : 1;
  
@@ -82,8 +87,8 @@ struct ir3_shader_key {
                          */
                         unsigned color_two_side : 1;
                         unsigned half_precision : 1;
-                       /* used when shader needs to handle flat varyings (a4xx),
-                        * for TGSI_INTERPOLATE_COLOR:
+                       /* used when shader needs to handle flat varyings (a4xx)
+                        * for front/back color inputs to frag shader:
                          */
                         unsigned rasterflat : 1;
                 };
@@ -147,10 +152,16 @@ struct ir3_shader_variant {
         uint8_t pos_regid;
         bool frag_coord, frag_face, color0_mrt;
  
+       /* NOTE: for input/outputs, slot is:
+        *   gl_vert_attrib  - for VS inputs
+        *   gl_varying_slot - for VS output / FS input
+        *   gl_frag_result  - for FS output
+        */
+
         /* varyings/outputs: */
         unsigned outputs_count;
         struct {
-               ir3_semantic semantic;
+               uint8_t slot;
                 uint8_t regid;
         } outputs[16 + 2];  /* +POSITION +PSIZE */
         bool writes_pos, writes_psize;
@@ -158,7 +169,7 @@ struct ir3_shader_variant {
         /* vertices/inputs: */
         unsigned inputs_count;
         struct {
-               ir3_semantic semantic;
+               uint8_t slot;
                 uint8_t regid;
                 uint8_t compmask;
                 uint8_t ncomp;
@@ -174,8 +185,12 @@ struct ir3_shader_variant {
                  * spots where inloc is used.
                  */
                 uint8_t inloc;
-               uint8_t bary;
-               uint8_t interpolate;
+               /* vertex shader specific: */
+               bool    sysval     : 1;   /* slot is a gl_system_value */
+               /* fragment shader specific: */
+               bool    bary       : 1;   /* fetched varying (vs one loaded into reg) */
+               bool    rasterflat : 1;   /* special handling for emit->rasterflat */
+               enum glsl_interp_qualifier interpolate;
         } inputs[16 + 2];  /* +POSITION +FACE */
  
         unsigned total_in;       /* sum of inputs (scalar) */
@@ -254,12 +269,12 @@ ir3_shader_stage(struct ir3_shader *shader)
  #include "pipe/p_shader_tokens.h"
  
  static inline int
-ir3_find_output(const struct ir3_shader_variant *so, ir3_semantic semantic)
+ir3_find_output(const struct ir3_shader_variant *so, gl_varying_slot slot)
  {
         int j;
  
         for (j = 0; j < so->outputs_count; j++)
-               if (so->outputs[j].semantic == semantic)
+               if (so->outputs[j].slot == slot)
                         return j;
  
         /* it seems optional to have a OUT.BCOLOR[n] for each OUT.COLOR[n]
@@ -269,18 +284,20 @@ ir3_find_output(const struct ir3_shader_variant *so, ir3_semantic semantic)
          * OUT.COLOR[n] to IN.BCOLOR[n].  And visa versa if there is only
          * a OUT.BCOLOR[n] but no matching OUT.COLOR[n]
          */
-       if (sem2name(semantic) == TGSI_SEMANTIC_BCOLOR) {
-               unsigned idx = sem2idx(semantic);
-               semantic = ir3_semantic_name(TGSI_SEMANTIC_COLOR, idx);
-       } else if (sem2name(semantic) == TGSI_SEMANTIC_COLOR) {
-               unsigned idx = sem2idx(semantic);
-               semantic = ir3_semantic_name(TGSI_SEMANTIC_BCOLOR, idx);
+       if (slot == VARYING_SLOT_BFC0) {
+               slot = VARYING_SLOT_COL0;
+       } else if (slot == VARYING_SLOT_BFC1) {
+               slot = VARYING_SLOT_COL1;
+       } else if (slot == VARYING_SLOT_COL0) {
+               slot = VARYING_SLOT_BFC0;
+       } else if (slot == VARYING_SLOT_COL1) {
+               slot = VARYING_SLOT_BFC1;
         } else {
                 return 0;
         }
  
         for (j = 0; j < so->outputs_count; j++)
-               if (so->outputs[j].semantic == semantic)
+               if (so->outputs[j].slot == slot)
                         return j;
  
         debug_assert(0);
@@ -298,11 +315,11 @@ ir3_next_varying(const struct ir3_shader_variant *so, int i)
  }
  
  static inline uint32_t
-ir3_find_output_regid(const struct ir3_shader_variant *so, ir3_semantic semantic)
+ir3_find_output_regid(const struct ir3_shader_variant *so, unsigned slot)
  {
         int j;
         for (j = 0; j < so->outputs_count; j++)
-               if (so->outputs[j].semantic == semantic)
+               if (so->outputs[j].slot == slot)
                         return so->outputs[j].regid;
         return regid(63, 0);
  }
diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c

index 83bb649..a2b6d27 100644 (file)
--- a/src/gallium/drivers/i915/i915_fpc_optimize.c
+++ b/src/gallium/drivers/i915/i915_fpc_optimize.c
@@ -69,7 +69,7 @@ static boolean same_src_reg(struct i915_full_src_register *d1, struct i915_full_
             d1->Register.Negate == d2->Register.Negate);
  }
  
-const static struct {
+static const struct {
     boolean is_texture;
     boolean commutes;
     unsigned neutral_element;
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c

index 19a94a8..51c64ed 100644 (file)
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -247,6 +247,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
     case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
     case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
     case PIPE_CAP_DEPTH_BOUNDS_TEST:
+   case PIPE_CAP_TGSI_TXQS:
        return 0;
  
     case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c

index ab4d137..9e37e24 100644 (file)
--- a/src/gallium/drivers/ilo/ilo_screen.c
+++ b/src/gallium/drivers/ilo/ilo_screen.c
@@ -469,6 +469,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
     case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
     case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
     case PIPE_CAP_DEPTH_BOUNDS_TEST:
+   case PIPE_CAP_TGSI_TXQS:
        return 0;
  
     case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c

index 14eeab0..697e3d9 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -296,6 +296,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
     case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
     case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
     case PIPE_CAP_DEPTH_BOUNDS_TEST:
+   case PIPE_CAP_TGSI_TXQS:
        return 0;
     }
     /* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h

index ba1b085..f6e9308 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -824,8 +824,8 @@ private:
  
  enum TexQuery
  {
-   TXQ_DIMS,
-   TXQ_TYPE,
+   TXQ_DIMS, /* x, y, z, levels */
+   TXQ_TYPE, /* ?, ?, samples, ? */
     TXQ_SAMPLE_POSITION,
     TXQ_FILTER,
     TXQ_LOD,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp

index 67ea6df..9014766 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -884,7 +884,7 @@ CodeEmitterNV50::emitINTERP(const Instruction *i)
     defId(i->def(0), 2);
     srcAddr8(i->src(0), 16);
  
-   if (i->getInterpMode() == NV50_IR_INTERP_FLAT) {
+   if (i->encSize != 8 && i->getInterpMode() == NV50_IR_INTERP_FLAT) {
        code[0] |= 1 << 8;
     } else {
        if (i->op == OP_PINTERP) {
@@ -896,10 +896,11 @@ CodeEmitterNV50::emitINTERP(const Instruction *i)
     }
  
     if (i->encSize == 8) {
-      code[1] =
-         (code[0] & (3 << 24)) >> (24 - 16) |
-         (code[0] & (1 <<  8)) << (18 -  8);
-      code[0] &= ~0x03000100;
+      if (i->getInterpMode() == NV50_IR_INTERP_FLAT)
+         code[1] = 4 << 16;
+      else
+         code[1] = (code[0] & (3 << 24)) >> (24 - 16);
+      code[0] &= ~0x03000000;
        code[0] |= 1;
        emitFlagsRd(i);
     }
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp

index f153674..c8efaf5 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -631,6 +631,7 @@ static nv50_ir::operation translateOpcode(uint opcode)
     NV50_IR_OPCODE_CASE(SAD, SAD);
     NV50_IR_OPCODE_CASE(TXF, TXF);
     NV50_IR_OPCODE_CASE(TXQ, TXQ);
+   NV50_IR_OPCODE_CASE(TXQS, TXQ);
     NV50_IR_OPCODE_CASE(TG4, TXG);
     NV50_IR_OPCODE_CASE(LODQ, TXLQ);
  
@@ -1324,7 +1325,7 @@ private:
     void setTexRS(TexInstruction *, unsigned int& s, int R, int S);
     void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy);
     void handleTXF(Value *dst0[4], int R, int L_M);
-   void handleTXQ(Value *dst0[4], enum TexQuery);
+   void handleTXQ(Value *dst0[4], enum TexQuery, int R);
     void handleLIT(Value *dst0[4]);
     void handleUserClipPlanes();
  
@@ -1795,7 +1796,7 @@ Converter::setTexRS(TexInstruction *tex, unsigned int& s, int R, int S)
  }
  
  void
-Converter::handleTXQ(Value *dst0[4], enum TexQuery query)
+Converter::handleTXQ(Value *dst0[4], enum TexQuery query, int R)
  {
     TexInstruction *tex = new_TexInstruction(func, OP_TXQ);
     tex->tex.query = query;
@@ -1807,9 +1808,12 @@ Converter::handleTXQ(Value *dst0[4], enum TexQuery query)
        tex->tex.mask |= 1 << c;
        tex->setDef(d++, dst0[c]);
     }
-   tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level
+   if (query == TXQ_DIMS)
+      tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level
+   else
+      tex->setSrc((c = 0), zero);
  
-   setTexRS(tex, ++c, 1, -1);
+   setTexRS(tex, ++c, R, -1);
  
     bb->insertTail(tex);
  }
@@ -2764,7 +2768,15 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
        break;
     case TGSI_OPCODE_TXQ:
     case TGSI_OPCODE_SVIEWINFO:
-      handleTXQ(dst0, TXQ_DIMS);
+      handleTXQ(dst0, TXQ_DIMS, 1);
+      break;
+   case TGSI_OPCODE_TXQS:
+      // The TXQ_TYPE query returns samples in its 3rd arg, but we need it to
+      // be in .x
+      dst0[1] = dst0[2] = dst0[3] = NULL;
+      std::swap(dst0[0], dst0[2]);
+      handleTXQ(dst0, TXQ_TYPE, 0);
+      std::swap(dst0[0], dst0[2]);
        break;
     case TGSI_OPCODE_F2I:
     case TGSI_OPCODE_F2U:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp

index bea293b..eec502b 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -202,7 +202,10 @@ NV50LegalizePostRA::visit(Function *fn)
     Program *prog = fn->getProgram();
  
     r63 = new_LValue(fn, FILE_GPR);
-   r63->reg.data.id = 63;
+   if (prog->maxGPR < 63)
+      r63->reg.data.id = 63;
+   else
+      r63->reg.data.id = 127;
  
     // this is actually per-program, but we can do it all on visiting main()
     std::list<Instruction *> *outWrites =
@@ -614,6 +617,7 @@ private:
     bool handleTXL(TexInstruction *); // hate
     bool handleTXD(TexInstruction *); // these 3
     bool handleTXLQ(TexInstruction *);
+   bool handleTXQ(TexInstruction *);
  
     bool handleCALL(Instruction *);
     bool handlePRECONT(Instruction *);
@@ -972,6 +976,23 @@ NV50LoweringPreSSA::handleTXLQ(TexInstruction *i)
  }
  
  bool
+NV50LoweringPreSSA::handleTXQ(TexInstruction *i)
+{
+   Value *ms, *ms_x, *ms_y;
+   if (i->tex.query == TXQ_DIMS)
+      return true;
+   assert(i->tex.query == TXQ_TYPE);
+   assert(i->tex.mask == 4);
+
+   loadTexMsInfo(i->tex.r * 4 * 2, &ms, &ms_x, &ms_y);
+   bld.mkOp2(OP_SHL, TYPE_U32, i->getDef(0), bld.loadImm(NULL, 1), ms);
+   i->bb->remove(i);
+
+   return true;
+}
+
+
+bool
  NV50LoweringPreSSA::handleSET(Instruction *i)
  {
     if (i->dType == TYPE_F32) {
@@ -1330,6 +1351,8 @@ NV50LoweringPreSSA::visit(Instruction *i)
        return handleTXD(i->asTex());
     case OP_TXLQ:
        return handleTXLQ(i->asTex());
+   case OP_TXQ:
+      return handleTXQ(i->asTex());
     case OP_EX2:
        bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0));
        i->setSrc(0, i->getDef(0));
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp

index b1f4065..b3fc73a 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -962,11 +962,14 @@ NVC0LoweringPass::handleTXD(TexInstruction *txd)
  bool
  NVC0LoweringPass::handleTXQ(TexInstruction *txq)
  {
+   const int chipset = prog->getTarget()->getChipset();
+   if (chipset >= NVISA_GK104_CHIPSET && txq->tex.rIndirectSrc < 0)
+      txq->tex.r += prog->driver->io.texBindBase / 4;
+
     if (txq->tex.rIndirectSrc < 0)
        return true;
  
     Value *ticRel = txq->getIndirectR();
-   const int chipset = prog->getTarget()->getChipset();
  
     txq->setIndirectS(NULL);
     txq->tex.sIndirectSrc = -1;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp

index b01ef41..44f74c6 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -2602,6 +2602,10 @@ NV50PostRaConstantFolding::visit(BasicBlock *bb)
               !isFloatType(i->dType))
              break;
  
+         if (i->getDef(0)->reg.data.id >= 64 ||
+             i->getSrc(0)->reg.data.id >= 64)
+            break;
+
           def = i->getSrc(1)->getInsn();
           if (def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) {
              vtmp = i->getSrc(1);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp

index 9ebdc65..5f30f3d 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -411,7 +411,7 @@ int ImmediateValue::print(char *buf, size_t size, DataType ty) const
     case TYPE_U64:
     case TYPE_S64:
     default:
-      PRINT("0x%016"PRIx64, reg.data.u64);
+      PRINT("0x%016" PRIx64, reg.data.u64);
        break;
     }
     return pos;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp

index 0cd21cf..400b9f0 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -25,6 +25,7 @@
  
  #include <stack>
  #include <limits>
+#include <tr1/unordered_map>
  
  namespace nv50_ir {
  
@@ -222,6 +223,7 @@ private:
     private:
        virtual bool visit(BasicBlock *);
        inline bool needNewElseBlock(BasicBlock *b, BasicBlock *p);
+      inline void splitEdges(BasicBlock *b);
     };
  
     class ArgumentMovesPass : public Pass {
@@ -345,28 +347,55 @@ RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock *b, BasicBlock *p)
     return (n == 2);
  }
  
-// For each operand of each PHI in b, generate a new value by inserting a MOV
-// at the end of the block it is coming from and replace the operand with its
-// result. This eliminates liveness conflicts and enables us to let values be
-// copied to the right register if such a conflict exists nonetheless.
+struct PhiMapHash {
+   size_t operator()(const std::pair<Instruction *, BasicBlock *>& val) const {
+      return std::tr1::hash<Instruction*>()(val.first) * 31 +
+         std::tr1::hash<BasicBlock*>()(val.second);
+   }
+};
+
+typedef std::tr1::unordered_map<
+   std::pair<Instruction *, BasicBlock *>, Value *, PhiMapHash> PhiMap;
+
+// Critical edges need to be split up so that work can be inserted along
+// specific edge transitions. Unfortunately manipulating incident edges into a
+// BB invalidates all the PHI nodes since their sources are implicitly ordered
+// by incident edge order.
  //
-// These MOVs are also crucial in making sure the live intervals of phi srces
-// are extended until the end of the loop, since they are not included in the
-// live-in sets.
-bool
-RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
+// TODO: Make it so that that is not the case, and PHI nodes store pointers to
+// the original BBs.
+void
+RegAlloc::PhiMovesPass::splitEdges(BasicBlock *bb)
  {
-   Instruction *phi, *mov;
     BasicBlock *pb, *pn;
-
+   Instruction *phi;
+   Graph::EdgeIterator ei;
     std::stack<BasicBlock *> stack;
+   int j = 0;
  
-   for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
+   for (ei = bb->cfg.incident(); !ei.end(); ei.next()) {
        pb = BasicBlock::get(ei.getNode());
        assert(pb);
        if (needNewElseBlock(bb, pb))
           stack.push(pb);
     }
+
+   // No critical edges were found, no need to perform any work.
+   if (stack.empty())
+      return;
+
+   // We're about to, potentially, reorder the inbound edges. This means that
+   // we need to hold on to the (phi, bb) -> src mapping, and fix up the phi
+   // nodes after the graph has been modified.
+   PhiMap phis;
+
+   j = 0;
+   for (ei = bb->cfg.incident(); !ei.end(); ei.next(), j++) {
+      pb = BasicBlock::get(ei.getNode());
+      for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next)
+         phis.insert(std::make_pair(std::make_pair(phi, pb), phi->getSrc(j)));
+   }
+
     while (!stack.empty()) {
        pb = stack.top();
        pn = new BasicBlock(func);
@@ -379,12 +408,47 @@ RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
        assert(pb->getExit()->op != OP_CALL);
        if (pb->getExit()->asFlow()->target.bb == bb)
           pb->getExit()->asFlow()->target.bb = pn;
+
+      for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
+         PhiMap::iterator it = phis.find(std::make_pair(phi, pb));
+         assert(it != phis.end());
+         phis.insert(std::make_pair(std::make_pair(phi, pn), it->second));
+         phis.erase(it);
+      }
     }
  
+   // Now go through and fix up all of the phi node sources.
+   j = 0;
+   for (ei = bb->cfg.incident(); !ei.end(); ei.next(), j++) {
+      pb = BasicBlock::get(ei.getNode());
+      for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
+         PhiMap::const_iterator it = phis.find(std::make_pair(phi, pb));
+         assert(it != phis.end());
+
+         phi->setSrc(j, it->second);
+      }
+   }
+}
+
+// For each operand of each PHI in b, generate a new value by inserting a MOV
+// at the end of the block it is coming from and replace the operand with its
+// result. This eliminates liveness conflicts and enables us to let values be
+// copied to the right register if such a conflict exists nonetheless.
+//
+// These MOVs are also crucial in making sure the live intervals of phi srces
+// are extended until the end of the loop, since they are not included in the
+// live-in sets.
+bool
+RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
+{
+   Instruction *phi, *mov;
+
+   splitEdges(bb);
+
     // insert MOVs (phi->src(j) should stem from j-th in-BB)
     int j = 0;
     for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
-      pb = BasicBlock::get(ei.getNode());
+      BasicBlock *pb = BasicBlock::get(ei.getNode());
        if (!pb->isTerminated())
           pb->insertTail(new_FlowInstruction(func, OP_BRA, bb));
  
diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c

index 67e181e..72e070b 100644 (file)
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -80,7 +80,12 @@ release_allocation(struct nouveau_mm_allocation **mm,
  inline void
  nouveau_buffer_release_gpu_storage(struct nv04_resource *buf)
  {
-   nouveau_bo_ref(NULL, &buf->bo);
+   if (buf->fence && buf->fence->state < NOUVEAU_FENCE_STATE_FLUSHED) {
+      nouveau_fence_work(buf->fence, nouveau_fence_unref_bo, buf->bo);
+      buf->bo = NULL;
+   } else {
+      nouveau_bo_ref(NULL, &buf->bo);
+   }
  
     if (buf->mm)
        release_allocation(&buf->mm, buf->fence);
@@ -206,8 +211,8 @@ nouveau_transfer_write(struct nouveau_context *nv, struct nouveau_transfer *tx,
        nv->copy_data(nv, buf->bo, buf->offset + base, buf->domain,
                      tx->bo, tx->offset + offset, NOUVEAU_BO_GART, size);
     else
-   if ((buf->base.bind & PIPE_BIND_CONSTANT_BUFFER) && nv->push_cb && can_cb)
-      nv->push_cb(nv, buf->bo, buf->domain, buf->offset, buf->base.width0,
+   if (nv->push_cb && can_cb)
+      nv->push_cb(nv, buf,
                    base, size / 4, (const uint32_t *)data);
     else
        nv->push_data(nv, buf->bo, buf->offset + base, buf->domain, size, data);
@@ -281,7 +286,8 @@ nouveau_buffer_transfer_del(struct nouveau_context *nv,
  {
     if (tx->map) {
        if (likely(tx->bo)) {
-         nouveau_bo_ref(NULL, &tx->bo);
+         nouveau_fence_work(nv->screen->fence.current,
+                            nouveau_fence_unref_bo, tx->bo);
           if (tx->mm)
              release_allocation(&tx->mm, nv->screen->fence.current);
        } else {
@@ -532,8 +538,13 @@ nouveau_buffer_transfer_unmap(struct pipe_context *pipe,
     struct nv04_resource *buf = nv04_resource(transfer->resource);
  
     if (tx->base.usage & PIPE_TRANSFER_WRITE) {
-      if (!(tx->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT) && tx->map)
-         nouveau_transfer_write(nv, tx, 0, tx->base.box.width);
+      if (!(tx->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
+         if (tx->map)
+            nouveau_transfer_write(nv, tx, 0, tx->base.box.width);
+
+         util_range_add(&buf->valid_buffer_range,
+                        tx->base.box.x, tx->base.box.x + tx->base.box.width);
+      }
  
        if (likely(buf->domain)) {
           const uint8_t bind = buf->base.bind;
@@ -541,9 +552,6 @@ nouveau_buffer_transfer_unmap(struct pipe_context *pipe,
           if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
              nv->vbo_dirty = true;
        }
-
-      util_range_add(&buf->valid_buffer_range,
-                     tx->base.box.x, tx->base.box.x + tx->base.box.width);
     }
  
     if (!tx->bo && (tx->base.usage & PIPE_TRANSFER_WRITE))
@@ -780,7 +788,7 @@ nouveau_buffer_migrate(struct nouveau_context *nv,
        nv->copy_data(nv, buf->bo, buf->offset, new_domain,
                      bo, offset, old_domain, buf->base.width0);
  
-      nouveau_bo_ref(NULL, &bo);
+      nouveau_fence_work(screen->fence.current, nouveau_fence_unref_bo, bo);
        if (mm)
           release_allocation(&mm, screen->fence.current);
     } else
diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.h b/src/gallium/drivers/nouveau/nouveau_buffer.h

index 7e6a6cc..d45bf7a 100644 (file)
--- a/src/gallium/drivers/nouveau/nouveau_buffer.h
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.h
@@ -41,6 +41,8 @@ struct nv04_resource {
     uint8_t status;
     uint8_t domain;
  
+   uint16_t cb_bindings[6]; /* per-shader per-slot bindings */
+
     struct nouveau_fence *fence;
     struct nouveau_fence *fence_wr;
  
diff --git a/src/gallium/drivers/nouveau/nouveau_context.h b/src/gallium/drivers/nouveau/nouveau_context.h

index 24deb7e..decb271 100644 (file)
--- a/src/gallium/drivers/nouveau/nouveau_context.h
+++ b/src/gallium/drivers/nouveau/nouveau_context.h
@@ -6,6 +6,8 @@
  
  #define NOUVEAU_MAX_SCRATCH_BUFS 4
  
+struct nv04_resource;
+
  struct nouveau_context {
     struct pipe_context pipe;
     struct nouveau_screen *screen;
@@ -23,8 +25,7 @@ struct nouveau_context {
                       unsigned, const void *);
     /* base, size refer to the whole constant buffer */
     void (*push_cb)(struct nouveau_context *,
-                   struct nouveau_bo *, unsigned domain,
-                   unsigned base, unsigned size,
+                   struct nv04_resource *,
                     unsigned offset, unsigned words, const uint32_t *);
  
     /* @return: @ref reduced by nr of references found in context */
diff --git a/src/gallium/drivers/nouveau/nouveau_fence.c b/src/gallium/drivers/nouveau/nouveau_fence.c

index abcdb47..ee4e08d 100644 (file)
--- a/src/gallium/drivers/nouveau/nouveau_fence.c
+++ b/src/gallium/drivers/nouveau/nouveau_fence.c
@@ -231,3 +231,11 @@ nouveau_fence_next(struct nouveau_screen *screen)
  
     nouveau_fence_new(screen, &screen->fence.current, false);
  }
+
+void
+nouveau_fence_unref_bo(void *data)
+{
+   struct nouveau_bo *bo = data;
+
+   nouveau_bo_ref(NULL, &bo);
+}
diff --git a/src/gallium/drivers/nouveau/nouveau_fence.h b/src/gallium/drivers/nouveau/nouveau_fence.h

index a158705..2efcab2 100644 (file)
--- a/src/gallium/drivers/nouveau/nouveau_fence.h
+++ b/src/gallium/drivers/nouveau/nouveau_fence.h
@@ -37,6 +37,9 @@ void nouveau_fence_next(struct nouveau_screen *);
  bool nouveau_fence_wait(struct nouveau_fence *);
  bool nouveau_fence_signalled(struct nouveau_fence *);
  
+void nouveau_fence_unref_bo(void *data); /* generic unref bo callback */
+
+
  static inline void
  nouveau_fence_ref(struct nouveau_fence *fence, struct nouveau_fence **ref)
  {
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c

index c75b4b9..c6f6965 100644 (file)
--- a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
@@ -28,6 +28,7 @@
  #include "util/u_surface.h"
  
  #include "nv_m2mf.xml.h"
+#include "nv_object.xml.h"
  #include "nv30/nv30_screen.h"
  #include "nv30/nv30_context.h"
  #include "nv30/nv30_resource.h"
@@ -144,21 +145,54 @@ nv30_resource_copy_region(struct pipe_context *pipe,
     nv30_transfer_rect(nv30, NEAREST, &src, &dst);
  }
  
-void
-nv30_resource_resolve(struct pipe_context *pipe,
-                      const struct pipe_resolve_info *info)
+static void
+nv30_resource_resolve(struct nv30_context *nv30,
+                      const struct pipe_blit_info *info)
  {
-#if 0
-   struct nv30_context *nv30 = nv30_context(pipe);
+   struct nv30_miptree *src_mt = nv30_miptree(info->src.resource);
     struct nv30_rect src, dst;
-
-   define_rect(info->src.res, 0, 0, info->src.x0, info->src.y0,
-               info->src.x1 - info->src.x0, info->src.y1 - info->src.y0, &src);
-   define_rect(info->dst.res, info->dst.level, 0, info->dst.x0, info->dst.y0,
-               info->dst.x1 - info->dst.x0, info->dst.y1 - info->dst.y0, &dst);
-
-   nv30_transfer_rect(nv30, BILINEAR, &src, &dst);
-#endif
+   unsigned x, x0, x1, y, y1, w, h;
+
+   define_rect(info->src.resource, 0, info->src.box.z, info->src.box.x,
+      info->src.box.y, info->src.box.width, info->src.box.height, &src);
+   define_rect(info->dst.resource, 0, info->dst.box.z, info->dst.box.x,
+      info->dst.box.y, info->dst.box.width, info->dst.box.height, &dst);
+
+   x0 = src.x0;
+   x1 = src.x1;
+   y1 = src.y1;
+
+   /* On nv3x we must use sifm which is restricted to 1024x1024 tiles */
+   for (y = src.y0; y < y1; y += h) {
+      h = y1 - y;
+      if (h > 1024)
+         h = 1024;
+
+      src.y0 = 0;
+      src.y1 = h;
+      src.h = h;
+
+      dst.y1 = dst.y0 + (h >> src_mt->ms_y);
+      dst.h = h >> src_mt->ms_y;
+
+      for (x = x0; x < x1; x += w) {
+         w = x1 - x;
+         if (w > 1024)
+            w = 1024;
+
+         src.offset = y * src.pitch + x * src.cpp;
+         src.x0 = 0;
+         src.x1 = w;
+         src.w = w;
+
+         dst.offset = (y >> src_mt->ms_y) * dst.pitch +
+                      (x >> src_mt->ms_x) * dst.cpp;
+         dst.x1 = dst.x0 + (w >> src_mt->ms_x);
+         dst.w = w >> src_mt->ms_x;
+
+         nv30_transfer_rect(nv30, BILINEAR, &src, &dst);
+      }
+   }
  }
  
  void
@@ -172,7 +206,7 @@ nv30_blit(struct pipe_context *pipe,
         info.dst.resource->nr_samples <= 1 &&
         !util_format_is_depth_or_stencil(info.src.resource->format) &&
         !util_format_is_pure_integer(info.src.resource->format)) {
-      debug_printf("nv30: color resolve unimplemented\n");
+      nv30_resource_resolve(nv30, blit_info);
        return;
     }
  
@@ -305,10 +339,15 @@ nv30_miptree_transfer_unmap(struct pipe_context *pipe,
     struct nv30_context *nv30 = nv30_context(pipe);
     struct nv30_transfer *tx = nv30_transfer(ptx);
  
-   if (ptx->usage & PIPE_TRANSFER_WRITE)
+   if (ptx->usage & PIPE_TRANSFER_WRITE) {
        nv30_transfer_rect(nv30, NEAREST, &tx->tmp, &tx->img);
  
-   nouveau_bo_ref(NULL, &tx->tmp.bo);
+      /* Allow the copies above to finish executing before freeing the source */
+      nouveau_fence_work(nv30->screen->base.fence.current,
+                         nouveau_fence_unref_bo, tx->tmp.bo);
+   } else {
+      nouveau_bo_ref(NULL, &tx->tmp.bo);
+   }
     pipe_resource_reference(&ptx->resource, NULL);
     FREE(tx);
  }
@@ -362,6 +401,7 @@ nv30_miptree_create(struct pipe_screen *pscreen,
     blocksz = util_format_get_blocksize(pt->format);
  
     if ((pt->target == PIPE_TEXTURE_RECT) ||
+       (pt->bind & PIPE_BIND_SCANOUT) ||
         !util_is_power_of_two(pt->width0) ||
         !util_is_power_of_two(pt->height0) ||
         !util_is_power_of_two(pt->depth0) ||
@@ -369,6 +409,14 @@ nv30_miptree_create(struct pipe_screen *pscreen,
         util_format_is_float(pt->format) || mt->ms_mode) {
        mt->uniform_pitch = util_format_get_nblocksx(pt->format, w) * blocksz;
        mt->uniform_pitch = align(mt->uniform_pitch, 64);
+      if (pt->bind & PIPE_BIND_SCANOUT) {
+         struct nv30_screen *screen = nv30_screen(pscreen);
+         int pitch_align = MAX2(
+               screen->eng3d->oclass >= NV40_3D_CLASS ? 1024 : 256,
+               /* round_down_pow2(mt->uniform_pitch / 4) */
+               1 << (util_last_bit(mt->uniform_pitch / 4) - 1));
+         mt->uniform_pitch = align(mt->uniform_pitch, pitch_align);
+      }
     }
  
     if (!mt->uniform_pitch)
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_resource.h b/src/gallium/drivers/nouveau/nv30/nv30_resource.h

index 8dac779..20d86b6 100644 (file)
--- a/src/gallium/drivers/nouveau/nv30/nv30_resource.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30_resource.h
@@ -66,9 +66,6 @@ nv30_resource_copy_region(struct pipe_context *pipe,
                            const struct pipe_box *src_box);
  
  void
-nv30_resource_resolve(struct pipe_context *, const struct pipe_resolve_info *);
-
-void
  nv30_blit(struct pipe_context *pipe,
            const struct pipe_blit_info *blit_info);
  
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c

index 7aad26b..806d4e6 100644 (file)
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -169,6 +169,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
     case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
     case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
     case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+   case PIPE_CAP_TGSI_TXQS:
        return 0;
  
     case PIPE_CAP_VENDOR_ID:
@@ -319,8 +320,9 @@ nv30_screen_is_format_supported(struct pipe_screen *pscreen,
                                  unsigned sample_count,
                                  unsigned bindings)
  {
-   if (sample_count > 4)
+   if (sample_count > nv30_screen(pscreen)->max_sample_count)
        return false;
+
     if (!(0x00000017 & (1 << sample_count)))
        return false;
  
@@ -450,6 +452,23 @@ nv30_screen_create(struct nouveau_device *dev)
        return NULL;
     }
  
+   /*
+    * Some modern apps try to use msaa without keeping in mind the
+    * restrictions on videomem of older cards. Resulting in dmesg saying:
+    * [ 1197.850642] nouveau E[soffice.bin[3785]] fail ttm_validate
+    * [ 1197.850648] nouveau E[soffice.bin[3785]] validating bo list
+    * [ 1197.850654] nouveau E[soffice.bin[3785]] validate: -12
+    *
+    * Because we are running out of video memory, after which the program
+    * using the msaa visual freezes, and eventually the entire system freezes.
+    *
+    * To work around this we do not allow msaa visauls by default and allow
+    * the user to override this via NV30_MAX_MSAA.
+    */
+   screen->max_sample_count = debug_get_num_option("NV30_MAX_MSAA", 0);
+   if (screen->max_sample_count > 4)
+      screen->max_sample_count = 4;
+
     pscreen = &screen->base.base;
     pscreen->destroy = nv30_screen_destroy;
     pscreen->get_param = nv30_screen_get_param;
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.h b/src/gallium/drivers/nouveau/nv30/nv30_screen.h

index 7b17b88..df11233 100644 (file)
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.h
@@ -38,6 +38,8 @@ struct nv30_screen {
     /*XXX: nvfx state */
     struct nouveau_heap *vp_exec_heap;
     struct nouveau_heap *vp_data_heap;
+
+   unsigned max_sample_count;
  };
  
  static inline struct nv30_screen *
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_transfer.c b/src/gallium/drivers/nouveau/nv30/nv30_transfer.c

index 214da65..2452071 100644 (file)
--- a/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
@@ -371,7 +371,7 @@ nv30_transfer_rect_blit(XFER_ARGS)
  static bool
  nv30_transfer_sifm(XFER_ARGS)
  {
-   if (!src->pitch || (src->w | src->h) > 1024 || src->w < 2 || src->h < 2)
+   if (!src->pitch || src->w > 1024 || src->h > 1024 || src->w < 2 || src->h < 2)
        return false;
  
     if (src->d > 1 || dst->d > 1)
@@ -381,7 +381,7 @@ nv30_transfer_sifm(XFER_ARGS)
        return false;
  
     if (!dst->pitch) {
-      if ((dst->w | dst->h) > 2048 || dst->w < 2 || dst->h < 2)
+      if (dst->w > 2048 || dst->h > 2048 || dst->w < 2 || dst->h < 2)
           return false;
     } else {
        if (dst->domain != NOUVEAU_BO_VRAM)
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c

index 4949459..4108f48 100644 (file)
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -199,9 +199,13 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
           }
        }
  
-      if (nv50->idxbuf.buffer == res)
+      if (nv50->idxbuf.buffer == res) {
+         /* Just rebind to the bufctx as there is no separate dirty bit */
+         nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_INDEX);
+         BCTX_REFN(nv50->bufctx_3d, INDEX, nv04_resource(res), RD);
           if (!--ref)
              return ref;
+      }
  
        for (s = 0; s < 3; ++s) {
        assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h

index e7adf47..69c1212 100644 (file)
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -197,7 +197,7 @@ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *);
  
  /* nv50_query.c */
  void nv50_init_query_functions(struct nv50_context *);
-void nv50_query_pushbuf_submit(struct nouveau_pushbuf *,
+void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t method,
                                 struct pipe_query *, unsigned result_offset);
  void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *);
  void nva0_so_target_save_offset(struct pipe_context *,
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c b/src/gallium/drivers/nouveau/nv50/nv50_formats.c

index 49a93bf..80f92be 100644 (file)
--- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
@@ -203,8 +203,10 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
     F3B(B5G6R5_UNORM, B5G6R5_UNORM, C2, C1, C0, xx, UNORM, 5_6_5, TD),
     C4B(B5G5R5A1_UNORM, BGR5_A1_UNORM, C2, C1, C0, C3, UNORM, 5_5_5_1, TD),
     F3B(B5G5R5X1_UNORM, BGR5_X1_UNORM, C2, C1, C0, xx, UNORM, 5_5_5_1, TD),
+#if NOUVEAU_DRIVER != 0xc0
     C4B(B4G4R4A4_UNORM, NONE, C2, C1, C0, C3, UNORM, 4_4_4_4, T),
     F3B(B4G4R4X4_UNORM, NONE, C2, C1, C0, xx, UNORM, 4_4_4_4, T),
+#endif
     F3B(R9G9B9E5_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 9_9_9_E5, T),
  
     C4A(R10G10B10A2_UNORM, RGB10_A2_UNORM, C0, C1, C2, C3, UNORM, 10_10_10_2,
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c

index 92d49e4..812d10c 100644 (file)
--- a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
@@ -163,7 +163,10 @@ nv50_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt)
  {
     struct nv50_miptree *mt = nv50_miptree(pt);
  
-   nouveau_bo_ref(NULL, &mt->base.bo);
+   if (mt->base.fence && mt->base.fence->state < NOUVEAU_FENCE_STATE_FLUSHED)
+      nouveau_fence_work(mt->base.fence, nouveau_fence_unref_bo, mt->base.bo);
+   else
+      nouveau_bo_ref(NULL, &mt->base.bo);
  
     nouveau_fence_ref(NULL, &mt->base.fence);
     nouveau_fence_ref(NULL, &mt->base.fence_wr);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c

index 02dc367..eff4477 100644 (file)
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -66,6 +66,7 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info)
        case TGSI_SEMANTIC_VERTEXID:
           prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
           prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START;
+         prog->vp.vertexid = 1;
           continue;
        default:
           break;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h

index 5d3ff56..f4e8e94 100644 (file)
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h
@@ -76,6 +76,7 @@ struct nv50_program {
        ubyte psiz;        /* output slot of point size */
        ubyte bfc[2];      /* indices into varying for FFC (FP) or BFC (VP) */
        ubyte edgeflag;
+      ubyte vertexid;
        ubyte clpd[2];     /* output slot of clip distance[i]'s 1st component */
        ubyte clpd_nr;
     } vp;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c

index f4adbf8..5368ee7 100644 (file)
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -266,6 +266,7 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
        nv50_query_get(push, q, 0, 0x1000f010);
        break;
     case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
+      q->sequence++;
        nv50_query_get(push, q, 0, 0x0d005002 | (q->index << 5));
        break;
     case PIPE_QUERY_TIMESTAMP_DISJOINT:
@@ -451,18 +452,18 @@ nv50_render_condition(struct pipe_context *pipe,
  }
  
  void
-nv50_query_pushbuf_submit(struct nouveau_pushbuf *push,
+nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method,
                            struct pipe_query *pq, unsigned result_offset)
  {
     struct nv50_query *q = nv50_query(pq);
  
-   /* XXX: does this exist ? */
-#define NV50_IB_ENTRY_1_NO_PREFETCH (0 << (31 - 8))
+   nv50_query_update(q);
+   if (q->state != NV50_QUERY_STATE_READY)
+      nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, push->client);
+   q->state = NV50_QUERY_STATE_READY;
  
-   PUSH_REFN(push, q->bo, NOUVEAU_BO_RD | NOUVEAU_BO_GART);
-   nouveau_pushbuf_space(push, 0, 0, 1);
-   nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 |
-                        NV50_IB_ENTRY_1_NO_PREFETCH);
+   BEGIN_NV04(push, SUBC_3D(method), 1);
+   PUSH_DATA (push, q->data[result_offset / 4]);
  }
  
  void
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c

index 30e6e04..c3bbc83 100644 (file)
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -100,7 +100,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
     case PIPE_CAP_MAX_TEXEL_OFFSET:
        return 7;
     case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
-      return 65536;
+      return 128 * 1024 * 1024;
     case PIPE_CAP_GLSL_FEATURE_LEVEL:
        return 330;
     case PIPE_CAP_MAX_RENDER_TARGETS:
@@ -179,6 +179,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
     case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
     case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
     case PIPE_CAP_DEPTH_BOUNDS_TEST:
+   case PIPE_CAP_TGSI_TXQS:
        return 1;
     case PIPE_CAP_SEAMLESS_CUBE_MAP:
        return 1; /* class_3d >= NVA0_3D_CLASS; */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c

index b033ce5..fdde11f 100644 (file)
--- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
@@ -641,12 +641,12 @@ nv50_stream_output_validate(struct nv50_context *nv50)
        PUSH_DATA (push, so->num_attribs[i]);
        if (n == 4) {
           PUSH_DATA(push, targ->pipe.buffer_size);
-
-         BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);
           if (!targ->clean) {
              assert(targ->pq);
-            nv50_query_pushbuf_submit(push, targ->pq, 0x4);
+            nv50_query_pushbuf_submit(push, NVA0_3D_STRMOUT_OFFSET(i),
+                                      targ->pq, 0x4);
           } else {
+            BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);
              PUSH_DATA(push, 0);
              targ->clean = false;
           }
@@ -655,6 +655,7 @@ nv50_stream_output_validate(struct nv50_context *nv50)
              (so->stride[i] * nv50->state.prim_size);
           prims = MIN2(prims, limit);
        }
+      targ->stride = so->stride[i];
        BCTX_REFN(nv50->bufctx_3d, SO, buf, WR);
     }
     if (prims != ~0) {
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c

index b304a17..66dcf43 100644 (file)
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -503,7 +503,8 @@ static struct state_validate {
      { nv50_validate_samplers,      NV50_NEW_SAMPLERS },
      { nv50_stream_output_validate, NV50_NEW_STRMOUT |
                                     NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
-    { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS },
+    { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS |
+                                   NV50_NEW_VERTPROG },
      { nv50_validate_min_samples,   NV50_NEW_MIN_SAMPLES },
  };
  #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_tex.c b/src/gallium/drivers/nouveau/nv50/nv50_tex.c

index fc6374d..6083ea9 100644 (file)
--- a/src/gallium/drivers/nouveau/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_tex.c
@@ -221,6 +221,26 @@ nv50_create_texture_view(struct pipe_context *pipe,
     return &view->pipe;
  }
  
+static void
+nv50_update_tic(struct nv50_context *nv50, struct nv50_tic_entry *tic,
+                struct nv04_resource *res)
+{
+   uint64_t address = res->address;
+   if (res->base.target != PIPE_BUFFER)
+      return;
+   address += tic->pipe.u.buf.first_element *
+      util_format_get_blocksize(tic->pipe.format);
+   if (tic->tic[1] == (uint32_t)address &&
+       (tic->tic[2] & 0xff) == address >> 32)
+      return;
+
+   nv50_screen_tic_unlock(nv50->screen, tic);
+   tic->id = -1;
+   tic->tic[1] = address;
+   tic->tic[2] &= 0xffffff00;
+   tic->tic[2] |= address >> 32;
+}
+
  static bool
  nv50_validate_tic(struct nv50_context *nv50, int s)
  {
@@ -240,6 +260,7 @@ nv50_validate_tic(struct nv50_context *nv50, int s)
           continue;
        }
        res = &nv50_miptree(tic->pipe.texture)->base;
+      nv50_update_tic(nv50, tic, res);
  
        if (tic->id < 0) {
           tic->id = nv50_screen_tic_alloc(nv50->screen, tic);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c

index fc6b24a..be51407 100644 (file)
--- a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
@@ -365,9 +365,14 @@ nv50_miptree_transfer_unmap(struct pipe_context *pctx,
              tx->rect[0].base += mt->layer_stride;
           tx->rect[1].base += tx->nblocksy * tx->base.stride;
        }
+
+      /* Allow the copies above to finish executing before freeing the source */
+      nouveau_fence_work(nv50->screen->base.fence.current,
+                         nouveau_fence_unref_bo, tx->rect[1].bo);
+   } else {
+      nouveau_bo_ref(NULL, &tx->rect[1].bo);
     }
  
-   nouveau_bo_ref(NULL, &tx->rect[1].bo);
     pipe_resource_reference(&transfer->resource, NULL);
  
     FREE(tx);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c

index 6324726..f5f4708 100644 (file)
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -293,7 +293,8 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
     uint64_t addrs[PIPE_MAX_ATTRIBS];
     uint32_t limits[PIPE_MAX_ATTRIBS];
     struct nouveau_pushbuf *push = nv50->base.pushbuf;
-   struct nv50_vertex_stateobj *vertex = nv50->vertex;
+   struct nv50_vertex_stateobj dummy = {};
+   struct nv50_vertex_stateobj *vertex = nv50->vertex ? nv50->vertex : &dummy;
     struct pipe_vertex_buffer *vb;
     struct nv50_vertex_element *ve;
     uint32_t mask;
@@ -301,6 +302,14 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
     unsigned i;
     const unsigned n = MAX2(vertex->num_elements, nv50->state.num_vtxelts);
  
+   /* A vertexid is not generated for inline data uploads. Have to use a
+    * VBO. This check must come after the vertprog has been validated,
+    * otherwise vertexid may be unset.
+    */
+   assert(nv50->vertprog->translated);
+   if (nv50->vertprog->vp.vertexid)
+      nv50->vbo_push_hint = 0;
+
     if (unlikely(vertex->need_conversion))
        nv50->vbo_fifo = ~0;
     else
@@ -317,7 +326,6 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
           if (buf && buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
              buf->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
              nv50->base.vbo_dirty = true;
-            break;
           }
        }
     }
@@ -736,9 +744,8 @@ nva0_draw_stream_output(struct nv50_context *nv50,
        BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BASE), 1);
        PUSH_DATA (push, 0);
        BEGIN_NV04(push, NVA0_3D(DRAW_TFB_STRIDE), 1);
-      PUSH_DATA (push, 0);
-      BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BYTES), 1);
-      nv50_query_pushbuf_submit(push, so->pq, 0x4);
+      PUSH_DATA (push, so->stride);
+      nv50_query_pushbuf_submit(push, NVA0_3D_DRAW_TFB_BYTES, so->pq, 0x4);
        BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);
        PUSH_DATA (push, 0);
  
@@ -761,6 +768,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
  {
     struct nv50_context *nv50 = nv50_context(pipe);
     struct nouveau_pushbuf *push = nv50->base.pushbuf;
+   bool tex_dirty = false;
     int i, s;
  
     /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
@@ -790,6 +798,9 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
  
     push->kick_notify = nv50_draw_vbo_kick_notify;
  
+   /* TODO: Instead of iterating over all the buffer resources looking for
+    * coherent buffers, keep track of a context-wide count.
+    */
     for (s = 0; s < 3 && !nv50->cb_dirty; ++s) {
        uint32_t valid = nv50->constbuf_valid[s];
  
@@ -817,6 +828,21 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
        nv50->cb_dirty = false;
     }
  
+   for (s = 0; s < 3 && !tex_dirty; ++s) {
+      for (i = 0; i < nv50->num_textures[s] && !tex_dirty; ++i) {
+         if (!nv50->textures[s][i] ||
+             nv50->textures[s][i]->texture->target != PIPE_BUFFER)
+            continue;
+         if (nv50->textures[s][i]->texture->flags &
+             PIPE_RESOURCE_FLAG_MAP_COHERENT)
+            tex_dirty = true;
+      }
+   }
+   if (tex_dirty) {
+      BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);
+      PUSH_DATA (push, 0x20);
+   }
+
     if (nv50->vbo_fifo) {
        nv50_push_vbo(nv50, info);
        push->kick_notify = nv50_default_kick_notify;
@@ -838,10 +864,6 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
           nv50->base.vbo_dirty = true;
     }
  
-   if (!nv50->base.vbo_dirty && nv50->idxbuf.buffer &&
-       nv50->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
-      nv50->base.vbo_dirty = true;
-
     if (nv50->base.vbo_dirty) {
        BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1);
        PUSH_DATA (push, 0);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h

index 6ed79cf..30bee3a 100644 (file)
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -299,10 +299,10 @@ nve4_p2mf_push_linear(struct nouveau_context *nv,
                        struct nouveau_bo *dst, unsigned offset, unsigned domain,
                        unsigned size, const void *data);
  void
-nvc0_cb_push(struct nouveau_context *,
-             struct nouveau_bo *bo, unsigned domain,
-             unsigned base, unsigned size,
-             unsigned offset, unsigned words, const uint32_t *data);
+nvc0_cb_bo_push(struct nouveau_context *,
+                struct nouveau_bo *bo, unsigned domain,
+                unsigned base, unsigned size,
+                unsigned offset, unsigned words, const uint32_t *data);
  
  /* nvc0_vbo.c */
  void nvc0_draw_vbo(struct pipe_context *, const struct pipe_draw_info *);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c

index 12f1bb7..a168dd6 100644 (file)
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -449,7 +449,7 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
  
     for (i = 0; i < info->numOutputs; ++i) {
        if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
-         fp->hdr[18] |= info->out[i].mask << info->out[i].slot[0];
+         fp->hdr[18] |= 0xf << info->out[i].slot[0];
     }
  
     fp->fp.early_z = info->prop.fp.earlyFragTests;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c

index ab19b26..1909b91 100644 (file)
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -87,7 +87,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
     case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
        return 31;
     case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
-      return 65536;
+      return 128 * 1024 * 1024;
     case PIPE_CAP_GLSL_FEATURE_LEVEL:
        return 410;
     case PIPE_CAP_MAX_RENDER_TARGETS:
@@ -178,6 +178,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
     case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
     case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
     case PIPE_CAP_DEPTH_BOUNDS_TEST:
+   case PIPE_CAP_TGSI_TXQS:
        return 1;
     case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
        return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c

index ee29912..c5bfd03 100644 (file)
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -831,6 +831,8 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
     }
     nvc0->constbuf_dirty[s] |= 1 << i;
  
+   if (nvc0->constbuf[s][i].u.buf)
+      nv04_resource(nvc0->constbuf[s][i].u.buf)->cb_bindings[s] &= ~(1 << i);
     pipe_resource_reference(&nvc0->constbuf[s][i].u.buf, res);
  
     nvc0->constbuf[s][i].user = (cb && cb->user_buffer) ? true : false;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c

index 47bd66d..aec0609 100644 (file)
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -440,7 +440,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
                 BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
                 PUSH_DATA (push, (0 << 4) | 1);
              }
-            nvc0_cb_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
+            nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
                           base, nvc0->state.uniform_buffer_bound[s],
                           0, (size + 3) / 4,
                           nvc0->constbuf[s][0].u.data);
@@ -458,6 +458,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
                 BCTX_REFN(nvc0->bufctx_3d, CB(s, i), res, RD);
  
                 nvc0->cb_dirty = 1; /* Force cache flush for UBO. */
+               res->cb_bindings[s] |= 1 << i;
              } else {
                 BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
                 PUSH_DATA (push, (i << 4) | 0);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c

index d19082e..2dd100f 100644 (file)
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -226,6 +226,26 @@ nvc0_create_texture_view(struct pipe_context *pipe,
     return &view->pipe;
  }
  
+static void
+nvc0_update_tic(struct nvc0_context *nvc0, struct nv50_tic_entry *tic,
+                struct nv04_resource *res)
+{
+   uint64_t address = res->address;
+   if (res->base.target != PIPE_BUFFER)
+      return;
+   address += tic->pipe.u.buf.first_element *
+      util_format_get_blocksize(tic->pipe.format);
+   if (tic->tic[1] == (uint32_t)address &&
+       (tic->tic[2] & 0xff) == address >> 32)
+      return;
+
+   nvc0_screen_tic_unlock(nvc0->screen, tic);
+   tic->id = -1;
+   tic->tic[1] = address;
+   tic->tic[2] &= 0xffffff00;
+   tic->tic[2] |= address >> 32;
+}
+
  static bool
  nvc0_validate_tic(struct nvc0_context *nvc0, int s)
  {
@@ -247,6 +267,7 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s)
           continue;
        }
        res = nv04_resource(tic->pipe.texture);
+      nvc0_update_tic(nvc0, tic, res);
  
        if (tic->id < 0) {
           tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
@@ -313,6 +334,7 @@ nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
           continue;
        }
        res = nv04_resource(tic->pipe.texture);
+      nvc0_update_tic(nvc0, tic, res);
  
        if (tic->id < 0) {
           tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c

index 7cc5b4b..aaec60a 100644 (file)
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
@@ -495,23 +495,65 @@ nvc0_miptree_transfer_unmap(struct pipe_context *pctx,
           tx->rect[1].base += tx->nblocksy * tx->base.stride;
        }
        NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_transfers_wr, 1);
+
+      /* Allow the copies above to finish executing before freeing the source */
+      nouveau_fence_work(nvc0->screen->base.fence.current,
+                         nouveau_fence_unref_bo, tx->rect[1].bo);
+   } else {
+      nouveau_bo_ref(NULL, &tx->rect[1].bo);
     }
     if (tx->base.usage & PIPE_TRANSFER_READ)
        NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_transfers_rd, 1);
  
-   nouveau_bo_ref(NULL, &tx->rect[1].bo);
     pipe_resource_reference(&transfer->resource, NULL);
  
     FREE(tx);
  }
  
  /* This happens rather often with DTD9/st. */
-void
+static void
  nvc0_cb_push(struct nouveau_context *nv,
-             struct nouveau_bo *bo, unsigned domain,
-             unsigned base, unsigned size,
+             struct nv04_resource *res,
               unsigned offset, unsigned words, const uint32_t *data)
  {
+   struct nvc0_context *nvc0 = nvc0_context(&nv->pipe);
+   struct nvc0_constbuf *cb = NULL;
+   int s;
+
+   /* Go through all the constbuf binding points of this buffer and try to
+    * find one which contains the region to be updated.
+    */
+   for (s = 0; s < 6 && !cb; s++) {
+      uint16_t bindings = res->cb_bindings[s];
+      while (bindings) {
+         int i = ffs(bindings) - 1;
+         uint32_t cb_offset = nvc0->constbuf[s][i].offset;
+
+         bindings &= ~(1 << i);
+         if (cb_offset <= offset &&
+             cb_offset + nvc0->constbuf[s][i].size >= offset + words * 4) {
+            cb = &nvc0->constbuf[s][i];
+            break;
+         }
+      }
+   }
+
+   if (cb) {
+      nvc0_cb_bo_push(nv, res->bo, res->domain,
+                      res->offset + cb->offset, cb->size,
+                      offset - cb->offset, words, data);
+   } else {
+      nv->push_data(nv, res->bo, res->offset + offset, res->domain,
+                    words * 4, data);
+   }
+}
+
+void
+nvc0_cb_bo_push(struct nouveau_context *nv,
+                struct nouveau_bo *bo, unsigned domain,
+                unsigned base, unsigned size,
+                unsigned offset, unsigned words, const uint32_t *data)
+{
     struct nouveau_pushbuf *push = nv->pushbuf;
  
     NOUVEAU_DRV_STAT(nv->screen, constbuf_upload_count, 1);
@@ -520,6 +562,9 @@ nvc0_cb_push(struct nouveau_context *nv,
     assert(!(offset & 3));
     size = align(size, 0x100);
  
+   assert(offset < size);
+   assert(offset + words * 4 <= size);
+
     BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
     PUSH_DATA (push, size);
     PUSH_DATAh(push, bo->offset + base);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c

index 6f9e790..188c7d7 100644 (file)
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -899,6 +899,9 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
  
     push->kick_notify = nvc0_draw_vbo_kick_notify;
  
+   /* TODO: Instead of iterating over all the buffer resources looking for
+    * coherent buffers, keep track of a context-wide count.
+    */
     for (s = 0; s < 5 && !nvc0->cb_dirty; ++s) {
        uint32_t valid = nvc0->constbuf_valid[s];
  
@@ -924,6 +927,23 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
        nvc0->cb_dirty = false;
     }
  
+   for (s = 0; s < 5; ++s) {
+      for (int i = 0; i < nvc0->num_textures[s]; ++i) {
+         struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
+         struct pipe_resource *res;
+         if (!tic)
+            continue;
+         res = nvc0->textures[s][i]->texture;
+         if (res->target != PIPE_BUFFER ||
+             !(res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT))
+            continue;
+
+         BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
+         PUSH_DATA (push, (tic->id << 4) | 1);
+         NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1);
+      }
+   }
+
     if (nvc0->state.vbo_mode) {
        nvc0_push_vbo(nvc0, info);
        push->kick_notify = nvc0_default_kick_notify;
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c

index 4ca0b26..e669ba2 100644 (file)
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -195,6 +195,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
          case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
          case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
          case PIPE_CAP_DEPTH_BOUNDS_TEST:
+        case PIPE_CAP_TGSI_TXQS:
              return 0;
  
          /* SWTCL-only features. */
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c

index 42e8b0b..c32d317 100644 (file)
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -115,6 +115,7 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
                                         S_SQ_CF_WORD1_BARRIER(1) |
                                         S_SQ_CF_WORD1_COND(cf->cond) |
                                         S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) |
+                                       S_SQ_CF_WORD1_COUNT(cf->count) |
                                         S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
                 }
         }
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c

index c52e43e..33009c1 100644 (file)
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -379,17 +379,17 @@ static void evergreen_emit_direct_dispatch(
                                 "allocating %u dwords lds.\n",
                                 num_pipes, num_waves, lds_size);
  
-       r600_write_config_reg(cs, R_008970_VGT_NUM_INDICES, group_size);
+       radeon_set_config_reg(cs, R_008970_VGT_NUM_INDICES, group_size);
  
-       r600_write_config_reg_seq(cs, R_00899C_VGT_COMPUTE_START_X, 3);
+       radeon_set_config_reg_seq(cs, R_00899C_VGT_COMPUTE_START_X, 3);
         radeon_emit(cs, 0); /* R_00899C_VGT_COMPUTE_START_X */
         radeon_emit(cs, 0); /* R_0089A0_VGT_COMPUTE_START_Y */
         radeon_emit(cs, 0); /* R_0089A4_VGT_COMPUTE_START_Z */
  
-       r600_write_config_reg(cs, R_0089AC_VGT_COMPUTE_THREAD_GROUP_SIZE,
+       radeon_set_config_reg(cs, R_0089AC_VGT_COMPUTE_THREAD_GROUP_SIZE,
                                                                 group_size);
  
-       r600_write_compute_context_reg_seq(cs, R_0286EC_SPI_COMPUTE_NUM_THREAD_X, 3);
+       radeon_compute_set_context_reg_seq(cs, R_0286EC_SPI_COMPUTE_NUM_THREAD_X, 3);
         radeon_emit(cs, block_layout[0]); /* R_0286EC_SPI_COMPUTE_NUM_THREAD_X */
         radeon_emit(cs, block_layout[1]); /* R_0286F0_SPI_COMPUTE_NUM_THREAD_Y */
         radeon_emit(cs, block_layout[2]); /* R_0286F4_SPI_COMPUTE_NUM_THREAD_Z */
@@ -402,7 +402,7 @@ static void evergreen_emit_direct_dispatch(
                 assert(lds_size <= 8160);
         }
  
-       r600_write_compute_context_reg(cs, CM_R_0288E8_SQ_LDS_ALLOC,
+       radeon_compute_set_context_reg(cs, CM_R_0288E8_SQ_LDS_ALLOC,
                                         lds_size | (num_waves << 14));
  
         /* Dispatch packet */
@@ -439,12 +439,12 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
         /* XXX support more than 8 colorbuffers (the offsets are not a multiple of 0x3C for CB8-11) */
         for (i = 0; i < 8 && i < ctx->framebuffer.state.nr_cbufs; i++) {
                 struct r600_surface *cb = (struct r600_surface*)ctx->framebuffer.state.cbufs[i];
-               unsigned reloc = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx,
+               unsigned reloc = radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.gfx,
                                                        (struct r600_resource*)cb->base.texture,
                                                        RADEON_USAGE_READWRITE,
                                                        RADEON_PRIO_SHADER_RESOURCE_RW);
  
-               r600_write_compute_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7);
+               radeon_compute_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7);
                 radeon_emit(cs, cb->cb_color_base);     /* R_028C60_CB_COLOR0_BASE */
                 radeon_emit(cs, cb->cb_color_pitch);    /* R_028C64_CB_COLOR0_PITCH */
                 radeon_emit(cs, cb->cb_color_slice);    /* R_028C68_CB_COLOR0_SLICE */
@@ -466,17 +466,17 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
         }
         if (ctx->keep_tiling_flags) {
                 for (; i < 8 ; i++) {
-                       r600_write_compute_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
+                       radeon_compute_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
                                                        S_028C70_FORMAT(V_028C70_COLOR_INVALID));
                 }
                 for (; i < 12; i++) {
-                       r600_write_compute_context_reg(cs, R_028E50_CB_COLOR8_INFO + (i - 8) * 0x1C,
+                       radeon_compute_set_context_reg(cs, R_028E50_CB_COLOR8_INFO + (i - 8) * 0x1C,
                                                        S_028C70_FORMAT(V_028C70_COLOR_INVALID));
                 }
         }
  
         /* Set CB_TARGET_MASK  XXX: Use cb_misc_state */
-       r600_write_compute_context_reg(cs, R_028238_CB_TARGET_MASK,
+       radeon_compute_set_context_reg(cs, R_028238_CB_TARGET_MASK,
                                         ctx->compute_cb_target_mask);
  
  
@@ -556,7 +556,7 @@ void evergreen_emit_cs_shader(
         nstack = shader->bc.nstack;
  #endif
  
-       r600_write_compute_context_reg_seq(cs, R_0288D0_SQ_PGM_START_LS, 3);
+       radeon_compute_set_context_reg_seq(cs, R_0288D0_SQ_PGM_START_LS, 3);
         radeon_emit(cs, va >> 8); /* R_0288D0_SQ_PGM_START_LS */
         radeon_emit(cs,           /* R_0288D4_SQ_PGM_RESOURCES_LS */
                         S_0288D4_NUM_GPRS(ngpr)
@@ -564,7 +564,7 @@ void evergreen_emit_cs_shader(
         radeon_emit(cs, 0);     /* R_0288D8_SQ_PGM_RESOURCES_LS_2 */
  
         radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0));
-       radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+       radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
                                               code_bo, RADEON_USAGE_READ,
                                               RADEON_PRIO_SHADER_DATA));
  }
diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c

index e272856..29bdd9d 100644 (file)
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -64,9 +64,9 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx,
         for (i = 0; i < ncopy; i++) {
                 csize = size < EG_DMA_COPY_MAX_SIZE ? size : EG_DMA_COPY_MAX_SIZE;
                 /* emit reloc before writing cs so that cs is always in consistent state */
-               r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
+               radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
                                       RADEON_PRIO_MIN);
-               r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
+               radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
                                       RADEON_PRIO_MIN);
                 cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, csize);
                 cs->buf[cs->cdw++] = dst_offset & 0xffffffff;
@@ -129,7 +129,7 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
                 }
  
                 /* This must be done after r600_need_cs_space. */
-               reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+               reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
                                               (struct r600_resource*)dst, RADEON_USAGE_WRITE,
                                               RADEON_PRIO_MIN);
  
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c

index 7c82390..52f4dc8 100644 (file)
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -857,7 +857,7 @@ static void evergreen_emit_clip_state(struct r600_context *rctx, struct r600_ato
         struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
         struct pipe_clip_state *state = &rctx->clip_state.state;
  
-       r600_write_context_reg_seq(cs, R_0285BC_PA_CL_UCP0_X, 6*4);
+       radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP0_X, 6*4);
         radeon_emit_array(cs, (unsigned*)state, 6*4);
  }
  
@@ -892,27 +892,38 @@ static void evergreen_set_scissor_states(struct pipe_context *ctx,
                                         const struct pipe_scissor_state *state)
  {
         struct r600_context *rctx = (struct r600_context *)ctx;
+       struct r600_scissor_state *rstate = &rctx->scissor;
         int i;
  
-       for (i = start_slot; i < start_slot + num_scissors; i++) {
-               rctx->scissor[i].scissor = state[i - start_slot];
-               r600_mark_atom_dirty(rctx, &rctx->scissor[i].atom);
-       }
+       for (i = start_slot; i < start_slot + num_scissors; i++)
+               rstate->scissor[i] = state[i - start_slot];
+       rstate->dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
+       rstate->atom.num_dw = util_bitcount(rstate->dirty_mask) * 4;
+       r600_mark_atom_dirty(rctx, &rstate->atom);
  }
  
  static void evergreen_emit_scissor_state(struct r600_context *rctx, struct r600_atom *atom)
  {
         struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
-       struct r600_scissor_state *rstate = (struct r600_scissor_state *)atom;
-       struct pipe_scissor_state *state = &rstate->scissor;
-       unsigned offset = rstate->idx * 4 * 2;
+       struct r600_scissor_state *rstate = &rctx->scissor;
+       struct pipe_scissor_state *state;
+       uint32_t dirty_mask;
+       unsigned i, offset;
         uint32_t tl, br;
  
-       evergreen_get_scissor_rect(rctx, state->minx, state->miny, state->maxx, state->maxy, &tl, &br);
+       dirty_mask = rstate->dirty_mask;
+       while (dirty_mask != 0) {
+               i = u_bit_scan(&dirty_mask);
+               state = &rstate->scissor[i];
+               evergreen_get_scissor_rect(rctx, state->minx, state->miny, state->maxx, state->maxy, &tl, &br);
  
-       r600_write_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset, 2);
-       radeon_emit(cs, tl);
-       radeon_emit(cs, br);
+               offset = i * 4 * 2;
+               radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset, 2);
+               radeon_emit(cs, tl);
+               radeon_emit(cs, br);
+       }
+       rstate->dirty_mask = 0;
+       rstate->atom.num_dw = 0;
  }
  
  /**
@@ -1505,34 +1516,34 @@ static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples,
                 nr_samples = 0;
                 break;
         case 2:
-               r600_write_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_0, Elements(eg_sample_locs_2x));
+               radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_0, Elements(eg_sample_locs_2x));
                 radeon_emit_array(cs, eg_sample_locs_2x, Elements(eg_sample_locs_2x));
                 max_dist = eg_max_dist_2x;
                 break;
         case 4:
-               r600_write_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_0, Elements(eg_sample_locs_4x));
+               radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_0, Elements(eg_sample_locs_4x));
                 radeon_emit_array(cs, eg_sample_locs_4x, Elements(eg_sample_locs_4x));
                 max_dist = eg_max_dist_4x;
                 break;
         case 8:
-               r600_write_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_0, Elements(sample_locs_8x));
+               radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_0, Elements(sample_locs_8x));
                 radeon_emit_array(cs, sample_locs_8x, Elements(sample_locs_8x));
                 max_dist = max_dist_8x;
                 break;
         }
  
         if (nr_samples > 1) {
-               r600_write_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
+               radeon_set_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
                 radeon_emit(cs, S_028C00_LAST_PIXEL(1) |
                                      S_028C00_EXPAND_LINE_WIDTH(1)); /* R_028C00_PA_SC_LINE_CNTL */
                 radeon_emit(cs, S_028C04_MSAA_NUM_SAMPLES(util_logbase2(nr_samples)) |
                                      S_028C04_MAX_SAMPLE_DIST(max_dist)); /* R_028C04_PA_SC_AA_CONFIG */
-               r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1));
+               radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1));
         } else {
-               r600_write_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
+               radeon_set_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
                 radeon_emit(cs, S_028C00_LAST_PIXEL(1)); /* R_028C00_PA_SC_LINE_CNTL */
                 radeon_emit(cs, 0); /* R_028C04_PA_SC_AA_CONFIG */
-               r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
+               radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
         }
  }
  
@@ -1556,13 +1567,13 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
  
                 cb = (struct r600_surface*)state->cbufs[i];
                 if (!cb) {
-                       r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
+                       radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
                                                S_028C70_FORMAT(V_028C70_COLOR_INVALID));
                         continue;
                 }
  
                 tex = (struct r600_texture *)cb->base.texture;
-               reloc = r600_context_bo_reloc(&rctx->b,
+               reloc = radeon_add_to_buffer_list(&rctx->b,
                                               &rctx->b.rings.gfx,
                                               (struct r600_resource*)cb->base.texture,
                                               RADEON_USAGE_READWRITE,
@@ -1571,14 +1582,14 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
                                                       RADEON_PRIO_COLOR_BUFFER);
  
                 if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
-                       cmask_reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+                       cmask_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
                                 tex->cmask_buffer, RADEON_USAGE_READWRITE,
                                 RADEON_PRIO_COLOR_META);
                 } else {
                         cmask_reloc = reloc;
                 }
  
-               r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 13);
+               radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 13);
                 radeon_emit(cs, cb->cb_color_base);     /* R_028C60_CB_COLOR0_BASE */
                 radeon_emit(cs, cb->cb_color_pitch);    /* R_028C64_CB_COLOR0_PITCH */
                 radeon_emit(cs, cb->cb_color_slice);    /* R_028C68_CB_COLOR0_SLICE */
@@ -1612,11 +1623,11 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
         }
         /* set CB_COLOR1_INFO for possible dual-src blending */
         if (i == 1 && state->cbufs[0]) {
-               r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
+               radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
                                        cb->cb_color_info | tex->cb_color_info);
  
                 if (!rctx->keep_tiling_flags) {
-                       unsigned reloc = r600_context_bo_reloc(&rctx->b,
+                       unsigned reloc = radeon_add_to_buffer_list(&rctx->b,
                                                                &rctx->b.rings.gfx,
                                                                (struct r600_resource*)state->cbufs[0]->texture,
                                                                RADEON_USAGE_READWRITE,
@@ -1629,17 +1640,17 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
         }
         if (rctx->keep_tiling_flags) {
                 for (; i < 8 ; i++) {
-                       r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
+                       radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
                 }
                 for (; i < 12; i++) {
-                       r600_write_context_reg(cs, R_028E50_CB_COLOR8_INFO + (i - 8) * 0x1C, 0);
+                       radeon_set_context_reg(cs, R_028E50_CB_COLOR8_INFO + (i - 8) * 0x1C, 0);
                 }
         }
  
         /* ZS buffer. */
         if (state->zsbuf) {
                 struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
-               unsigned reloc = r600_context_bo_reloc(&rctx->b,
+               unsigned reloc = radeon_add_to_buffer_list(&rctx->b,
                                                        &rctx->b.rings.gfx,
                                                        (struct r600_resource*)state->zsbuf->texture,
                                                        RADEON_USAGE_READWRITE,
@@ -1647,11 +1658,11 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
                                                                RADEON_PRIO_DEPTH_BUFFER_MSAA :
                                                                RADEON_PRIO_DEPTH_BUFFER);
  
-               r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
+               radeon_set_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
                                        zb->pa_su_poly_offset_db_fmt_cntl);
-               r600_write_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
+               radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
  
-               r600_write_context_reg_seq(cs, R_028040_DB_Z_INFO, 8);
+               radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 8);
                 radeon_emit(cs, zb->db_z_info);         /* R_028040_DB_Z_INFO */
                 radeon_emit(cs, zb->db_stencil_info);   /* R_028044_DB_STENCIL_INFO */
                 radeon_emit(cs, zb->db_depth_base);     /* R_028048_DB_Z_READ_BASE */
@@ -1680,7 +1691,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
         } else if (rctx->screen->b.info.drm_minor >= 18) {
                 /* DRM 2.6.18 allows the INVALID format to disable depth/stencil.
                  * Older kernels are out of luck. */
-               r600_write_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
+               radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
                 radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */
                 radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */
         }
@@ -1688,7 +1699,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
         /* Framebuffer dimensions. */
         evergreen_get_scissor_rect(rctx, 0, 0, state->width, state->height, &tl, &br);
  
-       r600_write_context_reg_seq(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, 2);
+       radeon_set_context_reg_seq(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, 2);
         radeon_emit(cs, tl); /* R_028204_PA_SC_WINDOW_SCISSOR_TL */
         radeon_emit(cs, br); /* R_028208_PA_SC_WINDOW_SCISSOR_BR */
  
@@ -1720,7 +1731,7 @@ static void evergreen_emit_polygon_offset(struct r600_context *rctx, struct r600
         default:;
         }
  
-       r600_write_context_reg_seq(cs, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 4);
+       radeon_set_context_reg_seq(cs, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 4);
         radeon_emit(cs, fui(offset_scale));
         radeon_emit(cs, fui(offset_units));
         radeon_emit(cs, fui(offset_scale));
@@ -1734,7 +1745,7 @@ static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_
         unsigned fb_colormask = (1ULL << ((unsigned)a->nr_cbufs * 4)) - 1;
         unsigned ps_colormask = (1ULL << ((unsigned)a->nr_ps_color_outputs * 4)) - 1;
  
-       r600_write_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
+       radeon_set_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
         radeon_emit(cs, a->blend_colormask & fb_colormask); /* R_028238_CB_TARGET_MASK */
         /* This must match the used export instructions exactly.
          * Other values may lead to undefined behavior and hangs.
@@ -1751,17 +1762,17 @@ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom
                 struct r600_texture *rtex = (struct r600_texture *)a->rsurf->base.texture;
                 unsigned reloc_idx;
  
-               r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
-               r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
-               r600_write_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control);
-               r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
-               reloc_idx = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer,
+               radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
+               radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
+               radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control);
+               radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
+               reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer,
                                                   RADEON_USAGE_READWRITE, RADEON_PRIO_DEPTH_META);
                 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
                 cs->buf[cs->cdw++] = reloc_idx;
         } else {
-               r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, 0);
-               r600_write_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0);
+               radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, 0);
+               radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0);
         }
  }
  
@@ -1822,11 +1833,11 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
                 db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(1);
         }
  
-       r600_write_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
+       radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
         radeon_emit(cs, db_render_control); /* R_028000_DB_RENDER_CONTROL */
         radeon_emit(cs, db_count_control); /* R_028004_DB_COUNT_CONTROL */
-       r600_write_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override);
-       r600_write_context_reg(cs, R_02880C_DB_SHADER_CONTROL, a->db_shader_control);
+       radeon_set_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override);
+       radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, a->db_shader_control);
  }
  
  static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
@@ -1853,7 +1864,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
                 radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
                 radeon_emit(cs, (resource_offset + buffer_index) * 8);
                 radeon_emit(cs, va); /* RESOURCEi_WORD0 */
-               radeon_emit(cs, rbuffer->buf->size - vb->buffer_offset - 1); /* RESOURCEi_WORD1 */
+               radeon_emit(cs, rbuffer->b.b.width0 - vb->buffer_offset - 1); /* RESOURCEi_WORD1 */
                 radeon_emit(cs, /* RESOURCEi_WORD2 */
                                  S_030008_ENDIAN_SWAP(r600_endian_swap(32)) |
                                  S_030008_STRIDE(vb->stride) |
@@ -1869,7 +1880,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
                 radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD7 */
  
                 radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
-               radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+               radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
                                                       RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
         }
         state->dirty_mask = 0;
@@ -1877,12 +1888,12 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
  
  static void evergreen_fs_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom * atom)
  {
-       evergreen_emit_vertex_buffers(rctx, &rctx->vertex_buffer_state, 992, 0);
+       evergreen_emit_vertex_buffers(rctx, &rctx->vertex_buffer_state, EG_FETCH_CONSTANTS_OFFSET_FS, 0);
  }
  
  static void evergreen_cs_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom * atom)
  {
-       evergreen_emit_vertex_buffers(rctx, &rctx->cs_vertex_buffer_state, 816,
+       evergreen_emit_vertex_buffers(rctx, &rctx->cs_vertex_buffer_state, EG_FETCH_CONSTANTS_OFFSET_CS,
                                       RADEON_CP_PACKET3_COMPUTE_MODE);
  }
  
@@ -1910,20 +1921,20 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
                 va = rbuffer->gpu_address + cb->buffer_offset;
  
                 if (!gs_ring_buffer) {
-                       r600_write_context_reg_flag(cs, reg_alu_constbuf_size + buffer_index * 4,
+                       radeon_set_context_reg_flag(cs, reg_alu_constbuf_size + buffer_index * 4,
                                                     ALIGN_DIVUP(cb->buffer_size >> 4, 16), pkt_flags);
-                       r600_write_context_reg_flag(cs, reg_alu_const_cache + buffer_index * 4, va >> 8,
+                       radeon_set_context_reg_flag(cs, reg_alu_const_cache + buffer_index * 4, va >> 8,
                                                     pkt_flags);
                 }
  
                 radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
-               radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+               radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
                                                       RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
  
                 radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
                 radeon_emit(cs, (buffer_id_base + buffer_index) * 8);
                 radeon_emit(cs, va); /* RESOURCEi_WORD0 */
-               radeon_emit(cs, rbuffer->buf->size - cb->buffer_offset - 1); /* RESOURCEi_WORD1 */
+               radeon_emit(cs, rbuffer->b.b.width0 - cb->buffer_offset - 1); /* RESOURCEi_WORD1 */
                 radeon_emit(cs, /* RESOURCEi_WORD2 */
                             S_030008_ENDIAN_SWAP(gs_ring_buffer ? ENDIAN_NONE : r600_endian_swap(32)) |
                             S_030008_STRIDE(gs_ring_buffer ? 4 : 16) |
@@ -1942,7 +1953,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
                             S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER));
  
                 radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
-               radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+               radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
                                                       RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
  
                 dirty_mask &= ~(1 << buffer_index);
@@ -1952,7 +1963,8 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
  
  static void evergreen_emit_vs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
  {
-       evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX], 176,
+       evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX],
+                                       EG_FETCH_CONSTANTS_OFFSET_VS,
                                         R_028180_ALU_CONST_BUFFER_SIZE_VS_0,
                                         R_028980_ALU_CONST_CACHE_VS_0,
                                         0 /* PKT3 flags */);
@@ -1960,7 +1972,8 @@ static void evergreen_emit_vs_constant_buffers(struct r600_context *rctx, struct
  
  static void evergreen_emit_gs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
  {
-       evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_GEOMETRY], 336,
+       evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_GEOMETRY],
+                                       EG_FETCH_CONSTANTS_OFFSET_GS,
                                         R_0281C0_ALU_CONST_BUFFER_SIZE_GS_0,
                                         R_0289C0_ALU_CONST_CACHE_GS_0,
                                         0 /* PKT3 flags */);
@@ -1968,15 +1981,17 @@ static void evergreen_emit_gs_constant_buffers(struct r600_context *rctx, struct
  
  static void evergreen_emit_ps_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
  {
-       evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_FRAGMENT], 0,
-                                      R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
-                                      R_028940_ALU_CONST_CACHE_PS_0,
-                                      0 /* PKT3 flags */);
+       evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_FRAGMENT],
+                                       EG_FETCH_CONSTANTS_OFFSET_PS,
+                                       R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
+                                       R_028940_ALU_CONST_CACHE_PS_0,
+                                       0 /* PKT3 flags */);
  }
  
  static void evergreen_emit_cs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
  {
-       evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_COMPUTE], 816,
+       evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_COMPUTE],
+                                       EG_FETCH_CONSTANTS_OFFSET_CS,
                                         R_028FC0_ALU_CONST_BUFFER_SIZE_LS_0,
                                         R_028F40_ALU_CONST_CACHE_LS_0,
                                         RADEON_CP_PACKET3_COMPUTE_MODE);
@@ -2001,7 +2016,7 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
                 radeon_emit(cs, (resource_id_base + resource_index) * 8);
                 radeon_emit_array(cs, rview->tex_resource_words, 8);
  
-               reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource,
+               reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource,
                                               RADEON_USAGE_READ,
                                               rview->tex_resource->b.b.nr_samples > 1 ?
                                                       RADEON_PRIO_SHADER_TEXTURE_MSAA :
@@ -2020,25 +2035,25 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
  static void evergreen_emit_vs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
  {
         evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views,
-                                    176 + R600_MAX_CONST_BUFFERS, 0);
+                                    EG_FETCH_CONSTANTS_OFFSET_VS + R600_MAX_CONST_BUFFERS, 0);
  }
  
  static void evergreen_emit_gs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
  {
         evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views,
-                                    336 + R600_MAX_CONST_BUFFERS, 0);
+                                    EG_FETCH_CONSTANTS_OFFSET_GS + R600_MAX_CONST_BUFFERS, 0);
  }
  
  static void evergreen_emit_ps_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
  {
         evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views,
-                                    R600_MAX_CONST_BUFFERS, 0);
+                                    EG_FETCH_CONSTANTS_OFFSET_PS + R600_MAX_CONST_BUFFERS, 0);
  }
  
  static void evergreen_emit_cs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
  {
         evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].views,
-                                    816 + 2, RADEON_CP_PACKET3_COMPUTE_MODE);
+                                    EG_FETCH_CONSTANTS_OFFSET_CS + 2, RADEON_CP_PACKET3_COMPUTE_MODE);
  }
  
  static void evergreen_emit_sampler_states(struct r600_context *rctx,
@@ -2062,7 +2077,7 @@ static void evergreen_emit_sampler_states(struct r600_context *rctx,
                 radeon_emit_array(cs, rstate->tex_sampler_words, 3);
  
                 if (rstate->border_color_use) {
-                       r600_write_config_reg_seq(cs, border_index_reg, 5);
+                       radeon_set_config_reg_seq(cs, border_index_reg, 5);
                         radeon_emit(cs, i);
                         radeon_emit_array(cs, rstate->border_color.ui, 4);
                 }
@@ -2100,7 +2115,7 @@ static void evergreen_emit_sample_mask(struct r600_context *rctx, struct r600_at
         struct r600_sample_mask *s = (struct r600_sample_mask*)a;
         uint8_t mask = s->sample_mask;
  
-       r600_write_context_reg(rctx->b.rings.gfx.cs, R_028C3C_PA_SC_AA_MASK,
+       radeon_set_context_reg(rctx->b.rings.gfx.cs, R_028C3C_PA_SC_AA_MASK,
                                mask | (mask << 8) | (mask << 16) | (mask << 24));
  }
  
@@ -2110,7 +2125,7 @@ static void cayman_emit_sample_mask(struct r600_context *rctx, struct r600_atom
         struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
         uint16_t mask = s->sample_mask;
  
-       r600_write_context_reg_seq(cs, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
+       radeon_set_context_reg_seq(cs, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
         radeon_emit(cs, mask | (mask << 16)); /* X0Y0_X1Y0 */
         radeon_emit(cs, mask | (mask << 16)); /* X0Y1_X1Y1 */
  }
@@ -2121,10 +2136,10 @@ static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct
         struct r600_cso_state *state = (struct r600_cso_state*)a;
         struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso;
  
-       r600_write_context_reg(cs, R_0288A4_SQ_PGM_START_FS,
+       radeon_set_context_reg(cs, R_0288A4_SQ_PGM_START_FS,
                                (shader->buffer->gpu_address + shader->offset) >> 8);
         radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-       radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, shader->buffer,
+       radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->buffer,
                                               RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA));
  }
  
@@ -2162,9 +2177,9 @@ static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_
                         primid = 1;
         }
  
-       r600_write_context_reg(cs, R_028B54_VGT_SHADER_STAGES_EN, v);
-       r600_write_context_reg(cs, R_028A40_VGT_GS_MODE, v2);
-       r600_write_context_reg(cs, R_028A84_VGT_PRIMITIVEID_EN, primid);
+       radeon_set_context_reg(cs, R_028B54_VGT_SHADER_STAGES_EN, v);
+       radeon_set_context_reg(cs, R_028A40_VGT_GS_MODE, v2);
+       radeon_set_context_reg(cs, R_028A84_VGT_PRIMITIVEID_EN, primid);
  }
  
  static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
@@ -2173,36 +2188,36 @@ static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom
         struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a;
         struct r600_resource *rbuffer;
  
-       r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
+       radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
         radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
         radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH));
  
         if (state->enable) {
                 rbuffer =(struct r600_resource*)state->esgs_ring.buffer;
-               r600_write_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE,
+               radeon_set_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE,
                                 rbuffer->gpu_address >> 8);
                 radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-               radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+               radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
                                                       RADEON_USAGE_READWRITE,
                                                       RADEON_PRIO_SHADER_RESOURCE_RW));
-               r600_write_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
+               radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
                                 state->esgs_ring.buffer_size >> 8);
  
                 rbuffer =(struct r600_resource*)state->gsvs_ring.buffer;
-               r600_write_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE,
+               radeon_set_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE,
                                 rbuffer->gpu_address >> 8);
                 radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-               radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+               radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
                                                       RADEON_USAGE_READWRITE,
                                                       RADEON_PRIO_SHADER_RESOURCE_RW));
-               r600_write_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
+               radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
                                 state->gsvs_ring.buffer_size >> 8);
         } else {
-               r600_write_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, 0);
-               r600_write_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, 0);
+               radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, 0);
+               radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, 0);
         }
  
-       r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
+       radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
         radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
         radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH));
  }
@@ -3012,8 +3027,12 @@ void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader
         struct r600_command_buffer *cb = &shader->command_buffer;
         struct r600_shader *rshader = &shader->shader;
         struct r600_shader *cp_shader = &shader->gs_copy_shader->shader;
-       unsigned gsvs_itemsize =
-                       (cp_shader->ring_item_size * shader->selector->gs_max_out_vertices) >> 2;
+       unsigned gsvs_itemsizes[4] = {
+                       (cp_shader->ring_item_sizes[0] * shader->selector->gs_max_out_vertices) >> 2,
+                       (cp_shader->ring_item_sizes[1] * shader->selector->gs_max_out_vertices) >> 2,
+                       (cp_shader->ring_item_sizes[2] * shader->selector->gs_max_out_vertices) >> 2,
+                       (cp_shader->ring_item_sizes[3] * shader->selector->gs_max_out_vertices) >> 2
+       };
  
         r600_init_command_buffer(cb, 64);
  
@@ -3032,21 +3051,24 @@ void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader
                                 S_028B90_ENABLE(shader->selector->gs_num_invocations > 0));
         }
         r600_store_context_reg_seq(cb, R_02891C_SQ_GS_VERT_ITEMSIZE, 4);
-       r600_store_value(cb, cp_shader->ring_item_size >> 2);
-       r600_store_value(cb, 0);
-       r600_store_value(cb, 0);
-       r600_store_value(cb, 0);
+       r600_store_value(cb, cp_shader->ring_item_sizes[0] >> 2);
+       r600_store_value(cb, cp_shader->ring_item_sizes[1] >> 2);
+       r600_store_value(cb, cp_shader->ring_item_sizes[2] >> 2);
+       r600_store_value(cb, cp_shader->ring_item_sizes[3] >> 2);
  
         r600_store_context_reg(cb, R_028900_SQ_ESGS_RING_ITEMSIZE,
-                              (rshader->ring_item_size) >> 2);
+                              (rshader->ring_item_sizes[0]) >> 2);
  
         r600_store_context_reg(cb, R_028904_SQ_GSVS_RING_ITEMSIZE,
-                              gsvs_itemsize);
+                              gsvs_itemsizes[0] +
+                              gsvs_itemsizes[1] +
+                              gsvs_itemsizes[2] +
+                              gsvs_itemsizes[3]);
  
         r600_store_context_reg_seq(cb, R_02892C_SQ_GSVS_RING_OFFSET_1, 3);
-       r600_store_value(cb, gsvs_itemsize);
-       r600_store_value(cb, gsvs_itemsize);
-       r600_store_value(cb, gsvs_itemsize);
+       r600_store_value(cb, gsvs_itemsizes[0]);
+       r600_store_value(cb, gsvs_itemsizes[0] + gsvs_itemsizes[1]);
+       r600_store_value(cb, gsvs_itemsizes[0] + gsvs_itemsizes[1] + gsvs_itemsizes[2]);
  
         /* FIXME calculate these values somehow ??? */
         r600_store_context_reg_seq(cb, R_028A54_GS_PER_ES, 3);
@@ -3307,9 +3329,9 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx,
                 }
                 size = (cheight * pitch) / 4;
                 /* emit reloc before writing cs so that cs is always in consistent state */
-               r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, &rsrc->resource,
+               radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rsrc->resource,
                                       RADEON_USAGE_READ, RADEON_PRIO_MIN);
-               r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, &rdst->resource,
+               radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rdst->resource,
                                       RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
                 cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, size);
                 cs->buf[cs->cdw++] = base >> 8;
@@ -3431,8 +3453,8 @@ fallback:
  
  void evergreen_init_state_functions(struct r600_context *rctx)
  {
-       unsigned id = 4;
-       int i;
+       unsigned id = 1;
+
         /* !!!
          *  To avoid GPU lockup registers must be emited in a specific order
          * (no kidding ...). The order below is important and have been
@@ -3484,12 +3506,8 @@ void evergreen_init_state_functions(struct r600_context *rctx)
         r600_init_atom(rctx, &rctx->dsa_state.atom, id++, r600_emit_cso_state, 0);
         r600_init_atom(rctx, &rctx->poly_offset_state.atom, id++, evergreen_emit_polygon_offset, 6);
         r600_init_atom(rctx, &rctx->rasterizer_state.atom, id++, r600_emit_cso_state, 0);
-       for (i = 0; i < R600_MAX_VIEWPORTS; i++) {
-               r600_init_atom(rctx, &rctx->viewport[i].atom, id++, r600_emit_viewport_state, 8);
-               r600_init_atom(rctx, &rctx->scissor[i].atom, id++, evergreen_emit_scissor_state, 4);
-               rctx->viewport[i].idx = i;
-               rctx->scissor[i].idx = i;
-       }
+       r600_init_atom(rctx, &rctx->scissor.atom, id++, evergreen_emit_scissor_state, 0);
+       r600_init_atom(rctx, &rctx->viewport.atom, id++, r600_emit_viewport_state, 0);
         r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
         r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, evergreen_emit_vertex_fetch_shader, 5);
         r600_add_atom(rctx, &rctx->b.streamout.begin_atom, id++);
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h

index ad6ad43..937ffcb 100644 (file)
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -2496,4 +2496,12 @@
  #define    DMA_PACKET_CONSTANT_FILL             0xd
  #define    DMA_PACKET_NOP                       0xf
  
+#define EG_FETCH_CONSTANTS_OFFSET_PS 0
+#define EG_FETCH_CONSTANTS_OFFSET_VS 176
+#define EG_FETCH_CONSTANTS_OFFSET_GS 336
+#define EG_FETCH_CONSTANTS_OFFSET_HS 496
+#define EG_FETCH_CONSTANTS_OFFSET_LS 656
+#define EG_FETCH_CONSTANTS_OFFSET_CS 816
+#define EG_FETCH_CONSTANTS_OFFSET_FS 992
+
  #endif
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c

index b514c58..bc69806 100644 (file)
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -252,6 +252,12 @@ static int alu_uses_rel(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
         return 0;
  }
  
+static int is_alu_64bit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
+{
+       const struct alu_op_info *op = r600_isa_alu(alu->op);
+       return (op->flags & AF_64);
+}
+
  static int is_alu_vec_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
  {
         unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op);
@@ -576,6 +582,12 @@ static int replace_gpr_with_pv_ps(struct r600_bytecode *bc,
  
         for (i = 0; i < max_slots; ++i) {
                 if (prev[i] && (prev[i]->dst.write || prev[i]->is_op3) && !prev[i]->dst.rel) {
+
+                       if (is_alu_64bit_inst(bc, prev[i])) {
+                               gpr[i] = -1;
+                               continue;
+                       }
+
                         gpr[i] = prev[i]->dst.sel;
                         /* cube writes more than PV.X */
                         if (is_alu_reduction_inst(bc, prev[i]))
@@ -591,6 +603,8 @@ static int replace_gpr_with_pv_ps(struct r600_bytecode *bc,
                 if(!alu)
                         continue;
  
+               if (is_alu_64bit_inst(bc, alu))
+                       continue;
                 num_src = r600_bytecode_get_num_operands(bc, alu);
                 for (src = 0; src < num_src; ++src) {
                         if (!is_gpr(alu->src[src].sel) || alu->src[src].rel)
@@ -2029,6 +2043,8 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
                                         fprintf(stderr, "CND:%X ", cf->cond);
                                 if (cf->pop_count)
                                         fprintf(stderr, "POP:%X ", cf->pop_count);
+                               if (cf->count && (cfop->flags & CF_EMIT))
+                                       fprintf(stderr, "STREAM%d ", cf->count);
                                 if (cf->end_of_program)
                                         fprintf(stderr, "EOP ");
                                 fprintf(stderr, "\n");
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h

index e37d926..7cf3a09 100644 (file)
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -149,6 +149,7 @@ struct r600_bytecode_cf {
         unsigned                        id;
         unsigned                        cond;
         unsigned                        pop_count;
+       unsigned                        count;
         unsigned                        cf_addr; /* control flow addr */
         struct r600_bytecode_kcache             kcache[4];
         unsigned                        r6xx_uses_waterfall;
@@ -279,4 +280,19 @@ void eg_bytecode_export_read(struct r600_bytecode *bc,
  
  void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
                            unsigned *num_format, unsigned *format_comp, unsigned *endian);
+
+static inline int fp64_switch(int i)
+{
+       switch (i) {
+       case 0:
+               return 1;
+       case 1:
+               return 0;
+       case 2:
+               return 3;
+       case 3:
+               return 2;
+       }
+       return 0;
+}
  #endif
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c

index 22a0950..d1370cd 100644 (file)
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -65,8 +65,8 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
         util_blitter_save_rasterizer(rctx->blitter, rctx->rasterizer_state.cso);
  
         if (op & R600_SAVE_FRAGMENT_STATE) {
-               util_blitter_save_viewport(rctx->blitter, &rctx->viewport[0].state);
-               util_blitter_save_scissor(rctx->blitter, &rctx->scissor[0].scissor);
+               util_blitter_save_viewport(rctx->blitter, &rctx->viewport.state[0]);
+               util_blitter_save_scissor(rctx->blitter, &rctx->scissor.scissor[0]);
                 util_blitter_save_fragment_shader(rctx->blitter, rctx->ps_shader);
                 util_blitter_save_blend(rctx->blitter, rctx->blend_state.cso);
                 util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->dsa_state.cso);
@@ -395,7 +395,7 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers,
  
         if (buffers & PIPE_CLEAR_COLOR && rctx->b.chip_class >= EVERGREEN) {
                 evergreen_do_fast_color_clear(&rctx->b, fb, &rctx->framebuffer.atom,
-                                             &buffers, color);
+                                             &buffers, NULL, color);
                 if (!buffers)
                         return; /* all buffers have been fast cleared */
         }
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c

index 6445151..cf71597 100644 (file)
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -48,16 +48,15 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
         num_dw += ctx->b.rings.gfx.cs->cdw;
  
         if (count_draw_in) {
-               unsigned i;
+               uint64_t mask;
  
                 /* The number of dwords all the dirty states would take. */
-               i = r600_next_dirty_atom(ctx, 0);
-               while (i < R600_NUM_ATOMS) {
-                       num_dw += ctx->atoms[i]->num_dw;
+               mask = ctx->dirty_atoms;
+               while (mask != 0) {
+                       num_dw += ctx->atoms[u_bit_scan64(&mask)]->num_dw;
                         if (ctx->screen->b.trace_bo) {
                                 num_dw += R600_TRACE_CS_DWORDS;
                         }
-                       i = r600_next_dirty_atom(ctx, i + 1);
                 }
  
                 /* The upper-bound of how much space a draw command would take. */
@@ -235,7 +234,7 @@ void r600_flush_emit(struct r600_context *rctx)
                 /* Use of WAIT_UNTIL is deprecated on Cayman+ */
                 if (rctx->b.family < CHIP_CAYMAN) {
                         /* wait for things to settle */
-                       r600_write_config_reg(cs, R_008040_WAIT_UNTIL, wait_until);
+                       radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, wait_until);
                 }
         }
  
@@ -269,7 +268,7 @@ void r600_context_gfx_flush(void *context, unsigned flags,
  
         /* old kernels and userspace don't set SX_MISC, so we must reset it to 0 here */
         if (ctx->b.chip_class == R600) {
-               r600_write_context_reg(cs, R_028350_SX_MISC, 0);
+               radeon_set_context_reg(cs, R_028350_SX_MISC, 0);
         }
  
         /* force to keep tiling flags */
@@ -287,7 +286,7 @@ void r600_context_gfx_flush(void *context, unsigned flags,
  void r600_begin_new_cs(struct r600_context *ctx)
  {
         unsigned shader;
-       int i;
+
         ctx->b.flags = 0;
         ctx->b.gtt = 0;
         ctx->b.vram = 0;
@@ -308,10 +307,12 @@ void r600_begin_new_cs(struct r600_context *ctx)
         r600_mark_atom_dirty(ctx, &ctx->poly_offset_state.atom);
         r600_mark_atom_dirty(ctx, &ctx->vgt_state.atom);
         r600_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
-       for (i = 0; i < R600_MAX_VIEWPORTS; i++) {
-               r600_mark_atom_dirty(ctx, &ctx->scissor[i].atom);
-               r600_mark_atom_dirty(ctx, &ctx->viewport[i].atom);
-       }
+       ctx->scissor.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
+       ctx->scissor.atom.num_dw = R600_MAX_VIEWPORTS * 4;
+       r600_mark_atom_dirty(ctx, &ctx->scissor.atom);
+       ctx->viewport.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
+       ctx->viewport.atom.num_dw = R600_MAX_VIEWPORTS * 8;
+       r600_mark_atom_dirty(ctx, &ctx->viewport.atom);
         if (ctx->b.chip_class < EVERGREEN) {
                 r600_mark_atom_dirty(ctx, &ctx->config_state.atom);
         }
@@ -417,9 +418,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
                 }
  
                 /* This must be done after r600_need_cs_space. */
-               src_reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)src,
+               src_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)src,
                                                   RADEON_USAGE_READ, RADEON_PRIO_MIN);
-               dst_reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)dst,
+               dst_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)dst,
                                                   RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
  
                 radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
@@ -470,9 +471,9 @@ void r600_dma_copy_buffer(struct r600_context *rctx,
         for (i = 0; i < ncopy; i++) {
                 csize = size < R600_DMA_COPY_MAX_SIZE_DW ? size : R600_DMA_COPY_MAX_SIZE_DW;
                 /* emit reloc before writing cs so that cs is always in consistent state */
-               r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
+               radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
                                       RADEON_PRIO_MIN);
-               r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
+               radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
                                       RADEON_PRIO_MIN);
                 cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 0, 0, csize);
                 cs->buf[cs->cdw++] = dst_offset & 0xfffffffc;
diff --git a/src/gallium/drivers/r600/r600_isa.h b/src/gallium/drivers/r600/r600_isa.h

index fdbe1c0..27fc1e8 100644 (file)
--- a/src/gallium/drivers/r600/r600_isa.h
+++ b/src/gallium/drivers/r600/r600_isa.h
@@ -339,11 +339,11 @@ static const struct alu_op_info alu_op_table[] = {
                 {"PRED_SETGT_64",             2, { 0x7C, 0xC7 },{   AF_V,  AF_V,  AF_V,  AF_V},  AF_PRED | AF_CC_GT | AF_64 },
                 {"PRED_SETE_64",              2, { 0x7D, 0xC8 },{   AF_V,  AF_V,  AF_V,  AF_V},  AF_PRED | AF_CC_E | AF_64 },
                 {"PRED_SETGE_64",             2, { 0x7E, 0xC9 },{   AF_V,  AF_V,  AF_V,  AF_V},  AF_PRED | AF_CC_GE | AF_64 },
-               {"MUL_64",                    2, { 0x1B, 0xCA },{   AF_V,  AF_V,  AF_V,  AF_V},  AF_64 },
+               {"MUL_64",                    2, { 0x1B, 0xCA },{   AF_V,  AF_V,  AF_V,  AF_4V}, AF_64 },
                 {"ADD_64",                    2, { 0x17, 0xCB },{   AF_V,  AF_V,  AF_V,  AF_V},  AF_64 },
                 {"MOVA_INT",                  1, { 0x18, 0xCC },{   AF_V,  AF_V,  AF_V,  AF_V},  AF_MOVA },
-               {"FLT64_TO_FLT32",            1, { 0x1C, 0xCD },{   AF_V,  AF_V,  AF_V,  AF_V},  0 },
-               {"FLT32_TO_FLT64",            1, { 0x1D, 0xCE },{   AF_V,  AF_V,  AF_V,  AF_V},  0 },
+               {"FLT64_TO_FLT32",            1, { 0x1C, 0xCD },{   AF_V,  AF_V,  AF_V,  AF_V},  AF_64 },
+               {"FLT32_TO_FLT64",            1, { 0x1D, 0xCE },{   AF_V,  AF_V,  AF_V,  AF_V},  AF_64 },
                 {"SAD_ACCUM_PREV_UINT",       2, {   -1, 0xCF },{      0,     0,  AF_V,  AF_V},  AF_UINT_DST | AF_PREV_NEXT },
                 {"DOT",                       2, {   -1, 0xD0 },{      0,     0,  AF_V,  AF_V},  AF_PREV_NEXT },
                 {"MUL_PREV",                  1, {   -1, 0xD1 },{      0,     0,  AF_V,  AF_V},  AF_PREV_INTERLEAVE },
@@ -369,7 +369,7 @@ static const struct alu_op_info alu_op_table[] = {
                 {"FMA",                       3, {   -1, 0x07 },{      0,     0,  AF_V,  AF_V},  0 },
                 {"MULADD_INT24",              3, {   -1, 0x08 },{      0,     0,     0,  AF_V},  AF_INT_DST | AF_24 },
                 {"CNDNE_64",                  3, {   -1, 0x09 },{      0,     0,  AF_V,  AF_V},  AF_CMOV | AF_64 },
-               {"FMA_64",                    3, {   -1, 0x0A },{      0,     0,  AF_V,  AF_V},  AF_64 },
+               {"FMA_64",                    3, {   -1, 0x0A },{      0,     0,  AF_V,  AF_4V}, AF_64 },
                 {"LERP_UINT",                 3, {   -1, 0x0B },{      0,     0,  AF_V,  AF_V},  AF_UINT_DST },
                 {"BIT_ALIGN_INT",             3, {   -1, 0x0C },{      0,     0,  AF_V,  AF_V},  AF_INT_DST },
                 {"BYTE_ALIGN_INT",            3, {   -1, 0x0D },{      0,     0,  AF_V,  AF_V},  AF_INT_DST },
diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c

index faf538c..372cd41 100644 (file)
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -22,7 +22,7 @@
  #if defined R600_USE_LLVM || defined HAVE_OPENCL
  
  #define CONSTANT_BUFFER_0_ADDR_SPACE 8
-#define CONSTANT_BUFFER_1_ADDR_SPACE (CONSTANT_BUFFER_0_ADDR_SPACE + R600_UCP_CONST_BUFFER)
+#define CONSTANT_BUFFER_1_ADDR_SPACE (CONSTANT_BUFFER_0_ADDR_SPACE + R600_BUFFER_INFO_CONST_BUFFER)
  #define LLVM_R600_BUFFER_INFO_CONST_BUFFER \
         (CONSTANT_BUFFER_0_ADDR_SPACE + R600_BUFFER_INFO_CONST_BUFFER)
  
@@ -77,22 +77,11 @@ static void llvm_load_system_value(
         default: assert(!"unknown system value");
         }
  
-#if HAVE_LLVM >= 0x0304
         ctx->system_values[index] = LLVMBuildExtractElement(ctx->gallivm.builder,
                 LLVMGetParam(ctx->main_fn, 0), lp_build_const_int32(&(ctx->gallivm), chan),
                 "");
-#else
-       LLVMValueRef reg = lp_build_const_int32(
-                       ctx->soa.bld_base.base.gallivm, chan);
-       ctx->system_values[index] = lp_build_intrinsic(
-                       ctx->soa.bld_base.base.gallivm->builder,
-                       "llvm.R600.load.input",
-                       ctx->soa.bld_base.base.elem_type, &reg, 1,
-                       LLVMReadNoneAttribute);
-#endif
  }
  
-#if HAVE_LLVM >= 0x0304
  static LLVMValueRef
  llvm_load_input_vector(
         struct radeon_llvm_context * ctx, unsigned location, unsigned ijregs,
@@ -131,34 +120,7 @@ llvm_load_input_vector(
                                 VecType, Args, ArgCount, LLVMReadNoneAttribute);
                 }
  }
-#else
-static LLVMValueRef
-llvm_load_input_helper(
-       struct radeon_llvm_context * ctx,
-       unsigned idx, int interp, int ij_index)
-{
-       const struct lp_build_context * bb = &ctx->soa.bld_base.base;
-       LLVMValueRef arg[2];
-       int arg_count;
-       const char * intrinsic;
-
-       arg[0] = lp_build_const_int32(bb->gallivm, idx);
-
-       if (interp) {
-               intrinsic = "llvm.R600.interp.input";
-               arg[1] = lp_build_const_int32(bb->gallivm, ij_index);
-               arg_count = 2;
-       } else {
-               intrinsic = "llvm.R600.load.input";
-               arg_count = 1;
-       }
-
-       return lp_build_intrinsic(bb->gallivm->builder, intrinsic,
-               bb->elem_type, &arg[0], arg_count, LLVMReadNoneAttribute);
-}
-#endif
  
-#if HAVE_LLVM >= 0x0304
  static LLVMValueRef
  llvm_face_select_helper(
         struct radeon_llvm_context * ctx,
@@ -171,21 +133,6 @@ llvm_face_select_helper(
         return LLVMBuildSelect(bb->gallivm->builder, is_front,
                 front_color, back_color, "");
  }
-#else
-static LLVMValueRef
-llvm_face_select_helper(
-       struct radeon_llvm_context * ctx,
-       unsigned face_loc, LLVMValueRef front_color, LLVMValueRef back_color)
-{
-       const struct lp_build_context * bb = &ctx->soa.bld_base.base;
-       LLVMValueRef face = llvm_load_input_helper(ctx, face_loc, 0, 0);
-       LLVMValueRef is_front = LLVMBuildFCmp(
-               bb->gallivm->builder, LLVMRealUGT, face,
-               lp_build_const_float(bb->gallivm, 0.0f),        "");
-       return LLVMBuildSelect(bb->gallivm->builder, is_front,
-               front_color, back_color, "");
-}
-#endif
  
  static void llvm_load_input(
         struct radeon_llvm_context * ctx,
@@ -194,18 +141,11 @@ static void llvm_load_input(
  {
         const struct r600_shader_io * input = &ctx->r600_inputs[input_index];
         unsigned chan;
-#if HAVE_LLVM < 0x0304
-       unsigned interp = 0;
-       int ij_index;
-#endif
         int two_side = (ctx->two_side && input->name == TGSI_SEMANTIC_COLOR);
         LLVMValueRef v;
-#if HAVE_LLVM >= 0x0304
         boolean require_interp_intrinsic = ctx->chip_class >= EVERGREEN &&
                 ctx->type == TGSI_PROCESSOR_FRAGMENT;
-#endif
  
-#if HAVE_LLVM >= 0x0304
         if (require_interp_intrinsic && input->spi_sid) {
                 v = llvm_load_input_vector(ctx, input->lds_pos, input->ij_index,
                         (input->interpolate > 0));
@@ -241,49 +181,7 @@ static void llvm_load_input(
                                 lp_build_const_float(&(ctx->gallivm), 1.0f),
                                 ctx->inputs[soa_index], "");
         }
-}
-#else
-       if (ctx->chip_class >= EVERGREEN && ctx->type == TGSI_PROCESSOR_FRAGMENT &&
-                       input->spi_sid) {
-               interp = 1;
-               ij_index = (input->interpolate > 0) ? input->ij_index : -1;
         }
-
-       for (chan = 0; chan < 4; chan++) {
-               unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
-               int loc;
-
-               if (interp) {
-                       loc = 4 * input->lds_pos + chan;
-               } else {
-                       if (input->name == TGSI_SEMANTIC_FACE)
-                               loc = 4 * ctx->face_gpr;
-                       else
-                               loc = 4 * input->gpr + chan;
-               }
-
-               v = llvm_load_input_helper(ctx, loc, interp, ij_index);
-
-               if (two_side) {
-                       struct r600_shader_io * back_input =
-                                       &ctx->r600_inputs[input->back_color_input];
-                       int back_loc = interp ? back_input->lds_pos : back_input->gpr;
-                       LLVMValueRef v2;
-
-                       back_loc = 4 * back_loc + chan;
-                       v2 = llvm_load_input_helper(ctx, back_loc, interp, ij_index);
-                       v = llvm_face_select_helper(ctx, 4 * ctx->face_gpr, v, v2);
-               } else if (input->name == TGSI_SEMANTIC_POSITION &&
-                               ctx->type == TGSI_PROCESSOR_FRAGMENT && chan == 3) {
-                       /* RCP for fragcoord.w */
-                       v = LLVMBuildFDiv(ctx->gallivm.builder,
-                                       lp_build_const_float(&(ctx->gallivm), 1.0f),
-                                       v, "");
-               }
-
-               ctx->inputs[soa_index] = v;
-       }
-#endif
  }
  
  static void llvm_emit_prologue(struct lp_build_tgsi_context * bld_base)
@@ -887,7 +785,6 @@ LLVMModuleRef r600_tgsi_llvm(
         struct tgsi_shader_info shader_info;
         struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base;
         radeon_llvm_context_init(ctx);
-#if HAVE_LLVM >= 0x0304
         LLVMTypeRef Arguments[32];
         unsigned ArgumentsCount = 0;
         for (unsigned i = 0; i < ctx->inputs_count; i++)
@@ -897,9 +794,6 @@ LLVMModuleRef r600_tgsi_llvm(
                 LLVMValueRef P = LLVMGetParam(ctx->main_fn, i);
                 LLVMAddAttribute(P, LLVMInRegAttribute);
         }
-#else
-       radeon_llvm_create_func(ctx, NULL, 0);
-#endif
         tgsi_scan_shader(tokens, &shader_info);
  
         bld_base->info = &shader_info;
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c

index f6efaa3..36d7e68 100644 (file)
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -273,6 +273,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
         case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
         case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
         case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+       case PIPE_CAP_TGSI_TXQS:
                 return 1;
  
         case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
@@ -358,7 +359,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
         case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
                 return 16384;
         case PIPE_CAP_MAX_VERTEX_STREAMS:
-               return 1;
+               return family >= CHIP_CEDAR ? 4 : 1;
  
         case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
                 return 2047;
@@ -500,6 +501,9 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
                         return PIPE_SHADER_IR_TGSI;
                 }
         case PIPE_SHADER_CAP_DOUBLES:
+               if (rscreen->b.family == CHIP_CYPRESS ||
+                       rscreen->b.family == CHIP_CAYMAN || rscreen->b.family == CHIP_ARUBA)
+                       return 1;
                 return 0;
         case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
         case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h

index ee3e928..d0774de 100644 (file)
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -38,7 +38,7 @@
  
  #include "tgsi/tgsi_scan.h"
  
-#define R600_NUM_ATOMS 75
+#define R600_NUM_ATOMS 42
  
  #define R600_MAX_VIEWPORTS 16
  
@@ -63,13 +63,15 @@
  #define R600_TRACE_CS_DWORDS           7
  
  #define R600_MAX_USER_CONST_BUFFERS 13
-#define R600_MAX_DRIVER_CONST_BUFFERS 3
+#define R600_MAX_DRIVER_CONST_BUFFERS 2
  #define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS)
  
  /* start driver buffers after user buffers */
-#define R600_UCP_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
-#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
-#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
+#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
+#define R600_UCP_SIZE (4*4*8)
+#define R600_BUFFER_INFO_OFFSET (R600_UCP_SIZE)
+
+#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
  /* Currently R600_MAX_CONST_BUFFERS just fits on the hw, which has a limit
   * of 16 const buffers.
   * UCP/SAMPLE_POSITIONS are never accessed by same shader stage so they can use the same id.
@@ -77,8 +79,6 @@
   * In order to support d3d 11 mandated minimum of 15 user const buffers
   * we'd have to squash all use cases into one driver buffer.
   */
-#define R600_SAMPLE_POSITIONS_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
-
  #define R600_MAX_CONST_BUFFER_SIZE (4096 * sizeof(float[4]))
  
  #ifdef PIPE_ARCH_BIG_ENDIAN
@@ -87,9 +87,6 @@
  #define R600_BIG_ENDIAN 0
  #endif
  
-#define R600_DIRTY_ATOM_WORD_BITS (sizeof(unsigned long) * 8)
-#define R600_DIRTY_ATOM_ARRAY_LEN DIV_ROUND_UP(R600_NUM_ATOMS, R600_DIRTY_ATOM_WORD_BITS)
-
  struct r600_context;
  struct r600_bytecode;
  union  r600_shader_key;
@@ -208,8 +205,8 @@ struct r600_stencil_ref_state {
  
  struct r600_viewport_state {
         struct r600_atom atom;
-       struct pipe_viewport_state state;
-       int idx;
+       struct pipe_viewport_state state[R600_MAX_VIEWPORTS];
+       uint32_t dirty_mask;
  };
  
  struct r600_shader_stages_state {
@@ -359,11 +356,15 @@ struct r600_textures_info {
         struct r600_samplerview_state   views;
         struct r600_sampler_states      states;
         bool                            is_array_sampler[NUM_TEX_UNITS];
+};
  
-       /* cube array txq workaround */
-       uint32_t                        *txq_constants;
-       /* buffer related workarounds */
-       uint32_t                        *buffer_constants;
+struct r600_shader_driver_constants_info {
+       /* currently 128 bytes for UCP/samplepos + sampler buffer constants */
+       uint32_t                        *constants;
+       uint32_t                        alloc_size;
+       bool                            vs_ucp_dirty;
+       bool                            texture_const_dirty;
+       bool                            ps_sample_pos_dirty;
  };
  
  struct r600_constbuf_state
@@ -393,9 +394,9 @@ struct r600_cso_state
  struct r600_scissor_state
  {
         struct r600_atom                atom;
-       struct pipe_scissor_state       scissor;
+       struct pipe_scissor_state       scissor[R600_MAX_VIEWPORTS];
+       uint32_t                        dirty_mask;
         bool                            enable; /* r6xx only */
-       int idx;
  };
  
  struct r600_fetch_shader {
@@ -438,7 +439,7 @@ struct r600_context {
         /* State binding slots are here. */
         struct r600_atom                *atoms[R600_NUM_ATOMS];
         /* Dirty atom bitmask for fast tests */
-       unsigned long                   dirty_atoms[R600_DIRTY_ATOM_ARRAY_LEN];
+       uint64_t                        dirty_atoms;
         /* States for CS initialization. */
         struct r600_command_buffer      start_cs_cmd; /* invariant state mostly */
         /** Compute specific registers initializations.  The start_cs_cmd atom
@@ -458,12 +459,12 @@ struct r600_context {
         struct r600_poly_offset_state   poly_offset_state;
         struct r600_cso_state           rasterizer_state;
         struct r600_sample_mask         sample_mask;
-       struct r600_scissor_state       scissor[R600_MAX_VIEWPORTS];
+       struct r600_scissor_state       scissor;
         struct r600_seamless_cube_map   seamless_cube_map;
         struct r600_config_state        config_state;
         struct r600_stencil_ref_state   stencil_ref;
         struct r600_vgt_state           vgt_state;
-       struct r600_viewport_state      viewport[R600_MAX_VIEWPORTS];
+       struct r600_viewport_state      viewport;
         /* Shaders and shader resources. */
         struct r600_cso_state           vertex_fetch_shader;
         struct r600_shader_state        vertex_shader;
@@ -475,6 +476,9 @@ struct r600_context {
         struct r600_gs_rings_state      gs_rings;
         struct r600_constbuf_state      constbuf_state[PIPE_SHADER_TYPES];
         struct r600_textures_info       samplers[PIPE_SHADER_TYPES];
+
+       struct r600_shader_driver_constants_info driver_consts[PIPE_SHADER_TYPES];
+
         /** Vertex buffers for fetch shaders */
         struct r600_vertexbuf_state     vertex_buffer_state;
         /** Vertex buffers for compute shaders */
@@ -501,6 +505,7 @@ struct r600_context {
  
         void                            *sb_context;
         struct r600_isa         *isa;
+       float sample_positions[4 * 16];
  };
  
  static inline void r600_emit_command_buffer(struct radeon_winsys_cs *cs,
@@ -515,18 +520,15 @@ static inline void r600_set_atom_dirty(struct r600_context *rctx,
                                        struct r600_atom *atom,
                                        bool dirty)
  {
-       unsigned long mask;
-       unsigned int w;
-
-       atom->dirty = dirty;
+       uint64_t mask;
  
         assert(atom->id != 0);
-       w = atom->id / R600_DIRTY_ATOM_WORD_BITS;
-       mask = 1ul << (atom->id % R600_DIRTY_ATOM_WORD_BITS);
+       assert(atom->id < sizeof(mask) * 8);
+       mask = 1ull << atom->id;
         if (dirty)
-               rctx->dirty_atoms[w] |= mask;
+               rctx->dirty_atoms |= mask;
         else
-               rctx->dirty_atoms[w] &= ~mask;
+               rctx->dirty_atoms &= ~mask;
  }
  
  static inline void r600_mark_atom_dirty(struct r600_context *rctx,
@@ -535,35 +537,6 @@ static inline void r600_mark_atom_dirty(struct r600_context *rctx,
         r600_set_atom_dirty(rctx, atom, true);
  }
  
-static inline unsigned int r600_next_dirty_atom(struct r600_context *rctx,
-                                               unsigned int id)
-{
-#if !defined(DEBUG) && defined(HAVE___BUILTIN_CTZ)
-       unsigned int w = id / R600_DIRTY_ATOM_WORD_BITS;
-       unsigned int bit = id % R600_DIRTY_ATOM_WORD_BITS;
-       unsigned long bits, mask = (1ul << bit) - 1;
-
-       for (; w < R600_DIRTY_ATOM_ARRAY_LEN; w++, mask = 0ul) {
-               bits = rctx->dirty_atoms[w] & ~mask;
-               if (bits == 0)
-                       continue;
-               return w * R600_DIRTY_ATOM_WORD_BITS + __builtin_ctzl(bits);
-       }
-
-       return R600_NUM_ATOMS;
-#else
-       for (; id < R600_NUM_ATOMS; id++) {
-               bool dirty = !!(rctx->dirty_atoms[id / R600_DIRTY_ATOM_WORD_BITS] &
-                       (1ul << (id % R600_DIRTY_ATOM_WORD_BITS)));
-               assert(dirty == (rctx->atoms[id] && rctx->atoms[id]->dirty));
-               if (dirty)
-                       break;
-       }
-
-       return id;
-#endif
-}
-
  void r600_trace_emit(struct r600_context *rctx);
  
  static inline void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
@@ -880,14 +853,14 @@ static inline void eg_store_loop_const(struct r600_command_buffer *cb, unsigned
  void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw);
  void r600_release_command_buffer(struct r600_command_buffer *cb);
  
-static inline void r600_write_compute_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void radeon_compute_set_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
  {
-       r600_write_context_reg_seq(cs, reg, num);
+       radeon_set_context_reg_seq(cs, reg, num);
         /* Set the compute bit on the packet header */
         cs->buf[cs->cdw - 2] |= RADEON_CP_PACKET3_COMPUTE_MODE;
  }
  
-static inline void r600_write_ctl_const_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void radeon_set_ctl_const_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
  {
         assert(reg >= R600_CTL_CONST_OFFSET);
         assert(cs->cdw+2+num <= cs->max_dw);
@@ -895,24 +868,24 @@ static inline void r600_write_ctl_const_seq(struct radeon_winsys_cs *cs, unsigne
         cs->buf[cs->cdw++] = (reg - R600_CTL_CONST_OFFSET) >> 2;
  }
  
-static inline void r600_write_compute_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void radeon_compute_set_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
  {
-       r600_write_compute_context_reg_seq(cs, reg, 1);
+       radeon_compute_set_context_reg_seq(cs, reg, 1);
         radeon_emit(cs, value);
  }
  
-static inline void r600_write_context_reg_flag(struct radeon_winsys_cs *cs, unsigned reg, unsigned value, unsigned flag)
+static inline void radeon_set_context_reg_flag(struct radeon_winsys_cs *cs, unsigned reg, unsigned value, unsigned flag)
  {
         if (flag & RADEON_CP_PACKET3_COMPUTE_MODE) {
-               r600_write_compute_context_reg(cs, reg, value);
+               radeon_compute_set_context_reg(cs, reg, value);
         } else {
-               r600_write_context_reg(cs, reg, value);
+               radeon_set_context_reg(cs, reg, value);
         }
  }
  
-static inline void r600_write_ctl_const(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void radeon_set_ctl_const(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
  {
-       r600_write_ctl_const_seq(cs, reg, 1);
+       radeon_set_ctl_const_seq(cs, reg, 1);
         radeon_emit(cs, value);
  }
  
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c

index b7d7828..1d90582 100644 (file)
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -47,7 +47,7 @@ MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
  These 8xx t-slot only opcodes become vector ops, with all four 
  slots expecting the arguments on sources a and b. Result is 
  broadcast to all channels.
-MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
+MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT, MUL_64
  These 8xx t-slot only opcodes become vector ops in the z, y, and 
  x slots.
  EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
@@ -60,6 +60,7 @@ issued in the w slot as well.
  The compiler must issue the source argument to slots z, y, and x
  */
  
+#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
  static int r600_shader_from_tgsi(struct r600_context *rctx,
                                  struct r600_pipe_shader *pipeshader,
                                  union r600_shader_key key);
@@ -93,8 +94,10 @@ static void r600_dump_streamout(struct pipe_stream_output_info *so)
         for (i = 0; i < so->num_outputs; i++) {
                 unsigned mask = ((1 << so->output[i].num_components) - 1) <<
                                 so->output[i].start_component;
-               fprintf(stderr, "  %i: MEM_STREAM0_BUF%i[%i..%i] <- OUT[%i].%s%s%s%s%s\n",
-                       i, so->output[i].output_buffer,
+               fprintf(stderr, "  %i: MEM_STREAM%d_BUF%i[%i..%i] <- OUT[%i].%s%s%s%s%s\n",
+                       i,
+                       so->output[i].stream,
+                       so->output[i].output_buffer,
                         so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1,
                         so->output[i].register_index,
                         mask & 1 ? "x" : "",
@@ -141,7 +144,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
         bool dump = r600_can_dump_shader(&rctx->screen->b, sel->tokens);
         unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB);
         unsigned sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM);
-       unsigned export_shader = key.vs.as_es;
+       unsigned export_shader;
  
         shader->shader.bc.isa = rctx->isa;
  
@@ -165,6 +168,8 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
      }
         /* disable SB for shaders using CF_INDEX_0/1 (sampler/ubo array indexing) as it doesn't handle those currently */
         use_sb &= !shader->shader.uses_index_registers;
+       /* disable SB for shaders using doubles */
+       use_sb &= !shader->shader.uses_doubles;
  
         /* Check if the bytecode has already been built.  When using the llvm
          * backend, r600_shader_from_tgsi() will take care of building the
@@ -220,6 +225,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
                 }
                 break;
         case TGSI_PROCESSOR_VERTEX:
+               export_shader = key.vs.as_es;
                 if (rctx->b.chip_class >= EVERGREEN) {
                         if (export_shader)
                                 evergreen_update_es_state(ctx, shader);
@@ -309,7 +315,8 @@ struct r600_shader_ctx {
         int                                     gs_out_ring_offset;
         int                                     gs_next_vertex;
         struct r600_shader      *gs_for_vs;
-       int                                     gs_export_gpr_treg;
+       int                                     gs_export_gpr_tregs[4];
+       const struct pipe_stream_output_info    *gs_stream_output_info;
         unsigned                                enabled_stream_buffers_mask;
  };
  
@@ -318,7 +325,7 @@ struct r600_shader_tgsi_instruction {
         int (*process)(struct r600_shader_ctx *ctx);
  };
  
-static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind);
+static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind);
  static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
  static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
  static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
@@ -340,7 +347,7 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx)
         struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
         int j;
  
-       if (i->Instruction.NumDstRegs > 1) {
+       if (i->Instruction.NumDstRegs > 1 && i->Instruction.Opcode != TGSI_OPCODE_DFRACEXP) {
                 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
                 return -EINVAL;
         }
@@ -941,7 +948,7 @@ static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_
  
         memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
         vtx.op = FETCH_OP_VFETCH;
-       vtx.buffer_id = R600_SAMPLE_POSITIONS_CONST_BUFFER;
+       vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER;
         vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
         if (sample_id == NULL) {
                 vtx.src_gpr = ctx->fixed_pt_position_gpr; // SAMPLEID is in .w;
@@ -1335,9 +1342,11 @@ static int process_twoside_color_inputs(struct r600_shader_ctx *ctx)
         return 0;
  }
  
-static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output_info *so)
+static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output_info *so,
+                                                 int stream, unsigned *stream_item_size)
  {
         unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS];
+       unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS];
         int i, j, r;
  
         /* Sanity checking. */
@@ -1357,8 +1366,9 @@ static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output
  
         /* Initialize locations where the outputs are stored. */
         for (i = 0; i < so->num_outputs; i++) {
-               so_gpr[i] = ctx->shader->output[so->output[i].register_index].gpr;
  
+               so_gpr[i] = ctx->shader->output[so->output[i].register_index].gpr;
+               start_comp[i] = so->output[i].start_component;
                 /* Lower outputs with dst_offset < start_component.
                  *
                  * We can only output 4D vectors with a write mask, e.g. we can
@@ -1384,7 +1394,7 @@ static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output
                                 if (r)
                                         return r;
                         }
-                       so->output[i].start_component = 0;
+                       start_comp[i] = 0;
                         so_gpr[i] = tmp;
                 }
         }
@@ -1393,18 +1403,21 @@ static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output
         for (i = 0; i < so->num_outputs; i++) {
                 struct r600_bytecode_output output;
  
+               if (stream != -1 && stream != so->output[i].output_buffer)
+                       continue;
+
                 memset(&output, 0, sizeof(struct r600_bytecode_output));
                 output.gpr = so_gpr[i];
-               output.elem_size = so->output[i].num_components;
-               output.array_base = so->output[i].dst_offset - so->output[i].start_component;
+               output.elem_size = so->output[i].num_components - 1;
+               if (output.elem_size == 2)
+                       output.elem_size = 3; // 3 not supported, write 4 with junk at end
+               output.array_base = so->output[i].dst_offset - start_comp[i];
                 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
                 output.burst_count = 1;
                 /* array_size is an upper limit for the burst_count
                  * with MEM_STREAM instructions */
                 output.array_size = 0xFFF;
-               output.comp_mask = ((1 << so->output[i].num_components) - 1) << so->output[i].start_component;
-
-               ctx->enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer);
+               output.comp_mask = ((1 << so->output[i].num_components) - 1) << start_comp[i];
  
                 if (ctx->bc->chip_class >= EVERGREEN) {
                         switch (so->output[i].output_buffer) {
@@ -1421,6 +1434,9 @@ static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output
                                 output.op = CF_OP_MEM_STREAM0_BUF3;
                                 break;
                         }
+                       output.op += so->output[i].stream * 4;
+                       assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3);
+                       ctx->enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer) << so->output[i].stream * 4;
                 } else {
                         switch (so->output[i].output_buffer) {
                         case 0:
@@ -1436,6 +1452,7 @@ static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output
                                 output.op = CF_OP_MEM_STREAM3;
                                         break;
                         }
+                       ctx->enabled_stream_buffers_mask |= 1 << so->output[i].output_buffer;
                 }
                 r = r600_bytecode_add_output(ctx->bc, &output);
                 if (r)
@@ -1488,7 +1505,8 @@ static int generate_gs_copy_shader(struct r600_context *rctx,
         struct r600_bytecode_output output;
         struct r600_bytecode_cf *cf_jump, *cf_pop,
                 *last_exp_pos = NULL, *last_exp_param = NULL;
-       int i, next_clip_pos = 61, next_param = 0;
+       int i, j, next_clip_pos = 61, next_param = 0;
+       int ring;
  
         cshader = calloc(1, sizeof(struct r600_pipe_shader));
         if (!cshader)
@@ -1508,6 +1526,9 @@ static int generate_gs_copy_shader(struct r600_context *rctx,
  
         ctx.bc->isa = rctx->isa;
  
+       cf_jump = NULL;
+       memset(cshader->shader.ring_item_sizes, 0, sizeof(cshader->shader.ring_item_sizes));
+
         /* R0.x = R0.x & 0x3fffffff */
         memset(&alu, 0, sizeof(alu));
         alu.op = ALU_OP2_AND_INT;
@@ -1526,22 +1547,10 @@ static int generate_gs_copy_shader(struct r600_context *rctx,
         alu.last = 1;
         r600_bytecode_add_alu(ctx.bc, &alu);
  
-       /* PRED_SETE_INT __, R0.y, 0 */
-       memset(&alu, 0, sizeof(alu));
-       alu.op = ALU_OP2_PRED_SETE_INT;
-       alu.src[0].chan = 1;
-       alu.src[1].sel = V_SQ_ALU_SRC_0;
-       alu.execute_mask = 1;
-       alu.update_pred = 1;
-       alu.last = 1;
-       r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE);
-
-       r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP);
-       cf_jump = ctx.bc->cf_last;
-
         /* fetch vertex data from GSVS ring */
         for (i = 0; i < ocnt; ++i) {
                 struct r600_shader_io *out = &ctx.shader->output[i];
+
                 out->gpr = i + 1;
                 out->ring_offset = i * 16;
  
@@ -1551,6 +1560,7 @@ static int generate_gs_copy_shader(struct r600_context *rctx,
                 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
                 vtx.offset = out->ring_offset;
                 vtx.dst_gpr = out->gpr;
+               vtx.src_gpr = 0;
                 vtx.dst_sel_x = 0;
                 vtx.dst_sel_y = 1;
                 vtx.dst_sel_z = 2;
@@ -1563,18 +1573,68 @@ static int generate_gs_copy_shader(struct r600_context *rctx,
  
                 r600_bytecode_add_vtx(ctx.bc, &vtx);
         }
+       ctx.temp_reg = i + 1;
+       for (ring = 3; ring >= 0; --ring) {
+               bool enabled = false;
+               for (i = 0; i < so->num_outputs; i++) {
+                       if (so->output[i].stream == ring) {
+                               enabled = true;
+                               break;
+                       }
+               }
+               if (ring != 0 && !enabled) {
+                       cshader->shader.ring_item_sizes[ring] = 0;
+                       continue;
+               }
+
+               if (cf_jump) {
+                       // Patch up jump label
+                       r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP);
+                       cf_pop = ctx.bc->cf_last;
+
+                       cf_jump->cf_addr = cf_pop->id + 2;
+                       cf_jump->pop_count = 1;
+                       cf_pop->cf_addr = cf_pop->id + 2;
+                       cf_pop->pop_count = 1;
+               }
+
+               /* PRED_SETE_INT __, R0.y, ring */
+               memset(&alu, 0, sizeof(alu));
+               alu.op = ALU_OP2_PRED_SETE_INT;
+               alu.src[0].chan = 1;
+               alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+               alu.src[1].value = ring;
+               alu.execute_mask = 1;
+               alu.update_pred = 1;
+               alu.last = 1;
+               r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE);
  
-       /* XXX handle clipvertex, streamout? */
-       emit_streamout(&ctx, so);
+               r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP);
+               cf_jump = ctx.bc->cf_last;
+
+               if (enabled)
+                       emit_streamout(&ctx, so, ring, &cshader->shader.ring_item_sizes[ring]);
+               cshader->shader.ring_item_sizes[ring] = ocnt * 16;
+       }
  
         /* export vertex data */
         /* XXX factor out common code with r600_shader_from_tgsi ? */
         for (i = 0; i < ocnt; ++i) {
                 struct r600_shader_io *out = &ctx.shader->output[i];
-
+               bool instream0 = true;
                 if (out->name == TGSI_SEMANTIC_CLIPVERTEX)
                         continue;
  
+               for (j = 0; j < so->num_outputs; j++) {
+                       if (so->output[j].register_index == i) {
+                               if (so->output[j].stream == 0)
+                                       break;
+                               if (so->output[j].stream > 0)
+                                       instream0 = false;
+                       }
+               }
+               if (!instream0)
+                       continue;
                 memset(&output, 0, sizeof(output));
                 output.gpr = out->gpr;
                 output.elem_size = 3;
@@ -1720,19 +1780,19 @@ static int generate_gs_copy_shader(struct r600_context *rctx,
         }
  
         gs->gs_copy_shader = cshader;
+       cshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask;
  
         ctx.bc->nstack = 1;
  
-       cshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask;
-       cshader->shader.ring_item_size = ocnt * 16;
-
         return r600_bytecode_build(ctx.bc);
  }
  
-static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind)
+static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind)
  {
         struct r600_bytecode_output output;
         int i, k, ring_offset;
+       int effective_stream = stream == -1 ? 0 : stream;
+       int idx = 0;
  
         for (i = 0; i < ctx->shader->noutput; i++) {
                 if (ctx->gs_for_vs) {
@@ -1749,15 +1809,18 @@ static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind)
  
                         if (ring_offset == -1)
                                 continue;
-               } else
-                       ring_offset = i * 16;
+               } else {
+                       ring_offset = idx * 16;
+                       idx++;
+               }
  
+               if (stream > 0 && ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION)
+                       continue;
                 /* next_ring_offset after parsing input decls contains total size of
                  * single vertex data, gs_next_vertex - current vertex index */
                 if (!ind)
                         ring_offset += ctx->gs_out_ring_offset * ctx->gs_next_vertex;
  
-               /* get a temp and add the ring offset to the next vertex base in the shader */
                 memset(&output, 0, sizeof(struct r600_bytecode_output));
                 output.gpr = ctx->shader->output[i].gpr;
                 output.elem_size = 3;
@@ -1768,28 +1831,39 @@ static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind)
                         output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
                 else
                         output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
-               output.op = CF_OP_MEM_RING;
  
+               switch (stream) {
+               default:
+               case 0:
+                       output.op = CF_OP_MEM_RING; break;
+               case 1:
+                       output.op = CF_OP_MEM_RING1; break;
+               case 2:
+                       output.op = CF_OP_MEM_RING2; break;
+               case 3:
+                       output.op = CF_OP_MEM_RING3; break;
+               }
  
                 if (ind) {
                         output.array_base = ring_offset >> 2; /* in dwords */
                         output.array_size = 0xfff;
-                       output.index_gpr = ctx->gs_export_gpr_treg;
+                       output.index_gpr = ctx->gs_export_gpr_tregs[effective_stream];
                 } else
                         output.array_base = ring_offset >> 2; /* in dwords */
                 r600_bytecode_add_output(ctx->bc, &output);
         }
  
         if (ind) {
+               /* get a temp and add the ring offset to the next vertex base in the shader */
                 struct r600_bytecode_alu alu;
                 int r;
  
                 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
                 alu.op = ALU_OP2_ADD_INT;
-               alu.src[0].sel = ctx->gs_export_gpr_treg;
+               alu.src[0].sel = ctx->gs_export_gpr_tregs[effective_stream];
                 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
                 alu.src[1].value = ctx->gs_out_ring_offset >> 4;
-               alu.dst.sel = ctx->gs_export_gpr_treg;
+               alu.dst.sel = ctx->gs_export_gpr_tregs[effective_stream];
                 alu.dst.write = 1;
                 alu.last = 1;
                 r = r600_bytecode_add_alu(ctx->bc, &alu);
@@ -1829,23 +1903,29 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
         ctx.shader = shader;
         ctx.native_integers = true;
  
-       shader->vs_as_gs_a = key.vs.as_gs_a;
-       shader->vs_as_es = key.vs.as_es;
  
         r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family,
                            rscreen->has_compressed_msaa_texturing);
         ctx.tokens = tokens;
         tgsi_scan_shader(tokens, &ctx.info);
         shader->indirect_files = ctx.info.indirect_files;
+
+       shader->uses_doubles = ctx.info.uses_doubles;
+
         indirect_gprs = ctx.info.indirect_files & ~(1 << TGSI_FILE_CONSTANT);
         tgsi_parse_init(&ctx.parse, tokens);
         ctx.type = ctx.info.processor;
         shader->processor_type = ctx.type;
         ctx.bc->type = shader->processor_type;
  
-       ring_outputs = key.vs.as_es || (ctx.type == TGSI_PROCESSOR_GEOMETRY);
+       if (ctx.type == TGSI_PROCESSOR_VERTEX) {
+               shader->vs_as_gs_a = key.vs.as_gs_a;
+               shader->vs_as_es = key.vs.as_es;
+       }
+
+       ring_outputs = shader->vs_as_es || ctx.type == TGSI_PROCESSOR_GEOMETRY;
  
-       if (key.vs.as_es) {
+       if (shader->vs_as_es) {
                 ctx.gs_for_vs = &rctx->gs_shader->current->shader;
         } else {
                 ctx.gs_for_vs = NULL;
@@ -1854,6 +1934,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
         ctx.next_ring_offset = 0;
         ctx.gs_out_ring_offset = 0;
         ctx.gs_next_vertex = 0;
+       ctx.gs_stream_output_info = &so;
  
         shader->uses_index_registers = false;
         ctx.face_gpr = -1;
@@ -1865,7 +1946,8 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
         shader->nr_ps_color_exports = 0;
         shader->nr_ps_max_color_exports = 0;
  
-       shader->two_side = key.ps.color_two_side;
+       if (ctx.type == TGSI_PROCESSOR_FRAGMENT)
+               shader->two_side = key.ps.color_two_side;
  
         /* register allocations */
         /* Values [0,127] correspond to GPR[0..127].
@@ -1940,8 +2022,11 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
         ctx.bc->index_reg[1] = ctx.bc->ar_reg + 2;
  
         if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
-               ctx.gs_export_gpr_treg = ctx.bc->ar_reg + 3;
-               ctx.temp_reg = ctx.bc->ar_reg + 4;
+               ctx.gs_export_gpr_tregs[0] = ctx.bc->ar_reg + 3;
+               ctx.gs_export_gpr_tregs[1] = ctx.bc->ar_reg + 4;
+               ctx.gs_export_gpr_tregs[2] = ctx.bc->ar_reg + 5;
+               ctx.gs_export_gpr_tregs[3] = ctx.bc->ar_reg + 6;
+               ctx.temp_reg = ctx.bc->ar_reg + 7;
         } else {
                 ctx.temp_reg = ctx.bc->ar_reg + 3;
         }
@@ -1966,13 +2051,9 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
  
         ctx.nliterals = 0;
         ctx.literals = NULL;
-       shader->fs_write_all = FALSE;
-       if (ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
-               shader->fs_write_all = TRUE;
  
-       shader->vs_position_window_space = FALSE;
-       if (ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION])
-               shader->vs_position_window_space = TRUE;
+       shader->fs_write_all = ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS];
+       shader->vs_position_window_space = ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
  
         if (shader->vs_as_gs_a)
                 vs_add_primid_output(&ctx, key.vs.prim_id_out);
@@ -2008,7 +2089,10 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
                 }
         }
         
-       shader->ring_item_size = ctx.next_ring_offset;
+       shader->ring_item_sizes[0] = ctx.next_ring_offset;
+       shader->ring_item_sizes[1] = 0;
+       shader->ring_item_sizes[2] = 0;
+       shader->ring_item_sizes[3] = 0;
  
         /* Process two side if needed */
         if (shader->two_side && ctx.colors_used) {
@@ -2131,17 +2215,18 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
                 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
                         struct r600_bytecode_alu alu;
                         int r;
-
-                       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-                       alu.op = ALU_OP1_MOV;
-                       alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
-                       alu.src[0].value = 0;
-                       alu.dst.sel = ctx.gs_export_gpr_treg;
-                       alu.dst.write = 1;
-                       alu.last = 1;
-                       r = r600_bytecode_add_alu(ctx.bc, &alu);
-                       if (r)
-                               return r;
+                       for (j = 0; j < 4; j++) {
+                               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+                               alu.op = ALU_OP1_MOV;
+                               alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
+                               alu.src[0].value = 0;
+                               alu.dst.sel = ctx.gs_export_gpr_tregs[j];
+                               alu.dst.write = 1;
+                               alu.last = 1;
+                               r = r600_bytecode_add_alu(ctx.bc, &alu);
+                               if (r)
+                                       return r;
+                       }
                 }
                 if (shader->two_side && ctx.colors_used) {
                         if ((r = process_twoside_color_inputs(&ctx)))
@@ -2223,7 +2308,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
                                 alu.src[0].chan = j;
  
                                 alu.src[1].sel = 512 + i;
-                               alu.src[1].kc_bank = R600_UCP_CONST_BUFFER;
+                               alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
                                 alu.src[1].chan = j;
  
                                 alu.dst.sel = clipdist_temp[oreg];
@@ -2242,14 +2327,20 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
         /* Add stream outputs. */
         if (!ring_outputs && ctx.type == TGSI_PROCESSOR_VERTEX &&
             so.num_outputs && !use_llvm)
-               emit_streamout(&ctx, &so);
+               emit_streamout(&ctx, &so, -1, NULL);
  
         pipeshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask;
         convert_edgeflag_to_int(&ctx);
  
         if (ring_outputs) {
-               if (key.vs.as_es)
-                       emit_gs_ring_writes(&ctx, FALSE);
+               if (shader->vs_as_es) {
+                       ctx.gs_export_gpr_tregs[0] = r600_get_temp(&ctx);
+                       ctx.gs_export_gpr_tregs[1] = -1;
+                       ctx.gs_export_gpr_tregs[2] = -1;
+                       ctx.gs_export_gpr_tregs[3] = -1;
+
+                       emit_gs_ring_writes(&ctx, &so, -1, FALSE);
+               }
         } else {
                 /* Export output */
                 next_clip_base = shader->vs_out_misc_write ? 62 : 61;
@@ -2599,6 +2690,167 @@ static int tgsi_last_instruction(unsigned writemask)
         return lasti;
  }
  
+
+
+static int tgsi_op2_64_params(struct r600_shader_ctx *ctx, bool singledest, bool swap)
+{
+       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       unsigned write_mask = inst->Dst[0].Register.WriteMask;
+       struct r600_bytecode_alu alu;
+       int i, j, r, lasti = tgsi_last_instruction(write_mask);
+       int use_tmp = 0;
+
+       if (singledest) {
+               switch (write_mask) {
+               case 0x1:
+                       write_mask = 0x3;
+                       break;
+               case 0x2:
+                       use_tmp = 1;
+                       write_mask = 0x3;
+                       break;
+               case 0x4:
+                       write_mask = 0xc;
+                       break;
+               case 0x8:
+                       write_mask = 0xc;
+                       use_tmp = 3;
+                       break;
+               }
+       }
+
+       lasti = tgsi_last_instruction(write_mask);
+       for (i = 0; i <= lasti; i++) {
+
+               if (!(write_mask & (1 << i)))
+                       continue;
+
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+
+               if (singledest) {
+                       tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+                       if (use_tmp) {
+                               alu.dst.sel = ctx->temp_reg;
+                               alu.dst.chan = i;
+                               alu.dst.write = 1;
+                       }
+                       if (i == 1 || i == 3)
+                               alu.dst.write = 0;
+               } else
+                       tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+
+               alu.op = ctx->inst_info->op;
+               if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DABS) {
+                       r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+               } else if (!swap) {
+                       for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+                               r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i));
+                       }
+               } else {
+                       r600_bytecode_src(&alu.src[0], &ctx->src[1], fp64_switch(i));
+                       r600_bytecode_src(&alu.src[1], &ctx->src[0], fp64_switch(i));
+               }
+
+               /* handle some special cases */
+               if (i == 1 || i == 3) {
+                       switch (ctx->parse.FullToken.FullInstruction.Instruction.Opcode) {
+                       case TGSI_OPCODE_SUB:
+                               r600_bytecode_src_toggle_neg(&alu.src[1]);
+                               break;
+                       case TGSI_OPCODE_DABS:
+                               r600_bytecode_src_set_abs(&alu.src[0]);
+                               break;
+                       default:
+                               break;
+                       }
+               }
+               if (i == lasti) {
+                       alu.last = 1;
+               }
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+
+       if (use_tmp) {
+               write_mask = inst->Dst[0].Register.WriteMask;
+
+               /* move result from temp to dst */
+               for (i = 0; i <= lasti; i++) {
+                       if (!(write_mask & (1 << i)))
+                               continue;
+
+                       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+                       alu.op = ALU_OP1_MOV;
+                       tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+                       alu.src[0].sel = ctx->temp_reg;
+                       alu.src[0].chan = use_tmp - 1;
+                       alu.last = (i == lasti);
+
+                       r = r600_bytecode_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
+       }
+       return 0;
+}
+
+static int tgsi_op2_64(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       unsigned write_mask = inst->Dst[0].Register.WriteMask;
+       /* confirm writemasking */
+       if ((write_mask & 0x3) != 0x3 &&
+           (write_mask & 0xc) != 0xc) {
+               fprintf(stderr, "illegal writemask for 64-bit: 0x%x\n", write_mask);
+               return -1;
+       }
+       return tgsi_op2_64_params(ctx, false, false);
+}
+
+static int tgsi_op2_64_single_dest(struct r600_shader_ctx *ctx)
+{
+       return tgsi_op2_64_params(ctx, true, false);
+}
+
+static int tgsi_op2_64_single_dest_s(struct r600_shader_ctx *ctx)
+{
+       return tgsi_op2_64_params(ctx, true, true);
+}
+
+static int tgsi_op3_64(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       struct r600_bytecode_alu alu;
+       int i, j, r;
+       int lasti = 3;
+       int tmp = r600_get_temp(ctx);
+
+       for (i = 0; i < lasti + 1; i++) {
+
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ctx->inst_info->op;
+               for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+                       r600_bytecode_src(&alu.src[j], &ctx->src[j], i == 3 ? 0 : 1);
+               }
+
+               if (inst->Dst[0].Register.WriteMask & (1 << i))
+                       tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+               else
+                       alu.dst.sel = tmp;
+
+               alu.dst.chan = i;
+               alu.is_op3 = 1;
+               if (i == lasti) {
+                       alu.last = 1;
+               }
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+       return 0;
+}
+
  static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
  {
         struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -2715,6 +2967,242 @@ static int tgsi_ineg(struct r600_shader_ctx *ctx)
  
  }
  
+static int tgsi_dneg(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       struct r600_bytecode_alu alu;
+       int i, r;
+       int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+
+       for (i = 0; i < lasti + 1; i++) {
+
+               if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+                       continue;
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ALU_OP1_MOV;
+
+               r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+
+               if (i == 1 || i == 3)
+                       r600_bytecode_src_toggle_neg(&alu.src[0]);
+               tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+
+               if (i == lasti) {
+                       alu.last = 1;
+               }
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+       return 0;
+
+}
+
+static int tgsi_dfracexp(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       struct r600_bytecode_alu alu;
+       unsigned write_mask = inst->Dst[0].Register.WriteMask;
+       int i, j, r;
+       int firsti = write_mask == 0xc ? 2 : 0;
+
+       for (i = 0; i <= 3; i++) {
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ctx->inst_info->op;
+
+               alu.dst.sel = ctx->temp_reg;
+               alu.dst.chan = i;
+               alu.dst.write = 1;
+               for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+                       r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i));
+               }
+
+               if (i == 3)
+                       alu.last = 1;
+
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+
+       /* MOV first two channels to writemask dst0 */
+       for (i = 0; i <= 1; i++) {
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ALU_OP1_MOV;
+               alu.src[0].chan = i + 2;
+               alu.src[0].sel = ctx->temp_reg;
+
+               tgsi_dst(ctx, &inst->Dst[0], firsti + i, &alu.dst);
+               alu.dst.write = (inst->Dst[0].Register.WriteMask >> (firsti + i)) & 1;
+               alu.last = 1;
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+
+       for (i = 0; i <= 3; i++) {
+               if (inst->Dst[1].Register.WriteMask & (1 << i)) {
+                       /* MOV third channels to writemask dst1 */
+                       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+                       alu.op = ALU_OP1_MOV;
+                       alu.src[0].chan = 1;
+                       alu.src[0].sel = ctx->temp_reg;
+
+                       tgsi_dst(ctx, &inst->Dst[1], i, &alu.dst);
+                       alu.last = 1;
+                       r = r600_bytecode_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+                       break;
+               }
+       }
+       return 0;
+}
+
+
+static int egcm_int_to_double(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       struct r600_bytecode_alu alu;
+       int i, r;
+       int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+
+       assert(inst->Instruction.Opcode == TGSI_OPCODE_I2D ||
+               inst->Instruction.Opcode == TGSI_OPCODE_U2D);
+
+       for (i = 0; i <= (lasti+1)/2; i++) {
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ctx->inst_info->op;
+
+               r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+               alu.dst.sel = ctx->temp_reg;
+               alu.dst.chan = i;
+               alu.dst.write = 1;
+               alu.last = 1;
+
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+
+       for (i = 0; i <= lasti; i++) {
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ALU_OP1_FLT32_TO_FLT64;
+
+               alu.src[0].chan = i/2;
+               if (i%2 == 0)
+                       alu.src[0].sel = ctx->temp_reg;
+               else {
+                       alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
+                       alu.src[0].value = 0x0;
+               }
+               tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+               alu.last = i == lasti;
+
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+
+       return 0;
+}
+
+static int egcm_double_to_int(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       struct r600_bytecode_alu alu;
+       int i, r;
+       int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+
+       assert(inst->Instruction.Opcode == TGSI_OPCODE_D2I ||
+               inst->Instruction.Opcode == TGSI_OPCODE_D2U);
+
+       for (i = 0; i <= lasti; i++) {
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ALU_OP1_FLT64_TO_FLT32;
+
+               r600_bytecode_src(&alu.src[0], &ctx->src[0], fp64_switch(i));
+               alu.dst.chan = i;
+               alu.dst.sel = ctx->temp_reg;
+               alu.dst.write = i%2 == 0;
+               alu.last = i == lasti;
+
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+
+       for (i = 0; i <= (lasti+1)/2; i++) {
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ctx->inst_info->op;
+
+               alu.src[0].chan = i*2;
+               alu.src[0].sel = ctx->temp_reg;
+               tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
+               alu.last = 1;
+
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+
+       return 0;
+}
+
+static int cayman_emit_double_instr(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       int i, r;
+       struct r600_bytecode_alu alu;
+       int last_slot = 3;
+       int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+       int t1 = ctx->temp_reg;
+
+       /* these have to write the result to X/Y by the looks of it */
+       for (i = 0 ; i < last_slot; i++) {
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ctx->inst_info->op;
+
+               /* should only be one src regs */
+               assert (inst->Instruction.NumSrcRegs == 1);
+
+               r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
+               r600_bytecode_src(&alu.src[1], &ctx->src[0], 0);
+
+               /* RSQ should take the absolute value of src */
+               if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DRSQ ||
+                   ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DSQRT) {
+                       r600_bytecode_src_set_abs(&alu.src[1]);
+               }
+               alu.dst.sel = t1;
+               alu.dst.chan = i;
+               alu.dst.write = (i == 0 || i == 1);
+
+               if (ctx->bc->chip_class != CAYMAN || i == last_slot - 1)
+                       alu.last = 1;
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+
+       for (i = 0 ; i <= lasti; i++) {
+               if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+                       continue;
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ALU_OP1_MOV;
+               alu.src[0].sel = t1;
+               alu.src[0].chan = (i == 0 || i == 2) ? 0 : 1;
+               tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+               alu.dst.write = 1;
+               if (i == lasti)
+                       alu.last = 1;
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+       return 0;
+}
+
  static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
  {
         struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -2793,6 +3281,55 @@ static int cayman_mul_int_instr(struct r600_shader_ctx *ctx)
         return 0;
  }
  
+
+static int cayman_mul_double_instr(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       int i, j, k, r;
+       struct r600_bytecode_alu alu;
+       int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+       int t1 = ctx->temp_reg;
+
+       for (k = 0; k < 2; k++) {
+               if (!(inst->Dst[0].Register.WriteMask & (0x3 << (k * 2))))
+                       continue;
+
+               for (i = 0; i < 4; i++) {
+                       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+                       alu.op = ctx->inst_info->op;
+                       for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+                               r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1));;
+                       }
+                       alu.dst.sel = t1;
+                       alu.dst.chan = i;
+                       alu.dst.write = 1;
+                       if (i == 3)
+                               alu.last = 1;
+                       r = r600_bytecode_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
+       }
+
+       for (i = 0; i <= lasti; i++) {
+               if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+                       continue;
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ALU_OP1_MOV;
+               alu.src[0].sel = t1;
+               alu.src[0].chan = i;
+               tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+               alu.dst.write = 1;
+               if (i == lasti)
+                       alu.last = 1;
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+
+       return 0;
+}
+
  /*
   * r600 - trunc to -PI..PI range
   * r700 - normalize by dividing by 2PI
@@ -5053,7 +5590,8 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_l
                 alu.src[0].sel = vtx.dst_gpr;
                 alu.src[0].chan = i;
  
-               alu.src[1].sel = 512 + (id * 2);
+               alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL;
+               alu.src[1].sel += (id * 2);
                 alu.src[1].chan = i % 4;
                 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
  
@@ -5075,7 +5613,7 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_l
                 alu.src[0].sel = vtx.dst_gpr;
                 alu.src[0].chan = 3;
  
-               alu.src[1].sel = 512 + (id * 2) + 1;
+               alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL + (id * 2) + 1;
                 alu.src[1].chan = 0;
                 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
  
@@ -5096,14 +5634,14 @@ static int r600_do_buffer_txq(struct r600_shader_ctx *ctx)
  
         memset(&alu, 0, sizeof(struct r600_bytecode_alu));
         alu.op = ALU_OP1_MOV;
-
+       alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
         if (ctx->bc->chip_class >= EVERGREEN) {
                 /* channel 0 or 2 of each word */
-               alu.src[0].sel = 512 + (id / 2);
+               alu.src[0].sel += (id / 2);
                 alu.src[0].chan = (id % 2) * 2;
         } else {
                 /* r600 we have them at channel 2 of the second dword */
-               alu.src[0].sel = 512 + (id * 2) + 1;
+               alu.src[0].sel += (id * 2) + 1;
                 alu.src[0].chan = 1;
         }
         alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
@@ -5136,6 +5674,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
         /* Texture fetch instructions can only use gprs as source.
          * Also they cannot negate the source or take the absolute value */
         const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ &&
+                                             inst->Instruction.Opcode != TGSI_OPCODE_TXQS &&
                                                tgsi_tex_src_requires_loading(ctx, 0)) ||
                                              read_compressed_msaa || txf_add_offsets;
  
@@ -5761,13 +6300,14 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
                 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
                 alu.op = ALU_OP1_MOV;
  
+               alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
                 if (ctx->bc->chip_class >= EVERGREEN) {
                         /* channel 1 or 3 of each word */
-                       alu.src[0].sel = 512 + (id / 2);
+                       alu.src[0].sel += (id / 2);
                         alu.src[0].chan = ((id % 2) * 2) + 1;
                 } else {
                         /* r600 we have them at channel 2 of the second dword */
-                       alu.src[0].sel = 512 + (id * 2) + 1;
+                       alu.src[0].sel += (id * 2) + 1;
                         alu.src[0].chan = 2;
                 }
                 alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
@@ -5880,6 +6420,12 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
                 tex.dst_sel_z = 7;
                 tex.dst_sel_w = 7;
         }
+       else if (inst->Instruction.Opcode == TGSI_OPCODE_TXQS) {
+               tex.dst_sel_x = 3;
+               tex.dst_sel_y = 7;
+               tex.dst_sel_z = 7;
+               tex.dst_sel_w = 7;
+       }
         else {
                 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
                 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
@@ -5888,7 +6434,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
         }
  
  
-       if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ) {
+       if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ||
+           inst->Instruction.Opcode == TGSI_OPCODE_TXQS) {
                 tex.src_sel_x = 4;
                 tex.src_sel_y = 4;
                 tex.src_sel_z = 4;
@@ -7200,10 +7747,17 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
  
  static int tgsi_gs_emit(struct r600_shader_ctx *ctx)
  {
+       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       int stream = ctx->literals[inst->Src[0].Register.Index * 4 + inst->Src[0].Register.SwizzleX];
+       int r;
+
         if (ctx->inst_info->op == CF_OP_EMIT_VERTEX)
-               emit_gs_ring_writes(ctx, TRUE);
+               emit_gs_ring_writes(ctx, ctx->gs_stream_output_info, stream, TRUE);
  
-       return r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
+       r = r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
+       if (!r)
+               ctx->bc->cf_last->count = stream; // Count field for CUT/EMIT_VERTEX indicates which stream
+       return r;
  }
  
  static int tgsi_umad(struct r600_shader_ctx *ctx)
@@ -7389,7 +7943,7 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]
         [TGSI_OPCODE_ENDLOOP]   = { ALU_OP0_NOP, tgsi_endloop},
         [TGSI_OPCODE_ENDSUB]    = { ALU_OP0_NOP, tgsi_unsupported},
         [TGSI_OPCODE_TXQ_LZ]    = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
-       [104]                   = { ALU_OP0_NOP, tgsi_unsupported},
+       [TGSI_OPCODE_TXQS]      = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex},
         [105]                   = { ALU_OP0_NOP, tgsi_unsupported},
         [106]                   = { ALU_OP0_NOP, tgsi_unsupported},
         [TGSI_OPCODE_NOP]       = { ALU_OP0_NOP, tgsi_unsupported},
@@ -7588,7 +8142,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
         [TGSI_OPCODE_ENDLOOP]   = { ALU_OP0_NOP, tgsi_endloop},
         [TGSI_OPCODE_ENDSUB]    = { ALU_OP0_NOP, tgsi_unsupported},
         [TGSI_OPCODE_TXQ_LZ]    = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
-       [104]                   = { ALU_OP0_NOP, tgsi_unsupported},
+       [TGSI_OPCODE_TXQS]      = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex},
         [105]                   = { ALU_OP0_NOP, tgsi_unsupported},
         [106]                   = { ALU_OP0_NOP, tgsi_unsupported},
         [TGSI_OPCODE_NOP]       = { ALU_OP0_NOP, tgsi_unsupported},
@@ -7679,6 +8233,29 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
         [TGSI_OPCODE_INTERP_CENTROID]   = { ALU_OP0_NOP, tgsi_interp_egcm},
         [TGSI_OPCODE_INTERP_SAMPLE]     = { ALU_OP0_NOP, tgsi_interp_egcm},
         [TGSI_OPCODE_INTERP_OFFSET]     = { ALU_OP0_NOP, tgsi_interp_egcm},
+       [TGSI_OPCODE_F2D]       = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64},
+       [TGSI_OPCODE_D2F]       = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest},
+       [TGSI_OPCODE_DABS]      = { ALU_OP1_MOV, tgsi_op2_64},
+       [TGSI_OPCODE_DNEG]      = { ALU_OP2_ADD_64, tgsi_dneg},
+       [TGSI_OPCODE_DADD]      = { ALU_OP2_ADD_64, tgsi_op2_64},
+       [TGSI_OPCODE_DMUL]      = { ALU_OP2_MUL_64, cayman_mul_double_instr},
+       [TGSI_OPCODE_DMAX]      = { ALU_OP2_MAX_64, tgsi_op2_64},
+       [TGSI_OPCODE_DMIN]      = { ALU_OP2_MIN_64, tgsi_op2_64},
+       [TGSI_OPCODE_DSLT]      = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s},
+       [TGSI_OPCODE_DSGE]      = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest},
+       [TGSI_OPCODE_DSEQ]      = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest},
+       [TGSI_OPCODE_DSNE]      = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest},
+       [TGSI_OPCODE_DRCP]      = { ALU_OP2_RECIP_64, cayman_emit_double_instr},
+       [TGSI_OPCODE_DSQRT]     = { ALU_OP2_SQRT_64, cayman_emit_double_instr},
+       [TGSI_OPCODE_DMAD]      = { ALU_OP3_FMA_64, tgsi_op3_64},
+       [TGSI_OPCODE_DFRAC]     = { ALU_OP1_FRACT_64, tgsi_op2_64},
+       [TGSI_OPCODE_DLDEXP]    = { ALU_OP2_LDEXP_64, tgsi_op2_64},
+       [TGSI_OPCODE_DFRACEXP]  = { ALU_OP1_FREXP_64, tgsi_dfracexp},
+       [TGSI_OPCODE_D2I]       = { ALU_OP1_FLT_TO_INT, egcm_double_to_int},
+       [TGSI_OPCODE_I2D]       = { ALU_OP1_INT_TO_FLT, egcm_int_to_double},
+       [TGSI_OPCODE_D2U]       = { ALU_OP1_FLT_TO_UINT, egcm_double_to_int},
+       [TGSI_OPCODE_U2D]       = { ALU_OP1_UINT_TO_FLT, egcm_int_to_double},
+       [TGSI_OPCODE_DRSQ]      = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr},
         [TGSI_OPCODE_LAST]      = { ALU_OP0_NOP, tgsi_unsupported},
  };
  
@@ -7787,7 +8364,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
         [TGSI_OPCODE_ENDLOOP]   = { ALU_OP0_NOP, tgsi_endloop},
         [TGSI_OPCODE_ENDSUB]    = { ALU_OP0_NOP, tgsi_unsupported},
         [TGSI_OPCODE_TXQ_LZ]    = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
-       [104]                   = { ALU_OP0_NOP, tgsi_unsupported},
+       [TGSI_OPCODE_TXQS]      = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex},
         [105]                   = { ALU_OP0_NOP, tgsi_unsupported},
         [106]                   = { ALU_OP0_NOP, tgsi_unsupported},
         [TGSI_OPCODE_NOP]       = { ALU_OP0_NOP, tgsi_unsupported},
@@ -7878,5 +8455,28 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
         [TGSI_OPCODE_INTERP_CENTROID]   = { ALU_OP0_NOP, tgsi_interp_egcm},
         [TGSI_OPCODE_INTERP_SAMPLE]     = { ALU_OP0_NOP, tgsi_interp_egcm},
         [TGSI_OPCODE_INTERP_OFFSET]     = { ALU_OP0_NOP, tgsi_interp_egcm},
+       [TGSI_OPCODE_F2D]       = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64},
+       [TGSI_OPCODE_D2F]       = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest},
+       [TGSI_OPCODE_DABS]      = { ALU_OP1_MOV, tgsi_op2_64},
+       [TGSI_OPCODE_DNEG]      = { ALU_OP2_ADD_64, tgsi_dneg},
+       [TGSI_OPCODE_DADD]      = { ALU_OP2_ADD_64, tgsi_op2_64},
+       [TGSI_OPCODE_DMUL]      = { ALU_OP2_MUL_64, cayman_mul_double_instr},
+       [TGSI_OPCODE_DMAX]      = { ALU_OP2_MAX_64, tgsi_op2_64},
+       [TGSI_OPCODE_DMIN]      = { ALU_OP2_MIN_64, tgsi_op2_64},
+       [TGSI_OPCODE_DSLT]      = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s},
+       [TGSI_OPCODE_DSGE]      = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest},
+       [TGSI_OPCODE_DSEQ]      = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest},
+       [TGSI_OPCODE_DSNE]      = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest},
+       [TGSI_OPCODE_DRCP]      = { ALU_OP2_RECIP_64, cayman_emit_double_instr},
+       [TGSI_OPCODE_DSQRT]     = { ALU_OP2_SQRT_64, cayman_emit_double_instr},
+       [TGSI_OPCODE_DMAD]      = { ALU_OP3_FMA_64, tgsi_op3_64},
+       [TGSI_OPCODE_DFRAC]     = { ALU_OP1_FRACT_64, tgsi_op2_64},
+       [TGSI_OPCODE_DLDEXP]    = { ALU_OP2_LDEXP_64, tgsi_op2_64},
+       [TGSI_OPCODE_DFRACEXP]  = { ALU_OP1_FREXP_64, tgsi_dfracexp},
+       [TGSI_OPCODE_D2I]       = { ALU_OP1_FLT_TO_INT, egcm_double_to_int},
+       [TGSI_OPCODE_I2D]       = { ALU_OP1_INT_TO_FLT, egcm_int_to_double},
+       [TGSI_OPCODE_D2U]       = { ALU_OP1_FLT_TO_UINT, egcm_double_to_int},
+       [TGSI_OPCODE_U2D]       = { ALU_OP1_UINT_TO_FLT, egcm_int_to_double},
+       [TGSI_OPCODE_DRSQ]      = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr},
         [TGSI_OPCODE_LAST]      = { ALU_OP0_NOP, tgsi_unsupported},
  };
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h

index f5ca9d6..48de9cd 100644 (file)
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -78,8 +78,9 @@ struct r600_shader {
         /* Temporarily workaround SB not handling CF_INDEX_[01] index registers */
         boolean                 uses_index_registers;
  
-       /* size in bytes of a data item in the ring (single vertex data) */
-       unsigned                ring_item_size;
+       /* Size in bytes of a data item in the ring(s) (single vertex data).
+          Stages with only one ring items 123 will be set to 0. */
+       unsigned                ring_item_sizes[4];
  
         unsigned                indirect_files;
         unsigned                max_arrays;
@@ -88,6 +89,8 @@ struct r600_shader {
         unsigned                vs_as_gs_a;
         unsigned                ps_prim_id_input;
         struct r600_shader_array * arrays;
+
+       boolean                 uses_doubles;
  };
  
  union r600_shader_key {
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c

index 5152763..7084c5f 100644 (file)
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -260,7 +260,7 @@ static void r600_emit_polygon_offset(struct r600_context *rctx, struct r600_atom
         default:;
         }
  
-       r600_write_context_reg_seq(cs, R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE, 4);
+       radeon_set_context_reg_seq(cs, R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE, 4);
         radeon_emit(cs, fui(offset_scale));
         radeon_emit(cs, fui(offset_units));
         radeon_emit(cs, fui(offset_scale));
@@ -757,7 +757,7 @@ static void r600_emit_clip_state(struct r600_context *rctx, struct r600_atom *at
         struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
         struct pipe_clip_state *state = &rctx->clip_state.state;
  
-       r600_write_context_reg_seq(cs, R_028E20_PA_CL_UCP0_X, 6*4);
+       radeon_set_context_reg_seq(cs, R_028E20_PA_CL_UCP0_X, 6*4);
         radeon_emit_array(cs, (unsigned*)state, 6*4);
  }
  
@@ -769,21 +769,36 @@ static void r600_set_polygon_stipple(struct pipe_context *ctx,
  static void r600_emit_scissor_state(struct r600_context *rctx, struct r600_atom *atom)
  {
         struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
-       struct r600_scissor_state *rstate = (struct r600_scissor_state *)atom;
-       struct pipe_scissor_state *state = &rstate->scissor;
-       unsigned offset = rstate->idx * 4 * 2;
-
-       if (rctx->b.chip_class != R600 || rctx->scissor[0].enable) {
-               r600_write_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset, 2);
-               radeon_emit(cs, S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) |
-                                    S_028240_WINDOW_OFFSET_DISABLE(1));
-               radeon_emit(cs, S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy));
-       } else {
-               r600_write_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
-               radeon_emit(cs, S_028240_TL_X(0) | S_028240_TL_Y(0) |
-                                    S_028240_WINDOW_OFFSET_DISABLE(1));
-               radeon_emit(cs, S_028244_BR_X(8192) | S_028244_BR_Y(8192));
+       struct r600_scissor_state *rstate = &rctx->scissor;
+       struct pipe_scissor_state *state;
+       bool do_disable_workaround = false;
+       uint32_t dirty_mask;
+       unsigned i, offset;
+       uint32_t tl, br;
+
+       if (rctx->b.chip_class == R600 && !rctx->scissor.enable) {
+               tl = S_028240_TL_X(0) | S_028240_TL_Y(0) | S_028240_WINDOW_OFFSET_DISABLE(1);
+               br = S_028244_BR_X(8192) | S_028244_BR_Y(8192);
+               do_disable_workaround = true;
+       }
+
+       dirty_mask = rstate->dirty_mask;
+       while (dirty_mask != 0)
+       {
+               i = u_bit_scan(&dirty_mask);
+               offset = i * 4 * 2;
+               radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset, 2);
+               if (!do_disable_workaround) {
+                       state = &rstate->scissor[i];
+                       tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) |
+                               S_028240_WINDOW_OFFSET_DISABLE(1);
+                       br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy);
+               }
+               radeon_emit(cs, tl);
+               radeon_emit(cs, br);
         }
+       rstate->dirty_mask = 0;
+       rstate->atom.num_dw = 0;
  }
  
  static void r600_set_scissor_states(struct pipe_context *ctx,
@@ -792,18 +807,18 @@ static void r600_set_scissor_states(struct pipe_context *ctx,
                                      const struct pipe_scissor_state *state)
  {
         struct r600_context *rctx = (struct r600_context *)ctx;
+       struct r600_scissor_state *rstate = &rctx->scissor;
         int i;
  
-       for (i = start_slot ; i < start_slot + num_scissors; i++) {
-               rctx->scissor[i].scissor = state[i - start_slot];
-       }
+       for (i = start_slot ; i < start_slot + num_scissors; i++)
+               rstate->scissor[i] = state[i - start_slot];
+       rstate->dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
+       rstate->atom.num_dw = util_bitcount(rstate->dirty_mask) * 4;
  
-       if (rctx->b.chip_class == R600 && !rctx->scissor[0].enable)
+       if (rctx->b.chip_class == R600 && !rstate->enable)
                 return;
  
-       for (i = start_slot ; i < start_slot + num_scissors; i++) {
-               r600_mark_atom_dirty(rctx, &rctx->scissor[i].atom);
-       }
+       r600_mark_atom_dirty(rctx, &rstate->atom);
  }
  
  static struct r600_resource *r600_buffer_create_helper(struct r600_screen *rscreen,
@@ -1007,7 +1022,7 @@ static void r600_init_color_surface(struct r600_context *rctx,
  
                 /* CMASK. */
                 if (!rctx->dummy_cmask ||
-                   rctx->dummy_cmask->buf->size < cmask.size ||
+                   rctx->dummy_cmask->b.b.width0 < cmask.size ||
                     rctx->dummy_cmask->buf->alignment % cmask.alignment != 0) {
                         struct pipe_transfer *transfer;
                         void *ptr;
@@ -1025,7 +1040,7 @@ static void r600_init_color_surface(struct r600_context *rctx,
  
                 /* FMASK. */
                 if (!rctx->dummy_fmask ||
-                   rctx->dummy_fmask->buf->size < fmask.size ||
+                   rctx->dummy_fmask->b.b.width0 < fmask.size ||
                     rctx->dummy_fmask->buf->alignment % fmask.alignment != 0) {
                         pipe_resource_reference((struct pipe_resource**)&rctx->dummy_fmask, NULL);
                         rctx->dummy_fmask = r600_buffer_create_helper(rscreen, fmask.size, fmask.alignment);
@@ -1322,15 +1337,15 @@ static void r600_emit_msaa_state(struct r600_context *rctx, int nr_samples)
                         nr_samples = 0;
                         break;
                 case 2:
-                       r600_write_config_reg(cs, R_008B40_PA_SC_AA_SAMPLE_LOCS_2S, sample_locs_2x[0]);
+                       radeon_set_config_reg(cs, R_008B40_PA_SC_AA_SAMPLE_LOCS_2S, sample_locs_2x[0]);
                         max_dist = max_dist_2x;
                         break;
                 case 4:
-                       r600_write_config_reg(cs, R_008B44_PA_SC_AA_SAMPLE_LOCS_4S, sample_locs_4x[0]);
+                       radeon_set_config_reg(cs, R_008B44_PA_SC_AA_SAMPLE_LOCS_4S, sample_locs_4x[0]);
                         max_dist = max_dist_4x;
                         break;
                 case 8:
-                       r600_write_config_reg_seq(cs, R_008B48_PA_SC_AA_SAMPLE_LOCS_8S_WD0, 2);
+                       radeon_set_config_reg_seq(cs, R_008B48_PA_SC_AA_SAMPLE_LOCS_8S_WD0, 2);
                         radeon_emit(cs, sample_locs_8x[0]); /* R_008B48_PA_SC_AA_SAMPLE_LOCS_8S_WD0 */
                         radeon_emit(cs, sample_locs_8x[1]); /* R_008B4C_PA_SC_AA_SAMPLE_LOCS_8S_WD1 */
                         max_dist = max_dist_8x;
@@ -1339,25 +1354,25 @@ static void r600_emit_msaa_state(struct r600_context *rctx, int nr_samples)
         } else {
                 switch (nr_samples) {
                 default:
-                       r600_write_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2);
+                       radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2);
                         radeon_emit(cs, 0); /* R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX */
                         radeon_emit(cs, 0); /* R_028C20_PA_SC_AA_SAMPLE_LOCS_8D_WD1_MCTX */
                         nr_samples = 0;
                         break;
                 case 2:
-                       r600_write_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2);
+                       radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2);
                         radeon_emit(cs, sample_locs_2x[0]); /* R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX */
                         radeon_emit(cs, sample_locs_2x[1]); /* R_028C20_PA_SC_AA_SAMPLE_LOCS_8D_WD1_MCTX */
                         max_dist = max_dist_2x;
                         break;
                 case 4:
-                       r600_write_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2);
+                       radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2);
                         radeon_emit(cs, sample_locs_4x[0]); /* R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX */
                         radeon_emit(cs, sample_locs_4x[1]); /* R_028C20_PA_SC_AA_SAMPLE_LOCS_8D_WD1_MCTX */
                         max_dist = max_dist_4x;
                         break;
                 case 8:
-                       r600_write_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2);
+                       radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2);
                         radeon_emit(cs, sample_locs_8x[0]); /* R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX */
                         radeon_emit(cs, sample_locs_8x[1]); /* R_028C20_PA_SC_AA_SAMPLE_LOCS_8D_WD1_MCTX */
                         max_dist = max_dist_8x;
@@ -1366,13 +1381,13 @@ static void r600_emit_msaa_state(struct r600_context *rctx, int nr_samples)
         }
  
         if (nr_samples > 1) {
-               r600_write_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
+               radeon_set_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
                 radeon_emit(cs, S_028C00_LAST_PIXEL(1) |
                                      S_028C00_EXPAND_LINE_WIDTH(1)); /* R_028C00_PA_SC_LINE_CNTL */
                 radeon_emit(cs, S_028C04_MSAA_NUM_SAMPLES(util_logbase2(nr_samples)) |
                                      S_028C04_MAX_SAMPLE_DIST(max_dist)); /* R_028C04_PA_SC_AA_CONFIG */
         } else {
-               r600_write_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
+               radeon_set_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
                 radeon_emit(cs, S_028C00_LAST_PIXEL(1)); /* R_028C00_PA_SC_LINE_CNTL */
                 radeon_emit(cs, 0); /* R_028C04_PA_SC_AA_CONFIG */
         }
@@ -1387,7 +1402,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
         unsigned i, sbu = 0;
  
         /* Colorbuffers. */
-       r600_write_context_reg_seq(cs, R_0280A0_CB_COLOR0_INFO, 8);
+       radeon_set_context_reg_seq(cs, R_0280A0_CB_COLOR0_INFO, 8);
         for (i = 0; i < nr_cbufs; i++) {
                 radeon_emit(cs, cb[i] ? cb[i]->cb_color_info : 0);
         }
@@ -1408,9 +1423,9 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
                                 continue;
  
                         /* COLOR_BASE */
-                       r600_write_context_reg(cs, R_028040_CB_COLOR0_BASE + i*4, cb[i]->cb_color_base);
+                       radeon_set_context_reg(cs, R_028040_CB_COLOR0_BASE + i*4, cb[i]->cb_color_base);
  
-                       reloc = r600_context_bo_reloc(&rctx->b,
+                       reloc = radeon_add_to_buffer_list(&rctx->b,
                                                       &rctx->b.rings.gfx,
                                                       (struct r600_resource*)cb[i]->base.texture,
                                                       RADEON_USAGE_READWRITE,
@@ -1421,9 +1436,9 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
                         radeon_emit(cs, reloc);
  
                         /* FMASK */
-                       r600_write_context_reg(cs, R_0280E0_CB_COLOR0_FRAG + i*4, cb[i]->cb_color_fmask);
+                       radeon_set_context_reg(cs, R_0280E0_CB_COLOR0_FRAG + i*4, cb[i]->cb_color_fmask);
  
-                       reloc = r600_context_bo_reloc(&rctx->b,
+                       reloc = radeon_add_to_buffer_list(&rctx->b,
                                                       &rctx->b.rings.gfx,
                                                       cb[i]->cb_buffer_fmask,
                                                       RADEON_USAGE_READWRITE,
@@ -1434,9 +1449,9 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
                         radeon_emit(cs, reloc);
  
                         /* CMASK */
-                       r600_write_context_reg(cs, R_0280C0_CB_COLOR0_TILE + i*4, cb[i]->cb_color_cmask);
+                       radeon_set_context_reg(cs, R_0280C0_CB_COLOR0_TILE + i*4, cb[i]->cb_color_cmask);
  
-                       reloc = r600_context_bo_reloc(&rctx->b,
+                       reloc = radeon_add_to_buffer_list(&rctx->b,
                                                       &rctx->b.rings.gfx,
                                                       cb[i]->cb_buffer_cmask,
                                                       RADEON_USAGE_READWRITE,
@@ -1447,17 +1462,17 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
                         radeon_emit(cs, reloc);
                 }
  
-               r600_write_context_reg_seq(cs, R_028060_CB_COLOR0_SIZE, nr_cbufs);
+               radeon_set_context_reg_seq(cs, R_028060_CB_COLOR0_SIZE, nr_cbufs);
                 for (i = 0; i < nr_cbufs; i++) {
                         radeon_emit(cs, cb[i] ? cb[i]->cb_color_size : 0);
                 }
  
-               r600_write_context_reg_seq(cs, R_028080_CB_COLOR0_VIEW, nr_cbufs);
+               radeon_set_context_reg_seq(cs, R_028080_CB_COLOR0_VIEW, nr_cbufs);
                 for (i = 0; i < nr_cbufs; i++) {
                         radeon_emit(cs, cb[i] ? cb[i]->cb_color_view : 0);
                 }
  
-               r600_write_context_reg_seq(cs, R_028100_CB_COLOR0_MASK, nr_cbufs);
+               radeon_set_context_reg_seq(cs, R_028100_CB_COLOR0_MASK, nr_cbufs);
                 for (i = 0; i < nr_cbufs; i++) {
                         radeon_emit(cs, cb[i] ? cb[i]->cb_color_mask : 0);
                 }
@@ -1475,7 +1490,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
         /* Zbuffer. */
         if (state->zsbuf) {
                 struct r600_surface *surf = (struct r600_surface*)state->zsbuf;
-               unsigned reloc = r600_context_bo_reloc(&rctx->b,
+               unsigned reloc = radeon_add_to_buffer_list(&rctx->b,
                                                        &rctx->b.rings.gfx,
                                                        (struct r600_resource*)state->zsbuf->texture,
                                                        RADEON_USAGE_READWRITE,
@@ -1483,26 +1498,26 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
                                                                RADEON_PRIO_DEPTH_BUFFER_MSAA :
                                                                RADEON_PRIO_DEPTH_BUFFER);
  
-               r600_write_context_reg(cs, R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
+               radeon_set_context_reg(cs, R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
                                        surf->pa_su_poly_offset_db_fmt_cntl);
  
-               r600_write_context_reg_seq(cs, R_028000_DB_DEPTH_SIZE, 2);
+               radeon_set_context_reg_seq(cs, R_028000_DB_DEPTH_SIZE, 2);
                 radeon_emit(cs, surf->db_depth_size); /* R_028000_DB_DEPTH_SIZE */
                 radeon_emit(cs, surf->db_depth_view); /* R_028004_DB_DEPTH_VIEW */
-               r600_write_context_reg_seq(cs, R_02800C_DB_DEPTH_BASE, 2);
+               radeon_set_context_reg_seq(cs, R_02800C_DB_DEPTH_BASE, 2);
                 radeon_emit(cs, surf->db_depth_base); /* R_02800C_DB_DEPTH_BASE */
                 radeon_emit(cs, surf->db_depth_info); /* R_028010_DB_DEPTH_INFO */
  
                 radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
                 radeon_emit(cs, reloc);
  
-               r600_write_context_reg(cs, R_028D34_DB_PREFETCH_LIMIT, surf->db_prefetch_limit);
+               radeon_set_context_reg(cs, R_028D34_DB_PREFETCH_LIMIT, surf->db_prefetch_limit);
  
                 sbu |= SURFACE_BASE_UPDATE_DEPTH;
         } else if (rctx->screen->b.info.drm_minor >= 18) {
                 /* DRM 2.6.18 allows the INVALID format to disable depth/stencil.
                  * Older kernels are out of luck. */
-               r600_write_context_reg(cs, R_028010_DB_DEPTH_INFO, S_028010_FORMAT(V_028010_DEPTH_INVALID));
+               radeon_set_context_reg(cs, R_028010_DB_DEPTH_INFO, S_028010_FORMAT(V_028010_DEPTH_INVALID));
         }
  
         /* SURFACE_BASE_UPDATE */
@@ -1513,19 +1528,19 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
         }
  
         /* Framebuffer dimensions. */
-       r600_write_context_reg_seq(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, 2);
+       radeon_set_context_reg_seq(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, 2);
         radeon_emit(cs, S_028240_TL_X(0) | S_028240_TL_Y(0) |
                              S_028240_WINDOW_OFFSET_DISABLE(1)); /* R_028204_PA_SC_WINDOW_SCISSOR_TL */
         radeon_emit(cs, S_028244_BR_X(state->width) |
                              S_028244_BR_Y(state->height)); /* R_028208_PA_SC_WINDOW_SCISSOR_BR */
  
         if (rctx->framebuffer.is_msaa_resolve) {
-               r600_write_context_reg(cs, R_0287A0_CB_SHADER_CONTROL, 1);
+               radeon_set_context_reg(cs, R_0287A0_CB_SHADER_CONTROL, 1);
         } else {
                 /* Always enable the first colorbuffer in CB_SHADER_CONTROL. This
                  * will assure that the alpha-test will work even if there is
                  * no colorbuffer bound. */
-               r600_write_context_reg(cs, R_0287A0_CB_SHADER_CONTROL,
+               radeon_set_context_reg(cs, R_0287A0_CB_SHADER_CONTROL,
                                        (1ull << MAX2(nr_cbufs, 1)) - 1);
         }
  
@@ -1553,7 +1568,7 @@ static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom
         struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom;
  
         if (G_028808_SPECIAL_OP(a->cb_color_control) == V_028808_SPECIAL_RESOLVE_BOX) {
-               r600_write_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
+               radeon_set_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
                 if (rctx->b.chip_class == R600) {
                         radeon_emit(cs, 0xff); /* R_028238_CB_TARGET_MASK */
                         radeon_emit(cs, 0xff); /* R_02823C_CB_SHADER_MASK */
@@ -1561,17 +1576,17 @@ static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom
                         radeon_emit(cs, 0xf); /* R_028238_CB_TARGET_MASK */
                         radeon_emit(cs, 0xf); /* R_02823C_CB_SHADER_MASK */
                 }
-               r600_write_context_reg(cs, R_028808_CB_COLOR_CONTROL, a->cb_color_control);
+               radeon_set_context_reg(cs, R_028808_CB_COLOR_CONTROL, a->cb_color_control);
         } else {
                 unsigned fb_colormask = (1ULL << ((unsigned)a->nr_cbufs * 4)) - 1;
                 unsigned ps_colormask = (1ULL << ((unsigned)a->nr_ps_color_outputs * 4)) - 1;
                 unsigned multiwrite = a->multiwrite && a->nr_cbufs > 1;
  
-               r600_write_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
+               radeon_set_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
                 radeon_emit(cs, a->blend_colormask & fb_colormask); /* R_028238_CB_TARGET_MASK */
                 /* Always enable the first color output to make sure alpha-test works even without one. */
                 radeon_emit(cs, 0xf | (multiwrite ? fb_colormask : ps_colormask)); /* R_02823C_CB_SHADER_MASK */
-               r600_write_context_reg(cs, R_028808_CB_COLOR_CONTROL,
+               radeon_set_context_reg(cs, R_028808_CB_COLOR_CONTROL,
                                        a->cb_color_control |
                                        S_028808_MULTIWRITE_ENABLE(multiwrite));
         }
@@ -1586,15 +1601,15 @@ static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom
                 struct r600_texture *rtex = (struct r600_texture *)a->rsurf->base.texture;
                 unsigned reloc_idx;
  
-               r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
-               r600_write_context_reg(cs, R_028D24_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
-               r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
-               reloc_idx = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer,
+               radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
+               radeon_set_context_reg(cs, R_028D24_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
+               radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
+               reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer,
                                                   RADEON_USAGE_READWRITE, RADEON_PRIO_DEPTH_META);
                 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
                 cs->buf[cs->cdw++] = reloc_idx;
         } else {
-               r600_write_context_reg(cs, R_028D24_DB_HTILE_SURFACE, 0);
+               radeon_set_context_reg(cs, R_028D24_DB_HTILE_SURFACE, 0);
         }
  }
  
@@ -1658,10 +1673,10 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
                 db_render_override |= S_028D10_MAX_TILES_IN_DTT(6);
         }
  
-       r600_write_context_reg_seq(cs, R_028D0C_DB_RENDER_CONTROL, 2);
+       radeon_set_context_reg_seq(cs, R_028D0C_DB_RENDER_CONTROL, 2);
         radeon_emit(cs, db_render_control); /* R_028D0C_DB_RENDER_CONTROL */
         radeon_emit(cs, db_render_override); /* R_028D10_DB_RENDER_OVERRIDE */
-       r600_write_context_reg(cs, R_02880C_DB_SHADER_CONTROL, a->db_shader_control);
+       radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, a->db_shader_control);
  }
  
  static void r600_emit_config_state(struct r600_context *rctx, struct r600_atom *atom)
@@ -1669,8 +1684,8 @@ static void r600_emit_config_state(struct r600_context *rctx, struct r600_atom *
         struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
         struct r600_config_state *a = (struct r600_config_state*)atom;
  
-       r600_write_config_reg(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, a->sq_gpr_resource_mgmt_1);
-       r600_write_config_reg(cs, R_008C08_SQ_GPR_RESOURCE_MGMT_2, a->sq_gpr_resource_mgmt_2);
+       radeon_set_config_reg(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, a->sq_gpr_resource_mgmt_1);
+       radeon_set_config_reg(cs, R_008C08_SQ_GPR_RESOURCE_MGMT_2, a->sq_gpr_resource_mgmt_2);
  }
  
  static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom)
@@ -1690,11 +1705,11 @@ static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom
  
                 offset = vb->buffer_offset;
  
-               /* fetch resources start at index 320 */
+               /* fetch resources start at index 320 (OFFSET_FS) */
                 radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
-               radeon_emit(cs, (320 + buffer_index) * 7);
+               radeon_emit(cs, (R600_FETCH_CONSTANTS_OFFSET_FS + buffer_index) * 7);
                 radeon_emit(cs, offset); /* RESOURCEi_WORD0 */
-               radeon_emit(cs, rbuffer->buf->size - offset - 1); /* RESOURCEi_WORD1 */
+               radeon_emit(cs, rbuffer->b.b.width0 - offset - 1); /* RESOURCEi_WORD1 */
                 radeon_emit(cs, /* RESOURCEi_WORD2 */
                                  S_038008_ENDIAN_SWAP(r600_endian_swap(32)) |
                                  S_038008_STRIDE(vb->stride));
@@ -1704,7 +1719,7 @@ static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom
                 radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD6 */
  
                 radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-               radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+               radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
                                                       RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
         }
  }
@@ -1731,19 +1746,19 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
                 offset = cb->buffer_offset;
  
                 if (!gs_ring_buffer) {
-                       r600_write_context_reg(cs, reg_alu_constbuf_size + buffer_index * 4,
+                       radeon_set_context_reg(cs, reg_alu_constbuf_size + buffer_index * 4,
                                                ALIGN_DIVUP(cb->buffer_size >> 4, 16));
-                       r600_write_context_reg(cs, reg_alu_const_cache + buffer_index * 4, offset >> 8);
+                       radeon_set_context_reg(cs, reg_alu_const_cache + buffer_index * 4, offset >> 8);
                 }
  
                 radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-               radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+               radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
                                                       RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
  
                 radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
                 radeon_emit(cs, (buffer_id_base + buffer_index) * 7);
                 radeon_emit(cs, offset); /* RESOURCEi_WORD0 */
-               radeon_emit(cs, rbuffer->buf->size - offset - 1); /* RESOURCEi_WORD1 */
+               radeon_emit(cs, rbuffer->b.b.width0 - offset - 1); /* RESOURCEi_WORD1 */
                 radeon_emit(cs, /* RESOURCEi_WORD2 */
                             S_038008_ENDIAN_SWAP(gs_ring_buffer ? ENDIAN_NONE : r600_endian_swap(32)) |
                             S_038008_STRIDE(gs_ring_buffer ? 4 : 16));
@@ -1753,7 +1768,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
                 radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD6 */
  
                 radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-               radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+               radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
                                                       RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
  
                 dirty_mask &= ~(1 << buffer_index);
@@ -1763,21 +1778,24 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
  
  static void r600_emit_vs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
  {
-       r600_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX], 160,
+       r600_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX],
+                                  R600_FETCH_CONSTANTS_OFFSET_VS,
                                    R_028180_ALU_CONST_BUFFER_SIZE_VS_0,
                                    R_028980_ALU_CONST_CACHE_VS_0);
  }
  
  static void r600_emit_gs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
  {
-       r600_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_GEOMETRY], 336,
+       r600_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_GEOMETRY],
+                                  R600_FETCH_CONSTANTS_OFFSET_GS,
                                    R_0281C0_ALU_CONST_BUFFER_SIZE_GS_0,
                                    R_0289C0_ALU_CONST_CACHE_GS_0);
  }
  
  static void r600_emit_ps_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
  {
-       r600_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_FRAGMENT], 0,
+       r600_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_FRAGMENT],
+                                  R600_FETCH_CONSTANTS_OFFSET_PS,
                                    R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
                                    R_028940_ALU_CONST_CACHE_PS_0);
  }
@@ -1801,7 +1819,7 @@ static void r600_emit_sampler_views(struct r600_context *rctx,
                 radeon_emit(cs, (resource_id_base + resource_index) * 7);
                 radeon_emit_array(cs, rview->tex_resource_words, 7);
  
-               reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource,
+               reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource,
                                               RADEON_USAGE_READ,
                                               rview->tex_resource->b.b.nr_samples > 1 ?
                                                       RADEON_PRIO_SHADER_TEXTURE_MSAA :
@@ -1814,26 +1832,20 @@ static void r600_emit_sampler_views(struct r600_context *rctx,
         state->dirty_mask = 0;
  }
  
-/* Resource IDs:
- *   PS: 0   .. +160
- *   VS: 160 .. +160
- *   FS: 320 .. +16
- *   GS: 336 .. +160
- */
  
  static void r600_emit_vs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
  {
-       r600_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views, 160 + R600_MAX_CONST_BUFFERS);
+       r600_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views, R600_FETCH_CONSTANTS_OFFSET_VS + R600_MAX_CONST_BUFFERS);
  }
  
  static void r600_emit_gs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
  {
-       r600_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views, 336 + R600_MAX_CONST_BUFFERS);
+       r600_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views, R600_FETCH_CONSTANTS_OFFSET_GS + R600_MAX_CONST_BUFFERS);
  }
  
  static void r600_emit_ps_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
  {
-       r600_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views, R600_MAX_CONST_BUFFERS);
+       r600_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views, R600_FETCH_CONSTANTS_OFFSET_PS + R600_MAX_CONST_BUFFERS);
  }
  
  static void r600_emit_sampler_states(struct r600_context *rctx,
@@ -1878,7 +1890,7 @@ static void r600_emit_sampler_states(struct r600_context *rctx,
  
                         offset = border_color_reg;
                         offset += i * 16;
-                       r600_write_config_reg_seq(cs, offset, 4);
+                       radeon_set_config_reg_seq(cs, offset, 4);
                         radeon_emit_array(cs, rstate->border_color.ui, 4);
                 }
         }
@@ -1912,7 +1924,7 @@ static void r600_emit_seamless_cube_map(struct r600_context *rctx, struct r600_a
         if (!rctx->seamless_cube_map.enabled) {
                 tmp |= S_009508_DISABLE_CUBE_WRAP(1);
         }
-       r600_write_config_reg(cs, R_009508_TA_CNTL_AUX, tmp);
+       radeon_set_config_reg(cs, R_009508_TA_CNTL_AUX, tmp);
  }
  
  static void r600_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a)
@@ -1920,7 +1932,7 @@ static void r600_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a
         struct r600_sample_mask *s = (struct r600_sample_mask*)a;
         uint8_t mask = s->sample_mask;
  
-       r600_write_context_reg(rctx->b.rings.gfx.cs, R_028C48_PA_SC_AA_MASK,
+       radeon_set_context_reg(rctx->b.rings.gfx.cs, R_028C48_PA_SC_AA_MASK,
                                mask | (mask << 8) | (mask << 16) | (mask << 24));
  }
  
@@ -1930,9 +1942,9 @@ static void r600_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600
         struct r600_cso_state *state = (struct r600_cso_state*)a;
         struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso;
  
-       r600_write_context_reg(cs, R_028894_SQ_PGM_START_FS, shader->offset >> 8);
+       radeon_set_context_reg(cs, R_028894_SQ_PGM_START_FS, shader->offset >> 8);
         radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-       radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, shader->buffer,
+       radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->buffer,
                                               RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA));
  }
  
@@ -1967,8 +1979,8 @@ static void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom
                         primid = 1;
         }
  
-       r600_write_context_reg(cs, R_028A40_VGT_GS_MODE, v2);
-       r600_write_context_reg(cs, R_028A84_VGT_PRIMITIVEID_EN, primid);
+       radeon_set_context_reg(cs, R_028A40_VGT_GS_MODE, v2);
+       radeon_set_context_reg(cs, R_028A84_VGT_PRIMITIVEID_EN, primid);
  }
  
  static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
@@ -1977,34 +1989,34 @@ static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
         struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a;
         struct r600_resource *rbuffer;
  
-       r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
+       radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
         radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
         radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH));
  
         if (state->enable) {
                 rbuffer =(struct r600_resource*)state->esgs_ring.buffer;
-               r600_write_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE, 0);
+               radeon_set_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE, 0);
                 radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-               radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+               radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
                                                       RADEON_USAGE_READWRITE,
                                                       RADEON_PRIO_SHADER_RESOURCE_RW));
-               r600_write_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
+               radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
                                 state->esgs_ring.buffer_size >> 8);
  
                 rbuffer =(struct r600_resource*)state->gsvs_ring.buffer;
-               r600_write_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE, 0);
+               radeon_set_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE, 0);
                 radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-               radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+               radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
                                                       RADEON_USAGE_READWRITE,
                                                       RADEON_PRIO_SHADER_RESOURCE_RW));
-               r600_write_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
+               radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
                                 state->gsvs_ring.buffer_size >> 8);
         } else {
-               r600_write_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, 0);
-               r600_write_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, 0);
+               radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, 0);
+               radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, 0);
         }
  
-       r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
+       radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
         radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
         radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH));
  }
@@ -2051,7 +2063,7 @@ bool r600_adjust_gprs(struct r600_context *rctx)
                         /* always privilege vs stage so that at worst we have the
                          * pixel stage producing wrong output (not the vertex
                          * stage) */
-                       new_num_ps_gprs = max_gprs - ((new_num_vs_gprs - new_num_es_gprs - new_num_gs_gprs) + def_num_clause_temp_gprs * 2);
+                       new_num_ps_gprs = max_gprs - ((new_num_vs_gprs + new_num_es_gprs + new_num_gs_gprs) + def_num_clause_temp_gprs * 2);
                         new_num_vs_gprs = num_vs_gprs;
                         new_num_gs_gprs = num_gs_gprs;
                         new_num_es_gprs = num_es_gprs;
@@ -2650,7 +2662,7 @@ void r600_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
         struct r600_shader *rshader = &shader->shader;
         struct r600_shader *cp_shader = &shader->gs_copy_shader->shader;
         unsigned gsvs_itemsize =
-                       (cp_shader->ring_item_size * shader->selector->gs_max_out_vertices) >> 2;
+                       (cp_shader->ring_item_sizes[0] * shader->selector->gs_max_out_vertices) >> 2;
  
         r600_init_command_buffer(cb, 64);
  
@@ -2665,10 +2677,10 @@ void r600_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
                                r600_conv_prim_to_gs_out(shader->selector->gs_output_prim));
  
         r600_store_context_reg(cb, R_0288C8_SQ_GS_VERT_ITEMSIZE,
-                              cp_shader->ring_item_size >> 2);
+                              cp_shader->ring_item_sizes[0] >> 2);
  
         r600_store_context_reg(cb, R_0288A8_SQ_ESGS_RING_ITEMSIZE,
-                              (rshader->ring_item_size) >> 2);
+                              (rshader->ring_item_sizes[0]) >> 2);
  
         r600_store_context_reg(cb, R_0288AC_SQ_GSVS_RING_ITEMSIZE,
                                gsvs_itemsize);
@@ -2901,9 +2913,9 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx,
                 cheight = cheight > copy_height ? copy_height : cheight;
                 size = (cheight * pitch) / 4;
                 /* emit reloc before writing cs so that cs is always in consistent state */
-               r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, &rsrc->resource, RADEON_USAGE_READ,
+               radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rsrc->resource, RADEON_USAGE_READ,
                                       RADEON_PRIO_MIN);
-               r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, &rdst->resource, RADEON_USAGE_WRITE,
+               radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rdst->resource, RADEON_USAGE_WRITE,
                                       RADEON_PRIO_MIN);
                 cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 1, 0, size);
                 cs->buf[cs->cdw++] = base >> 8;
@@ -3016,8 +3028,7 @@ fallback:
  
  void r600_init_state_functions(struct r600_context *rctx)
  {
-       unsigned id = 4;
-       int i;
+       unsigned id = 1;
  
         /* !!!
          *  To avoid GPU lockup registers must be emited in a specific order
@@ -3065,12 +3076,8 @@ void r600_init_state_functions(struct r600_context *rctx)
         r600_init_atom(rctx, &rctx->dsa_state.atom, id++, r600_emit_cso_state, 0);
         r600_init_atom(rctx, &rctx->poly_offset_state.atom, id++, r600_emit_polygon_offset, 6);
         r600_init_atom(rctx, &rctx->rasterizer_state.atom, id++, r600_emit_cso_state, 0);
-       for (i = 0;i < R600_MAX_VIEWPORTS; i++) {
-               r600_init_atom(rctx, &rctx->scissor[i].atom, id++, r600_emit_scissor_state, 4);
-               r600_init_atom(rctx, &rctx->viewport[i].atom, id++, r600_emit_viewport_state, 8);
-               rctx->scissor[i].idx = i;
-               rctx->viewport[i].idx = i;
-       }
+       r600_init_atom(rctx, &rctx->scissor.atom, id++, r600_emit_scissor_state, 0);
+       r600_init_atom(rctx, &rctx->viewport.atom, id++, r600_emit_viewport_state, 0);
         r600_init_atom(rctx, &rctx->config_state.atom, id++, r600_emit_config_state, 3);
         r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
         r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, r600_emit_vertex_fetch_shader, 5);
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c

index a650649..efce852 100644 (file)
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -56,7 +56,6 @@ void r600_add_atom(struct r600_context *rctx,
         assert(rctx->atoms[id] == NULL);
         rctx->atoms[id] = atom;
         atom->id = id;
-       atom->dirty = false;
  }
  
  void r600_init_atom(struct r600_context *rctx,
@@ -85,10 +84,10 @@ void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_atom *atom
                 alpha_ref &= ~0x1FFF;
         }
  
-       r600_write_context_reg(cs, R_028410_SX_ALPHA_TEST_CONTROL,
+       radeon_set_context_reg(cs, R_028410_SX_ALPHA_TEST_CONTROL,
                                a->sx_alpha_test_control |
                                S_028410_ALPHA_TEST_BYPASS(a->bypass));
-       r600_write_context_reg(cs, R_028438_SX_ALPHA_REF, alpha_ref);
+       radeon_set_context_reg(cs, R_028438_SX_ALPHA_REF, alpha_ref);
  }
  
  static void r600_texture_barrier(struct pipe_context *ctx)
@@ -215,7 +214,7 @@ void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom)
         struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
         struct pipe_blend_color *state = &rctx->blend_color.state;
  
-       r600_write_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
+       radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
         radeon_emit(cs, fui(state->color[0])); /* R_028414_CB_BLEND_RED */
         radeon_emit(cs, fui(state->color[1])); /* R_028418_CB_BLEND_GREEN */
         radeon_emit(cs, fui(state->color[2])); /* R_02841C_CB_BLEND_BLUE */
@@ -227,13 +226,13 @@ void r600_emit_vgt_state(struct r600_context *rctx, struct r600_atom *atom)
         struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
         struct r600_vgt_state *a = (struct r600_vgt_state *)atom;
  
-       r600_write_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, a->vgt_multi_prim_ib_reset_en);
-       r600_write_context_reg_seq(cs, R_028408_VGT_INDX_OFFSET, 2);
+       radeon_set_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, a->vgt_multi_prim_ib_reset_en);
+       radeon_set_context_reg_seq(cs, R_028408_VGT_INDX_OFFSET, 2);
         radeon_emit(cs, a->vgt_indx_offset); /* R_028408_VGT_INDX_OFFSET */
         radeon_emit(cs, a->vgt_multi_prim_ib_reset_indx); /* R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX */
         if (a->last_draw_was_indirect) {
                 a->last_draw_was_indirect = false;
-               r600_write_ctl_const(cs, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
+               radeon_set_ctl_const(cs, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
         }
  }
  
@@ -241,17 +240,10 @@ static void r600_set_clip_state(struct pipe_context *ctx,
                                 const struct pipe_clip_state *state)
  {
         struct r600_context *rctx = (struct r600_context *)ctx;
-       struct pipe_constant_buffer cb;
  
         rctx->clip_state.state = *state;
         r600_mark_atom_dirty(rctx, &rctx->clip_state.atom);
-
-       cb.buffer = NULL;
-       cb.user_buffer = state->ucp;
-       cb.buffer_offset = 0;
-       cb.buffer_size = 4*4*8;
-       ctx->set_constant_buffer(ctx, PIPE_SHADER_VERTEX, R600_UCP_CONST_BUFFER, &cb);
-       pipe_resource_reference(&cb.buffer, NULL);
+       rctx->driver_consts[PIPE_SHADER_VERTEX].vs_ucp_dirty = true;
  }
  
  static void r600_set_stencil_ref(struct pipe_context *ctx,
@@ -268,7 +260,7 @@ void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom)
         struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
         struct r600_stencil_ref_state *a = (struct r600_stencil_ref_state*)atom;
  
-       r600_write_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
+       radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
         radeon_emit(cs, /* R_028430_DB_STENCILREFMASK */
                          S_028430_STENCILREF(a->state.ref_value[0]) |
                          S_028430_STENCILMASK(a->state.valuemask[0]) |
@@ -372,9 +364,11 @@ static void r600_bind_rs_state(struct pipe_context *ctx, void *state)
  
         /* Workaround for a missing scissor enable on r600. */
         if (rctx->b.chip_class == R600 &&
-           rs->scissor_enable != rctx->scissor[0].enable) {
-               rctx->scissor[0].enable = rs->scissor_enable;
-               r600_mark_atom_dirty(rctx, &rctx->scissor[0].atom);
+           rs->scissor_enable != rctx->scissor.enable) {
+               rctx->scissor.enable = rs->scissor_enable;
+               rctx->scissor.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
+               rctx->scissor.atom.num_dw = R600_MAX_VIEWPORTS * 4;
+               r600_mark_atom_dirty(rctx, &rctx->scissor.atom);
         }
  
         /* Re-emit PA_SC_LINE_STIPPLE. */
@@ -703,28 +697,39 @@ static void r600_set_viewport_states(struct pipe_context *ctx,
                                       const struct pipe_viewport_state *state)
  {
         struct r600_context *rctx = (struct r600_context *)ctx;
+       struct r600_viewport_state *rstate = &rctx->viewport;
         int i;
  
-       for (i = start_slot; i < start_slot + num_viewports; i++) {
-               rctx->viewport[i].state = state[i - start_slot];
-               r600_mark_atom_dirty(rctx, &rctx->viewport[i].atom);
-       }
+       for (i = start_slot; i < start_slot + num_viewports; i++)
+               rstate->state[i] = state[i - start_slot];
+       rstate->dirty_mask |= ((1 << num_viewports) - 1) << start_slot;
+       rstate->atom.num_dw = util_bitcount(rstate->dirty_mask) * 8;
+       r600_mark_atom_dirty(rctx, &rctx->viewport.atom);
  }
  
  void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom)
  {
         struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
-       struct r600_viewport_state *rstate = (struct r600_viewport_state *)atom;
-       struct pipe_viewport_state *state = &rstate->state;
-       int offset = rstate->idx * 6 * 4;
-
-       r600_write_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE_0 + offset, 6);
-       radeon_emit(cs, fui(state->scale[0]));     /* R_02843C_PA_CL_VPORT_XSCALE_0  */
-       radeon_emit(cs, fui(state->translate[0])); /* R_028440_PA_CL_VPORT_XOFFSET_0 */
-       radeon_emit(cs, fui(state->scale[1]));     /* R_028444_PA_CL_VPORT_YSCALE_0  */
-       radeon_emit(cs, fui(state->translate[1])); /* R_028448_PA_CL_VPORT_YOFFSET_0 */
-       radeon_emit(cs, fui(state->scale[2]));     /* R_02844C_PA_CL_VPORT_ZSCALE_0  */
-       radeon_emit(cs, fui(state->translate[2])); /* R_028450_PA_CL_VPORT_ZOFFSET_0 */
+       struct r600_viewport_state *rstate = &rctx->viewport;
+       struct pipe_viewport_state *state;
+       uint32_t dirty_mask;
+       unsigned i, offset;
+
+       dirty_mask = rstate->dirty_mask;
+       while (dirty_mask != 0) {
+               i = u_bit_scan(&dirty_mask);
+               offset = i * 6 * 4;
+               radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE_0 + offset, 6);
+               state = &rstate->state[i];
+               radeon_emit(cs, fui(state->scale[0]));     /* R_02843C_PA_CL_VPORT_XSCALE_0  */
+               radeon_emit(cs, fui(state->translate[0])); /* R_028440_PA_CL_VPORT_XOFFSET_0 */
+               radeon_emit(cs, fui(state->scale[1]));     /* R_028444_PA_CL_VPORT_YSCALE_0  */
+               radeon_emit(cs, fui(state->translate[1])); /* R_028448_PA_CL_VPORT_YOFFSET_0 */
+               radeon_emit(cs, fui(state->scale[2]));     /* R_02844C_PA_CL_VPORT_ZSCALE_0  */
+               radeon_emit(cs, fui(state->translate[2])); /* R_028450_PA_CL_VPORT_ZOFFSET_0 */
+       }
+       rstate->dirty_mask = 0;
+       rstate->atom.num_dw = 0;
  }
  
  /* Compute the key for the hw shader variant */
@@ -1042,6 +1047,74 @@ static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask
         r600_mark_atom_dirty(rctx, &rctx->sample_mask.atom);
  }
  
+static void r600_update_driver_const_buffers(struct r600_context *rctx)
+{
+       int sh, size;;
+       void *ptr;
+       struct pipe_constant_buffer cb;
+       for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) {
+               struct r600_shader_driver_constants_info *info = &rctx->driver_consts[sh];
+               if (!info->vs_ucp_dirty &&
+                   !info->texture_const_dirty &&
+                   !info->ps_sample_pos_dirty)
+                       continue;
+
+               ptr = info->constants;
+               size = info->alloc_size;
+               if (info->vs_ucp_dirty) {
+                       assert(sh == PIPE_SHADER_VERTEX);
+                       if (!size) {
+                               ptr = rctx->clip_state.state.ucp;
+                               size = R600_UCP_SIZE;
+                       } else {
+                               memcpy(ptr, rctx->clip_state.state.ucp, R600_UCP_SIZE);
+                       }
+                       info->vs_ucp_dirty = false;
+               }
+
+               if (info->ps_sample_pos_dirty) {
+                       assert(sh == PIPE_SHADER_FRAGMENT);
+                       if (!size) {
+                               ptr = rctx->sample_positions;
+                               size = R600_UCP_SIZE;
+                       } else {
+                               memcpy(ptr, rctx->sample_positions, R600_UCP_SIZE);
+                       }
+                       info->ps_sample_pos_dirty = false;
+               }
+
+               if (info->texture_const_dirty) {
+                       assert (ptr);
+                       assert (size);
+                       if (sh == PIPE_SHADER_VERTEX)
+                               memcpy(ptr, rctx->clip_state.state.ucp, R600_UCP_SIZE);
+                       if (sh == PIPE_SHADER_FRAGMENT)
+                               memcpy(ptr, rctx->sample_positions, R600_UCP_SIZE);
+               }
+               info->texture_const_dirty = false;
+
+               cb.buffer = NULL;
+               cb.user_buffer = ptr;
+               cb.buffer_offset = 0;
+               cb.buffer_size = size;
+               rctx->b.b.set_constant_buffer(&rctx->b.b, sh, R600_BUFFER_INFO_CONST_BUFFER, &cb);
+               pipe_resource_reference(&cb.buffer, NULL);
+       }
+}
+
+static void *r600_alloc_buf_consts(struct r600_context *rctx, int shader_type,
+                                  int array_size, uint32_t *base_offset)
+{
+       struct r600_shader_driver_constants_info *info = &rctx->driver_consts[shader_type];
+       if (array_size + R600_UCP_SIZE > info->alloc_size) {
+               info->constants = realloc(info->constants, array_size + R600_UCP_SIZE);
+               info->alloc_size = array_size + R600_UCP_SIZE;
+       }
+       memset(info->constants + (R600_UCP_SIZE / 4), 0, array_size);
+       info->texture_const_dirty = true;
+       *base_offset = R600_UCP_SIZE;
+       return info->constants;
+}
  /*
   * On r600/700 hw we don't have vertex fetch swizzle, though TBO
   * doesn't require full swizzles it does need masking and setting alpha
@@ -1056,9 +1129,9 @@ static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_ty
         struct r600_textures_info *samplers = &rctx->samplers[shader_type];
         int bits;
         uint32_t array_size;
-       struct pipe_constant_buffer cb;
         int i, j;
-
+       uint32_t *constants;
+       uint32_t base_offset;
         if (!samplers->views.dirty_buffer_constants)
                 return;
  
@@ -1066,38 +1139,33 @@ static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_ty
  
         bits = util_last_bit(samplers->views.enabled_mask);
         array_size = bits * 8 * sizeof(uint32_t) * 4;
-       samplers->buffer_constants = realloc(samplers->buffer_constants, array_size);
-       memset(samplers->buffer_constants, 0, array_size);
+
+       constants = r600_alloc_buf_consts(rctx, shader_type, array_size, &base_offset);
+
         for (i = 0; i < bits; i++) {
                 if (samplers->views.enabled_mask & (1 << i)) {
-                       int offset = i * 8;
+                       int offset = (base_offset / 4) + i * 8;
                         const struct util_format_description *desc;
                         desc = util_format_description(samplers->views.views[i]->base.format);
  
                         for (j = 0; j < 4; j++)
                                 if (j < desc->nr_channels)
-                                       samplers->buffer_constants[offset+j] = 0xffffffff;
+                                       constants[offset+j] = 0xffffffff;
                                 else
-                                       samplers->buffer_constants[offset+j] = 0x0;
+                                       constants[offset+j] = 0x0;
                         if (desc->nr_channels < 4) {
                                 if (desc->channel[0].pure_integer)
-                                       samplers->buffer_constants[offset+4] = 1;
+                                       constants[offset+4] = 1;
                                 else
-                                       samplers->buffer_constants[offset+4] = fui(1.0);
+                                       constants[offset+4] = fui(1.0);
                         } else
-                               samplers->buffer_constants[offset + 4] = 0;
+                               constants[offset + 4] = 0;
  
-                       samplers->buffer_constants[offset + 5] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
-                       samplers->buffer_constants[offset + 6] = samplers->views.views[i]->base.texture->array_size / 6;
+                       constants[offset + 5] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
+                       constants[offset + 6] = samplers->views.views[i]->base.texture->array_size / 6;
                 }
         }
  
-       cb.buffer = NULL;
-       cb.user_buffer = samplers->buffer_constants;
-       cb.buffer_offset = 0;
-       cb.buffer_size = array_size;
-       rctx->b.b.set_constant_buffer(&rctx->b.b, shader_type, R600_BUFFER_INFO_CONST_BUFFER, &cb);
-       pipe_resource_reference(&cb.buffer, NULL);
  }
  
  /* On evergreen we store two values
@@ -1109,9 +1177,9 @@ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type
         struct r600_textures_info *samplers = &rctx->samplers[shader_type];
         int bits;
         uint32_t array_size;
-       struct pipe_constant_buffer cb;
         int i;
-
+       uint32_t *constants;
+       uint32_t base_offset;
         if (!samplers->views.dirty_buffer_constants)
                 return;
  
@@ -1119,45 +1187,37 @@ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type
  
         bits = util_last_bit(samplers->views.enabled_mask);
         array_size = bits * 2 * sizeof(uint32_t) * 4;
-       samplers->buffer_constants = realloc(samplers->buffer_constants, array_size);
-       memset(samplers->buffer_constants, 0, array_size);
+
+       constants = r600_alloc_buf_consts(rctx, shader_type, array_size,
+                                         &base_offset);
+
         for (i = 0; i < bits; i++) {
                 if (samplers->views.enabled_mask & (1 << i)) {
-                       uint32_t offset = i * 2;
-                       samplers->buffer_constants[offset] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
-                       samplers->buffer_constants[offset + 1] = samplers->views.views[i]->base.texture->array_size / 6;
+                       uint32_t offset = (base_offset / 4) + i * 2;
+                       constants[offset] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
+                       constants[offset + 1] = samplers->views.views[i]->base.texture->array_size / 6;
                 }
         }
-
-       cb.buffer = NULL;
-       cb.user_buffer = samplers->buffer_constants;
-       cb.buffer_offset = 0;
-       cb.buffer_size = array_size;
-       rctx->b.b.set_constant_buffer(&rctx->b.b, shader_type, R600_BUFFER_INFO_CONST_BUFFER, &cb);
-       pipe_resource_reference(&cb.buffer, NULL);
  }
  
  /* set sample xy locations as array of fragment shader constants */
  void r600_set_sample_locations_constant_buffer(struct r600_context *rctx)
  {
-       struct pipe_constant_buffer constbuf = {0};
-       float values[4*16] = {0.0f};
         int i;
         struct pipe_context *ctx = &rctx->b.b;
  
-       assert(rctx->framebuffer.nr_samples <= Elements(values)/4);
+       assert(rctx->framebuffer.nr_samples < R600_UCP_SIZE);
+       assert(rctx->framebuffer.nr_samples <= Elements(rctx->sample_positions)/4);
+
+       memset(rctx->sample_positions, 0, 4 * 4 * 16);
         for (i = 0; i < rctx->framebuffer.nr_samples; i++) {
-               ctx->get_sample_position(ctx, rctx->framebuffer.nr_samples, i, &values[4*i]);
+               ctx->get_sample_position(ctx, rctx->framebuffer.nr_samples, i, &rctx->sample_positions[4*i]);
                 /* Also fill in center-zeroed positions used for interpolateAtSample */
-               values[4*i + 2] = values[4*i + 0] - 0.5f;
-               values[4*i + 3] = values[4*i + 1] - 0.5f;
+               rctx->sample_positions[4*i + 2] = rctx->sample_positions[4*i + 0] - 0.5f;
+               rctx->sample_positions[4*i + 3] = rctx->sample_positions[4*i + 1] - 0.5f;
         }
  
-       constbuf.user_buffer = values;
-       constbuf.buffer_size = rctx->framebuffer.nr_samples * 4 * 4;
-       ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT,
-               R600_SAMPLE_POSITIONS_CONST_BUFFER, &constbuf);
-       pipe_resource_reference(&constbuf.buffer, NULL);
+       rctx->driver_consts[PIPE_SHADER_FRAGMENT].ps_sample_pos_dirty = true;
  }
  
  static void update_shader_atom(struct pipe_context *ctx,
@@ -1376,6 +1436,8 @@ static bool r600_update_derived_state(struct r600_context *rctx)
                 }
         }
  
+       r600_update_driver_const_buffers(rctx);
+
         if (rctx->b.chip_class < EVERGREEN && rctx->ps_shader && rctx->vs_shader) {
                 if (!r600_adjust_gprs(rctx)) {
                         /* discard rendering */
@@ -1401,11 +1463,11 @@ void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom
         struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
         struct r600_clip_misc_state *state = &rctx->clip_misc_state;
  
-       r600_write_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
+       radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
                                state->pa_cl_clip_cntl |
                                (state->clip_dist_write ? 0 : state->clip_plane_enable & 0x3F) |
                                 S_028810_CLIP_DISABLE(state->clip_disable));
-       r600_write_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
+       radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
                                state->pa_cl_vs_out_cntl |
                                (state->clip_plane_enable & state->clip_dist_write));
  }
@@ -1415,8 +1477,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
         struct r600_context *rctx = (struct r600_context *)ctx;
         struct pipe_draw_info info = *dinfo;
         struct pipe_index_buffer ib = {};
-       unsigned i;
         struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+       uint64_t mask;
  
         if (!info.indirect && !info.count && (info.indexed || !info.count_from_stream_output)) {
                 return;
@@ -1526,10 +1588,9 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
         r600_need_cs_space(rctx, ib.user_buffer ? 5 : 0, TRUE);
         r600_flush_emit(rctx);
  
-       i = r600_next_dirty_atom(rctx, 0);
-       while (i < R600_NUM_ATOMS) {
-               r600_emit_atom(rctx, rctx->atoms[i]);
-               i = r600_next_dirty_atom(rctx, i + 1);
+       mask = rctx->dirty_atoms;
+       while (mask != 0) {
+               r600_emit_atom(rctx, rctx->atoms[u_bit_scan64(&mask)]);
         }
  
         if (rctx->b.chip_class == CAYMAN) {
@@ -1550,7 +1611,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
                     rctx->b.streamout.prims_gen_query_enabled)
                         partial_vs_wave = true;
  
-               r600_write_context_reg(cs, CM_R_028AA8_IA_MULTI_VGT_PARAM,
+               radeon_set_context_reg(cs, CM_R_028AA8_IA_MULTI_VGT_PARAM,
                                        S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
                                        S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
                                        S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1));
@@ -1572,12 +1633,12 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
                     info.mode == R600_PRIM_RECTANGLE_LIST) {
                         su_sc_mode_cntl &= C_028814_CULL_FRONT;
                 }
-               r600_write_context_reg(cs, R_028814_PA_SU_SC_MODE_CNTL, su_sc_mode_cntl);
+               radeon_set_context_reg(cs, R_028814_PA_SU_SC_MODE_CNTL, su_sc_mode_cntl);
         }
  
         /* Update start instance. */
         if (!info.indirect && rctx->last_start_instance != info.start_instance) {
-               r600_write_ctl_const(cs, R_03CFF4_SQ_VTX_START_INST_LOC, info.start_instance);
+               radeon_set_ctl_const(cs, R_03CFF4_SQ_VTX_START_INST_LOC, info.start_instance);
                 rctx->last_start_instance = info.start_instance;
         }
  
@@ -1591,10 +1652,10 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
                          info.mode == PIPE_PRIM_LINE_LOOP)
                         ls_mask = 2;
  
-               r600_write_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE,
+               radeon_set_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE,
                                        S_028A0C_AUTO_RESET_CNTL(ls_mask) |
                                        (rctx->rasterizer ? rctx->rasterizer->pa_sc_line_stipple : 0));
-               r600_write_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE,
+               radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE,
                                       r600_conv_pipe_prim(info.mode));
  
                 rctx->last_primitive_type = info.mode;
@@ -1620,7 +1681,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
                 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
  
                 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
-               cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+               cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
                                                            (struct r600_resource*)info.indirect,
                                                            RADEON_USAGE_READ, RADEON_PRIO_MIN);
         }
@@ -1649,7 +1710,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
                                 cs->buf[cs->cdw++] = info.count;
                                 cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA;
                                 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
-                               cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+                               cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
                                                                            (struct r600_resource*)ib.buffer,
                                                                            RADEON_USAGE_READ, RADEON_PRIO_MIN);
                         }
@@ -1661,7 +1722,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
                                 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
  
                                 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
-                               cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+                               cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
                                                                            (struct r600_resource*)ib.buffer,
                                                                            RADEON_USAGE_READ, RADEON_PRIO_MIN);
  
@@ -1678,7 +1739,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
                         struct r600_so_target *t = (struct r600_so_target*)info.count_from_stream_output;
                         uint64_t va = t->buf_filled_size->gpu_address + t->buf_filled_size_offset;
  
-                       r600_write_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw);
+                       radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw);
  
                         cs->buf[cs->cdw++] = PKT3(PKT3_COPY_DW, 4, 0);
                         cs->buf[cs->cdw++] = COPY_DW_SRC_IS_MEM | COPY_DW_DST_IS_REG;
@@ -1688,7 +1749,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
                         cs->buf[cs->cdw++] = 0; /* unused */
  
                         cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-                       cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+                       cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
                                                                    t->buf_filled_size, RADEON_USAGE_READ,
                                                                    RADEON_PRIO_MIN);
                 }
@@ -1879,7 +1940,7 @@ void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a)
  
         r600_emit_command_buffer(cs, &shader->command_buffer);
         radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-       radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, shader->bo,
+       radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->bo,
                                               RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA));
  }
  
@@ -2607,7 +2668,7 @@ void r600_trace_emit(struct r600_context *rctx)
         uint32_t reloc;
  
         va = rscreen->b.trace_bo->gpu_address;
-       reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rscreen->b.trace_bo,
+       reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rscreen->b.trace_bo,
                                       RADEON_USAGE_READWRITE, RADEON_PRIO_MIN);
         radeon_emit(cs, PKT3(PKT3_MEM_WRITE, 3, 0));
         radeon_emit(cs, va & 0xFFFFFFFFUL);
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h

index 3c08ba5..6bba88c 100644 (file)
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -3783,4 +3783,15 @@
  #define DMA_PACKET_CONSTANT_FILL       0xd /* 7xx only */
  #define DMA_PACKET_NOP                 0xf
  
+
+/* Resource IDs:
+ *   PS: 0   .. +160
+ *   VS: 160 .. +160
+ *   FS: 320 .. +16
+ *   GS: 336 .. +160
+ */
+#define R600_FETCH_CONSTANTS_OFFSET_PS 0
+#define R600_FETCH_CONSTANTS_OFFSET_VS 160
+#define R600_FETCH_CONSTANTS_OFFSET_FS 320
+#define R600_FETCH_CONSTANTS_OFFSET_GS 336
  #endif
diff --git a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp

index 5232782..0fc73c4 100644 (file)
--- a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
@@ -182,6 +182,9 @@ void bc_dump::dump(cf_node& n) {
  
                 if (n.bc.pop_count)
                         s << " POP:" << n.bc.pop_count;
+
+               if (n.bc.count && (n.bc.op_ptr->flags & CF_EMIT))
+                       s << " STREAM" << n.bc.count;
         }
  
         if (!n.bc.barrier)
@@ -466,6 +469,10 @@ void bc_dump::dump(fetch_node& n) {
                 for (unsigned k = 0; k < 3; ++k)
                         if (n.bc.offset[k])
                                 s << " O" << chans[k] << ":" << n.bc.offset[k];
+               if (ctx.is_egcm() && n.bc.resource_index_mode)
+                       s << " RIM:SQ_CF_INDEX_" << n.bc.resource_index_mode;
+               if (ctx.is_egcm() && n.bc.sampler_index_mode)
+                       s << " SID:SQ_CF_INDEX_" << n.bc.sampler_index_mode;
         }
  
         sblog << s.str() << "\n";
diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp

index dadee45..522ff9d 100644 (file)
--- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
@@ -764,8 +764,6 @@ void bc_finalizer::finalize_cf(cf_node* c) {
                         mask |= (1 << chan);
                 }
  
-               assert(reg >= 0 && mask);
-
                 if (reg >= 0)
                         update_ngpr(reg);
  
diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp

index c479927..19bd078 100644 (file)
--- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
@@ -757,10 +757,22 @@ int bc_parser::prepare_ir() {
                         c->bc.end_of_program = eop;
  
                 } else if (flags & CF_EMIT) {
-                       c->flags |= NF_DONT_KILL | NF_DONT_HOIST | NF_DONT_MOVE;
+                       /* quick peephole */
+                       cf_node *prev = static_cast<cf_node *>(c->prev);
+                       if (c->bc.op == CF_OP_CUT_VERTEX &&
+                               prev && prev->is_valid() &&
+                               prev->bc.op == CF_OP_EMIT_VERTEX &&
+                               c->bc.count == prev->bc.count) {
+                               prev->bc.set_op(CF_OP_EMIT_CUT_VERTEX);
+                               prev->bc.end_of_program = c->bc.end_of_program;
+                               c->remove();
+                       }
+                       else {
+                               c->flags |= NF_DONT_KILL | NF_DONT_HOIST | NF_DONT_MOVE;
  
-                       c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
-                       c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
+                               c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
+                               c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
+                       }
                 }
         }
  
diff --git a/src/gallium/drivers/radeon/cayman_msaa.c b/src/gallium/drivers/radeon/cayman_msaa.c

index 12a5f60..c6afa82 100644 (file)
--- a/src/gallium/drivers/radeon/cayman_msaa.c
+++ b/src/gallium/drivers/radeon/cayman_msaa.c
@@ -144,19 +144,19 @@ void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples)
  {
         switch (nr_samples) {
         case 2:
-               r600_write_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]);
-               r600_write_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]);
-               r600_write_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_2x[2]);
-               r600_write_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_2x[3]);
+               radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]);
+               radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]);
+               radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_2x[2]);
+               radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_2x[3]);
                 break;
         case 4:
-               r600_write_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_4x[0]);
-               r600_write_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_4x[1]);
-               r600_write_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_4x[2]);
-               r600_write_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_4x[3]);
+               radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_4x[0]);
+               radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_4x[1]);
+               radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_4x[2]);
+               radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_4x[3]);
                 break;
         case 8:
-               r600_write_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
+               radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
                 radeon_emit(cs, cm_sample_locs_8x[0]);
                 radeon_emit(cs, cm_sample_locs_8x[4]);
                 radeon_emit(cs, 0);
@@ -173,7 +173,7 @@ void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples)
                 radeon_emit(cs, cm_sample_locs_8x[7]);
                 break;
         case 16:
-               r600_write_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16);
+               radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16);
                 radeon_emit(cs, cm_sample_locs_16x[0]);
                 radeon_emit(cs, cm_sample_locs_16x[4]);
                 radeon_emit(cs, cm_sample_locs_16x[8]);
@@ -213,7 +213,7 @@ void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
                 unsigned log_ps_iter_samples =
                         util_logbase2(util_next_power_of_two(ps_iter_samples));
  
-               r600_write_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
+               radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
                 radeon_emit(cs, S_028BDC_LAST_PIXEL(1) |
                             S_028BDC_EXPAND_LINE_WIDTH(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */
                 radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
@@ -221,30 +221,30 @@ void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
                             S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* CM_R_028BE0_PA_SC_AA_CONFIG */
  
                 if (nr_samples > 1) {
-                       r600_write_context_reg(cs, CM_R_028804_DB_EQAA,
+                       radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
                                                S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
                                                S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
                                                S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
                                                S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) |
                                                S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
                                                S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
-                       r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
+                       radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
                                              EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1));
                 } else if (overrast_samples > 1) {
-                       r600_write_context_reg(cs, CM_R_028804_DB_EQAA,
+                       radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
                                                S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
                                                S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) |
                                                S_028804_OVERRASTERIZATION_AMOUNT(log_samples));
-                       r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
+                       radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
                 }
         } else {
-               r600_write_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
+               radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
                 radeon_emit(cs, S_028BDC_LAST_PIXEL(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */
                 radeon_emit(cs, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
  
-               r600_write_context_reg(cs, CM_R_028804_DB_EQAA,
+               radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
                                        S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
                                        S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
-               r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
+               radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
         }
  }
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c

index cb9809f..f341ecb 100644 (file)
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -305,12 +305,11 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
                                 data += box->x % R600_MAP_BUFFER_ALIGNMENT;
                                 return r600_buffer_get_transfer(ctx, resource, level, usage, box,
                                                                 ptransfer, data, staging, offset);
-                       } else {
-                               return NULL; /* error, shouldn't occur though */
                         }
+               } else {
+                       /* At this point, the buffer is always idle (we checked it above). */
+                       usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
                 }
-               /* At this point, the buffer is always idle (we checked it above). */
-               usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
         }
         /* Using a staging buffer in GTT for larger reads is much faster. */
         else if ((usage & PIPE_TRANSFER_READ) &&
@@ -346,37 +345,59 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
                                         ptransfer, data, NULL, 0);
  }
  
-static void r600_buffer_transfer_unmap(struct pipe_context *ctx,
-                                      struct pipe_transfer *transfer)
+static void r600_buffer_do_flush_region(struct pipe_context *ctx,
+                                       struct pipe_transfer *transfer,
+                                       const struct pipe_box *box)
  {
         struct r600_common_context *rctx = (struct r600_common_context*)ctx;
         struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
         struct r600_resource *rbuffer = r600_resource(transfer->resource);
  
         if (rtransfer->staging) {
-               if (rtransfer->transfer.usage & PIPE_TRANSFER_WRITE) {
-                       struct pipe_resource *dst, *src;
-                       unsigned soffset, doffset, size;
-                       struct pipe_box box;
+               struct pipe_resource *dst, *src;
+               unsigned soffset;
+               struct pipe_box dma_box;
  
-                       dst = transfer->resource;
-                       src = &rtransfer->staging->b.b;
-                       size = transfer->box.width;
-                       doffset = transfer->box.x;
-                       soffset = rtransfer->offset + transfer->box.x % R600_MAP_BUFFER_ALIGNMENT;
+               dst = transfer->resource;
+               src = &rtransfer->staging->b.b;
+               soffset = rtransfer->offset + box->x % R600_MAP_BUFFER_ALIGNMENT;
  
-                       u_box_1d(soffset, size, &box);
+               u_box_1d(soffset, box->width, &dma_box);
  
-                       /* Copy the staging buffer into the original one. */
-                       rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, &box);
-               }
-               pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL);
+               /* Copy the staging buffer into the original one. */
+               rctx->dma_copy(ctx, dst, 0, box->x, 0, 0, src, 0, &dma_box);
         }
  
-       if (transfer->usage & PIPE_TRANSFER_WRITE) {
-               util_range_add(&rbuffer->valid_buffer_range, transfer->box.x,
-                              transfer->box.x + transfer->box.width);
+       util_range_add(&rbuffer->valid_buffer_range, box->x,
+                      box->x + box->width);
+}
+
+static void r600_buffer_flush_region(struct pipe_context *ctx,
+                                    struct pipe_transfer *transfer,
+                                    const struct pipe_box *rel_box)
+{
+       if (transfer->usage & (PIPE_TRANSFER_WRITE |
+                              PIPE_TRANSFER_FLUSH_EXPLICIT)) {
+               struct pipe_box box;
+
+               u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box);
+               r600_buffer_do_flush_region(ctx, transfer, &box);
         }
+}
+
+static void r600_buffer_transfer_unmap(struct pipe_context *ctx,
+                                      struct pipe_transfer *transfer)
+{
+       struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+       struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
+
+       if (transfer->usage & PIPE_TRANSFER_WRITE &&
+           !(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
+               r600_buffer_do_flush_region(ctx, transfer, &transfer->box);
+
+       if (rtransfer->staging)
+               pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL);
+
         util_slab_free(&rctx->pool_transfers, transfer);
  }
  
@@ -385,7 +406,7 @@ static const struct u_resource_vtbl r600_buffer_vtbl =
         NULL,                           /* get_handle */
         r600_buffer_destroy,            /* resource_destroy */
         r600_buffer_transfer_map,       /* transfer_map */
-       NULL,                           /* transfer_flush_region */
+       r600_buffer_flush_region,       /* transfer_flush_region */
         r600_buffer_transfer_unmap,     /* transfer_unmap */
         NULL                            /* transfer_inline_write */
  };
diff --git a/src/gallium/drivers/radeon/r600_cs.h b/src/gallium/drivers/radeon/r600_cs.h

index 03a04b7..fa40dc4 100644 (file)
--- a/src/gallium/drivers/radeon/r600_cs.h
+++ b/src/gallium/drivers/radeon/r600_cs.h
@@ -33,11 +33,21 @@
  #include "r600_pipe_common.h"
  #include "r600d_common.h"
  
-static inline unsigned r600_context_bo_reloc(struct r600_common_context *rctx,
-                                            struct r600_ring *ring,
-                                            struct r600_resource *rbo,
-                                            enum radeon_bo_usage usage,
-                                            enum radeon_bo_priority priority)
+/**
+ * Add a buffer to the buffer list for the given command stream (CS).
+ *
+ * All buffers used by a CS must be added to the list. This tells the kernel
+ * driver which buffers are used by GPU commands. Other buffers can
+ * be swapped out (not accessible) during execution.
+ *
+ * The buffer list becomes empty after every context flush and must be
+ * rebuilt.
+ */
+static inline unsigned radeon_add_to_buffer_list(struct r600_common_context *rctx,
+                                                struct r600_ring *ring,
+                                                struct r600_resource *rbo,
+                                                enum radeon_bo_usage usage,
+                                                enum radeon_bo_priority priority)
  {
         assert(usage);
  
@@ -66,7 +76,7 @@ static inline void r600_emit_reloc(struct r600_common_context *rctx,
  {
         struct radeon_winsys_cs *cs = ring->cs;
         bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.r600_virtual_address;
-       unsigned reloc = r600_context_bo_reloc(rctx, ring, rbo, usage, priority);
+       unsigned reloc = radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority);
  
         if (!has_vm) {
                 radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
@@ -74,7 +84,7 @@ static inline void r600_emit_reloc(struct r600_common_context *rctx,
         }
  }
  
-static inline void r600_write_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void radeon_set_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
  {
         assert(reg < R600_CONTEXT_REG_OFFSET);
         assert(cs->cdw+2+num <= cs->max_dw);
@@ -82,13 +92,13 @@ static inline void r600_write_config_reg_seq(struct radeon_winsys_cs *cs, unsign
         radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2);
  }
  
-static inline void r600_write_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void radeon_set_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
  {
-       r600_write_config_reg_seq(cs, reg, 1);
+       radeon_set_config_reg_seq(cs, reg, 1);
         radeon_emit(cs, value);
  }
  
-static inline void r600_write_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void radeon_set_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
  {
         assert(reg >= R600_CONTEXT_REG_OFFSET);
         assert(cs->cdw+2+num <= cs->max_dw);
@@ -96,13 +106,13 @@ static inline void r600_write_context_reg_seq(struct radeon_winsys_cs *cs, unsig
         radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2);
  }
  
-static inline void r600_write_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void radeon_set_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
  {
-       r600_write_context_reg_seq(cs, reg, 1);
+       radeon_set_context_reg_seq(cs, reg, 1);
         radeon_emit(cs, value);
  }
  
-static inline void si_write_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void radeon_set_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
  {
         assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
         assert(cs->cdw+2+num <= cs->max_dw);
@@ -110,13 +120,13 @@ static inline void si_write_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg
         radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
  }
  
-static inline void si_write_sh_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void radeon_set_sh_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
  {
-       si_write_sh_reg_seq(cs, reg, 1);
+       radeon_set_sh_reg_seq(cs, reg, 1);
         radeon_emit(cs, value);
  }
  
-static inline void cik_write_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void radeon_set_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
  {
         assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
         assert(cs->cdw+2+num <= cs->max_dw);
@@ -124,9 +134,9 @@ static inline void cik_write_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsign
         radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
  }
  
-static inline void cik_write_uconfig_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void radeon_set_uconfig_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
  {
-       cik_write_uconfig_reg_seq(cs, reg, 1);
+       radeon_set_uconfig_reg_seq(cs, reg, 1);
         radeon_emit(cs, value);
  }
  
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c

index ed5d1da..0883934 100644 (file)
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -78,6 +78,9 @@ void r600_draw_rectangle(struct blitter_context *blitter,
          * I guess the 4th one is derived from the first 3.
          * The vertex specification should match u_blitter's vertex element state. */
         u_upload_alloc(rctx->uploader, 0, sizeof(float) * 24, &offset, &buf, (void**)&vb);
+       if (!buf)
+               return;
+
         vb[0] = x1;
         vb[1] = y1;
         vb[2] = depth;
@@ -231,7 +234,7 @@ bool r600_common_context_init(struct r600_common_context *rctx,
                 rctx->max_db = 4;
  
         rctx->b.transfer_map = u_transfer_map_vtbl;
-       rctx->b.transfer_flush_region = u_default_transfer_flush_region;
+       rctx->b.transfer_flush_region = u_transfer_flush_region_vtbl;
         rctx->b.transfer_unmap = u_transfer_unmap_vtbl;
         rctx->b.transfer_inline_write = u_default_transfer_inline_write;
          rctx->b.memory_barrier = r600_memory_barrier;
@@ -531,11 +534,7 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
         case CHIP_KAVERI: return "kaveri";
         case CHIP_HAWAII: return "hawaii";
         case CHIP_MULLINS:
-#if HAVE_LLVM >= 0x0305
                 return "mullins";
-#else
-               return "kabini";
-#endif
         case CHIP_TONGA: return "tonga";
         case CHIP_ICELAND: return "iceland";
         case CHIP_CARRIZO: return "carrizo";
@@ -722,7 +721,7 @@ static int r600_get_driver_query_info(struct pipe_screen *screen,
                 {"VRAM-usage", R600_QUERY_VRAM_USAGE, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
                 {"GTT-usage", R600_QUERY_GTT_USAGE, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
                 {"GPU-load", R600_QUERY_GPU_LOAD, {100}},
-               {"temperature", R600_QUERY_GPU_TEMPERATURE, {100}},
+               {"temperature", R600_QUERY_GPU_TEMPERATURE, {125}},
                 {"shader-clock", R600_QUERY_CURRENT_GPU_SCLK, {0}, PIPE_DRIVER_QUERY_TYPE_HZ},
                 {"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, {0}, PIPE_DRIVER_QUERY_TYPE_HZ},
         };
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h

index 29db1cc..534b987 100644 (file)
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -316,8 +316,7 @@ struct r600_common_screen {
  struct r600_atom {
         void (*emit)(struct r600_common_context *ctx, struct r600_atom *state);
         unsigned                num_dw;
-       unsigned short          id;     /* used by r600 only */
-       bool                    dirty;
+       unsigned short          id;
  };
  
  struct r600_so_target {
@@ -562,7 +561,7 @@ unsigned r600_translate_colorswap(enum pipe_format format);
  void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
                                    struct pipe_framebuffer_state *fb,
                                    struct r600_atom *fb_state,
-                                  unsigned *buffers,
+                                  unsigned *buffers, unsigned *dirty_cbufs,
                                    const union pipe_color_union *color);
  void r600_init_screen_texture_functions(struct r600_common_screen *rscreen);
  void r600_init_context_texture_functions(struct r600_common_context *rctx);
diff --git a/src/gallium/drivers/radeon/r600_streamout.c b/src/gallium/drivers/radeon/r600_streamout.c

index 0853f63..5198f1e 100644 (file)
--- a/src/gallium/drivers/radeon/r600_streamout.c
+++ b/src/gallium/drivers/radeon/r600_streamout.c
@@ -165,9 +165,9 @@ static void r600_flush_vgt_streamout(struct r600_common_context *rctx)
         }
  
         if (rctx->chip_class >= CIK) {
-               cik_write_uconfig_reg(cs, reg_strmout_cntl, 0);
+               radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0);
         } else {
-               r600_write_config_reg(cs, reg_strmout_cntl, 0);
+               radeon_set_config_reg(cs, reg_strmout_cntl, 0);
         }
  
         radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
@@ -201,7 +201,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r
                         /* SI binds streamout buffers as shader resources.
                          * VGT only counts primitives and tells the shader
                          * through SGPRs what to do. */
-                       r600_write_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2);
+                       radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2);
                         radeon_emit(cs, (t[i]->b.buffer_offset +
                                          t[i]->b.buffer_size) >> 2);    /* BUFFER_SIZE (in DW) */
                         radeon_emit(cs, stride_in_dw[i]);               /* VTX_STRIDE (in DW) */
@@ -210,7 +210,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r
  
                         update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i);
  
-                       r600_write_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 3);
+                       radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 3);
                         radeon_emit(cs, (t[i]->b.buffer_offset +
                                          t[i]->b.buffer_size) >> 2);    /* BUFFER_SIZE (in DW) */
                         radeon_emit(cs, stride_in_dw[i]);               /* VTX_STRIDE (in DW) */
@@ -295,7 +295,7 @@ void r600_emit_streamout_end(struct r600_common_context *rctx)
                  * primitives emitted) may be enabled even if there is not
                  * buffer bound. This ensures that the primitives-emitted query
                  * won't increment. */
-               r600_write_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0);
+               radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0);
  
                 t[i]->buf_filled_size_valid = true;
         }
@@ -336,8 +336,8 @@ static void r600_emit_streamout_enable(struct r600_common_context *rctx,
                         S_028B94_STREAMOUT_2_EN(r600_get_strmout_en(rctx)) |
                         S_028B94_STREAMOUT_3_EN(r600_get_strmout_en(rctx));
         }
-       r600_write_context_reg(rctx->rings.gfx.cs, strmout_buffer_reg, strmout_buffer_val);
-       r600_write_context_reg(rctx->rings.gfx.cs, strmout_config_reg, strmout_config_val);
+       radeon_set_context_reg(rctx->rings.gfx.cs, strmout_buffer_reg, strmout_buffer_val);
+       radeon_set_context_reg(rctx->rings.gfx.cs, strmout_config_reg, strmout_config_val);
  }
  
  static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable)
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c

index 5469691..fc69f48 100644 (file)
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -989,6 +989,11 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
  
                         if (usage & PIPE_TRANSFER_READ) {
                                 struct pipe_resource *temp = ctx->screen->resource_create(ctx->screen, &resource);
+                               if (!temp) {
+                                       R600_ERR("failed to create a temporary depth texture\n");
+                                       FREE(trans);
+                                       return NULL;
+                               }
  
                                 r600_copy_region_with_blit(ctx, temp, 0, 0, 0, 0, texture, level, box);
                                 rctx->blit_decompress_depth(ctx, (struct r600_texture*)temp, staging_depth,
@@ -1092,7 +1097,7 @@ static const struct u_resource_vtbl r600_texture_vtbl =
         NULL,                           /* get_handle */
         r600_texture_destroy,           /* resource_destroy */
         r600_texture_transfer_map,      /* transfer_map */
-       NULL,                           /* transfer_flush_region */
+       u_default_transfer_flush_region, /* transfer_flush_region */
         r600_texture_transfer_unmap,    /* transfer_unmap */
         NULL                            /* transfer_inline_write */
  };
@@ -1217,7 +1222,7 @@ static void evergreen_set_clear_color(struct r600_texture *rtex,
  void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
                                    struct pipe_framebuffer_state *fb,
                                    struct r600_atom *fb_state,
-                                  unsigned *buffers,
+                                  unsigned *buffers, unsigned *dirty_cbufs,
                                    const union pipe_color_union *color)
  {
         int i;
@@ -1279,6 +1284,8 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
                                    tex->cmask.offset, tex->cmask.size, 0, true);
  
                 tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
+               if (dirty_cbufs)
+                       *dirty_cbufs |= 1 << i;
                 rctx->set_atom_dirty(rctx, fb_state, true);
                 *buffers &= ~clear_bit;
         }
diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c b/src/gallium/drivers/radeon/radeon_llvm_emit.c

index 0002559..3acbd02 100644 (file)
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
@@ -122,8 +122,6 @@ LLVMTargetRef radeon_llvm_get_r600_target(const char *triple)
         return target;
  }
  
-#if HAVE_LLVM >= 0x0305
-
  static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context)
  {
         if (LLVMGetDiagInfoSeverity(di) == LLVMDSError) {
@@ -136,8 +134,6 @@ static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context)
         }
  }
  
-#endif
-
  /**
   * Compile an LLVM module to machine code.
   *
@@ -180,9 +176,7 @@ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binar
         /* Setup Diagnostic Handler*/
         llvm_ctx = LLVMGetModuleContext(M);
  
-#if HAVE_LLVM >= 0x0305
         LLVMContextSetDiagnosticHandler(llvm_ctx, radeonDiagnosticHandler, &rval);
-#endif
         rval = 0;
  
         /* Compile IR*/
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c

index 5669470..2e9a013 100644 (file)
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -1520,8 +1520,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
         bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
         bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
         bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.AMDIL.round.nearest.";
-       bld_base->op_actions[TGSI_OPCODE_RSQ].intr_name =
-               HAVE_LLVM >= 0x0305 ? "llvm.AMDGPU.rsq.clamped.f32" : "llvm.AMDGPU.rsq";
+       bld_base->op_actions[TGSI_OPCODE_RSQ].intr_name = "llvm.AMDGPU.rsq.clamped.f32";
         bld_base->op_actions[TGSI_OPCODE_RSQ].emit = build_tgsi_intrinsic_nomem;
         bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_cmp;
         bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_cmp;
diff --git a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c

index e64fbc7..c005659 100644 (file)
--- a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
+++ b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
@@ -233,6 +233,9 @@ static void vui(struct rvce_encoder *enc)
  {
         int i;
  
+       if (!enc->pic.rate_ctrl.frame_rate_num)
+               return;
+
         RVCE_BEGIN(0x04000009); // vui
         RVCE_CS(0x00000000); //aspectRatioInfoPresentFlag
         RVCE_CS(0x00000000); //aspectRatioInfo.aspectRatioIdc
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h

index a4a2ae1..00accd5 100644 (file)
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -223,6 +223,7 @@ struct radeon_info {
      boolean                     has_uvd;
      uint32_t                    vce_fw_version;
      boolean                     has_userptr;
+    bool                        gfx_ib_pad_with_type2;
  
      uint32_t                    r300_num_gb_pipes;
      uint32_t                    r300_num_z_pipes;
diff --git a/src/gallium/drivers/radeonsi/Android.mk b/src/gallium/drivers/radeonsi/Android.mk

index 57f3bef..7e5e54b 100644 (file)
--- a/src/gallium/drivers/radeonsi/Android.mk
+++ b/src/gallium/drivers/radeonsi/Android.mk
@@ -23,7 +23,7 @@
  
  LOCAL_PATH := $(call my-dir)
  
-# get C_SOURCES
+# get C_SOURCES and GENERATED_SOURCES
  include $(LOCAL_PATH)/Makefile.sources
  
  include $(CLEAR_VARS)
@@ -33,5 +33,16 @@ LOCAL_SRC_FILES := $(C_SOURCES)
  LOCAL_SHARED_LIBRARIES := libdrm libdrm_radeon
  LOCAL_MODULE := libmesa_pipe_radeonsi
  
+# generate sources
+LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+intermediates := $(call local-generated-sources-dir)
+LOCAL_GENERATED_SOURCES := $(addprefix $(intermediates)/, $(GENERATED_SOURCES))
+
+$(LOCAL_GENERATED_SOURCES): PRIVATE_PYTHON := $(MESA_PYTHON2)
+$(LOCAL_GENERATED_SOURCES): PRIVATE_CUSTOM_TOOL = $(PRIVATE_PYTHON) $^ > $@
+
+$(intermediates)/sid_tables.h:  $(intermediates)/%.h: $(LOCAL_PATH)/%.py $(LOCAL_PATH)/sid.h
+       $(transform-generated-source)
+
  include $(GALLIUM_COMMON_MK)
  include $(BUILD_STATIC_LIBRARY)
diff --git a/src/gallium/drivers/radeonsi/Makefile.am b/src/gallium/drivers/radeonsi/Makefile.am

index ae5035a..c506666 100644 (file)
--- a/src/gallium/drivers/radeonsi/Makefile.am
+++ b/src/gallium/drivers/radeonsi/Makefile.am
@@ -30,7 +30,7 @@ AM_CFLAGS = \
  
  noinst_LTLIBRARIES = libradeonsi.la
  
-libradeonsi_la_SOURCES = $(C_SOURCES)
+libradeonsi_la_SOURCES = $(C_SOURCES) $(GENERATED_SOURCES)
  
  sid_tables.h: $(srcdir)/sid_tables.py $(srcdir)/sid.h
         $(AM_V_GEN) $(PYTHON2) $(srcdir)/sid_tables.py $(srcdir)/sid.h > $@
diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources

index fd44807..7e997c6 100644 (file)
--- a/src/gallium/drivers/radeonsi/Makefile.sources
+++ b/src/gallium/drivers/radeonsi/Makefile.sources
@@ -1,13 +1,11 @@
  C_SOURCES := \
         cik_sdma.c \
         si_blit.c \
-       si_commands.c \
         si_compute.c \
         si_cp_dma.c \
         si_debug.c \
         si_descriptors.c \
         sid.h \
-       sid_tables.h \
         si_dma.c \
         si_hw_context.c \
         si_pipe.c \
@@ -22,3 +20,6 @@ C_SOURCES := \
         si_state_shaders.c \
         si_state.h \
         si_uvd.c
+
+GENERATED_SOURCES := \
+       sid_tables.h
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c

index 47b586f..8b0ce9f 100644 (file)
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -61,9 +61,9 @@ static void cik_sdma_do_copy_buffer(struct si_context *ctx,
         ncopy = (size + CIK_SDMA_COPY_MAX_SIZE - 1) / CIK_SDMA_COPY_MAX_SIZE;
         r600_need_dma_space(&ctx->b, ncopy * 7);
  
-       r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
+       radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
                               RADEON_PRIO_MIN);
-       r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
+       radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
                               RADEON_PRIO_MIN);
  
         for (i = 0; i < ncopy; i++) {
@@ -171,9 +171,9 @@ static void cik_sdma_copy_tile(struct si_context *ctx,
         ncopy = (copy_height + cheight - 1) / cheight;
         r600_need_dma_space(&ctx->b, ncopy * 12);
  
-       r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
+       radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
                               RADEON_USAGE_READ, RADEON_PRIO_MIN);
-       r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
+       radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
                               RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
  
         copy_height = size * 4 / pitch;
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c

index b7450b6..93fa67a 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -53,7 +53,7 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
  
         util_blitter_save_blend(sctx->blitter, sctx->queued.named.blend);
         util_blitter_save_depth_stencil_alpha(sctx->blitter, sctx->queued.named.dsa);
-       util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref);
+       util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref.state);
         util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer);
         util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader);
         util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader);
@@ -61,16 +61,9 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
         util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader);
         util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader);
         util_blitter_save_vertex_elements(sctx->blitter, sctx->vertex_elements);
-       if (sctx->queued.named.sample_mask) {
-               util_blitter_save_sample_mask(sctx->blitter,
-                                             sctx->queued.named.sample_mask->sample_mask);
-       }
-       if (sctx->queued.named.viewport[0]) {
-               util_blitter_save_viewport(sctx->blitter, &sctx->queued.named.viewport[0]->viewport);
-       }
-       if (sctx->queued.named.scissor[0]) {
-               util_blitter_save_scissor(sctx->blitter, &sctx->queued.named.scissor[0]->scissor);
-       }
+       util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask.sample_mask);
+       util_blitter_save_viewport(sctx->blitter, &sctx->viewports.states[0]);
+       util_blitter_save_scissor(sctx->blitter, &sctx->scissors.states[0]);
         util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer);
         util_blitter_save_so_targets(sctx->blitter, sctx->b.streamout.num_targets,
                                      (struct pipe_stream_output_target**)sctx->b.streamout.targets);
@@ -340,8 +333,10 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
                 zsbuf ? (struct r600_texture*)zsbuf->texture : NULL;
  
         if (buffers & PIPE_CLEAR_COLOR) {
-               evergreen_do_fast_color_clear(&sctx->b, fb, &sctx->framebuffer.atom,
-                                             &buffers, color);
+               evergreen_do_fast_color_clear(&sctx->b, fb,
+                                             &sctx->framebuffer.atom, &buffers,
+                                             &sctx->framebuffer.dirty_cbufs,
+                                             color);
                 if (!buffers)
                         return; /* all buffers have been fast cleared */
         }
@@ -378,6 +373,7 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
                 }
  
                 zstex->depth_clear_value = depth;
+               sctx->framebuffer.dirty_zsbuf = true;
                 si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */
                 sctx->db_depth_clear = true;
                 si_mark_atom_dirty(sctx, &sctx->db_render_state);
diff --git a/src/gallium/drivers/radeonsi/si_commands.c b/src/gallium/drivers/radeonsi/si_commands.c

deleted file mode 100644 (file)

index 04bc5b9..0000000
--- a/src/gallium/drivers/radeonsi/si_commands.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright 2012 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *      Christian König <christian.koenig@amd.com>
- */
-
-#include "sid.h"
-#include "si_pipe.h"
-
-void si_cmd_context_control(struct si_pm4_state *pm4)
-{
-       si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL);
-       si_pm4_cmd_add(pm4, 0x80000000);
-       si_pm4_cmd_add(pm4, 0x80000000);
-       si_pm4_cmd_end(pm4, false);
-}
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c

index 0cdecd6..e1849ba 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -33,14 +33,11 @@
  #include "sid.h"
  
  #define MAX_GLOBAL_BUFFERS 20
-#if HAVE_LLVM < 0x0305
-#define NUM_USER_SGPRS 2
-#else
+
  /* XXX: Even though we don't pass the scratch buffer via user sgprs any more
   * LLVM still expects that we specify 4 USER_SGPRS so it can remain compatible
   * with older mesa. */
  #define NUM_USER_SGPRS 4
-#endif
  
  struct si_compute {
         struct si_context *ctx;
@@ -262,7 +259,7 @@ static void si_launch_grid(
                          SI_CONTEXT_INV_KCACHE |
                          SI_CONTEXT_FLUSH_WITH_INV_L2 |
                          SI_CONTEXT_FLAG_COMPUTE;
-       si_emit_cache_flush(&sctx->b, NULL);
+       si_emit_cache_flush(sctx, NULL);
  
         pm4->compute_pkt = true;
  
@@ -297,9 +294,10 @@ static void si_launch_grid(
                             shader->scratch_bytes_per_wave *
                             num_waves_for_scratch);
  
-               si_pm4_add_bo(pm4, shader->scratch_bo,
-                               RADEON_USAGE_READWRITE,
-                               RADEON_PRIO_SHADER_RESOURCE_RW);
+               radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+                                         shader->scratch_bo,
+                                         RADEON_USAGE_READWRITE,
+                                         RADEON_PRIO_SHADER_RESOURCE_RW);
  
                 scratch_buffer_va = shader->scratch_bo->gpu_address;
         }
@@ -312,8 +310,8 @@ static void si_launch_grid(
         kernel_args_va = input_buffer->gpu_address;
         kernel_args_va += kernel_args_offset;
  
-       si_pm4_add_bo(pm4, input_buffer, RADEON_USAGE_READ,
-               RADEON_PRIO_SHADER_DATA);
+       radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, input_buffer,
+                                 RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
  
         si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va);
         si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4, S_008F04_BASE_ADDRESS_HI (kernel_args_va >> 32) | S_008F04_STRIDE(0));
@@ -340,7 +338,9 @@ static void si_launch_grid(
                 if (!buffer) {
                         continue;
                 }
-               si_pm4_add_bo(pm4, buffer, RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
+               radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, buffer,
+                                         RADEON_USAGE_READWRITE,
+                                         RADEON_PRIO_SHADER_RESOURCE_RW);
         }
  
         /* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID
@@ -361,7 +361,8 @@ static void si_launch_grid(
  #if HAVE_LLVM >= 0x0306
         shader_va += pc;
  #endif
-       si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+       radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, shader->bo,
+                                 RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
         si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, shader_va >> 8);
         si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40);
  
@@ -453,7 +454,7 @@ static void si_launch_grid(
                          SI_CONTEXT_INV_ICACHE |
                          SI_CONTEXT_INV_KCACHE |
                          SI_CONTEXT_FLAG_COMPUTE;
-       si_emit_cache_flush(&sctx->b, NULL);
+       si_emit_cache_flush(sctx, NULL);
  }
  
  
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c

index 7bdac97..32ab6a9 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -155,18 +155,17 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
                 unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
                 unsigned dma_flags = tc_l2_flag;
  
-               si_need_cs_space(sctx, 7 + (sctx->b.flags ? sctx->cache_flush.num_dw : 0),
-                                FALSE);
+               si_need_cs_space(sctx);
  
                 /* This must be done after need_cs_space. */
-               r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+               radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                       (struct r600_resource*)dst, RADEON_USAGE_WRITE,
                                       RADEON_PRIO_MIN);
  
                 /* Flush the caches for the first copy only.
                  * Also wait for the previous CP DMA operations. */
                 if (sctx->b.flags) {
-                       si_emit_cache_flush(&sctx->b, NULL);
+                       si_emit_cache_flush(sctx, NULL);
                         dma_flags |= SI_CP_DMA_RAW_WAIT; /* same as WAIT_UNTIL=CP_DMA_IDLE */
                 }
  
@@ -226,11 +225,11 @@ void si_copy_buffer(struct si_context *sctx,
                 unsigned sync_flags = tc_l2_flag;
                 unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
  
-               si_need_cs_space(sctx, 7 + (sctx->b.flags ? sctx->cache_flush.num_dw : 0), FALSE);
+               si_need_cs_space(sctx);
  
                 /* Flush the caches for the first copy only. Also wait for old CP DMA packets to complete. */
                 if (sctx->b.flags) {
-                       si_emit_cache_flush(&sctx->b, NULL);
+                       si_emit_cache_flush(sctx, NULL);
                         sync_flags |= SI_CP_DMA_RAW_WAIT;
                 }
  
@@ -240,9 +239,9 @@ void si_copy_buffer(struct si_context *sctx,
                 }
  
                 /* This must be done after r600_need_cs_space. */
-               r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)src,
+               radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)src,
                                       RADEON_USAGE_READ, RADEON_PRIO_MIN);
-               r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)dst,
+               radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)dst,
                                       RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
  
                 si_emit_cp_dma_copy_buffer(sctx, dst_offset, src_offset, byte_count, sync_flags);
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c

index b74c893..b07ab3b 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -117,7 +117,7 @@ static bool si_upload_descriptors(struct si_context *sctx,
  
         util_memcpy_cpu_to_le32(ptr, desc->list, list_size);
  
-       r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, desc->buffer,
+       radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, desc->buffer,
                               RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
  
         desc->list_dirty = false;
@@ -163,14 +163,14 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx,
                 if (!rview->resource)
                         continue;
  
-               r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+               radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                       rview->resource, RADEON_USAGE_READ,
                                       si_get_resource_ro_priority(rview->resource));
         }
  
         if (!views->desc.buffer)
                 return;
-       r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, views->desc.buffer,
+       radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, views->desc.buffer,
                               RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
  }
  
@@ -188,7 +188,7 @@ static void si_set_sampler_view(struct si_context *sctx, unsigned shader,
                         (struct si_sampler_view*)view;
  
                 if (rview->resource)
-                       r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+                       radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                 rview->resource, RADEON_USAGE_READ,
                                 si_get_resource_ro_priority(rview->resource));
  
@@ -269,17 +269,21 @@ static void si_sampler_states_begin_new_cs(struct si_context *sctx,
  {
         if (!states->desc.buffer)
                 return;
-       r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, states->desc.buffer,
+       radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, states->desc.buffer,
                               RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
  }
  
-void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader,
-                               unsigned start, unsigned count, void **states)
+static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
+                                   unsigned start, unsigned count, void **states)
  {
+       struct si_context *sctx = (struct si_context *)ctx;
         struct si_sampler_states *samplers = &sctx->samplers[shader].states;
         struct si_sampler_state **sstates = (struct si_sampler_state**)states;
         int i;
  
+       if (!count || shader >= SI_NUM_SHADERS)
+               return;
+
         if (start == 0)
                 samplers->saved_states[0] = states[0];
         if (start == 1)
@@ -335,14 +339,14 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
         while (mask) {
                 int i = u_bit_scan64(&mask);
  
-               r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+               radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                       (struct r600_resource*)buffers->buffers[i],
                                       buffers->shader_usage, buffers->priority);
         }
  
         if (!buffers->desc.buffer)
                 return;
-       r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+       radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                               buffers->desc.buffer, RADEON_USAGE_READWRITE,
                               RADEON_PRIO_SHADER_DATA);
  }
@@ -363,14 +367,14 @@ static void si_vertex_buffers_begin_new_cs(struct si_context *sctx)
                 if (!sctx->vertex_buffer[vb].buffer)
                         continue;
  
-               r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+               radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                       (struct r600_resource*)sctx->vertex_buffer[vb].buffer,
                                       RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
         }
  
         if (!desc->buffer)
                 return;
-       r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+       radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                               desc->buffer, RADEON_USAGE_READ,
                               RADEON_PRIO_SHADER_DATA);
  }
@@ -397,7 +401,7 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
         if (!desc->buffer)
                 return false;
  
-       r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+       radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                               desc->buffer, RADEON_USAGE_READ,
                               RADEON_PRIO_SHADER_DATA);
  
@@ -441,7 +445,7 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
                 desc[3] = sctx->vertex_elements->rsrc_word3[i];
  
                 if (!bound[ve->vertex_buffer_index]) {
-                       r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+                       radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                               (struct r600_resource*)vb->buffer,
                                               RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
                         bound[ve->vertex_buffer_index] = true;
@@ -468,7 +472,8 @@ void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuf
  
         u_upload_alloc(sctx->b.uploader, 0, size, const_offset,
                        (struct pipe_resource**)rbuffer, &tmp);
-       util_memcpy_cpu_to_le32(tmp, ptr, size);
+       if (rbuffer)
+               util_memcpy_cpu_to_le32(tmp, ptr, size);
  }
  
  static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint slot,
@@ -500,6 +505,11 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
                         si_upload_const_buffer(sctx,
                                                (struct r600_resource**)&buffer, input->user_buffer,
                                                input->buffer_size, &buffer_offset);
+                       if (!buffer) {
+                               /* Just unbind on failure. */
+                               si_set_constant_buffer(ctx, shader, slot, NULL);
+                               return;
+                       }
                         va = r600_resource(buffer)->gpu_address + buffer_offset;
                 } else {
                         pipe_resource_reference(&buffer, input->buffer);
@@ -520,7 +530,7 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
                           S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
  
                 buffers->buffers[slot] = buffer;
-               r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+               radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                       (struct r600_resource*)buffer,
                                       buffers->shader_usage, buffers->priority);
                 buffers->desc.enabled_mask |= 1llu << slot;
@@ -615,7 +625,7 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
                           S_008F0C_ADD_TID_ENABLE(add_tid);
  
                 pipe_resource_reference(&buffers->buffers[slot], buffer);
-               r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+               radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                       (struct r600_resource*)buffer,
                                       buffers->shader_usage, buffers->priority);
                 buffers->desc.enabled_mask |= 1llu << slot;
@@ -705,7 +715,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
                         /* Set the resource. */
                         pipe_resource_reference(&buffers->buffers[bufidx],
                                                 buffer);
-                       r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+                       radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                               (struct r600_resource*)buffer,
                                               buffers->shader_usage, buffers->priority);
                         buffers->desc.enabled_mask |= 1llu << bufidx;
@@ -804,7 +814,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
                                                             old_va, buf);
                                 buffers->desc.list_dirty = true;
  
-                               r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+                               radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                                       rbuffer, buffers->shader_usage,
                                                       buffers->priority);
  
@@ -833,7 +843,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
                                                             old_va, buf);
                                 buffers->desc.list_dirty = true;
  
-                               r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+                               radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                                       rbuffer, buffers->shader_usage,
                                                       buffers->priority);
                         }
@@ -858,7 +868,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
                                                             old_va, buf);
                                 views->desc.list_dirty = true;
  
-                               r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+                               radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                                       rbuffer, RADEON_USAGE_READ,
                                                       RADEON_PRIO_SHADER_BUFFER_RO);
                         }
@@ -960,8 +970,7 @@ static void si_emit_shader_pointer(struct si_context *sctx,
         desc->pointer_dirty = keep_dirty;
  }
  
-static void si_emit_shader_userdata(struct si_context *sctx,
-                                   struct r600_atom *atom)
+void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom)
  {
         unsigned i;
         uint32_t *sh_base = sctx->shader_userdata.sh_base;
@@ -1023,17 +1032,15 @@ void si_init_all_descriptors(struct si_context *sctx)
                             4, SI_NUM_VERTEX_BUFFERS);
  
         /* Set pipe_context functions. */
+       sctx->b.b.bind_sampler_states = si_bind_sampler_states;
         sctx->b.b.set_constant_buffer = si_set_constant_buffer;
         sctx->b.b.set_sampler_views = si_set_sampler_views;
         sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
         sctx->b.invalidate_buffer = si_invalidate_buffer;
  
         /* Shader user data. */
-       sctx->atoms.s.shader_userdata = &sctx->shader_userdata.atom;
-       sctx->shader_userdata.atom.emit = (void*)si_emit_shader_userdata;
-
-       /* Upper bound, 4 pointers per shader, +1 for vertex buffers, +2 for the VS copy shader. */
-       sctx->shader_userdata.atom.num_dw = (SI_NUM_SHADERS * 4 + 1 + 2) * 4;
+       si_init_atom(sctx, &sctx->shader_userdata.atom, &sctx->atoms.s.shader_userdata,
+                    si_emit_shader_userdata);
  
         /* Set default and immutable mappings. */
         si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B130_SPI_SHADER_USER_DATA_VS_0);
diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c

index 1a7eeae..309ae04 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_dma.c
+++ b/src/gallium/drivers/radeonsi/si_dma.c
@@ -78,9 +78,9 @@ static void si_dma_copy_buffer(struct si_context *ctx,
  
         r600_need_dma_space(&ctx->b, ncopy * 5);
  
-       r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
+       radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
                               RADEON_PRIO_MIN);
-       r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
+       radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
                               RADEON_PRIO_MIN);
  
         for (i = 0; i < ncopy; i++) {
@@ -177,9 +177,9 @@ static void si_dma_copy_tile(struct si_context *ctx,
         ncopy = (size / SI_DMA_COPY_MAX_SIZE_DW) + !!(size % SI_DMA_COPY_MAX_SIZE_DW);
         r600_need_dma_space(&ctx->b, ncopy * 9);
  
-       r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
+       radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
                               RADEON_USAGE_READ, RADEON_PRIO_MIN);
-       r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
+       radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
                               RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
  
         for (i = 0; i < ncopy; i++) {
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c

index 110e316..1d5d426 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -27,25 +27,16 @@
  #include "si_pipe.h"
  
  /* initialize */
-void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
-                       boolean count_draw_in)
+void si_need_cs_space(struct si_context *ctx)
  {
         struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
-       int i;
-
-       /* If the CS is sufficiently large, don't count the space needed
-        * and just flush if there is less than 8096 dwords left. */
-       if (cs->max_dw >= 24 * 1024) {
-               if (cs->cdw > cs->max_dw - 8 * 1024)
-                       ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
-               return;
-       }
  
         /* There are two memory usage counters in the winsys for all buffers
          * that have been added (cs_add_reloc) and two counters in the pipe
          * driver for those that haven't been added yet.
-        * */
-       if (!ctx->b.ws->cs_memory_below_limit(ctx->b.rings.gfx.cs, ctx->b.vram, ctx->b.gtt)) {
+        */
+       if (unlikely(!ctx->b.ws->cs_memory_below_limit(ctx->b.rings.gfx.cs,
+                                                      ctx->b.vram, ctx->b.gtt))) {
                 ctx->b.gtt = 0;
                 ctx->b.vram = 0;
                 ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
@@ -54,47 +45,11 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
         ctx->b.gtt = 0;
         ctx->b.vram = 0;
  
-       /* The number of dwords we already used in the CS so far. */
-       num_dw += cs->cdw;
-
-       if (count_draw_in) {
-               for (i = 0; i < SI_NUM_ATOMS(ctx); i++) {
-                       if (ctx->atoms.array[i]->dirty) {
-                               num_dw += ctx->atoms.array[i]->num_dw;
-                       }
-               }
-
-               /* The number of dwords all the dirty states would take. */
-               num_dw += si_pm4_dirty_dw(ctx);
-
-               /* The upper-bound of how much a draw command would take. */
-               num_dw += SI_MAX_DRAW_CS_DWORDS;
-       }
-
-       /* Count in queries_suspend. */
-       num_dw += ctx->b.num_cs_dw_nontimer_queries_suspend +
-                 ctx->b.num_cs_dw_timer_queries_suspend;
-
-       /* Count in streamout_end at the end of CS. */
-       if (ctx->b.streamout.begin_emitted) {
-               num_dw += ctx->b.streamout.num_dw_for_end;
-       }
-
-       /* Count in render_condition(NULL) at the end of CS. */
-       if (ctx->b.predicate_drawing) {
-               num_dw += 3;
-       }
-
-       /* Count in framebuffer cache flushes at the end of CS. */
-       num_dw += ctx->atoms.s.cache_flush->num_dw;
-
-       if (ctx->screen->b.trace_bo)
-               num_dw += SI_TRACE_CS_DWORDS * 2;
-
-       /* Flush if there's not enough space. */
-       if (num_dw > cs->max_dw) {
+       /* If the CS is sufficiently large, don't count the space needed
+        * and just flush if there is not enough space left.
+        */
+       if (unlikely(cs->cdw > cs->max_dw - 2048))
                 ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
-       }
  }
  
  void si_context_gfx_flush(void *context, unsigned flags,
@@ -122,7 +77,7 @@ void si_context_gfx_flush(void *context, unsigned flags,
                         SI_CONTEXT_INV_TC_L2 |
                         /* this is probably not needed anymore */
                         SI_CONTEXT_PS_PARTIAL_FLUSH;
-       si_emit_cache_flush(&ctx->b, NULL);
+       si_emit_cache_flush(ctx, NULL);
  
         /* force to keep tiling flags */
         flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
@@ -185,14 +140,28 @@ void si_begin_new_cs(struct si_context *ctx)
         /* The CS initialization should be emitted before everything else. */
         si_pm4_emit(ctx, ctx->init_config);
  
-       si_mark_atom_dirty(ctx, &ctx->clip_regs);
+       ctx->framebuffer.dirty_cbufs = (1 << 8) - 1;
+       ctx->framebuffer.dirty_zsbuf = true;
         si_mark_atom_dirty(ctx, &ctx->framebuffer.atom);
+
+       si_mark_atom_dirty(ctx, &ctx->clip_regs);
+       si_mark_atom_dirty(ctx, &ctx->clip_state.atom);
         si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs);
         si_mark_atom_dirty(ctx, &ctx->msaa_config);
+       si_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
+       si_mark_atom_dirty(ctx, &ctx->cb_target_mask);
+       si_mark_atom_dirty(ctx, &ctx->blend_color.atom);
         si_mark_atom_dirty(ctx, &ctx->db_render_state);
+       si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
+       si_mark_atom_dirty(ctx, &ctx->spi_map);
         si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
         si_all_descriptors_begin_new_cs(ctx);
  
+       ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+       ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+       si_mark_atom_dirty(ctx, &ctx->scissors.atom);
+       si_mark_atom_dirty(ctx, &ctx->viewports.atom);
+
         r600_postflush_resume_features(&ctx->b);
  
         ctx->b.initial_gfx_cs_size = ctx->b.rings.gfx.cs->cdw;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c

index 92c6ae3..9edee50 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -44,13 +44,12 @@ static void si_destroy_context(struct pipe_context *context)
         pipe_resource_reference(&sctx->gsvs_ring, NULL);
         pipe_resource_reference(&sctx->tf_ring, NULL);
         pipe_resource_reference(&sctx->null_const_buf.buffer, NULL);
-       r600_resource_reference(&sctx->border_color_table, NULL);
+       r600_resource_reference(&sctx->border_color_buffer, NULL);
+       free(sctx->border_color_table);
         r600_resource_reference(&sctx->scratch_buffer, NULL);
         sctx->b.ws->fence_reference(&sctx->last_gfx_fence, NULL);
  
         si_pm4_free_state(sctx, sctx->init_config, ~0);
-       si_pm4_delete_state(sctx, gs_rings, sctx->gs_rings);
-       si_pm4_delete_state(sctx, tf_ring, sctx->tf_state);
         for (i = 0; i < Elements(sctx->vgt_shader_config); i++)
                 si_pm4_delete_state(sctx, vgt_shader_config, sctx->vgt_shader_config[i]);
  
@@ -73,8 +72,6 @@ static void si_destroy_context(struct pipe_context *context)
         if (sctx->blitter)
                 util_blitter_destroy(sctx->blitter);
  
-       si_pm4_cleanup(sctx);
-
         r600_common_context_cleanup(&sctx->b);
  
  #if HAVE_LLVM >= 0x0306
@@ -141,21 +138,26 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
                                                 sscreen->b.trace_bo->cs_buf : NULL);
         sctx->b.rings.gfx.flush = si_context_gfx_flush;
  
-       si_init_all_descriptors(sctx);
-
-       /* Initialize cache_flush. */
-       sctx->cache_flush = si_atom_cache_flush;
-       sctx->atoms.s.cache_flush = &sctx->cache_flush;
-
-       sctx->msaa_sample_locs = si_atom_msaa_sample_locs;
-       sctx->atoms.s.msaa_sample_locs = &sctx->msaa_sample_locs;
+       /* Border colors. */
+       sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
+                                         sizeof(*sctx->border_color_table));
+       if (!sctx->border_color_table)
+               goto fail;
  
-       sctx->msaa_config = si_atom_msaa_config;
-       sctx->atoms.s.msaa_config = &sctx->msaa_config;
+       sctx->border_color_buffer = (struct r600_resource*)
+               pipe_buffer_create(screen, PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT,
+                                  SI_MAX_BORDER_COLORS *
+                                  sizeof(*sctx->border_color_table));
+       if (!sctx->border_color_buffer)
+               goto fail;
  
-       sctx->atoms.s.streamout_begin = &sctx->b.streamout.begin_atom;
-       sctx->atoms.s.streamout_enable = &sctx->b.streamout.enable_atom;
+       sctx->border_color_map =
+               ws->buffer_map(sctx->border_color_buffer->cs_buf,
+                              NULL, PIPE_TRANSFER_WRITE);
+       if (!sctx->border_color_map)
+               goto fail;
  
+       si_init_all_descriptors(sctx);
         si_init_state_functions(sctx);
         si_init_shader_functions(sctx);
  
@@ -167,6 +169,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
                 goto fail;
         sctx->blitter->draw_rectangle = r600_draw_rectangle;
  
+       sctx->sample_mask.sample_mask = 0xffff;
+
         /* these must be last */
         si_begin_new_cs(sctx);
         r600_query_init_backend_mask(&sctx->b); /* this emits commands and must be last */
@@ -176,6 +180,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
         if (sctx->b.chip_class == CIK) {
                 sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER,
                                                                  PIPE_USAGE_DEFAULT, 16);
+               if (!sctx->null_const_buf.buffer)
+                       goto fail;
                 sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0;
  
                 for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
@@ -201,9 +207,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
         r600_target = radeon_llvm_get_r600_target(triple);
         sctx->tm = LLVMCreateTargetMachine(r600_target, triple,
                                            r600_get_llvm_processor_name(sscreen->b.family),
-                                          sctx->b.chip_class >= VI ?
-                                                  "+DumpCode" :
-                                                  "+DumpCode,+vgpr-spilling",
+                                          "+DumpCode,+vgpr-spilling",
                                            LLVMCodeGenLevelDefault,
                                            LLVMRelocDefault,
                                            LLVMCodeModelDefault);
@@ -211,6 +215,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
  
         return &sctx->b.b;
  fail:
+       fprintf(stderr, "radeonsi: Failed to create a context.\n");
         si_destroy_context(&sctx->b.b);
         return NULL;
  }
@@ -279,6 +284,9 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
         case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
         case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
         case PIPE_CAP_DEPTH_BOUNDS_TEST:
+       case PIPE_CAP_TEXTURE_QUERY_LOD:
+       case PIPE_CAP_TEXTURE_GATHER_SM5:
+       case PIPE_CAP_TGSI_TXQS:
                 return 1;
  
         case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
@@ -301,6 +309,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
  
         case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
         case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+       case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
                 return 4;
  
         case PIPE_CAP_GLSL_FEATURE_LEVEL:
@@ -309,12 +318,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
         case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
                 return MIN2(sscreen->b.info.vram_size, 0xFFFFFFFF);
  
-       case PIPE_CAP_TEXTURE_QUERY_LOD:
-       case PIPE_CAP_TEXTURE_GATHER_SM5:
-               return HAVE_LLVM >= 0x0305;
-       case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
-               return HAVE_LLVM >= 0x0305 ? 4 : 0;
-
         /* Unsupported features. */
         case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
         case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
@@ -369,7 +372,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
                 return 8;
  
         case PIPE_CAP_MAX_VIEWPORTS:
-               return 16;
+               return SI_MAX_VIEWPORTS;
  
         /* Timer queries, present when the clock frequency is non zero. */
         case PIPE_CAP_QUERY_TIMESTAMP:
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h

index 52167f2..847853e 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -43,13 +43,6 @@
  #define SI_RESTART_INDEX_UNKNOWN INT_MIN
  #define SI_NUM_SMOOTH_AA_SAMPLES 8
  
-#define SI_TRACE_CS_DWORDS             7
-
-#define SI_MAX_DRAW_CS_DWORDS \
-       (/*scratch:*/ 3 + /*derived prim state:*/ 3 + \
-        /*draw regs:*/ 18 + /*draw packets:*/ 31 +\
-        /*derived tess state:*/ 19)
-
  /* Instruction cache. */
  #define SI_CONTEXT_INV_ICACHE          (R600_CONTEXT_PRIVATE_FLAG << 0)
  /* Cache used by scalar memory (SMEM) instructions. They also use TC
@@ -85,12 +78,20 @@
  #define SI_IS_TRACE_POINT(x)           (((x) & 0xcafe0000) == 0xcafe0000)
  #define SI_GET_TRACE_POINT_ID(x)       ((x) & 0xffff)
  
+#define SI_MAX_VIEWPORTS       16
+#define SI_MAX_BORDER_COLORS   4096
+
  struct si_compute;
  
  struct si_screen {
         struct r600_common_screen       b;
  };
  
+struct si_blend_color {
+       struct r600_atom                atom;
+       struct pipe_blend_color         state;
+};
+
  struct si_sampler_view {
         struct pipe_sampler_view        base;
         struct list_head                list;
@@ -103,7 +104,6 @@ struct si_sampler_view {
  
  struct si_sampler_state {
         uint32_t                        val[4];
-       uint32_t                        border_color[4];
  };
  
  struct si_cs_shader_state {
@@ -125,9 +125,31 @@ struct si_framebuffer {
         unsigned                        cb0_is_integer;
         unsigned                        compressed_cb_mask;
         unsigned                        export_16bpc;
+       unsigned                        dirty_cbufs;
+       bool                            dirty_zsbuf;
  };
  
-#define SI_NUM_ATOMS(sctx) (sizeof((sctx)->atoms)/sizeof((sctx)->atoms.array[0]))
+struct si_clip_state {
+       struct r600_atom                atom;
+       struct pipe_clip_state          state;
+};
+
+struct si_sample_mask {
+       struct r600_atom        atom;
+       uint16_t                sample_mask;
+};
+
+struct si_scissors {
+       struct r600_atom                atom;
+       unsigned                        dirty_mask;
+       struct pipe_scissor_state       states[SI_MAX_VIEWPORTS];
+};
+
+struct si_viewports {
+       struct r600_atom                atom;
+       unsigned                        dirty_mask;
+       struct pipe_viewport_state      states[SI_MAX_VIEWPORTS];
+};
  
  struct si_context {
         struct r600_common_context      b;
@@ -138,30 +160,41 @@ struct si_context {
         void                            *custom_blend_fastclear;
         void                            *pstipple_sampler_state;
         struct si_screen                *screen;
-       struct si_pm4_state             *init_config;
         struct pipe_fence_handle        *last_gfx_fence;
         struct si_shader_selector       *fixed_func_tcs_shader;
+       LLVMTargetMachineRef            tm;
  
-       union {
-               struct {
-                       /* The order matters. */
-                       struct r600_atom *cache_flush;
-                       struct r600_atom *streamout_begin;
-                       struct r600_atom *streamout_enable; /* must be after streamout_begin */
-                       struct r600_atom *framebuffer;
-                       struct r600_atom *msaa_sample_locs;
-                       struct r600_atom *db_render_state;
-                       struct r600_atom *msaa_config;
-                       struct r600_atom *clip_regs;
-                       struct r600_atom *shader_userdata;
-               } s;
-               struct r600_atom *array[0];
-       } atoms;
+       /* Atoms (direct states). */
+       union si_state_atoms            atoms;
+       unsigned                        dirty_atoms; /* mask */
+       /* PM4 states (precomputed immutable states) */
+       union si_state                  queued;
+       union si_state                  emitted;
  
+       /* Atom declarations. */
+       struct r600_atom                cache_flush;
         struct si_framebuffer           framebuffer;
-       struct si_vertex_element        *vertex_elements;
-       /* for saving when using blitter */
-       struct pipe_stencil_ref         stencil_ref;
+       struct r600_atom                msaa_sample_locs;
+       struct r600_atom                db_render_state;
+       struct r600_atom                msaa_config;
+       struct si_sample_mask           sample_mask;
+       struct r600_atom                cb_target_mask;
+       struct si_blend_color           blend_color;
+       struct r600_atom                clip_regs;
+       struct si_clip_state            clip_state;
+       struct si_shader_data           shader_userdata;
+       struct si_scissors              scissors;
+       struct si_viewports             viewports;
+       struct si_stencil_ref           stencil_ref;
+       struct r600_atom                spi_map;
+
+       /* Precomputed states. */
+       struct si_pm4_state             *init_config;
+       struct si_pm4_state             *vgt_shader_config[4];
+       /* With rasterizer discard, there doesn't have to be a pixel shader.
+        * In that case, we bind this one: */
+       void                            *dummy_pixel_shader;
+
         /* shaders */
         struct si_shader_selector       *ps_shader;
         struct si_shader_selector       *gs_shader;
@@ -169,51 +202,38 @@ struct si_context {
         struct si_shader_selector       *tcs_shader;
         struct si_shader_selector       *tes_shader;
         struct si_cs_shader_state       cs_shader_state;
-       struct si_shader_data           shader_userdata;
+
         /* shader information */
+       struct si_vertex_element        *vertex_elements;
         unsigned                        sprite_coord_enable;
         bool                            flatshade;
+
+       /* shader descriptors */
         struct si_descriptors           vertex_buffers;
         struct si_buffer_resources      const_buffers[SI_NUM_SHADERS];
         struct si_buffer_resources      rw_buffers[SI_NUM_SHADERS];
         struct si_textures_info         samplers[SI_NUM_SHADERS];
-       struct r600_resource            *scratch_buffer;
-       struct r600_resource            *border_color_table;
-       unsigned                        border_color_offset;
  
-       struct r600_atom                clip_regs;
-       struct r600_atom                msaa_sample_locs;
-       struct r600_atom                msaa_config;
-       int                             ps_iter_samples;
-       bool                            smoothing_enabled;
+       /* other shader resources */
+       struct pipe_constant_buffer     null_const_buf; /* used for set_constant_buffer(NULL) on CIK */
+       struct pipe_resource            *esgs_ring;
+       struct pipe_resource            *gsvs_ring;
+       struct pipe_resource            *tf_ring;
+       union pipe_color_union          *border_color_table; /* in CPU memory, any endian */
+       struct r600_resource            *border_color_buffer;
+       union pipe_color_union          *border_color_map; /* in VRAM (slow access), little endian */
+       unsigned                        border_color_count;
  
         /* Vertex and index buffers. */
-       bool                    vertex_buffers_dirty;
-       struct pipe_index_buffer index_buffer;
-       struct pipe_vertex_buffer vertex_buffer[SI_NUM_VERTEX_BUFFERS];
-
-       /* With rasterizer discard, there doesn't have to be a pixel shader.
-        * In that case, we bind this one: */
-       void                    *dummy_pixel_shader;
-       struct r600_atom        cache_flush;
-       struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on CIK */
-
-       /* VGT states. */
-       struct si_pm4_state     *vgt_shader_config[4];
-       struct si_pm4_state     *gs_rings;
-       struct pipe_resource    *esgs_ring;
-       struct pipe_resource    *gsvs_ring;
-       struct si_pm4_state     *tf_state;
-       struct pipe_resource    *tf_ring;
+       bool                            vertex_buffers_dirty;
+       struct pipe_index_buffer        index_buffer;
+       struct pipe_vertex_buffer       vertex_buffer[SI_NUM_VERTEX_BUFFERS];
  
-       LLVMTargetMachineRef            tm;
-
-       /* SI state handling */
-       union si_state  queued;
-       union si_state  emitted;
+       /* MSAA config state. */
+       int                             ps_iter_samples;
+       bool                            smoothing_enabled;
  
         /* DB render state. */
-       struct r600_atom        db_render_state;
         bool                    dbcb_depth_copy_enabled;
         bool                    dbcb_stencil_copy_enabled;
         unsigned                dbcb_copy_sample;
@@ -235,8 +255,10 @@ struct si_context {
         int                     last_rast_prim;
         unsigned                last_sc_line_stipple;
         int                     current_rast_prim; /* primitive type after TES, GS */
+       unsigned                last_gsvs_itemsize;
  
         /* Scratch buffer */
+       struct r600_resource    *scratch_buffer;
         boolean                 emit_scratch_reloc;
         unsigned                scratch_waves;
         unsigned                spi_tmpring_size;
@@ -302,7 +324,7 @@ void si_dma_copy(struct pipe_context *ctx,
  void si_context_gfx_flush(void *context, unsigned flags,
                           struct pipe_fence_handle **fence);
  void si_begin_new_cs(struct si_context *ctx);
-void si_need_cs_space(struct si_context *ctx, unsigned num_dw, boolean count_draw_in);
+void si_need_cs_space(struct si_context *ctx);
  
  /* si_compute.c */
  void si_init_compute_functions(struct si_context *sctx);
@@ -339,7 +361,12 @@ static inline void
  si_set_atom_dirty(struct si_context *sctx,
                   struct r600_atom *atom, bool dirty)
  {
-       atom->dirty = dirty;
+       unsigned bit = 1 << (atom->id - 1);
+
+       if (dirty)
+               sctx->dirty_atoms |= bit;
+       else
+               sctx->dirty_atoms &= ~bit;
  }
  
  static inline void
diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c

index 036d90c..b1834af 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_pm4.c
+++ b/src/gallium/drivers/radeonsi/si_pm4.c
@@ -107,6 +107,7 @@ void si_pm4_free_state_simple(struct si_pm4_state *state)
  {
         for (int i = 0; i < state->nbo; ++i)
                 r600_resource_reference(&state->bo[i], NULL);
+       r600_resource_reference(&state->indirect_buffer, NULL);
         FREE(state);
  }
  
@@ -124,37 +125,28 @@ void si_pm4_free_state(struct si_context *sctx,
         si_pm4_free_state_simple(state);
  }
  
-unsigned si_pm4_dirty_dw(struct si_context *sctx)
-{
-       unsigned count = 0;
-
-       for (int i = 0; i < NUMBER_OF_STATES; ++i) {
-               struct si_pm4_state *state = sctx->queued.array[i];
-
-               if (!state || sctx->emitted.array[i] == state)
-                       continue;
-
-               count += state->ndw;
-       }
-
-       return count;
-}
-
  void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state)
  {
         struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+
         for (int i = 0; i < state->nbo; ++i) {
-               r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, state->bo[i],
+               radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, state->bo[i],
                                       state->bo_usage[i], state->bo_priority[i]);
         }
  
-       memcpy(&cs->buf[cs->cdw], state->pm4, state->ndw * 4);
+       if (!state->indirect_buffer) {
+               radeon_emit_array(cs, state->pm4, state->ndw);
+       } else {
+               struct r600_resource *ib = state->indirect_buffer;
  
-       for (int i = 0; i < state->nrelocs; ++i) {
-               cs->buf[cs->cdw + state->relocs[i]] += cs->cdw << 2;
-       }
+               radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, ib,
+                                         RADEON_USAGE_READ, RADEON_PRIO_MIN);
  
-       cs->cdw += state->ndw;
+               radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
+               radeon_emit(cs, ib->gpu_address);
+               radeon_emit(cs, (ib->gpu_address >> 32) & 0xffff);
+               radeon_emit(cs, (ib->b.b.width0 >> 2) & 0xfffff);
+       }
  }
  
  void si_pm4_emit_dirty(struct si_context *sctx)
@@ -175,9 +167,35 @@ void si_pm4_reset_emitted(struct si_context *sctx)
         memset(&sctx->emitted, 0, sizeof(sctx->emitted));
  }
  
-void si_pm4_cleanup(struct si_context *sctx)
+void si_pm4_upload_indirect_buffer(struct si_context *sctx,
+                                  struct si_pm4_state *state)
  {
-       for (int i = 0; i < NUMBER_OF_STATES; ++i) {
-               si_pm4_free_state(sctx, sctx->queued.array[i], i);
+       struct pipe_screen *screen = sctx->b.b.screen;
+       unsigned aligned_ndw = align(state->ndw, 8);
+
+       /* only supported on CIK and later */
+       if (sctx->b.chip_class < CIK)
+               return;
+
+       assert(state->ndw);
+       assert(aligned_ndw <= SI_PM4_MAX_DW);
+
+       r600_resource_reference(&state->indirect_buffer, NULL);
+       state->indirect_buffer = (struct r600_resource*)
+               pipe_buffer_create(screen, PIPE_BIND_CUSTOM,
+                                  PIPE_USAGE_DEFAULT, aligned_ndw * 4);
+       if (!state->indirect_buffer)
+               return;
+
+       /* Pad the IB to 8 DWs to meet CP fetch alignment requirements. */
+       if (sctx->screen->b.info.gfx_ib_pad_with_type2) {
+               for (int i = state->ndw; i < aligned_ndw; i++)
+                       state->pm4[i] = 0x80000000; /* type2 nop packet */
+       } else {
+               for (int i = state->ndw; i < aligned_ndw; i++)
+                       state->pm4[i] = 0xffff1000; /* type3 nop packet */
         }
+
+       pipe_buffer_write(&sctx->b.b, &state->indirect_buffer->b.b,
+                         0, aligned_ndw *4, state->pm4);
  }
diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h

index d215882..309a596 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_pm4.h
@@ -29,9 +29,8 @@
  
  #include "radeon/radeon_winsys.h"
  
-#define SI_PM4_MAX_DW          256
-#define SI_PM4_MAX_BO          32
-#define SI_PM4_MAX_RELOCS      4
+#define SI_PM4_MAX_DW          160
+#define SI_PM4_MAX_BO          1
  
  // forward defines
  struct si_context;
@@ -39,6 +38,9 @@ enum chip_class;
  
  struct si_pm4_state
  {
+       /* optional indirect buffer */
+       struct r600_resource    *indirect_buffer;
+
         /* PKT3_SET_*_REG handling */
         unsigned        last_opcode;
         unsigned        last_reg;
@@ -54,10 +56,6 @@ struct si_pm4_state
         enum radeon_bo_usage    bo_usage[SI_PM4_MAX_BO];
         enum radeon_bo_priority bo_priority[SI_PM4_MAX_BO];
  
-       /* relocs for shader data */
-       unsigned        nrelocs;
-       unsigned        relocs[SI_PM4_MAX_RELOCS];
-
         bool compute_pkt;
  };
  
@@ -70,16 +68,16 @@ void si_pm4_add_bo(struct si_pm4_state *state,
                    struct r600_resource *bo,
                    enum radeon_bo_usage usage,
                    enum radeon_bo_priority priority);
+void si_pm4_upload_indirect_buffer(struct si_context *sctx,
+                                  struct si_pm4_state *state);
  
  void si_pm4_free_state_simple(struct si_pm4_state *state);
  void si_pm4_free_state(struct si_context *sctx,
                        struct si_pm4_state *state,
                        unsigned idx);
  
-unsigned si_pm4_dirty_dw(struct si_context *sctx);
  void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state);
  void si_pm4_emit_dirty(struct si_context *sctx);
  void si_pm4_reset_emitted(struct si_context *sctx);
-void si_pm4_cleanup(struct si_context *sctx);
  
  #endif
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c

index ab5b3ee..a3df648 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2245,39 +2245,77 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
                                 struct lp_build_tgsi_context * bld_base,
                                 struct lp_build_emit_data * emit_data);
  
-static bool tgsi_is_shadow_sampler(unsigned target)
+static bool tgsi_is_array_sampler(unsigned target)
  {
-       return target == TGSI_TEXTURE_SHADOW1D ||
+       return target == TGSI_TEXTURE_1D_ARRAY ||
                target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
-              target == TGSI_TEXTURE_SHADOW2D ||
+              target == TGSI_TEXTURE_2D_ARRAY ||
                target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
-              target == TGSI_TEXTURE_SHADOWCUBE ||
+              target == TGSI_TEXTURE_CUBE_ARRAY ||
                target == TGSI_TEXTURE_SHADOWCUBE_ARRAY ||
-              target == TGSI_TEXTURE_SHADOWRECT;
+              target == TGSI_TEXTURE_2D_ARRAY_MSAA;
+}
+
+static void set_tex_fetch_args(struct gallivm_state *gallivm,
+                              struct lp_build_emit_data *emit_data,
+                              unsigned opcode, unsigned target,
+                              LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
+                              LLVMValueRef *param, unsigned count,
+                              unsigned dmask)
+{
+       unsigned num_args;
+       unsigned is_rect = target == TGSI_TEXTURE_RECT;
+       LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
+
+       /* Pad to power of two vector */
+       while (count < util_next_power_of_two(count))
+               param[count++] = LLVMGetUndef(i32);
+
+       /* Texture coordinates. */
+       if (count > 1)
+               emit_data->args[0] = lp_build_gather_values(gallivm, param, count);
+       else
+               emit_data->args[0] = param[0];
+
+       /* Resource. */
+       emit_data->args[1] = res_ptr;
+       num_args = 2;
+
+       if (opcode == TGSI_OPCODE_TXF || opcode == TGSI_OPCODE_TXQ)
+               emit_data->dst_type = LLVMVectorType(i32, 4);
+       else {
+               emit_data->dst_type = LLVMVectorType(
+                       LLVMFloatTypeInContext(gallivm->context), 4);
+
+               emit_data->args[num_args++] = samp_ptr;
+       }
+
+       emit_data->args[num_args++] = lp_build_const_int32(gallivm, dmask);
+       emit_data->args[num_args++] = lp_build_const_int32(gallivm, is_rect); /* unorm */
+       emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* r128 */
+       emit_data->args[num_args++] = lp_build_const_int32(gallivm,
+                                       tgsi_is_array_sampler(target)); /* da */
+       emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* glc */
+       emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* slc */
+       emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* tfe */
+       emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* lwe */
+
+       emit_data->arg_count = num_args;
  }
  
  static const struct lp_build_tgsi_action tex_action;
  
-static void tex_fetch_args(
+static void tex_fetch_ptrs(
         struct lp_build_tgsi_context * bld_base,
-       struct lp_build_emit_data * emit_data)
+       struct lp_build_emit_data * emit_data,
+       LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr)
  {
         struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
         struct gallivm_state *gallivm = bld_base->base.gallivm;
         const struct tgsi_full_instruction * inst = emit_data->inst;
-       unsigned opcode = inst->Instruction.Opcode;
         unsigned target = inst->Texture.Texture;
-       LLVMValueRef coords[5], derivs[6];
-       LLVMValueRef address[16];
-       int ref_pos;
-       unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &ref_pos);
-       unsigned count = 0;
-       unsigned chan;
         unsigned sampler_src;
         unsigned sampler_index;
-       unsigned num_deriv_channels = 0;
-       bool has_offset = HAVE_LLVM >= 0x0305 ? inst->Texture.NumOffsets > 0 : false;
-       LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
  
         sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
         sampler_index = emit_data->inst->Src[sampler_src].Register.Index;
@@ -2288,24 +2326,86 @@ static void tex_fetch_args(
  
                 ind_index = get_indirect_index(si_shader_ctx, &reg->Indirect, reg->Register.Index);
  
-               res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
-               res_ptr = build_indexed_load_const(si_shader_ctx, res_ptr, ind_index);
+               *res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
+               *res_ptr = build_indexed_load_const(si_shader_ctx, *res_ptr, ind_index);
  
-               samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER);
-               samp_ptr = build_indexed_load_const(si_shader_ctx, samp_ptr, ind_index);
+               *samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER);
+               *samp_ptr = build_indexed_load_const(si_shader_ctx, *samp_ptr, ind_index);
  
                 if (target == TGSI_TEXTURE_2D_MSAA ||
                     target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
                         ind_index = LLVMBuildAdd(gallivm->builder, ind_index,
                                                  lp_build_const_int32(gallivm,
                                                                       SI_FMASK_TEX_OFFSET), "");
-                       fmask_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
-                       fmask_ptr = build_indexed_load_const(si_shader_ctx, res_ptr, ind_index);
+                       *fmask_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
+                       *fmask_ptr = build_indexed_load_const(si_shader_ctx, *fmask_ptr, ind_index);
                 }
         } else {
-               res_ptr = si_shader_ctx->resources[sampler_index];
-               samp_ptr = si_shader_ctx->samplers[sampler_index];
-               fmask_ptr = si_shader_ctx->resources[SI_FMASK_TEX_OFFSET + sampler_index];
+               *res_ptr = si_shader_ctx->resources[sampler_index];
+               *samp_ptr = si_shader_ctx->samplers[sampler_index];
+               *fmask_ptr = si_shader_ctx->resources[SI_FMASK_TEX_OFFSET + sampler_index];
+       }
+}
+
+static void tex_fetch_args(
+       struct lp_build_tgsi_context * bld_base,
+       struct lp_build_emit_data * emit_data)
+{
+       struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       LLVMBuilderRef builder = gallivm->builder;
+       const struct tgsi_full_instruction * inst = emit_data->inst;
+       unsigned opcode = inst->Instruction.Opcode;
+       unsigned target = inst->Texture.Texture;
+       LLVMValueRef coords[5], derivs[6];
+       LLVMValueRef address[16];
+       int ref_pos;
+       unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &ref_pos);
+       unsigned count = 0;
+       unsigned chan;
+       unsigned num_deriv_channels = 0;
+       bool has_offset = inst->Texture.NumOffsets > 0;
+       LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
+       LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
+       unsigned dmask = 0xf;
+
+       tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr);
+
+       if (opcode == TGSI_OPCODE_TXQ) {
+               if (target == TGSI_TEXTURE_BUFFER) {
+                       LLVMTypeRef v8i32 = LLVMVectorType(i32, 8);
+
+                       /* Read the size from the buffer descriptor directly. */
+                       LLVMValueRef res = LLVMBuildBitCast(builder, res_ptr, v8i32, "");
+                       LLVMValueRef size = LLVMBuildExtractElement(builder, res,
+                                                       lp_build_const_int32(gallivm, 6), "");
+
+                       if (si_shader_ctx->screen->b.chip_class >= VI) {
+                               /* On VI, the descriptor contains the size in bytes,
+                                * but TXQ must return the size in elements.
+                                * The stride is always non-zero for resources using TXQ.
+                                */
+                               LLVMValueRef stride =
+                                       LLVMBuildExtractElement(builder, res,
+                                                               lp_build_const_int32(gallivm, 5), "");
+                               stride = LLVMBuildLShr(builder, stride,
+                                                      lp_build_const_int32(gallivm, 16), "");
+                               stride = LLVMBuildAnd(builder, stride,
+                                                     lp_build_const_int32(gallivm, 0x3FFF), "");
+
+                               size = LLVMBuildUDiv(builder, size, stride, "");
+                       }
+
+                       emit_data->args[0] = size;
+                       return;
+               }
+
+               /* Textures - set the mip level. */
+               address[count++] = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
+
+               set_tex_fetch_args(gallivm, emit_data, opcode, target, res_ptr,
+                                  NULL, address, count, 0xf);
+               return;
         }
  
         if (target == TGSI_TEXTURE_BUFFER) {
@@ -2375,7 +2475,7 @@ static void tex_fetch_args(
                 address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0);
  
         /* Pack depth comparison value */
-       if (tgsi_is_shadow_sampler(target) && opcode != TGSI_OPCODE_LODQ) {
+       if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) {
                 if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
                         address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0);
                 } else {
@@ -2457,9 +2557,7 @@ static void tex_fetch_args(
  
         for (chan = 0; chan < count; chan++ ) {
                 address[chan] = LLVMBuildBitCast(gallivm->builder,
-                                                address[chan],
-                                                LLVMInt32TypeInContext(gallivm->context),
-                                                "");
+                                                address[chan], i32, "");
         }
  
         /* Adjust the sample index according to FMASK.
@@ -2491,22 +2589,14 @@ static void tex_fetch_args(
                 }
                 txf_address[3] = bld_base->uint_bld.zero;
  
-               /* Pad to a power-of-two size. */
-               while (txf_count < util_next_power_of_two(txf_count))
-                       txf_address[txf_count++] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
-
                 /* Read FMASK using TXF. */
                 inst.Instruction.Opcode = TGSI_OPCODE_TXF;
-               inst.Texture.Texture = target == TGSI_TEXTURE_2D_MSAA ? TGSI_TEXTURE_2D : TGSI_TEXTURE_2D_ARRAY;
+               inst.Texture.Texture = target;
                 txf_emit_data.inst = &inst;
                 txf_emit_data.chan = 0;
-               txf_emit_data.dst_type = LLVMVectorType(
-                       LLVMInt32TypeInContext(gallivm->context), 4);
-               txf_emit_data.args[0] = lp_build_gather_values(gallivm, txf_address, txf_count);
-               txf_emit_data.args[1] = fmask_ptr;
-               txf_emit_data.args[2] = lp_build_const_int32(gallivm, inst.Texture.Texture);
-               txf_emit_data.arg_count = 3;
-
+               set_tex_fetch_args(gallivm, &txf_emit_data, TGSI_OPCODE_TXF,
+                                  target, fmask_ptr, NULL,
+                                  txf_address, txf_count, 0xf);
                 build_tex_intrinsic(&tex_action, bld_base, &txf_emit_data);
  
                 /* Initialize some constants. */
@@ -2551,9 +2641,6 @@ static void tex_fetch_args(
                                         final_sample, address[sample_chan], "");
         }
  
-       /* Resource */
-       emit_data->args[1] = res_ptr;
-
         if (opcode == TGSI_OPCODE_TXF) {
                 /* add tex offsets */
                 if (inst->Texture.NumOffsets) {
@@ -2589,89 +2676,37 @@ static void tex_fetch_args(
                                 /* texture offsets do not apply to other texture targets */
                         }
                 }
+       }
  
-               emit_data->args[2] = lp_build_const_int32(gallivm, target);
-               emit_data->arg_count = 3;
+       if (opcode == TGSI_OPCODE_TG4) {
+               unsigned gather_comp = 0;
  
-               emit_data->dst_type = LLVMVectorType(
-                       LLVMInt32TypeInContext(gallivm->context),
-                       4);
-       } else if (opcode == TGSI_OPCODE_TG4 ||
-                  opcode == TGSI_OPCODE_LODQ ||
-                  has_offset) {
-               unsigned is_array = target == TGSI_TEXTURE_1D_ARRAY ||
-                                   target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
-                                   target == TGSI_TEXTURE_2D_ARRAY ||
-                                   target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
-                                   target == TGSI_TEXTURE_CUBE_ARRAY ||
-                                   target == TGSI_TEXTURE_SHADOWCUBE_ARRAY;
-               unsigned is_rect = target == TGSI_TEXTURE_RECT;
-               unsigned dmask = 0xf;
-
-               if (opcode == TGSI_OPCODE_TG4) {
-                       unsigned gather_comp = 0;
-
-                       /* DMASK was repurposed for GATHER4. 4 components are always
-                        * returned and DMASK works like a swizzle - it selects
-                        * the component to fetch. The only valid DMASK values are
-                        * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
-                        * (red,red,red,red) etc.) The ISA document doesn't mention
-                        * this.
-                        */
+               /* DMASK was repurposed for GATHER4. 4 components are always
+                * returned and DMASK works like a swizzle - it selects
+                * the component to fetch. The only valid DMASK values are
+                * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
+                * (red,red,red,red) etc.) The ISA document doesn't mention
+                * this.
+                */
  
-                       /* Get the component index from src1.x for Gather4. */
-                       if (!tgsi_is_shadow_sampler(target)) {
-                               LLVMValueRef (*imms)[4] = lp_soa_context(bld_base)->immediates;
-                               LLVMValueRef comp_imm;
-                               struct tgsi_src_register src1 = inst->Src[1].Register;
+               /* Get the component index from src1.x for Gather4. */
+               if (!tgsi_is_shadow_target(target)) {
+                       LLVMValueRef (*imms)[4] = lp_soa_context(bld_base)->immediates;
+                       LLVMValueRef comp_imm;
+                       struct tgsi_src_register src1 = inst->Src[1].Register;
  
-                               assert(src1.File == TGSI_FILE_IMMEDIATE);
+                       assert(src1.File == TGSI_FILE_IMMEDIATE);
  
-                               comp_imm = imms[src1.Index][src1.SwizzleX];
-                               gather_comp = LLVMConstIntGetZExtValue(comp_imm);
-                               gather_comp = CLAMP(gather_comp, 0, 3);
-                       }
-
-                       dmask = 1 << gather_comp;
+                       comp_imm = imms[src1.Index][src1.SwizzleX];
+                       gather_comp = LLVMConstIntGetZExtValue(comp_imm);
+                       gather_comp = CLAMP(gather_comp, 0, 3);
                 }
  
-               emit_data->args[2] = samp_ptr;
-               emit_data->args[3] = lp_build_const_int32(gallivm, dmask);
-               emit_data->args[4] = lp_build_const_int32(gallivm, is_rect); /* unorm */
-               emit_data->args[5] = lp_build_const_int32(gallivm, 0); /* r128 */
-               emit_data->args[6] = lp_build_const_int32(gallivm, is_array); /* da */
-               emit_data->args[7] = lp_build_const_int32(gallivm, 0); /* glc */
-               emit_data->args[8] = lp_build_const_int32(gallivm, 0); /* slc */
-               emit_data->args[9] = lp_build_const_int32(gallivm, 0); /* tfe */
-               emit_data->args[10] = lp_build_const_int32(gallivm, 0); /* lwe */
-
-               emit_data->arg_count = 11;
-
-               emit_data->dst_type = LLVMVectorType(
-                       LLVMFloatTypeInContext(gallivm->context),
-                       4);
-       } else {
-               emit_data->args[2] = samp_ptr;
-               emit_data->args[3] = lp_build_const_int32(gallivm, target);
-               emit_data->arg_count = 4;
-
-               emit_data->dst_type = LLVMVectorType(
-                       LLVMFloatTypeInContext(gallivm->context),
-                       4);
+               dmask = 1 << gather_comp;
         }
  
-       /* The fetch opcode has been converted to a 2D array fetch.
-        * This simplifies the LLVM backend. */
-       if (target == TGSI_TEXTURE_CUBE_ARRAY)
-               target = TGSI_TEXTURE_2D_ARRAY;
-       else if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
-               target = TGSI_TEXTURE_SHADOW2D_ARRAY;
-
-       /* Pad to power of two vector */
-       while (count < util_next_power_of_two(count))
-               address[count++] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
-
-       emit_data->args[0] = lp_build_gather_values(gallivm, address, count);
+       set_tex_fetch_args(gallivm, emit_data, opcode, target, res_ptr,
+                          samp_ptr, address, count, dmask);
  }
  
  static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
@@ -2682,8 +2717,17 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
         unsigned opcode = emit_data->inst->Instruction.Opcode;
         unsigned target = emit_data->inst->Texture.Texture;
         char intr_name[127];
-       bool has_offset = HAVE_LLVM >= 0x0305 ?
-                               emit_data->inst->Texture.NumOffsets > 0 : false;
+       bool has_offset = emit_data->inst->Texture.NumOffsets > 0;
+       bool is_shadow = tgsi_is_shadow_target(target);
+       char type[64];
+       const char *name = "llvm.SI.image.sample";
+       const char *infix = "";
+
+       if (opcode == TGSI_OPCODE_TXQ && target == TGSI_TEXTURE_BUFFER) {
+               /* Just return the buffer size. */
+               emit_data->output[emit_data->chan] = emit_data->args[0];
+               return;
+       }
  
         if (target == TGSI_TEXTURE_BUFFER) {
                 emit_data->output[emit_data->chan] = lp_build_intrinsic(
@@ -2694,202 +2738,109 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
                 return;
         }
  
-       if (opcode == TGSI_OPCODE_TG4 ||
-           opcode == TGSI_OPCODE_LODQ ||
-           (opcode != TGSI_OPCODE_TXF && has_offset)) {
-               bool is_shadow = tgsi_is_shadow_sampler(target);
-               const char *name = "llvm.SI.image.sample";
-               const char *infix = "";
-
-               switch (opcode) {
-               case TGSI_OPCODE_TEX:
-               case TGSI_OPCODE_TEX2:
-               case TGSI_OPCODE_TXP:
-                       break;
-               case TGSI_OPCODE_TXB:
-               case TGSI_OPCODE_TXB2:
-                       infix = ".b";
-                       break;
-               case TGSI_OPCODE_TXL:
-               case TGSI_OPCODE_TXL2:
-                       infix = ".l";
-                       break;
-               case TGSI_OPCODE_TXD:
-                       infix = ".d";
-                       break;
-               case TGSI_OPCODE_TG4:
-                       name = "llvm.SI.gather4";
-                       break;
-               case TGSI_OPCODE_LODQ:
-                       name = "llvm.SI.getlod";
-                       is_shadow = false;
-                       has_offset = false;
-                       break;
-               default:
-                       assert(0);
-                       return;
-               }
+       switch (opcode) {
+       case TGSI_OPCODE_TXF:
+               name = target == TGSI_TEXTURE_2D_MSAA ||
+                      target == TGSI_TEXTURE_2D_ARRAY_MSAA ?
+                              "llvm.SI.image.load" :
+                              "llvm.SI.image.load.mip";
+               is_shadow = false;
+               has_offset = false;
+               break;
+       case TGSI_OPCODE_TXQ:
+               name = "llvm.SI.getresinfo";
+               is_shadow = false;
+               has_offset = false;
+               break;
+       case TGSI_OPCODE_LODQ:
+               name = "llvm.SI.getlod";
+               is_shadow = false;
+               has_offset = false;
+               break;
+       case TGSI_OPCODE_TEX:
+       case TGSI_OPCODE_TEX2:
+       case TGSI_OPCODE_TXP:
+               break;
+       case TGSI_OPCODE_TXB:
+       case TGSI_OPCODE_TXB2:
+               infix = ".b";
+               break;
+       case TGSI_OPCODE_TXL:
+       case TGSI_OPCODE_TXL2:
+               infix = ".l";
+               break;
+       case TGSI_OPCODE_TXD:
+               infix = ".d";
+               break;
+       case TGSI_OPCODE_TG4:
+               name = "llvm.SI.gather4";
+               break;
+       default:
+               assert(0);
+               return;
+       }
  
-               /* Add the type and suffixes .c, .o if needed. */
-               sprintf(intr_name, "%s%s%s%s.v%ui32", name,
-                       is_shadow ? ".c" : "", infix, has_offset ? ".o" : "",
+       if (LLVMGetTypeKind(LLVMTypeOf(emit_data->args[0])) == LLVMVectorTypeKind)
+               sprintf(type, ".v%ui32",
                         LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0])));
+       else
+               strcpy(type, ".i32");
  
-               emit_data->output[emit_data->chan] = lp_build_intrinsic(
-                       base->gallivm->builder, intr_name, emit_data->dst_type,
-                       emit_data->args, emit_data->arg_count,
-                       LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
-       } else {
-               LLVMTypeRef i8, v16i8, v32i8;
-               const char *name;
-
-               switch (opcode) {
-               case TGSI_OPCODE_TEX:
-               case TGSI_OPCODE_TEX2:
-               case TGSI_OPCODE_TXP:
-                       name = "llvm.SI.sample";
-                       break;
-               case TGSI_OPCODE_TXB:
-               case TGSI_OPCODE_TXB2:
-                       name = "llvm.SI.sampleb";
-                       break;
-               case TGSI_OPCODE_TXD:
-                       name = "llvm.SI.sampled";
-                       break;
-               case TGSI_OPCODE_TXF:
-                       name = "llvm.SI.imageload";
-                       break;
-               case TGSI_OPCODE_TXL:
-               case TGSI_OPCODE_TXL2:
-                       name = "llvm.SI.samplel";
-                       break;
-               default:
-                       assert(0);
-                       return;
-               }
+       /* Add the type and suffixes .c, .o if needed. */
+       sprintf(intr_name, "%s%s%s%s%s",
+               name, is_shadow ? ".c" : "", infix,
+               has_offset ? ".o" : "", type);
  
-               i8 = LLVMInt8TypeInContext(base->gallivm->context);
-               v16i8 = LLVMVectorType(i8, 16);
-               v32i8 = LLVMVectorType(i8, 32);
+       emit_data->output[emit_data->chan] = lp_build_intrinsic(
+               base->gallivm->builder, intr_name, emit_data->dst_type,
+               emit_data->args, emit_data->arg_count,
+               LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
  
-               emit_data->args[1] = LLVMBuildBitCast(base->gallivm->builder,
-                                               emit_data->args[1], v32i8, "");
-               if (opcode != TGSI_OPCODE_TXF) {
-                       emit_data->args[2] = LLVMBuildBitCast(base->gallivm->builder,
-                                               emit_data->args[2], v16i8, "");
-               }
+       /* Divide the number of layers by 6 to get the number of cubes. */
+       if (opcode == TGSI_OPCODE_TXQ &&
+           (target == TGSI_TEXTURE_CUBE_ARRAY ||
+            target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)) {
+               LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+               LLVMValueRef two = lp_build_const_int32(bld_base->base.gallivm, 2);
+               LLVMValueRef six = lp_build_const_int32(bld_base->base.gallivm, 6);
  
-               sprintf(intr_name, "%s.v%ui32", name,
-                       LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0])));
+               LLVMValueRef v4 = emit_data->output[emit_data->chan];
+               LLVMValueRef z = LLVMBuildExtractElement(builder, v4, two, "");
+               z = LLVMBuildSDiv(builder, z, six, "");
  
-               emit_data->output[emit_data->chan] = lp_build_intrinsic(
-                       base->gallivm->builder, intr_name, emit_data->dst_type,
-                       emit_data->args, emit_data->arg_count,
-                       LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
+               emit_data->output[emit_data->chan] =
+                       LLVMBuildInsertElement(builder, v4, z, two, "");
         }
  }
  
-static void txq_fetch_args(
+static void si_llvm_emit_txqs(
+       const struct lp_build_tgsi_action * action,
         struct lp_build_tgsi_context * bld_base,
         struct lp_build_emit_data * emit_data)
  {
-       struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
-       const struct tgsi_full_instruction *inst = emit_data->inst;
         struct gallivm_state *gallivm = bld_base->base.gallivm;
         LLVMBuilderRef builder = gallivm->builder;
-       unsigned target = inst->Texture.Texture;
-       LLVMValueRef res_ptr;
-
-       if (inst->Src[1].Register.Indirect) {
-               const struct tgsi_full_src_register *reg = &inst->Src[1];
-               LLVMValueRef ind_index;
-
-               ind_index = get_indirect_index(si_shader_ctx, &reg->Indirect, reg->Register.Index);
-
-               res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
-               res_ptr = build_indexed_load_const(si_shader_ctx, res_ptr,
-                                                  ind_index);
-       } else
-               res_ptr = si_shader_ctx->resources[inst->Src[1].Register.Index];
-
-       if (target == TGSI_TEXTURE_BUFFER) {
-               LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
-               LLVMTypeRef v8i32 = LLVMVectorType(i32, 8);
-
-               /* Read the size from the buffer descriptor directly. */
-               LLVMValueRef res = LLVMBuildBitCast(builder, res_ptr, v8i32, "");
-               LLVMValueRef size = LLVMBuildExtractElement(builder, res,
-                                               lp_build_const_int32(gallivm, 6), "");
-
-               if (si_shader_ctx->screen->b.chip_class >= VI) {
-                       /* On VI, the descriptor contains the size in bytes,
-                        * but TXQ must return the size in elements.
-                        * The stride is always non-zero for resources using TXQ.
-                        */
-                       LLVMValueRef stride =
-                               LLVMBuildExtractElement(builder, res,
-                                                       lp_build_const_int32(gallivm, 5), "");
-                       stride = LLVMBuildLShr(builder, stride,
-                                              lp_build_const_int32(gallivm, 16), "");
-                       stride = LLVMBuildAnd(builder, stride,
-                                             lp_build_const_int32(gallivm, 0x3FFF), "");
-
-                       size = LLVMBuildUDiv(builder, size, stride, "");
-               }
-
-               emit_data->args[0] = size;
-               return;
-       }
-
-       /* Mip level */
-       emit_data->args[0] = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
-
-       /* Resource */
-       emit_data->args[1] = res_ptr;
-
-       /* Texture target */
-       if (target == TGSI_TEXTURE_CUBE_ARRAY ||
-           target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
-               target = TGSI_TEXTURE_2D_ARRAY;
-
-       emit_data->args[2] = lp_build_const_int32(bld_base->base.gallivm,
-                                                 target);
-
-       emit_data->arg_count = 3;
-
-       emit_data->dst_type = LLVMVectorType(
-               LLVMInt32TypeInContext(bld_base->base.gallivm->context),
-               4);
-}
-
-static void build_txq_intrinsic(const struct lp_build_tgsi_action * action,
-                               struct lp_build_tgsi_context * bld_base,
-                               struct lp_build_emit_data * emit_data)
-{
-       unsigned target = emit_data->inst->Texture.Texture;
+       LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
+       LLVMTypeRef v8i32 = LLVMVectorType(i32, 8);
+       LLVMValueRef res, samples;
+       LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
  
-       if (target == TGSI_TEXTURE_BUFFER) {
-               /* Just return the buffer size. */
-               emit_data->output[emit_data->chan] = emit_data->args[0];
-               return;
-       }
+       tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr);
  
-       build_tgsi_intrinsic_nomem(action, bld_base, emit_data);
  
-       /* Divide the number of layers by 6 to get the number of cubes. */
-       if (target == TGSI_TEXTURE_CUBE_ARRAY ||
-           target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
-               LLVMBuilderRef builder = bld_base->base.gallivm->builder;
-               LLVMValueRef two = lp_build_const_int32(bld_base->base.gallivm, 2);
-               LLVMValueRef six = lp_build_const_int32(bld_base->base.gallivm, 6);
+       /* Read the samples from the descriptor directly. */
+       res = LLVMBuildBitCast(builder, res_ptr, v8i32, "");
+       samples = LLVMBuildExtractElement(
+               builder, res,
+               lp_build_const_int32(gallivm, 3), "");
+       samples = LLVMBuildLShr(builder, samples,
+                               lp_build_const_int32(gallivm, 16), "");
+       samples = LLVMBuildAnd(builder, samples,
+                              lp_build_const_int32(gallivm, 0xf), "");
+       samples = LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1),
+                              samples, "");
  
-               LLVMValueRef v4 = emit_data->output[emit_data->chan];
-               LLVMValueRef z = LLVMBuildExtractElement(builder, v4, two, "");
-               z = LLVMBuildSDiv(builder, z, six, "");
-
-               emit_data->output[emit_data->chan] =
-                       LLVMBuildInsertElement(builder, v4, z, two, "");
-       }
+       emit_data->output[emit_data->chan] = samples;
  }
  
  /*
@@ -3355,12 +3306,6 @@ static const struct lp_build_tgsi_action tex_action = {
         .emit = build_tex_intrinsic,
  };
  
-static const struct lp_build_tgsi_action txq_action = {
-       .fetch_args = txq_fetch_args,
-       .emit = build_txq_intrinsic,
-       .intr_name = "llvm.SI.resinfo"
-};
-
  static const struct lp_build_tgsi_action interp_action = {
         .fetch_args = interp_fetch_args,
         .emit = build_interp_intrinsic,
@@ -3829,11 +3774,14 @@ int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader)
  {
         const struct radeon_shader_binary *binary = &shader->binary;
         unsigned i;
+       int r;
         bool dump  = r600_can_dump_shader(&sscreen->b,
                 shader->selector ? shader->selector->tokens : NULL);
  
         si_shader_binary_read_config(sscreen, shader, 0);
-       si_shader_binary_upload(sscreen, shader);
+       r = si_shader_binary_upload(sscreen, shader);
+       if (r)
+               return r;
  
         if (dump) {
                 if (!(sscreen->b.debug_flags & DBG_NO_ASM)) {
@@ -4070,9 +4018,10 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
         bld_base->op_actions[TGSI_OPCODE_TXL] = tex_action;
         bld_base->op_actions[TGSI_OPCODE_TXL2] = tex_action;
         bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action;
-       bld_base->op_actions[TGSI_OPCODE_TXQ] = txq_action;
+       bld_base->op_actions[TGSI_OPCODE_TXQ] = tex_action;
         bld_base->op_actions[TGSI_OPCODE_TG4] = tex_action;
         bld_base->op_actions[TGSI_OPCODE_LODQ] = tex_action;
+       bld_base->op_actions[TGSI_OPCODE_TXQS].emit = si_llvm_emit_txqs;
  
         bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
         bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h

index 423b849..2305b99 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -185,11 +185,13 @@ struct si_shader_selector {
         unsigned        gs_output_prim;
         unsigned        gs_max_out_vertices;
         unsigned        gs_num_invocations;
+       unsigned        gsvs_itemsize;
  
         /* masks of "get_unique_index" bits */
         uint64_t        inputs_read;
         uint64_t        outputs_written;
         uint32_t        patch_outputs_written;
+       uint32_t        ps_colors_written;
  };
  
  /* Valid shader configurations:
@@ -277,8 +279,10 @@ static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx)
                 return &sctx->gs_shader->info;
         else if (sctx->tes_shader)
                 return &sctx->tes_shader->info;
-       else
+       else if (sctx->vs_shader)
                 return &sctx->vs_shader->info;
+       else
+               return NULL;
  }
  
  static inline struct si_shader* si_get_vs_state(struct si_context *sctx)
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c

index 806ab5f..d74f6e8 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -29,18 +29,28 @@
  #include "sid.h"
  #include "radeon/r600_cs.h"
  
+#include "util/u_dual_blend.h"
  #include "util/u_format.h"
  #include "util/u_format_s3tc.h"
  #include "util/u_memory.h"
  #include "util/u_pstipple.h"
  
-static void si_init_atom(struct r600_atom *atom, struct r600_atom **list_elem,
-                        void (*emit_func)(struct si_context *ctx, struct r600_atom *state),
-                        unsigned num_dw)
+/* Initialize an external atom (owned by ../radeon). */
+static void
+si_init_external_atom(struct si_context *sctx, struct r600_atom *atom,
+                     struct r600_atom **list_elem)
+{
+       atom->id = list_elem - sctx->atoms.array + 1;
+       *list_elem = atom;
+}
+
+/* Initialize an atom owned by radeonsi.  */
+void si_init_atom(struct si_context *sctx, struct r600_atom *atom,
+                 struct r600_atom **list_elem,
+                 void (*emit_func)(struct si_context *ctx, struct r600_atom *state))
  {
         atom->emit = (void*)emit_func;
-       atom->num_dw = num_dw;
-       atom->dirty = false;
+       atom->id = list_elem - sctx->atoms.array + 1; /* index+1 in the atom array */
         *list_elem = atom;
  }
  
@@ -233,27 +243,33 @@ static unsigned si_pack_float_12p4(float x)
   * - The COLOR1 format isn't INVALID because of possible dual-source blending,
   *   so COLOR1 is enabled pretty much all the time.
   * So CB_TARGET_MASK is the only register that can disable COLOR1.
+ *
+ * Another reason is to avoid a hang with dual source blending.
   */
-static void si_update_fb_blend_state(struct si_context *sctx)
+static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *atom)
  {
-       struct si_pm4_state *pm4;
+       struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
         struct si_state_blend *blend = sctx->queued.named.blend;
         uint32_t mask = 0, i;
  
-       if (blend == NULL)
-               return;
-
-       pm4 = CALLOC_STRUCT(si_pm4_state);
-       if (pm4 == NULL)
-               return;
-
         for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++)
                 if (sctx->framebuffer.state.cbufs[i])
                         mask |= 0xf << (4*i);
-       mask &= blend->cb_target_mask;
  
-       si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask);
-       si_pm4_set_state(sctx, fb_blend, pm4);
+       if (blend)
+               mask &= blend->cb_target_mask;
+
+       /* Avoid a hang that happens when dual source blending is enabled
+        * but there is not enough color outputs. This is undefined behavior,
+        * so disable color writes completely.
+        *
+        * Reproducible with Unigine Heaven 4.0 and drirc missing.
+        */
+       if (blend->dual_src_blend &&
+           (sctx->ps_shader->ps_colors_written & 0x3) != 0x3)
+               mask = 0;
+
+       radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, mask);
  }
  
  /*
@@ -343,6 +359,7 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
                 return NULL;
  
         blend->alpha_to_one = state->alpha_to_one;
+       blend->dual_src_blend = util_blend_state_is_dual(state, 0);
  
         if (state->logicop_enable) {
                 color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
@@ -413,7 +430,7 @@ static void si_bind_blend_state(struct pipe_context *ctx, void *state)
  {
         struct si_context *sctx = (struct si_context *)ctx;
         si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state);
-       si_update_fb_blend_state(sctx);
+       si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
  }
  
  static void si_delete_blend_state(struct pipe_context *ctx, void *state)
@@ -426,17 +443,20 @@ static void si_set_blend_color(struct pipe_context *ctx,
                                const struct pipe_blend_color *state)
  {
         struct si_context *sctx = (struct si_context *)ctx;
-       struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
  
-        if (pm4 == NULL)
-                return;
+       if (memcmp(&sctx->blend_color.state, state, sizeof(*state)) == 0)
+               return;
+
+       sctx->blend_color.state = *state;
+       si_mark_atom_dirty(sctx, &sctx->blend_color.atom);
+}
  
-       si_pm4_set_reg(pm4, R_028414_CB_BLEND_RED, fui(state->color[0]));
-       si_pm4_set_reg(pm4, R_028418_CB_BLEND_GREEN, fui(state->color[1]));
-       si_pm4_set_reg(pm4, R_02841C_CB_BLEND_BLUE, fui(state->color[2]));
-       si_pm4_set_reg(pm4, R_028420_CB_BLEND_ALPHA, fui(state->color[3]));
+static void si_emit_blend_color(struct si_context *sctx, struct r600_atom *atom)
+{
+       struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
  
-       si_pm4_set_state(sctx, blend_color, pm4);
+       radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
+       radeon_emit_array(cs, (uint32_t*)sctx->blend_color.state.color, 4);
  }
  
  /*
@@ -447,22 +467,13 @@ static void si_set_clip_state(struct pipe_context *ctx,
                               const struct pipe_clip_state *state)
  {
         struct si_context *sctx = (struct si_context *)ctx;
-       struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
         struct pipe_constant_buffer cb;
  
-       if (pm4 == NULL)
+       if (memcmp(&sctx->clip_state.state, state, sizeof(*state)) == 0)
                 return;
  
-       for (int i = 0; i < 6; i++) {
-               si_pm4_set_reg(pm4, R_0285BC_PA_CL_UCP_0_X + i * 16,
-                              fui(state->ucp[i][0]));
-               si_pm4_set_reg(pm4, R_0285C0_PA_CL_UCP_0_Y + i * 16,
-                              fui(state->ucp[i][1]));
-               si_pm4_set_reg(pm4, R_0285C4_PA_CL_UCP_0_Z + i * 16,
-                              fui(state->ucp[i][2]));
-               si_pm4_set_reg(pm4, R_0285C8_PA_CL_UCP_0_W + i * 16,
-                              fui(state->ucp[i][3]));
-        }
+       sctx->clip_state.state = *state;
+       si_mark_atom_dirty(sctx, &sctx->clip_state.atom);
  
         cb.buffer = NULL;
         cb.user_buffer = state->ucp;
@@ -470,8 +481,14 @@ static void si_set_clip_state(struct pipe_context *ctx,
         cb.buffer_size = 4*4*8;
         ctx->set_constant_buffer(ctx, PIPE_SHADER_VERTEX, SI_DRIVER_STATE_CONST_BUF, &cb);
         pipe_resource_reference(&cb.buffer, NULL);
+}
+
+static void si_emit_clip_state(struct si_context *sctx, struct r600_atom *atom)
+{
+       struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
  
-       si_pm4_set_state(sctx, clip, pm4);
+       radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4);
+       radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4);
  }
  
  #define SIX_BITS 0x3F
@@ -485,7 +502,7 @@ static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
         unsigned clipdist_mask =
                 info->writes_clipvertex ? SIX_BITS : info->clipdist_writemask;
  
-       r600_write_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
+       radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
                 S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) |
                 S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) |
                 S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) |
@@ -499,7 +516,7 @@ static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
                 S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1) |
                 (sctx->queued.named.rasterizer->clip_plane_enable &
                  clipdist_mask));
-       r600_write_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
+       radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
                 sctx->queued.named.rasterizer->pa_cl_clip_cntl |
                 (clipdist_mask ? 0 :
                  sctx->queued.named.rasterizer->clip_plane_enable & SIX_BITS) |
@@ -512,26 +529,50 @@ static void si_set_scissor_states(struct pipe_context *ctx,
                                    const struct pipe_scissor_state *state)
  {
         struct si_context *sctx = (struct si_context *)ctx;
-       struct si_state_scissor *scissor;
-       struct si_pm4_state *pm4;
         int i;
  
-       for (i = start_slot; i < start_slot + num_scissors; i++) {
-               int idx = i - start_slot;
-               int offset = i * 4 * 2;
+       for (i = 0; i < num_scissors; i++)
+               sctx->scissors.states[start_slot + i] = state[i];
  
-               scissor = CALLOC_STRUCT(si_state_scissor);
-               if (scissor == NULL)
-                       return;
-               pm4 = &scissor->pm4;
-               scissor->scissor = state[idx];
-               si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset,
-                              S_028250_TL_X(state[idx].minx) | S_028250_TL_Y(state[idx].miny) |
-                              S_028250_WINDOW_OFFSET_DISABLE(1));
-               si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR + offset,
-                              S_028254_BR_X(state[idx].maxx) | S_028254_BR_Y(state[idx].maxy));
-               si_pm4_set_state(sctx, scissor[i], scissor);
+       sctx->scissors.dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
+       si_mark_atom_dirty(sctx, &sctx->scissors.atom);
+}
+
+static void si_emit_scissors(struct si_context *sctx, struct r600_atom *atom)
+{
+       struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+       struct pipe_scissor_state *states = sctx->scissors.states;
+       unsigned mask = sctx->scissors.dirty_mask;
+
+       /* The simple case: Only 1 viewport is active. */
+       if (mask & 1 &&
+           !si_get_vs_info(sctx)->writes_viewport_index) {
+               radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
+               radeon_emit(cs, S_028250_TL_X(states[0].minx) |
+                               S_028250_TL_Y(states[0].miny) |
+                               S_028250_WINDOW_OFFSET_DISABLE(1));
+               radeon_emit(cs, S_028254_BR_X(states[0].maxx) |
+                               S_028254_BR_Y(states[0].maxy));
+               sctx->scissors.dirty_mask &= ~1; /* clear one bit */
+               return;
         }
+
+       while (mask) {
+               int start, count, i;
+
+               u_bit_scan_consecutive_range(&mask, &start, &count);
+
+               radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL +
+                                              start * 4 * 2, count * 2);
+               for (i = start; i < start+count; i++) {
+                       radeon_emit(cs, S_028250_TL_X(states[i].minx) |
+                                       S_028250_TL_Y(states[i].miny) |
+                                       S_028250_WINDOW_OFFSET_DISABLE(1));
+                       radeon_emit(cs, S_028254_BR_X(states[i].maxx) |
+                                       S_028254_BR_Y(states[i].maxy));
+               }
+       }
+       sctx->scissors.dirty_mask = 0;
  }
  
  static void si_set_viewport_states(struct pipe_context *ctx,
@@ -540,76 +581,76 @@ static void si_set_viewport_states(struct pipe_context *ctx,
                                     const struct pipe_viewport_state *state)
  {
         struct si_context *sctx = (struct si_context *)ctx;
-       struct si_state_viewport *viewport;
-       struct si_pm4_state *pm4;
         int i;
  
-       for (i = start_slot; i < start_slot + num_viewports; i++) {
-               int idx = i - start_slot;
-               int offset = i * 4 * 6;
+       for (i = 0; i < num_viewports; i++)
+               sctx->viewports.states[start_slot + i] = state[i];
  
-               viewport = CALLOC_STRUCT(si_state_viewport);
-               if (!viewport)
-                       return;
-               pm4 = &viewport->pm4;
+       sctx->viewports.dirty_mask |= ((1 << num_viewports) - 1) << start_slot;
+       si_mark_atom_dirty(sctx, &sctx->viewports.atom);
+}
+
+static void si_emit_viewports(struct si_context *sctx, struct r600_atom *atom)
+{
+       struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+       struct pipe_viewport_state *states = sctx->viewports.states;
+       unsigned mask = sctx->viewports.dirty_mask;
+
+       /* The simple case: Only 1 viewport is active. */
+       if (mask & 1 &&
+           !si_get_vs_info(sctx)->writes_viewport_index) {
+               radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
+               radeon_emit(cs, fui(states[0].scale[0]));
+               radeon_emit(cs, fui(states[0].translate[0]));
+               radeon_emit(cs, fui(states[0].scale[1]));
+               radeon_emit(cs, fui(states[0].translate[1]));
+               radeon_emit(cs, fui(states[0].scale[2]));
+               radeon_emit(cs, fui(states[0].translate[2]));
+               sctx->viewports.dirty_mask &= ~1; /* clear one bit */
+               return;
+       }
+
+       while (mask) {
+               int start, count, i;
  
-               viewport->viewport = state[idx];
-               si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE + offset, fui(state[idx].scale[0]));
-               si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET + offset, fui(state[idx].translate[0]));
-               si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE + offset, fui(state[idx].scale[1]));
-               si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET + offset, fui(state[idx].translate[1]));
-               si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE + offset, fui(state[idx].scale[2]));
-               si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET + offset, fui(state[idx].translate[2]));
+               u_bit_scan_consecutive_range(&mask, &start, &count);
  
-               si_pm4_set_state(sctx, viewport[i], viewport);
+               radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE +
+                                              start * 4 * 6, count * 6);
+               for (i = start; i < start+count; i++) {
+                       radeon_emit(cs, fui(states[i].scale[0]));
+                       radeon_emit(cs, fui(states[i].translate[0]));
+                       radeon_emit(cs, fui(states[i].scale[1]));
+                       radeon_emit(cs, fui(states[i].translate[1]));
+                       radeon_emit(cs, fui(states[i].scale[2]));
+                       radeon_emit(cs, fui(states[i].translate[2]));
+               }
         }
+       sctx->viewports.dirty_mask = 0;
  }
  
  /*
   * inferred state between framebuffer and rasterizer
   */
-static void si_update_fb_rs_state(struct si_context *sctx)
+static void si_update_poly_offset_state(struct si_context *sctx)
  {
         struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
-       struct si_pm4_state *pm4;
-       float offset_units;
  
-       if (!rs || !sctx->framebuffer.state.zsbuf)
+       if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf)
                 return;
  
-       offset_units = sctx->queued.named.rasterizer->offset_units;
         switch (sctx->framebuffer.state.zsbuf->texture->format) {
-       case PIPE_FORMAT_S8_UINT_Z24_UNORM:
-       case PIPE_FORMAT_X8Z24_UNORM:
-       case PIPE_FORMAT_Z24X8_UNORM:
-       case PIPE_FORMAT_Z24_UNORM_S8_UINT:
-               offset_units *= 2.0f;
+       case PIPE_FORMAT_Z16_UNORM:
+               si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]);
+               break;
+       default: /* 24-bit */
+               si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]);
                 break;
         case PIPE_FORMAT_Z32_FLOAT:
         case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
-               offset_units *= 1.0f;
-               break;
-       case PIPE_FORMAT_Z16_UNORM:
-               offset_units *= 4.0f;
+               si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]);
                 break;
-       default:
-               return;
         }
-
-       pm4 = CALLOC_STRUCT(si_pm4_state);
-
-       if (pm4 == NULL)
-               return;
-
-       /* FIXME some of those reg can be computed with cso */
-       si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
-                      fui(sctx->queued.named.rasterizer->offset_scale));
-       si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, fui(offset_units));
-       si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
-                      fui(sctx->queued.named.rasterizer->offset_scale));
-       si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, fui(offset_units));
-
-       si_pm4_set_state(sctx, fb_rs, pm4);
  }
  
  /*
@@ -636,7 +677,7 @@ static void *si_create_rs_state(struct pipe_context *ctx,
  {
         struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer);
         struct si_pm4_state *pm4 = &rs->pm4;
-       unsigned tmp;
+       unsigned tmp, i;
         float psize_min, psize_max;
  
         if (rs == NULL) {
@@ -650,6 +691,8 @@ static void *si_create_rs_state(struct pipe_context *ctx,
         rs->poly_stipple_enable = state->poly_stipple_enable;
         rs->line_smooth = state->line_smooth;
         rs->poly_smooth = state->poly_smooth;
+       rs->uses_poly_offset = state->offset_point || state->offset_line ||
+                              state->offset_tri;
  
         rs->flatshade = state->flatshade;
         rs->sprite_coord_enable = state->sprite_coord_enable;
@@ -664,10 +707,6 @@ static void *si_create_rs_state(struct pipe_context *ctx,
                 S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) |
                 S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
  
-       /* offset */
-       rs->offset_units = state->offset_units;
-       rs->offset_scale = state->offset_scale * 16.0f;
-
         si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0,
                 S_0286D4_FLAT_SHADE_ENA(1) |
                 S_0286D4_PNT_SPRITE_ENA(1) |
@@ -720,6 +759,35 @@ static void *si_create_rs_state(struct pipe_context *ctx,
                                    state->fill_back != PIPE_POLYGON_MODE_FILL) |
                 S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) |
                 S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)));
+
+       /* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */
+       for (i = 0; i < 3; i++) {
+               struct si_pm4_state *pm4 = &rs->pm4_poly_offset[i];
+               float offset_units = state->offset_units;
+               float offset_scale = state->offset_scale * 16.0f;
+
+               switch (i) {
+               case 0: /* 16-bit zbuffer */
+                       offset_units *= 4.0f;
+                       break;
+               case 1: /* 24-bit zbuffer */
+                       offset_units *= 2.0f;
+                       break;
+               case 2: /* 32-bit zbuffer */
+                       offset_units *= 1.0f;
+                       break;
+               }
+
+               si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
+                              fui(offset_scale));
+               si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET,
+                              fui(offset_units));
+               si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
+                              fui(offset_scale));
+               si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET,
+                              fui(offset_units));
+       }
+
         return rs;
  }
  
@@ -738,7 +806,7 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
                 si_mark_atom_dirty(sctx, &sctx->db_render_state);
  
         si_pm4_bind_state(sctx, rasterizer, rs);
-       si_update_fb_rs_state(sctx);
+       si_update_poly_offset_state(sctx);
  
         si_mark_atom_dirty(sctx, &sctx->clip_regs);
  }
@@ -746,45 +814,42 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
  static void si_delete_rs_state(struct pipe_context *ctx, void *state)
  {
         struct si_context *sctx = (struct si_context *)ctx;
+
+       if (sctx->queued.named.rasterizer == state)
+               si_pm4_bind_state(sctx, poly_offset, NULL);
         si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state);
  }
  
  /*
   * infeered state between dsa and stencil ref
   */
-static void si_update_dsa_stencil_ref(struct si_context *sctx)
+static void si_emit_stencil_ref(struct si_context *sctx, struct r600_atom *atom)
  {
-       struct si_pm4_state *pm4;
-       struct pipe_stencil_ref *ref = &sctx->stencil_ref;
-       struct si_state_dsa *dsa = sctx->queued.named.dsa;
-
-       if (!dsa)
-               return;
-
-       pm4 = CALLOC_STRUCT(si_pm4_state);
-       if (pm4 == NULL)
-               return;
-
-       si_pm4_set_reg(pm4, R_028430_DB_STENCILREFMASK,
-                      S_028430_STENCILTESTVAL(ref->ref_value[0]) |
-                      S_028430_STENCILMASK(dsa->valuemask[0]) |
-                      S_028430_STENCILWRITEMASK(dsa->writemask[0]) |
-                      S_028430_STENCILOPVAL(1));
-       si_pm4_set_reg(pm4, R_028434_DB_STENCILREFMASK_BF,
-                      S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) |
-                      S_028434_STENCILMASK_BF(dsa->valuemask[1]) |
-                      S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) |
-                      S_028434_STENCILOPVAL_BF(1));
+       struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+       struct pipe_stencil_ref *ref = &sctx->stencil_ref.state;
+       struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part;
  
-       si_pm4_set_state(sctx, dsa_stencil_ref, pm4);
+       radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
+       radeon_emit(cs, S_028430_STENCILTESTVAL(ref->ref_value[0]) |
+                       S_028430_STENCILMASK(dsa->valuemask[0]) |
+                       S_028430_STENCILWRITEMASK(dsa->writemask[0]) |
+                       S_028430_STENCILOPVAL(1));
+       radeon_emit(cs, S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) |
+                       S_028434_STENCILMASK_BF(dsa->valuemask[1]) |
+                       S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) |
+                       S_028434_STENCILOPVAL_BF(1));
  }
  
-static void si_set_pipe_stencil_ref(struct pipe_context *ctx,
-                                   const struct pipe_stencil_ref *state)
+static void si_set_stencil_ref(struct pipe_context *ctx,
+                              const struct pipe_stencil_ref *state)
  {
          struct si_context *sctx = (struct si_context *)ctx;
-        sctx->stencil_ref = *state;
-       si_update_dsa_stencil_ref(sctx);
+
+       if (memcmp(&sctx->stencil_ref.state, state, sizeof(*state)) == 0)
+               return;
+
+       sctx->stencil_ref.state = *state;
+       si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
  }
  
  
@@ -831,10 +896,10 @@ static void *si_create_dsa_state(struct pipe_context *ctx,
                 return NULL;
         }
  
-       dsa->valuemask[0] = state->stencil[0].valuemask;
-       dsa->valuemask[1] = state->stencil[1].valuemask;
-       dsa->writemask[0] = state->stencil[0].writemask;
-       dsa->writemask[1] = state->stencil[1].writemask;
+       dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask;
+       dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask;
+       dsa->stencil_ref.writemask[0] = state->stencil[0].writemask;
+       dsa->stencil_ref.writemask[1] = state->stencil[1].writemask;
  
         db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
                 S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
@@ -887,7 +952,12 @@ static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
                  return;
  
         si_pm4_bind_state(sctx, dsa, dsa);
-       si_update_dsa_stencil_ref(sctx);
+
+       if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part,
+                  sizeof(struct si_dsa_stencil_ref_part)) != 0) {
+               sctx->stencil_ref.dsa_part = dsa->stencil_ref;
+               si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
+       }
  }
  
  static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
@@ -918,7 +988,7 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
         struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
         unsigned db_shader_control;
  
-       r600_write_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
+       radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
  
         /* DB_RENDER_CONTROL */
         if (sctx->dbcb_depth_copy_enabled ||
@@ -963,10 +1033,10 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
  
         /* DB_RENDER_OVERRIDE2 */
         if (sctx->db_depth_disable_expclear) {
-               r600_write_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
+               radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
                         S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(1));
         } else {
-               r600_write_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 0);
+               radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 0);
         }
  
         db_shader_control = S_02880C_ALPHA_TO_MASK_DISABLE(sctx->framebuffer.cb0_is_integer) |
@@ -982,7 +1052,7 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
         if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable))
                 db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
  
-       r600_write_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
+       radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
                                db_shader_control);
  }
  
@@ -2036,6 +2106,13 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
                          SI_CONTEXT_INV_TC_L2 |
                          SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
  
+       /* Take the maximum of the old and new count. If the new count is lower,
+        * dirtying is needed to disable the unbound colorbuffers.
+        */
+       sctx->framebuffer.dirty_cbufs |=
+               (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1;
+       sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf;
+
         util_copy_framebuffer_state(&sctx->framebuffer.state, state);
  
         sctx->framebuffer.export_16bpc = 0;
@@ -2084,13 +2161,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
                 r600_context_add_resource_size(ctx, surf->base.texture);
         }
  
-       si_update_fb_rs_state(sctx);
-       si_update_fb_blend_state(sctx);
-
-       sctx->framebuffer.atom.num_dw = state->nr_cbufs*16 + (8 - state->nr_cbufs)*3;
-       sctx->framebuffer.atom.num_dw += state->zsbuf ? 26 : 4;
-       sctx->framebuffer.atom.num_dw += 3; /* WINDOW_SCISSOR_BR */
-       sctx->framebuffer.atom.num_dw += 18; /* MSAA sample locations */
+       si_update_poly_offset_state(sctx);
+       si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
         si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
  
         if (sctx->framebuffer.nr_samples != old_nr_samples) {
@@ -2146,27 +2218,30 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
  
         /* Colorbuffers. */
         for (i = 0; i < nr_cbufs; i++) {
+               if (!(sctx->framebuffer.dirty_cbufs & (1 << i)))
+                       continue;
+
                 cb = (struct r600_surface*)state->cbufs[i];
                 if (!cb) {
-                       r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
+                       radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
                                                S_028C70_FORMAT(V_028C70_COLOR_INVALID));
                         continue;
                 }
  
                 tex = (struct r600_texture *)cb->base.texture;
-               r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+               radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                       &tex->resource, RADEON_USAGE_READWRITE,
                                       tex->surface.nsamples > 1 ?
                                               RADEON_PRIO_COLOR_BUFFER_MSAA :
                                               RADEON_PRIO_COLOR_BUFFER);
  
                 if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
-                       r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+                       radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                 tex->cmask_buffer, RADEON_USAGE_READWRITE,
                                 RADEON_PRIO_COLOR_META);
                 }
  
-               r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
+               radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
                                            sctx->b.chip_class >= VI ? 14 : 13);
                 radeon_emit(cs, cb->cb_color_base);     /* R_028C60_CB_COLOR0_BASE */
                 radeon_emit(cs, cb->cb_color_pitch);    /* R_028C64_CB_COLOR0_PITCH */
@@ -2186,36 +2261,37 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
                         radeon_emit(cs, 0);     /* R_028C94_CB_COLOR0_DCC_BASE */
         }
         /* set CB_COLOR1_INFO for possible dual-src blending */
-       if (i == 1 && state->cbufs[0]) {
-               r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
+       if (i == 1 && state->cbufs[0] &&
+           sctx->framebuffer.dirty_cbufs & (1 << 0)) {
+               radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
                                        cb->cb_color_info | tex->cb_color_info);
                 i++;
         }
-       for (; i < 8 ; i++) {
-               r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
-       }
+       for (; i < 8 ; i++)
+               if (sctx->framebuffer.dirty_cbufs & (1 << i))
+                       radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
  
         /* ZS buffer. */
-       if (state->zsbuf) {
+       if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
                 struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
                 struct r600_texture *rtex = (struct r600_texture*)zb->base.texture;
  
-               r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+               radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                       &rtex->resource, RADEON_USAGE_READWRITE,
                                       zb->base.texture->nr_samples > 1 ?
                                               RADEON_PRIO_DEPTH_BUFFER_MSAA :
                                               RADEON_PRIO_DEPTH_BUFFER);
  
                 if (zb->db_htile_data_base) {
-                       r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+                       radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                               rtex->htile_buffer, RADEON_USAGE_READWRITE,
                                               RADEON_PRIO_DEPTH_META);
                 }
  
-               r600_write_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
-               r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
+               radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
+               radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
  
-               r600_write_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9);
+               radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9);
                 radeon_emit(cs, zb->db_depth_info);     /* R_02803C_DB_DEPTH_INFO */
                 radeon_emit(cs, zb->db_z_info |         /* R_028040_DB_Z_INFO */
                             S_028040_ZRANGE_PRECISION(rtex->depth_clear_value != 0));
@@ -2227,26 +2303,28 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
                 radeon_emit(cs, zb->db_depth_size);     /* R_028058_DB_DEPTH_SIZE */
                 radeon_emit(cs, zb->db_depth_slice);    /* R_02805C_DB_DEPTH_SLICE */
  
-               r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface);
-               r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
-               r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
+               radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface);
+               radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
+               radeon_set_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
                                        zb->pa_su_poly_offset_db_fmt_cntl);
-       } else {
-               r600_write_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
+       } else if (sctx->framebuffer.dirty_zsbuf) {
+               radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
                 radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */
                 radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */
         }
  
         /* Framebuffer dimensions. */
          /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */
-       r600_write_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
+       radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
                                S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
+
+       sctx->framebuffer.dirty_cbufs = 0;
+       sctx->framebuffer.dirty_zsbuf = false;
  }
  
-static void si_emit_msaa_sample_locs(struct r600_common_context *rctx,
+static void si_emit_msaa_sample_locs(struct si_context *sctx,
                                      struct r600_atom *atom)
  {
-       struct si_context *sctx = (struct si_context *)rctx;
         struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
         unsigned nr_samples = sctx->framebuffer.nr_samples;
  
@@ -2254,11 +2332,8 @@ static void si_emit_msaa_sample_locs(struct r600_common_context *rctx,
                                                 SI_NUM_SMOOTH_AA_SAMPLES);
  }
  
-const struct r600_atom si_atom_msaa_sample_locs = { si_emit_msaa_sample_locs, 18 }; /* number of CS dwords */
-
-static void si_emit_msaa_config(struct r600_common_context *rctx, struct r600_atom *atom)
+static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom)
  {
-       struct si_context *sctx = (struct si_context *)rctx;
         struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
  
         cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples,
@@ -2266,7 +2341,6 @@ static void si_emit_msaa_config(struct r600_common_context *rctx, struct r600_at
                                 sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0);
  }
  
-const struct r600_atom si_atom_msaa_config = { si_emit_msaa_config, 10 }; /* number of CS dwords */
  
  static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
  {
@@ -2628,18 +2702,63 @@ static bool sampler_state_needs_border_color(const struct pipe_sampler_state *st
  static void *si_create_sampler_state(struct pipe_context *ctx,
                                      const struct pipe_sampler_state *state)
  {
+       struct si_context *sctx = (struct si_context *)ctx;
         struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state);
         unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0;
-       unsigned border_color_type;
+       unsigned border_color_type, border_color_index = 0;
  
         if (rstate == NULL) {
                 return NULL;
         }
  
-       if (sampler_state_needs_border_color(state))
-               border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
-       else
+       if (!sampler_state_needs_border_color(state))
+               border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
+       else if (state->border_color.f[0] == 0 &&
+                state->border_color.f[1] == 0 &&
+                state->border_color.f[2] == 0 &&
+                state->border_color.f[3] == 0)
                 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
+       else if (state->border_color.f[0] == 0 &&
+                state->border_color.f[1] == 0 &&
+                state->border_color.f[2] == 0 &&
+                state->border_color.f[3] == 1)
+               border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
+       else if (state->border_color.f[0] == 1 &&
+                state->border_color.f[1] == 1 &&
+                state->border_color.f[2] == 1 &&
+                state->border_color.f[3] == 1)
+               border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
+       else {
+               int i;
+
+               border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
+
+               /* Check if the border has been uploaded already. */
+               for (i = 0; i < sctx->border_color_count; i++)
+                       if (memcmp(&sctx->border_color_table[i], &state->border_color,
+                                  sizeof(state->border_color)) == 0)
+                               break;
+
+               if (i >= SI_MAX_BORDER_COLORS) {
+                       /* Getting 4096 unique border colors is very unlikely. */
+                       fprintf(stderr, "radeonsi: The border color table is full. "
+                               "Any new border colors will be just black. "
+                               "Please file a bug.\n");
+                       border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
+               } else {
+                       if (i == sctx->border_color_count) {
+                               /* Upload a new border color. */
+                               memcpy(&sctx->border_color_table[i], &state->border_color,
+                                      sizeof(state->border_color));
+                               util_memcpy_cpu_to_le32(&sctx->border_color_map[i],
+                                                       &state->border_color,
+                                                       sizeof(state->border_color));
+                               sctx->border_color_count++;
+                       }
+
+                       border_color_index = i;
+               }
+       }
  
         rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
                           S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
@@ -2654,104 +2773,30 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
                           S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter) | aniso_flag_offset) |
                           S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter) | aniso_flag_offset) |
                           S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)));
-       rstate->val[3] = S_008F3C_BORDER_COLOR_TYPE(border_color_type);
-
-       if (border_color_type == V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) {
-               memcpy(rstate->border_color, state->border_color.ui,
-                      sizeof(rstate->border_color));
-       }
-
+       rstate->val[3] = S_008F3C_BORDER_COLOR_PTR(border_color_index) |
+                        S_008F3C_BORDER_COLOR_TYPE(border_color_type);
         return rstate;
  }
  
-/* Upload border colors and update the pointers in resource descriptors.
- * There can only be 4096 border colors per context.
- *
- * XXX: This is broken if the buffer gets reallocated.
- */
-static void si_set_border_colors(struct si_context *sctx, unsigned count,
-                                void **states)
-{
-       struct si_sampler_state **rstates = (struct si_sampler_state **)states;
-       uint32_t *border_color_table = NULL;
-       int i, j;
-
-       for (i = 0; i < count; i++) {
-               if (rstates[i] &&
-                   G_008F3C_BORDER_COLOR_TYPE(rstates[i]->val[3]) ==
-                   V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) {
-                       if (!sctx->border_color_table ||
-                           ((sctx->border_color_offset + count - i) &
-                            C_008F3C_BORDER_COLOR_PTR)) {
-                               r600_resource_reference(&sctx->border_color_table, NULL);
-                               sctx->border_color_offset = 0;
-
-                               sctx->border_color_table =
-                                       si_resource_create_custom(&sctx->screen->b.b,
-                                                                 PIPE_USAGE_DYNAMIC,
-                                                                 4096 * 4 * 4);
-                       }
-
-                       if (!border_color_table) {
-                               border_color_table =
-                                       sctx->b.ws->buffer_map(sctx->border_color_table->cs_buf,
-                                                            sctx->b.rings.gfx.cs,
-                                                            PIPE_TRANSFER_WRITE |
-                                                            PIPE_TRANSFER_UNSYNCHRONIZED);
-                       }
-
-                       for (j = 0; j < 4; j++) {
-                               border_color_table[4 * sctx->border_color_offset + j] =
-                                       util_le32_to_cpu(rstates[i]->border_color[j]);
-                       }
-
-                       rstates[i]->val[3] &= C_008F3C_BORDER_COLOR_PTR;
-                       rstates[i]->val[3] |= S_008F3C_BORDER_COLOR_PTR(sctx->border_color_offset++);
-               }
-       }
-
-       if (border_color_table) {
-               struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
-
-               uint64_t va_offset = sctx->border_color_table->gpu_address;
-
-               si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, va_offset >> 8);
-               if (sctx->b.chip_class >= CIK)
-                       si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, va_offset >> 40);
-               si_pm4_add_bo(pm4, sctx->border_color_table, RADEON_USAGE_READ,
-                             RADEON_PRIO_SHADER_DATA);
-               si_pm4_set_state(sctx, ta_bordercolor_base, pm4);
-       }
-}
-
-static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
-                                   unsigned start, unsigned count,
-                                   void **states)
+static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
  {
         struct si_context *sctx = (struct si_context *)ctx;
  
-       if (!count || shader >= SI_NUM_SHADERS)
+       if (sctx->sample_mask.sample_mask == (uint16_t)sample_mask)
                 return;
  
-       si_set_border_colors(sctx, count, states);
-       si_set_sampler_descriptors(sctx, shader, start, count, states);
+       sctx->sample_mask.sample_mask = sample_mask;
+       si_mark_atom_dirty(sctx, &sctx->sample_mask.atom);
  }
  
-static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
+static void si_emit_sample_mask(struct si_context *sctx, struct r600_atom *atom)
  {
-       struct si_context *sctx = (struct si_context *)ctx;
-       struct si_state_sample_mask *state = CALLOC_STRUCT(si_state_sample_mask);
-       struct si_pm4_state *pm4 = &state->pm4;
-       uint16_t mask = sample_mask;
-
-        if (state == NULL)
-                return;
-
-       state->sample_mask = mask;
-       si_pm4_set_reg(pm4, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, mask | (mask << 16));
-       si_pm4_set_reg(pm4, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, mask | (mask << 16));
+       struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+       unsigned mask = sctx->sample_mask.sample_mask;
  
-       si_pm4_set_state(sctx, sample_mask, state);
+       radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
+       radeon_emit(cs, mask | (mask << 16));
+       radeon_emit(cs, mask | (mask << 16));
  }
  
  static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
@@ -2770,7 +2815,7 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
         struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element);
         int i;
  
-       assert(count < PIPE_MAX_ATTRIBS);
+       assert(count < SI_MAX_ATTRIBS);
         if (!v)
                 return NULL;
  
@@ -2962,16 +3007,29 @@ static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
  static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
                                  bool include_draw_vbo)
  {
-       si_need_cs_space((struct si_context*)ctx, num_dw, include_draw_vbo);
+       si_need_cs_space((struct si_context*)ctx);
  }
  
  static void si_init_config(struct si_context *sctx);
  
  void si_init_state_functions(struct si_context *sctx)
  {
-       si_init_atom(&sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state, 0);
-       si_init_atom(&sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state, 10);
-       si_init_atom(&sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs, 6);
+       si_init_external_atom(sctx, &sctx->b.streamout.begin_atom, &sctx->atoms.s.streamout_begin);
+       si_init_external_atom(sctx, &sctx->b.streamout.enable_atom, &sctx->atoms.s.streamout_enable);
+
+       si_init_atom(sctx, &sctx->cache_flush, &sctx->atoms.s.cache_flush, si_emit_cache_flush);
+       si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state);
+       si_init_atom(sctx, &sctx->msaa_sample_locs, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs);
+       si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state);
+       si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config);
+       si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask);
+       si_init_atom(sctx, &sctx->cb_target_mask, &sctx->atoms.s.cb_target_mask, si_emit_cb_target_mask);
+       si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color);
+       si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs);
+       si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state);
+       si_init_atom(sctx, &sctx->scissors.atom, &sctx->atoms.s.scissors, si_emit_scissors);
+       si_init_atom(sctx, &sctx->viewports.atom, &sctx->atoms.s.viewports, si_emit_viewports);
+       si_init_atom(sctx, &sctx->stencil_ref.atom, &sctx->atoms.s.stencil_ref, si_emit_stencil_ref);
  
         sctx->b.b.create_blend_state = si_create_blend_state;
         sctx->b.b.bind_blend_state = si_bind_blend_state;
@@ -2994,13 +3052,12 @@ void si_init_state_functions(struct si_context *sctx)
         sctx->b.b.set_clip_state = si_set_clip_state;
         sctx->b.b.set_scissor_states = si_set_scissor_states;
         sctx->b.b.set_viewport_states = si_set_viewport_states;
-       sctx->b.b.set_stencil_ref = si_set_pipe_stencil_ref;
+       sctx->b.b.set_stencil_ref = si_set_stencil_ref;
  
         sctx->b.b.set_framebuffer_state = si_set_framebuffer_state;
         sctx->b.b.get_sample_position = cayman_get_sample_position;
  
         sctx->b.b.create_sampler_state = si_create_sampler_state;
-       sctx->b.b.bind_sampler_states = si_bind_sampler_states;
         sctx->b.b.delete_sampler_state = si_delete_sampler_state;
  
         sctx->b.b.create_sampler_view = si_create_sampler_view;
@@ -3165,12 +3222,17 @@ static void si_init_config(struct si_context *sctx)
         unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16);
         unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
         unsigned raster_config, raster_config_1;
+       uint64_t border_color_va = sctx->border_color_buffer->gpu_address;
         struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
+       int i;
  
         if (pm4 == NULL)
                 return;
  
-       si_cmd_context_control(pm4);
+       si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL);
+       si_pm4_cmd_add(pm4, 0x80000000);
+       si_pm4_cmd_add(pm4, 0x80000000);
+       si_pm4_cmd_end(pm4, false);
  
         si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
         si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
@@ -3181,7 +3243,6 @@ static void si_init_config(struct si_context *sctx)
         si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
  
         si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
-       si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0);
         si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
  
         si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
@@ -3196,6 +3257,11 @@ static void si_init_config(struct si_context *sctx)
  
         si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
  
+       for (i = 0; i < 16; i++) {
+               si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0 + i*8, 0);
+               si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0 + i*8, fui(1.0));
+       }
+
         switch (sctx->screen->b.family) {
         case CHIP_TAHITI:
         case CHIP_PITCAIRN:
@@ -3282,8 +3348,6 @@ static void si_init_config(struct si_context *sctx)
         si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
         /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
         si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
-       si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0, 0);
-       si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0, fui(1.0));
         si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
         si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, fui(1.0));
         si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0));
@@ -3323,5 +3387,12 @@ static void si_init_config(struct si_context *sctx)
                 si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
         }
  
+       si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
+       if (sctx->b.chip_class >= CIK)
+               si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);
+       si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ,
+                     RADEON_PRIO_SHADER_DATA);
+
+       si_pm4_upload_indirect_buffer(sctx, pm4);
         sctx->init_config = pm4;
  }
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h

index 118c562..3fc0799 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -31,6 +31,7 @@
  #include "radeon/r600_pipe_common.h"
  
  #define SI_NUM_SHADERS (PIPE_SHADER_TESS_EVAL+1)
+#define SI_MAX_ATTRIBS 16
  
  struct si_screen;
  struct si_shader;
@@ -39,25 +40,13 @@ struct si_state_blend {
         struct si_pm4_state     pm4;
         uint32_t                cb_target_mask;
         bool                    alpha_to_one;
-};
-
-struct si_state_sample_mask {
-       struct si_pm4_state     pm4;
-       uint16_t                sample_mask;
-};
-
-struct si_state_scissor {
-       struct si_pm4_state             pm4;
-       struct pipe_scissor_state       scissor;
-};
-
-struct si_state_viewport {
-       struct si_pm4_state             pm4;
-       struct pipe_viewport_state      viewport;
+       bool                    dual_src_blend;
  };
  
  struct si_state_rasterizer {
         struct si_pm4_state     pm4;
+       /* poly offset states for 16-bit, 24-bit, and 32-bit zbuffers */
+       struct si_pm4_state     pm4_poly_offset[3];
         bool                    flatshade;
         bool                    two_side;
         bool                    multisample_enable;
@@ -66,56 +55,80 @@ struct si_state_rasterizer {
         unsigned                pa_sc_line_stipple;
         unsigned                pa_cl_clip_cntl;
         unsigned                clip_plane_enable;
-       float                   offset_units;
-       float                   offset_scale;
         bool                    poly_stipple_enable;
         bool                    line_smooth;
         bool                    poly_smooth;
+       bool                    uses_poly_offset;
  };
  
-struct si_state_dsa {
-       struct si_pm4_state     pm4;
-       unsigned                alpha_func;
+struct si_dsa_stencil_ref_part {
         uint8_t                 valuemask[2];
         uint8_t                 writemask[2];
  };
  
+struct si_state_dsa {
+       struct si_pm4_state             pm4;
+       unsigned                        alpha_func;
+       struct si_dsa_stencil_ref_part  stencil_ref;
+};
+
+struct si_stencil_ref {
+       struct r600_atom                atom;
+       struct pipe_stencil_ref         state;
+       struct si_dsa_stencil_ref_part  dsa_part;
+};
+
  struct si_vertex_element
  {
         unsigned                        count;
-       uint32_t                        rsrc_word3[PIPE_MAX_ATTRIBS];
-       uint32_t                        format_size[PIPE_MAX_ATTRIBS];
-       struct pipe_vertex_element      elements[PIPE_MAX_ATTRIBS];
+       uint32_t                        rsrc_word3[SI_MAX_ATTRIBS];
+       uint32_t                        format_size[SI_MAX_ATTRIBS];
+       struct pipe_vertex_element      elements[SI_MAX_ATTRIBS];
  };
  
  union si_state {
         struct {
                 struct si_state_blend           *blend;
-               struct si_pm4_state             *blend_color;
-               struct si_pm4_state             *clip;
-               struct si_state_sample_mask     *sample_mask;
-               struct si_state_scissor         *scissor[16];
-               struct si_state_viewport        *viewport[16];
                 struct si_state_rasterizer      *rasterizer;
                 struct si_state_dsa             *dsa;
-               struct si_pm4_state             *fb_rs;
-               struct si_pm4_state             *fb_blend;
-               struct si_pm4_state             *dsa_stencil_ref;
-               struct si_pm4_state             *ta_bordercolor_base;
+               struct si_pm4_state             *poly_offset;
                 struct si_pm4_state             *ls;
                 struct si_pm4_state             *hs;
                 struct si_pm4_state             *es;
                 struct si_pm4_state             *gs;
-               struct si_pm4_state             *gs_rings;
-               struct si_pm4_state             *tf_ring;
                 struct si_pm4_state             *vgt_shader_config;
                 struct si_pm4_state             *vs;
                 struct si_pm4_state             *ps;
-               struct si_pm4_state             *spi;
         } named;
         struct si_pm4_state     *array[0];
  };
  
+union si_state_atoms {
+       struct {
+               /* The order matters. */
+               struct r600_atom *cache_flush;
+               struct r600_atom *streamout_begin;
+               struct r600_atom *streamout_enable; /* must be after streamout_begin */
+               struct r600_atom *framebuffer;
+               struct r600_atom *msaa_sample_locs;
+               struct r600_atom *db_render_state;
+               struct r600_atom *msaa_config;
+               struct r600_atom *sample_mask;
+               struct r600_atom *cb_target_mask;
+               struct r600_atom *blend_color;
+               struct r600_atom *clip_regs;
+               struct r600_atom *clip_state;
+               struct r600_atom *shader_userdata;
+               struct r600_atom *scissors;
+               struct r600_atom *viewports;
+               struct r600_atom *stencil_ref;
+               struct r600_atom *spi_map;
+       } s;
+       struct r600_atom *array[0];
+};
+
+#define SI_NUM_ATOMS (sizeof(union si_state_atoms)/sizeof(struct r600_atom*))
+
  struct si_shader_data {
         struct r600_atom        atom;
         uint32_t                sh_base[SI_NUM_SHADERS];
@@ -155,7 +168,7 @@ struct si_shader_data {
  #define SI_SO_BUF_OFFSET       SI_NUM_RING_BUFFERS
  #define SI_NUM_RW_BUFFERS      (SI_SO_BUF_OFFSET + 4)
  
-#define SI_NUM_VERTEX_BUFFERS  16
+#define SI_NUM_VERTEX_BUFFERS  SI_MAX_ATTRIBS
  
  
  /* This represents descriptors in memory, such as buffer resources,
@@ -222,19 +235,7 @@ struct si_buffer_resources {
                                   si_pm4_block_idx(member)); \
         } while(0)
  
-#define si_pm4_set_state(sctx, member, value) \
-       do { \
-               if ((sctx)->queued.named.member != (value)) { \
-                       si_pm4_free_state(sctx, \
-                               (struct si_pm4_state *)(sctx)->queued.named.member, \
-                               si_pm4_block_idx(member)); \
-                       (sctx)->queued.named.member = (value); \
-               } \
-       } while(0)
-
  /* si_descriptors.c */
-void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader,
-                               unsigned start, unsigned count, void **states);
  void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
                         struct pipe_resource *buffer,
                         unsigned stride, unsigned num_records,
@@ -247,10 +248,14 @@ void si_all_descriptors_begin_new_cs(struct si_context *sctx);
  void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
                             const uint8_t *ptr, unsigned size, uint32_t *const_offset);
  void si_shader_change_notify(struct si_context *sctx);
+void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom);
  
  /* si_state.c */
  struct si_shader_selector;
  
+void si_init_atom(struct si_context *sctx, struct r600_atom *atom,
+                 struct r600_atom **list_elem,
+                 void (*emit_func)(struct si_context *ctx, struct r600_atom *state));
  boolean si_is_format_supported(struct pipe_screen *screen,
                                 enum pipe_format format,
                                 enum pipe_texture_target target,
@@ -272,18 +277,12 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
                               unsigned force_level);
  
  /* si_state_shader.c */
-void si_update_shaders(struct si_context *sctx);
+bool si_update_shaders(struct si_context *sctx);
  void si_init_shader_functions(struct si_context *sctx);
  
  /* si_state_draw.c */
-extern const struct r600_atom si_atom_cache_flush;
-extern const struct r600_atom si_atom_msaa_sample_locs;
-extern const struct r600_atom si_atom_msaa_config;
-void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *atom);
+void si_emit_cache_flush(struct si_context *sctx, struct r600_atom *atom);
  void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo);
  void si_trace_emit(struct si_context *sctx);
  
-/* si_commands.c */
-void si_cmd_context_control(struct si_pm4_state *pm4);
-
  #endif
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c

index fd2feca..6d8e0e5 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -176,8 +176,8 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
         /* Due to a hw bug, RSRC2_LS must be written twice with another
          * LS register written in between. */
         if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII)
-               si_write_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
-       si_write_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
+               radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
+       radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
         radeon_emit(cs, ls->current->ls_rsrc1);
         radeon_emit(cs, ls_rsrc2);
  
@@ -199,19 +199,19 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
                           ((perpatch_output_offset / 16) << 16);
  
         /* Set them for LS. */
-       si_write_sh_reg(cs,
+       radeon_set_sh_reg(cs,
                 R_00B530_SPI_SHADER_USER_DATA_LS_0 + SI_SGPR_LS_OUT_LAYOUT * 4,
                 tcs_in_layout);
  
         /* Set them for TCS. */
-       si_write_sh_reg_seq(cs,
+       radeon_set_sh_reg_seq(cs,
                 R_00B430_SPI_SHADER_USER_DATA_HS_0 + SI_SGPR_TCS_OUT_OFFSETS * 4, 3);
         radeon_emit(cs, tcs_out_offsets);
         radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26));
         radeon_emit(cs, tcs_in_layout);
  
         /* Set them for TES. */
-       si_write_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TCS_OUT_OFFSETS * 4, 2);
+       radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TCS_OUT_OFFSETS * 4, 2);
         radeon_emit(cs, tcs_out_offsets);
         radeon_emit(cs, tcs_out_layout | (num_tcs_output_cp << 26));
  }
@@ -347,11 +347,11 @@ static void si_emit_scratch_reloc(struct si_context *sctx)
         if (!sctx->emit_scratch_reloc)
                 return;
  
-       r600_write_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE,
+       radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE,
                                sctx->spi_tmpring_size);
  
         if (sctx->scratch_buffer) {
-               r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+               radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                       sctx->scratch_buffer, RADEON_USAGE_READWRITE,
                                       RADEON_PRIO_SHADER_RESOURCE_RW);
  
@@ -378,7 +378,7 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
             rs->pa_sc_line_stipple == sctx->last_sc_line_stipple)
                 return;
  
-       r600_write_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE,
+       radeon_set_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE,
                 rs->pa_sc_line_stipple |
                 S_028A0C_AUTO_RESET_CNTL(rast_prim == PIPE_PRIM_LINES ? 1 :
                                          rast_prim == PIPE_PRIM_LINE_STRIP ? 2 : 0));
@@ -411,9 +411,9 @@ static void si_emit_draw_registers(struct si_context *sctx,
                         radeon_emit(cs, ia_multi_vgt_param); /* IA_MULTI_VGT_PARAM */
                         radeon_emit(cs, ls_hs_config); /* VGT_LS_HS_CONFIG */
                 } else {
-                       r600_write_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, prim);
-                       r600_write_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
-                       r600_write_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config);
+                       radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, prim);
+                       radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
+                       radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config);
                 }
                 sctx->last_prim = prim;
                 sctx->last_multi_vgt_param = ia_multi_vgt_param;
@@ -421,19 +421,19 @@ static void si_emit_draw_registers(struct si_context *sctx,
         }
  
         if (gs_out_prim != sctx->last_gs_out_prim) {
-               r600_write_context_reg(cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out_prim);
+               radeon_set_context_reg(cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out_prim);
                 sctx->last_gs_out_prim = gs_out_prim;
         }
  
         /* Primitive restart. */
         if (info->primitive_restart != sctx->last_primitive_restart_en) {
-               r600_write_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, info->primitive_restart);
+               radeon_set_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, info->primitive_restart);
                 sctx->last_primitive_restart_en = info->primitive_restart;
  
                 if (info->primitive_restart &&
                     (info->restart_index != sctx->last_restart_index ||
                      sctx->last_restart_index == SI_RESTART_INDEX_UNKNOWN)) {
-                       r600_write_context_reg(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
+                       radeon_set_context_reg(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
                                                info->restart_index);
                         sctx->last_restart_index = info->restart_index;
                 }
@@ -453,7 +453,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
                 uint64_t va = t->buf_filled_size->gpu_address +
                               t->buf_filled_size_offset;
  
-               r600_write_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE,
+               radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE,
                                        t->stride_in_dw);
  
                 radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
@@ -465,7 +465,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
                 radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
                 radeon_emit(cs, 0); /* unused */
  
-               r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+               radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                       t->buf_filled_size, RADEON_USAGE_READ,
                                       RADEON_PRIO_MIN);
         }
@@ -508,7 +508,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
                     sctx->last_base_vertex == SI_BASE_VERTEX_UNKNOWN ||
                     info->start_instance != sctx->last_start_instance ||
                     sh_base_reg != sctx->last_sh_base_reg) {
-                       si_write_sh_reg_seq(cs, sh_base_reg + SI_SGPR_BASE_VERTEX * 4, 2);
+                       radeon_set_sh_reg_seq(cs, sh_base_reg + SI_SGPR_BASE_VERTEX * 4, 2);
                         radeon_emit(cs, base_vertex);
                         radeon_emit(cs, info->start_instance);
  
@@ -519,7 +519,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
         } else {
                 si_invalidate_draw_sh_constants(sctx);
  
-               r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+               radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                       (struct r600_resource *)info->indirect,
                                       RADEON_USAGE_READ, RADEON_PRIO_MIN);
         }
@@ -529,7 +529,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
                                           ib->index_size;
                 uint64_t index_va = r600_resource(ib->buffer)->gpu_address + ib->offset;
  
-               r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+               radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
                                       (struct r600_resource *)ib->buffer,
                                       RADEON_USAGE_READ, RADEON_PRIO_MIN);
  
@@ -595,8 +595,9 @@ static void si_emit_draw_packets(struct si_context *sctx,
  
  #define BOTH_ICACHE_KCACHE (SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_KCACHE)
  
-void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *atom)
+void si_emit_cache_flush(struct si_context *si_ctx, struct r600_atom *atom)
  {
+       struct r600_common_context *sctx = &si_ctx->b;
         struct radeon_winsys_cs *cs = sctx->rings.gfx.cs;
         uint32_t cp_coher_cntl = 0;
         uint32_t compute =
@@ -706,8 +707,6 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
         sctx->flags = 0;
  }
  
-const struct r600_atom si_atom_cache_flush = { si_emit_cache_flush, 24 }; /* number of CS dwords */
-
  static void si_get_draw_start_count(struct si_context *sctx,
                                     const struct pipe_draw_info *info,
                                     unsigned *start, unsigned *count)
@@ -730,7 +729,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
  {
         struct si_context *sctx = (struct si_context *)ctx;
         struct pipe_index_buffer ib = {};
-       unsigned i;
+       unsigned mask;
  
         if (!info->count && !info->indirect &&
             (info->indexed || !info->count_from_stream_output))
@@ -760,8 +759,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
         else
                 sctx->current_rast_prim = info->mode;
  
-       si_update_shaders(sctx);
-       if (!si_upload_shader_descriptors(sctx))
+       if (!si_update_shaders(sctx) ||
+           !si_upload_shader_descriptors(sctx))
                 return;
  
         if (info->indexed) {
@@ -783,6 +782,10 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
  
                         u_upload_alloc(sctx->b.uploader, start_offset, count * 2,
                                        &out_offset, &out_buffer, &ptr);
+                       if (!out_buffer) {
+                               pipe_resource_reference(&ib.buffer, NULL);
+                               return;
+                       }
  
                         util_shorten_ubyte_elts_to_userptr(&sctx->b.b, &ib, 0,
                                                            ib.offset + start_offset,
@@ -803,6 +806,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
                         u_upload_data(sctx->b.uploader, start_offset, count * ib.index_size,
                                       (char*)ib.user_buffer + start_offset,
                                       &ib.offset, &ib.buffer);
+                       if (!ib.buffer)
+                               return;
                         /* info->start will be added by the drawing code */
                         ib.offset -= start_offset;
                 }
@@ -819,15 +824,16 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
         if (sctx->b.flags)
                 si_mark_atom_dirty(sctx, sctx->atoms.s.cache_flush);
  
-       si_need_cs_space(sctx, 0, TRUE);
+       si_need_cs_space(sctx);
  
         /* Emit states. */
-       for (i = 0; i < SI_NUM_ATOMS(sctx); i++) {
-               if (sctx->atoms.array[i]->dirty) {
-                       sctx->atoms.array[i]->emit(&sctx->b, sctx->atoms.array[i]);
-                       sctx->atoms.array[i]->dirty = false;
-               }
+       mask = sctx->dirty_atoms;
+       while (mask) {
+               struct r600_atom *atom = sctx->atoms.array[u_bit_scan(&mask)];
+
+               atom->emit(&sctx->b, atom);
         }
+       sctx->dirty_atoms = 0;
  
         si_pm4_emit_dirty(sctx);
         si_emit_scratch_reloc(sctx);
@@ -876,7 +882,7 @@ void si_trace_emit(struct si_context *sctx)
         struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
  
         sctx->trace_id++;
-       r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, sctx->trace_buf,
+       radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, sctx->trace_buf,
                               RADEON_USAGE_READWRITE, RADEON_PRIO_MIN);
         radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
         radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c

index a09f588..b5e14ea 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -28,6 +28,7 @@
  #include "si_pipe.h"
  #include "si_shader.h"
  #include "sid.h"
+#include "radeon/r600_cs.h"
  
  #include "tgsi/tgsi_parse.h"
  #include "tgsi/tgsi_ureg.h"
@@ -665,8 +666,16 @@ static void *si_create_shader_state(struct pipe_context *ctx,
         struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector);
         int i;
  
+       if (!sel)
+               return NULL;
+
         sel->type = pipe_shader_type;
         sel->tokens = tgsi_dup_tokens(state->tokens);
+       if (!sel->tokens) {
+               FREE(sel);
+               return NULL;
+       }
+
         sel->so = state->stream_output;
         tgsi_scan_shader(state->tokens, &sel->info);
         p_atomic_inc(&sscreen->b.num_shaders_created);
@@ -679,6 +688,8 @@ static void *si_create_shader_state(struct pipe_context *ctx,
                         sel->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
                 sel->gs_num_invocations =
                         sel->info.properties[TGSI_PROPERTY_GS_INVOCATIONS];
+               sel->gsvs_itemsize = sel->info.num_outputs * 16 *
+                                    sel->gs_max_out_vertices;
  
                 for (i = 0; i < sel->info.num_inputs; i++) {
                         unsigned name = sel->info.input_semantic_name[i];
@@ -713,10 +724,24 @@ static void *si_create_shader_state(struct pipe_context *ctx,
                         }
                 }
                 break;
+       case PIPE_SHADER_FRAGMENT:
+               for (i = 0; i < sel->info.num_outputs; i++) {
+                       unsigned name = sel->info.output_semantic_name[i];
+                       unsigned index = sel->info.output_semantic_index[i];
+
+                       if (name == TGSI_SEMANTIC_COLOR)
+                               sel->ps_colors_written |= 1 << index;
+               }
+               break;
         }
  
         if (sscreen->b.debug_flags & DBG_PRECOMPILE)
-               si_shader_select(ctx, sel);
+               if (si_shader_select(ctx, sel)) {
+                       fprintf(stderr, "radeonsi: can't create a shader\n");
+                       tgsi_free_tokens(sel->tokens);
+                       FREE(sel);
+                       return NULL;
+               }
  
         return sel;
  }
@@ -751,6 +776,25 @@ static void *si_create_tes_state(struct pipe_context *ctx,
         return si_create_shader_state(ctx, state, PIPE_SHADER_TESS_EVAL);
  }
  
+/**
+ * Normally, we only emit 1 viewport and 1 scissor if no shader is using
+ * the VIEWPORT_INDEX output, and emitting the other viewports and scissors
+ * is delayed. When a shader with VIEWPORT_INDEX appears, this should be
+ * called to emit the rest.
+ */
+static void si_update_viewports_and_scissors(struct si_context *sctx)
+{
+       struct tgsi_shader_info *info = si_get_vs_info(sctx);
+
+       if (!info || !info->writes_viewport_index)
+               return;
+
+       if (sctx->scissors.dirty_mask)
+           si_mark_atom_dirty(sctx, &sctx->scissors.atom);
+       if (sctx->viewports.dirty_mask)
+           si_mark_atom_dirty(sctx, &sctx->viewports.atom);
+}
+
  static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
  {
         struct si_context *sctx = (struct si_context *)ctx;
@@ -761,6 +805,7 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
  
         sctx->vs_shader = sel;
         si_mark_atom_dirty(sctx, &sctx->clip_regs);
+       si_update_viewports_and_scissors(sctx);
  }
  
  static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
@@ -778,6 +823,7 @@ static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
  
         if (enable_changed)
                 si_shader_change_notify(sctx);
+       si_update_viewports_and_scissors(sctx);
  }
  
  static void si_bind_tcs_shader(struct pipe_context *ctx, void *state)
@@ -812,6 +858,7 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
                 si_shader_change_notify(sctx);
                 sctx->last_tes_sh_base = -1; /* invalidate derived tess state */
         }
+       si_update_viewports_and_scissors(sctx);
  }
  
  static void si_make_dummy_ps(struct si_context *sctx)
@@ -840,6 +887,7 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
         }
  
         sctx->ps_shader = sel;
+       si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
  }
  
  static void si_delete_shader_selector(struct pipe_context *ctx,
@@ -946,14 +994,19 @@ static void si_delete_tes_shader(struct pipe_context *ctx, void *state)
         si_delete_shader_selector(ctx, sel);
  }
  
-static void si_update_spi_map(struct si_context *sctx)
+static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom)
  {
+       struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
         struct si_shader *ps = sctx->ps_shader->current;
         struct si_shader *vs = si_get_vs_state(sctx);
         struct tgsi_shader_info *psinfo = &ps->selector->info;
         struct tgsi_shader_info *vsinfo = &vs->selector->info;
-       struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
-       unsigned i, j, tmp;
+       unsigned i, j, tmp, num_written = 0;
+
+       if (!ps->nparam)
+               return;
+
+       radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, ps->nparam);
  
         for (i = 0; i < psinfo->num_inputs; i++) {
                 unsigned name = psinfo->input_semantic_name[i];
@@ -997,9 +1050,9 @@ bcolor:
                         tmp = S_028644_OFFSET(0x20);
                 }
  
-               si_pm4_set_reg(pm4,
-                              R_028644_SPI_PS_INPUT_CNTL_0 + param_offset * 4,
-                              tmp);
+               assert(param_offset == num_written);
+               radeon_emit(cs, tmp);
+               num_written++;
  
                 if (name == TGSI_SEMANTIC_COLOR &&
                     ps->key.ps.color_two_side) {
@@ -1008,8 +1061,7 @@ bcolor:
                         goto bcolor;
                 }
         }
-
-       si_pm4_set_state(sctx, spi, pm4);
+       assert(ps->nparam == num_written);
  }
  
  /* Initialize state related to ESGS / GSVS ring buffers */
@@ -1018,15 +1070,21 @@ static void si_init_gs_rings(struct si_context *sctx)
         unsigned esgs_ring_size = 128 * 1024;
         unsigned gsvs_ring_size = 60 * 1024 * 1024;
  
-       assert(!sctx->gs_rings);
-       sctx->gs_rings = CALLOC_STRUCT(si_pm4_state);
+       assert(!sctx->esgs_ring && !sctx->gsvs_ring);
  
         sctx->esgs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
                                        PIPE_USAGE_DEFAULT, esgs_ring_size);
+       if (!sctx->esgs_ring)
+               return;
  
         sctx->gsvs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
                                              PIPE_USAGE_DEFAULT, gsvs_ring_size);
+       if (!sctx->gsvs_ring) {
+               pipe_resource_reference(&sctx->esgs_ring, NULL);
+               return;
+       }
  
+       /* Append these registers to the init config state. */
         if (sctx->b.chip_class >= CIK) {
                 if (sctx->b.chip_class >= VI) {
                         /* The maximum sizes are 63.999 MB on VI, because
@@ -1034,17 +1092,24 @@ static void si_init_gs_rings(struct si_context *sctx)
                         assert(esgs_ring_size / 256 < (1 << 18));
                         assert(gsvs_ring_size / 256 < (1 << 18));
                 }
-               si_pm4_set_reg(sctx->gs_rings, R_030900_VGT_ESGS_RING_SIZE,
+               si_pm4_set_reg(sctx->init_config, R_030900_VGT_ESGS_RING_SIZE,
                                esgs_ring_size / 256);
-               si_pm4_set_reg(sctx->gs_rings, R_030904_VGT_GSVS_RING_SIZE,
+               si_pm4_set_reg(sctx->init_config, R_030904_VGT_GSVS_RING_SIZE,
                                gsvs_ring_size / 256);
         } else {
-               si_pm4_set_reg(sctx->gs_rings, R_0088C8_VGT_ESGS_RING_SIZE,
+               si_pm4_set_reg(sctx->init_config, R_0088C8_VGT_ESGS_RING_SIZE,
                                esgs_ring_size / 256);
-               si_pm4_set_reg(sctx->gs_rings, R_0088CC_VGT_GSVS_RING_SIZE,
+               si_pm4_set_reg(sctx->init_config, R_0088CC_VGT_GSVS_RING_SIZE,
                                gsvs_ring_size / 256);
         }
  
+       /* Flush the context to re-emit the init_config state.
+        * This is done only once in a lifetime of a context.
+        */
+       si_pm4_upload_indirect_buffer(sctx, sctx->init_config);
+       sctx->b.initial_gfx_cs_size = 0; /* force flush */
+       si_context_gfx_flush(sctx, RADEON_FLUSH_ASYNC, NULL);
+
         si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_VERTEX, SI_RING_ESGS,
                            sctx->esgs_ring, 0, esgs_ring_size,
                            true, true, 4, 64, 0);
@@ -1058,11 +1123,14 @@ static void si_init_gs_rings(struct si_context *sctx)
  
  static void si_update_gs_rings(struct si_context *sctx)
  {
-       unsigned gs_vert_itemsize = sctx->gs_shader->info.num_outputs * 16;
-       unsigned gs_max_vert_out = sctx->gs_shader->gs_max_out_vertices;
-       unsigned gsvs_itemsize = gs_vert_itemsize * gs_max_vert_out;
+       unsigned gsvs_itemsize = sctx->gs_shader->gsvs_itemsize;
         uint64_t offset;
  
+       if (gsvs_itemsize == sctx->last_gsvs_itemsize)
+               return;
+
+       sctx->last_gsvs_itemsize = gsvs_itemsize;
+
         si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS,
                            sctx->gsvs_ring, gsvs_itemsize,
                            64, true, true, 4, 16, 0);
@@ -1081,17 +1149,19 @@ static void si_update_gs_rings(struct si_context *sctx)
         si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS_3,
                            sctx->gsvs_ring, gsvs_itemsize,
                            64, true, true, 4, 16, offset);
-
  }
+
  /**
- * @returns 1 if \p sel has been updated to use a new scratch buffer and 0
- *          otherwise.
+ * @returns 1 if \p sel has been updated to use a new scratch buffer
+ *          0 if not
+ *          < 0 if there was a failure
   */
-static unsigned si_update_scratch_buffer(struct si_context *sctx,
+static int si_update_scratch_buffer(struct si_context *sctx,
                                     struct si_shader_selector *sel)
  {
         struct si_shader *shader;
         uint64_t scratch_va = sctx->scratch_buffer->gpu_address;
+       int r;
  
         if (!sel)
                 return 0;
@@ -1112,7 +1182,9 @@ static unsigned si_update_scratch_buffer(struct si_context *sctx,
         si_shader_apply_scratch_relocs(sctx, shader, scratch_va);
  
         /* Replace the shader bo with a new bo that has the relocs applied. */
-       si_shader_binary_upload(sctx->screen, shader);
+       r = si_shader_binary_upload(sctx->screen, shader);
+       if (r)
+               return r;
  
         /* Update the shader state to use the new shader bo. */
         si_shader_init_pm4_state(shader);
@@ -1151,7 +1223,7 @@ static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx)
         return bytes;
  }
  
-static void si_update_spi_tmpring_size(struct si_context *sctx)
+static bool si_update_spi_tmpring_size(struct si_context *sctx)
  {
         unsigned current_scratch_buffer_size =
                 si_get_current_scratch_buffer_size(sctx);
@@ -1159,6 +1231,7 @@ static void si_update_spi_tmpring_size(struct si_context *sctx)
                 si_get_max_scratch_bytes_per_wave(sctx);
         unsigned scratch_needed_size = scratch_bytes_per_wave *
                 sctx->scratch_waves;
+       int r;
  
         if (scratch_needed_size > 0) {
  
@@ -1171,6 +1244,9 @@ static void si_update_spi_tmpring_size(struct si_context *sctx)
                         sctx->scratch_buffer =
                                         si_resource_create_custom(&sctx->screen->b.b,
                                         PIPE_USAGE_DEFAULT, scratch_needed_size);
+                       if (!sctx->scratch_buffer)
+                               return false;
+                       sctx->emit_scratch_reloc = true;
                 }
  
                 /* Update the shaders, so they are using the latest scratch.  The
@@ -1178,31 +1254,57 @@ static void si_update_spi_tmpring_size(struct si_context *sctx)
                  * last used, so we still need to try to update them, even if
                  * they require scratch buffers smaller than the current size.
                  */
-               if (si_update_scratch_buffer(sctx, sctx->ps_shader))
+               r = si_update_scratch_buffer(sctx, sctx->ps_shader);
+               if (r < 0)
+                       return false;
+               if (r == 1)
                         si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
-               if (si_update_scratch_buffer(sctx, sctx->gs_shader))
+
+               r = si_update_scratch_buffer(sctx, sctx->gs_shader);
+               if (r < 0)
+                       return false;
+               if (r == 1)
                         si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
-               if (si_update_scratch_buffer(sctx, sctx->tcs_shader))
+
+               r = si_update_scratch_buffer(sctx, sctx->tcs_shader);
+               if (r < 0)
+                       return false;
+               if (r == 1)
                         si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
  
                 /* VS can be bound as LS, ES, or VS. */
                 if (sctx->tes_shader) {
-                       if (si_update_scratch_buffer(sctx, sctx->vs_shader))
+                       r = si_update_scratch_buffer(sctx, sctx->vs_shader);
+                       if (r < 0)
+                               return false;
+                       if (r == 1)
                                 si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
                 } else if (sctx->gs_shader) {
-                       if (si_update_scratch_buffer(sctx, sctx->vs_shader))
+                       r = si_update_scratch_buffer(sctx, sctx->vs_shader);
+                       if (r < 0)
+                               return false;
+                       if (r == 1)
                                 si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
                 } else {
-                       if (si_update_scratch_buffer(sctx, sctx->vs_shader))
+                       r = si_update_scratch_buffer(sctx, sctx->vs_shader);
+                       if (r < 0)
+                               return false;
+                       if (r == 1)
                                 si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
                 }
  
                 /* TES can be bound as ES or VS. */
                 if (sctx->gs_shader) {
-                       if (si_update_scratch_buffer(sctx, sctx->tes_shader))
+                       r = si_update_scratch_buffer(sctx, sctx->tes_shader);
+                       if (r < 0)
+                               return false;
+                       if (r == 1)
                                 si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
                 } else {
-                       if (si_update_scratch_buffer(sctx, sctx->tes_shader))
+                       r = si_update_scratch_buffer(sctx, sctx->tes_shader);
+                       if (r < 0)
+                               return false;
+                       if (r == 1)
                                 si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4);
                 }
         }
@@ -1213,40 +1315,44 @@ static void si_update_spi_tmpring_size(struct si_context *sctx)
  
         sctx->spi_tmpring_size = S_0286E8_WAVES(sctx->scratch_waves) |
                                 S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10);
+       return true;
  }
  
  static void si_init_tess_factor_ring(struct si_context *sctx)
  {
-       assert(!sctx->tf_state);
-       sctx->tf_state = CALLOC_STRUCT(si_pm4_state);
+       assert(!sctx->tf_ring);
  
         sctx->tf_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
                                            PIPE_USAGE_DEFAULT,
                                            32768 * sctx->screen->b.info.max_se);
-       sctx->b.clear_buffer(&sctx->b.b, sctx->tf_ring, 0,
-                            sctx->tf_ring->width0, fui(0), false);
+       if (!sctx->tf_ring)
+               return;
+
         assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0);
  
+       /* Append these registers to the init config state. */
         if (sctx->b.chip_class >= CIK) {
-               si_pm4_set_reg(sctx->tf_state, R_030938_VGT_TF_RING_SIZE,
+               si_pm4_set_reg(sctx->init_config, R_030938_VGT_TF_RING_SIZE,
                                S_030938_SIZE(sctx->tf_ring->width0 / 4));
-               si_pm4_set_reg(sctx->tf_state, R_030940_VGT_TF_MEMORY_BASE,
+               si_pm4_set_reg(sctx->init_config, R_030940_VGT_TF_MEMORY_BASE,
                                r600_resource(sctx->tf_ring)->gpu_address >> 8);
         } else {
-               si_pm4_set_reg(sctx->tf_state, R_008988_VGT_TF_RING_SIZE,
+               si_pm4_set_reg(sctx->init_config, R_008988_VGT_TF_RING_SIZE,
                                S_008988_SIZE(sctx->tf_ring->width0 / 4));
-               si_pm4_set_reg(sctx->tf_state, R_0089B8_VGT_TF_MEMORY_BASE,
+               si_pm4_set_reg(sctx->init_config, R_0089B8_VGT_TF_MEMORY_BASE,
                                r600_resource(sctx->tf_ring)->gpu_address >> 8);
         }
-       si_pm4_add_bo(sctx->tf_state, r600_resource(sctx->tf_ring),
-                     RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
-       si_pm4_bind_state(sctx, tf_ring, sctx->tf_state);
+
+       /* Flush the context to re-emit the init_config state.
+        * This is done only once in a lifetime of a context.
+        */
+       si_pm4_upload_indirect_buffer(sctx, sctx->init_config);
+       sctx->b.initial_gfx_cs_size = 0; /* force flush */
+       si_context_gfx_flush(sctx, RADEON_FLUSH_ASYNC, NULL);
  
         si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_TESS_CTRL,
                            SI_RING_TESS_FACTOR, sctx->tf_ring, 0,
                            sctx->tf_ring->width0, false, false, 0, 0, 0);
-
-       sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
  }
  
  /**
@@ -1280,7 +1386,6 @@ static void si_generate_fixed_func_tcs(struct si_context *sctx)
  
         sctx->fixed_func_tcs_shader =
                 ureg_create_shader_and_destroy(ureg, &sctx->b.b);
-       assert(sctx->fixed_func_tcs_shader);
  }
  
  static void si_update_vgt_shader_config(struct si_context *sctx)
@@ -1328,32 +1433,49 @@ static void si_update_so(struct si_context *sctx, struct si_shader_selector *sha
         sctx->b.streamout.stride_in_dw = shader->so.stride;
  }
  
-void si_update_shaders(struct si_context *sctx)
+bool si_update_shaders(struct si_context *sctx)
  {
         struct pipe_context *ctx = (struct pipe_context*)sctx;
         struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
+       int r;
  
         /* Update stages before GS. */
         if (sctx->tes_shader) {
-               if (!sctx->tf_state)
+               if (!sctx->tf_ring) {
                         si_init_tess_factor_ring(sctx);
+                       if (!sctx->tf_ring)
+                               return false;
+               }
  
                 /* VS as LS */
-               si_shader_select(ctx, sctx->vs_shader);
+               r = si_shader_select(ctx, sctx->vs_shader);
+               if (r)
+                       return false;
                 si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
  
                 if (sctx->tcs_shader) {
-                       si_shader_select(ctx, sctx->tcs_shader);
+                       r = si_shader_select(ctx, sctx->tcs_shader);
+                       if (r)
+                               return false;
                         si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
                 } else {
-                       if (!sctx->fixed_func_tcs_shader)
+                       if (!sctx->fixed_func_tcs_shader) {
                                 si_generate_fixed_func_tcs(sctx);
-                       si_shader_select(ctx, sctx->fixed_func_tcs_shader);
+                               if (!sctx->fixed_func_tcs_shader)
+                                       return false;
+                       }
+
+                       r = si_shader_select(ctx, sctx->fixed_func_tcs_shader);
+                       if (r)
+                               return false;
                         si_pm4_bind_state(sctx, hs,
                                           sctx->fixed_func_tcs_shader->current->pm4);
                 }
  
-               si_shader_select(ctx, sctx->tes_shader);
+               r = si_shader_select(ctx, sctx->tes_shader);
+               if (r)
+                       return false;
+
                 if (sctx->gs_shader) {
                         /* TES as ES */
                         si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
@@ -1364,50 +1486,45 @@ void si_update_shaders(struct si_context *sctx)
                 }
         } else if (sctx->gs_shader) {
                 /* VS as ES */
-               si_shader_select(ctx, sctx->vs_shader);
+               r = si_shader_select(ctx, sctx->vs_shader);
+               if (r)
+                       return false;
                 si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
         } else {
                 /* VS as VS */
-               si_shader_select(ctx, sctx->vs_shader);
+               r = si_shader_select(ctx, sctx->vs_shader);
+               if (r)
+                       return false;
                 si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
                 si_update_so(sctx, sctx->vs_shader);
         }
  
         /* Update GS. */
         if (sctx->gs_shader) {
-               si_shader_select(ctx, sctx->gs_shader);
+               r = si_shader_select(ctx, sctx->gs_shader);
+               if (r)
+                       return false;
                 si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
                 si_pm4_bind_state(sctx, vs, sctx->gs_shader->current->gs_copy_shader->pm4);
                 si_update_so(sctx, sctx->gs_shader);
  
-               if (!sctx->gs_rings)
+               if (!sctx->gsvs_ring) {
                         si_init_gs_rings(sctx);
-
-               if (sctx->emitted.named.gs_rings != sctx->gs_rings)
-                       sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
-               si_pm4_bind_state(sctx, gs_rings, sctx->gs_rings);
+                       if (!sctx->gsvs_ring)
+                               return false;
+               }
  
                 si_update_gs_rings(sctx);
         } else {
-               si_pm4_bind_state(sctx, gs_rings, NULL);
                 si_pm4_bind_state(sctx, gs, NULL);
                 si_pm4_bind_state(sctx, es, NULL);
         }
  
         si_update_vgt_shader_config(sctx);
  
-       si_shader_select(ctx, sctx->ps_shader);
-
-       if (!sctx->ps_shader->current) {
-               struct si_shader_selector *sel;
-
-               /* use a dummy shader if compiling the shader (variant) failed */
-               si_make_dummy_ps(sctx);
-               sel = sctx->dummy_pixel_shader;
-               si_shader_select(ctx, sel);
-               sctx->ps_shader->current = sel->current;
-       }
-
+       r = si_shader_select(ctx, sctx->ps_shader);
+       if (r)
+               return false;
         si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
  
         if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
@@ -1415,12 +1532,17 @@ void si_update_shaders(struct si_context *sctx)
             sctx->flatshade != rs->flatshade) {
                 sctx->sprite_coord_enable = rs->sprite_coord_enable;
                 sctx->flatshade = rs->flatshade;
-               si_update_spi_map(sctx);
+               si_mark_atom_dirty(sctx, &sctx->spi_map);
         }
  
-       if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
-           si_pm4_state_changed(sctx, gs)) {
-               si_update_spi_tmpring_size(sctx);
+       if (si_pm4_state_changed(sctx, ls) ||
+           si_pm4_state_changed(sctx, hs) ||
+           si_pm4_state_changed(sctx, es) ||
+           si_pm4_state_changed(sctx, gs) ||
+           si_pm4_state_changed(sctx, vs) ||
+           si_pm4_state_changed(sctx, ps)) {
+               if (!si_update_spi_tmpring_size(sctx))
+                       return false;
         }
  
         if (sctx->ps_db_shader_control != sctx->ps_shader->current->db_shader_control) {
@@ -1435,10 +1557,13 @@ void si_update_shaders(struct si_context *sctx)
                 if (sctx->b.chip_class == SI)
                         si_mark_atom_dirty(sctx, &sctx->db_render_state);
         }
+       return true;
  }
  
  void si_init_shader_functions(struct si_context *sctx)
  {
+       si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, si_emit_spi_map);
+
         sctx->b.b.create_vs_state = si_create_vs_state;
         sctx->b.b.create_tcs_state = si_create_tcs_state;
         sctx->b.b.create_tes_state = si_create_tes_state;
diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h

index cd6be73..4bb2457 100644 (file)
--- a/src/gallium/drivers/radeonsi/sid.h
+++ b/src/gallium/drivers/radeonsi/sid.h
@@ -94,7 +94,7 @@
  #define PKT3_DRAW_INDEX_IMMD                   0x2E /* not on CIK */
  #define PKT3_NUM_INSTANCES                     0x2F
  #define PKT3_DRAW_INDEX_MULTI_AUTO             0x30
-#define PKT3_INDIRECT_BUFFER                   0x32
+#define PKT3_INDIRECT_BUFFER_SI                0x32 /* not on CIK */
  #define PKT3_STRMOUT_BUFFER_UPDATE             0x34
  #define PKT3_DRAW_INDEX_OFFSET_2               0x35
  #define PKT3_DRAW_PREAMBLE                     0x36 /* new on CIK, required on GFX7.2 and later */
@@ -122,6 +122,7 @@
  #define PKT3_WAIT_REG_MEM                      0x3C
  #define                WAIT_REG_MEM_EQUAL              3
  #define PKT3_MEM_WRITE                         0x3D /* not on CIK */
+#define PKT3_INDIRECT_BUFFER_CIK               0x3F /* new on CIK */
  #define PKT3_COPY_DATA                        0x40
  #define                COPY_DATA_SRC_SEL(x)            ((x) & 0xf)
  #define                        COPY_DATA_REG           0
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c

index 0bfd9c3..d8606f3 100644 (file)
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -193,9 +193,9 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
     case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
        return 4;
     case PIPE_CAP_TEXTURE_GATHER_SM5:
+   case PIPE_CAP_TEXTURE_QUERY_LOD:
        return 1;
     case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
-   case PIPE_CAP_TEXTURE_QUERY_LOD:
     case PIPE_CAP_SAMPLE_SHADING:
     case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
        return 0;
@@ -246,6 +246,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
     case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
     case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
     case PIPE_CAP_DEPTH_BOUNDS_TEST:
+   case PIPE_CAP_TGSI_TXQS:
        return 0;
     }
     /* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c

index 565fca6..8a09350 100644 (file)
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -135,7 +135,7 @@ wrap_nearest_repeat(float s, unsigned size, int offset, int *icoord)
  {
     /* s limited to [0,1) */
     /* i limited to [0,size-1] */
-   int i = util_ifloor(s * size);
+   const int i = util_ifloor(s * size);
     *icoord = repeat(i + offset, size);
  }
  
@@ -280,7 +280,7 @@ static void
  wrap_linear_repeat(float s, unsigned size, int offset,
                     int *icoord0, int *icoord1, float *w)
  {
-   float u = s * size - 0.5F;
+   const float u = s * size - 0.5F;
     *icoord0 = repeat(util_ifloor(u) + offset, size);
     *icoord1 = repeat(*icoord0 + 1, size);
     *w = frac(u);
@@ -291,9 +291,8 @@ static void
  wrap_linear_clamp(float s, unsigned size, int offset,
                    int *icoord0, int *icoord1, float *w)
  {
-   float u = CLAMP(s * size + offset, 0.0F, (float)size);
+   const float u = CLAMP(s * size + offset, 0.0F, (float)size) - 0.5f;
  
-   u = u - 0.5f;
     *icoord0 = util_ifloor(u);
     *icoord1 = *icoord0 + 1;
     *w = frac(u);
@@ -304,8 +303,7 @@ static void
  wrap_linear_clamp_to_edge(float s, unsigned size, int offset,
                            int *icoord0, int *icoord1, float *w)
  {
-   float u = CLAMP(s * size + offset, 0.0F, (float)size);
-   u = u - 0.5f;
+   const float u = CLAMP(s * size + offset, 0.0F, (float)size) - 0.5f;
     *icoord0 = util_ifloor(u);
     *icoord1 = *icoord0 + 1;
     if (*icoord0 < 0)
@@ -322,8 +320,7 @@ wrap_linear_clamp_to_border(float s, unsigned size, int offset,
  {
     const float min = -0.5F;
     const float max = (float)size + 0.5F;
-   float u = CLAMP(s * size + offset, min, max);
-   u = u - 0.5f;
+   const float u = CLAMP(s * size + offset, min, max) - 0.5f;
     *icoord0 = util_ifloor(u);
     *icoord1 = *icoord0 + 1;
     *w = frac(u);
@@ -391,12 +388,8 @@ wrap_linear_mirror_clamp_to_border(float s, unsigned size, int offset,
  {
     const float min = -0.5F;
     const float max = size + 0.5F;
-   float u = fabsf(s * size + offset);
-   if (u <= min)
-      u = min;
-   else if (u >= max)
-      u = max;
-   u -= 0.5F;
+   const float t = fabsf(s * size + offset);
+   const float u = CLAMP(t, min, max) - 0.5F;
     *icoord0 = util_ifloor(u);
     *icoord1 = *icoord0 + 1;
     *w = frac(u);
@@ -409,7 +402,7 @@ wrap_linear_mirror_clamp_to_border(float s, unsigned size, int offset,
  static void
  wrap_nearest_unorm_clamp(float s, unsigned size, int offset, int *icoord)
  {
-   int i = util_ifloor(s);
+   const int i = util_ifloor(s);
     *icoord = CLAMP(i + offset, 0, (int) size-1);
  }
  
@@ -442,7 +435,7 @@ wrap_linear_unorm_clamp(float s, unsigned size, int offset,
                          int *icoord0, int *icoord1, float *w)
  {
     /* Not exactly what the spec says, but it matches NVIDIA output */
-   float u = CLAMP(s + offset - 0.5F, 0.0f, (float) size - 1.0f);
+   const float u = CLAMP(s + offset - 0.5F, 0.0f, (float) size - 1.0f);
     *icoord0 = util_ifloor(u);
     *icoord1 = *icoord0 + 1;
     *w = frac(u);
@@ -456,8 +449,7 @@ static void
  wrap_linear_unorm_clamp_to_border(float s, unsigned size, int offset,
                                    int *icoord0, int *icoord1, float *w)
  {
-   float u = CLAMP(s + offset, -0.5F, (float) size + 0.5F);
-   u -= 0.5F;
+   const float u = CLAMP(s + offset, -0.5F, (float) size + 0.5F) - 0.5F;
     *icoord0 = util_ifloor(u);
     *icoord1 = *icoord0 + 1;
     if (*icoord1 > (int) size - 1)
@@ -473,8 +465,7 @@ static void
  wrap_linear_unorm_clamp_to_edge(float s, unsigned size, int offset,
                                  int *icoord0, int *icoord1, float *w)
  {
-   float u = CLAMP(s + offset, +0.5F, (float) size - 0.5F);
-   u -= 0.5F;
+   const float u = CLAMP(s + offset, +0.5F, (float) size - 0.5F) - 0.5F;
     *icoord0 = util_ifloor(u);
     *icoord1 = *icoord0 + 1;
     if (*icoord1 > (int) size - 1)
@@ -489,7 +480,7 @@ wrap_linear_unorm_clamp_to_edge(float s, unsigned size, int offset,
  static inline int
  coord_to_layer(float coord, unsigned first_layer, unsigned last_layer)
  {
-   int c = util_ifloor(coord + 0.5F);
+   const int c = util_ifloor(coord + 0.5F);
     return CLAMP(c, (int)first_layer, (int)last_layer);
  }
  
@@ -505,9 +496,9 @@ compute_lambda_1d(const struct sp_sampler_view *sview,
                    const float p[TGSI_QUAD_SIZE])
  {
     const struct pipe_resource *texture = sview->base.texture;
-   float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
-   float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
-   float rho = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
+   const float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
+   const float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
+   const float rho = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
  
     return util_fast_log2(rho);
  }
@@ -520,13 +511,13 @@ compute_lambda_2d(const struct sp_sampler_view *sview,
                    const float p[TGSI_QUAD_SIZE])
  {
     const struct pipe_resource *texture = sview->base.texture;
-   float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
-   float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
-   float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
-   float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
-   float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
-   float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, sview->base.u.tex.first_level);
-   float rho  = MAX2(maxx, maxy);
+   const float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
+   const float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
+   const float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
+   const float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
+   const float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
+   const float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, sview->base.u.tex.first_level);
+   const float rho  = MAX2(maxx, maxy);
  
     return util_fast_log2(rho);
  }
@@ -539,19 +530,16 @@ compute_lambda_3d(const struct sp_sampler_view *sview,
                    const float p[TGSI_QUAD_SIZE])
  {
     const struct pipe_resource *texture = sview->base.texture;
-   float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
-   float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
-   float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
-   float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
-   float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]);
-   float dpdy = fabsf(p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT]);
-   float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
-   float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, sview->base.u.tex.first_level);
-   float maxz = MAX2(dpdx, dpdy) * u_minify(texture->depth0, sview->base.u.tex.first_level);
-   float rho;
-
-   rho = MAX2(maxx, maxy);
-   rho = MAX2(rho, maxz);
+   const float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
+   const float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
+   const float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
+   const float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
+   const float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]);
+   const float dpdy = fabsf(p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT]);
+   const float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
+   const float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, sview->base.u.tex.first_level);
+   const float maxz = MAX2(dpdx, dpdy) * u_minify(texture->depth0, sview->base.u.tex.first_level);
+   const float rho = MAX3(maxx, maxy, maxz);
  
     return util_fast_log2(rho);
  }
@@ -609,7 +597,7 @@ get_texel_2d(const struct sp_sampler_view *sp_sview,
               union tex_tile_address addr, int x, int y)
  {
     const struct pipe_resource *texture = sp_sview->base.texture;
-   unsigned level = addr.bits.level;
+   const unsigned level = addr.bits.level;
  
     if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
         y < 0 || y >= (int) u_minify(texture->height0, level)) {
@@ -852,7 +840,7 @@ get_texel_3d(const struct sp_sampler_view *sp_sview,
               union tex_tile_address addr, int x, int y, int z)
  {
     const struct pipe_resource *texture = sp_sview->base.texture;
-   unsigned level = addr.bits.level;
+   const unsigned level = addr.bits.level;
  
     if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
         y < 0 || y >= (int) u_minify(texture->height0, level) ||
@@ -872,7 +860,7 @@ get_texel_1d_array(const struct sp_sampler_view *sp_sview,
                     union tex_tile_address addr, int x, int y)
  {
     const struct pipe_resource *texture = sp_sview->base.texture;
-   unsigned level = addr.bits.level;
+   const unsigned level = addr.bits.level;
  
     if (x < 0 || x >= (int) u_minify(texture->width0, level)) {
        return sp_samp->base.border_color.f;
@@ -890,7 +878,7 @@ get_texel_2d_array(const struct sp_sampler_view *sp_sview,
                     union tex_tile_address addr, int x, int y, int layer)
  {
     const struct pipe_resource *texture = sp_sview->base.texture;
-   unsigned level = addr.bits.level;
+   const unsigned level = addr.bits.level;
  
     assert(layer < (int) texture->array_size);
     assert(layer >= 0);
@@ -911,7 +899,7 @@ get_texel_cube_seamless(const struct sp_sampler_view *sp_sview,
                          float *corner, int layer, unsigned face)
  {
     const struct pipe_resource *texture = sp_sview->base.texture;
-   unsigned level = addr.bits.level;
+   const unsigned level = addr.bits.level;
     int new_x, new_y, max_x;
  
     max_x = (int) u_minify(texture->width0, level);
@@ -966,7 +954,7 @@ get_texel_cube_array(const struct sp_sampler_view *sp_sview,
                       union tex_tile_address addr, int x, int y, int layer)
  {
     const struct pipe_resource *texture = sp_sview->base.texture;
-   unsigned level = addr.bits.level;
+   const unsigned level = addr.bits.level;
  
     assert(layer < (int) texture->array_size);
     assert(layer >= 0);
@@ -1017,29 +1005,29 @@ print_sample_4(const char *function, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZ
  /* Some image-filter fastpaths:
   */
  static inline void
-img_filter_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview,
-                                struct sp_sampler *sp_samp,
+img_filter_2d_linear_repeat_POT(const struct sp_sampler_view *sp_sview,
+                                const struct sp_sampler *sp_samp,
                                  const struct img_filter_args *args,
                                  float *rgba)
  {
-   unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
-   unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
-   int xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */
-   int ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */
+   const unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
+   const unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
+   const int xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */
+   const int ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */
     union tex_tile_address addr;
     int c;
  
-   float u = (args->s * xpot - 0.5F) + args->offset[0];
-   float v = (args->t * ypot - 0.5F) + args->offset[1];
+   const float u = (args->s * xpot - 0.5F) + args->offset[0];
+   const float v = (args->t * ypot - 0.5F) + args->offset[1];
  
-   int uflr = util_ifloor(u);
-   int vflr = util_ifloor(v);
+   const int uflr = util_ifloor(u);
+   const int vflr = util_ifloor(v);
  
-   float xw = u - (float)uflr;
-   float yw = v - (float)vflr;
+   const float xw = u - (float)uflr;
+   const float yw = v - (float)vflr;
  
-   int x0 = uflr & (xpot - 1);
-   int y0 = vflr & (ypot - 1);
+   const int x0 = uflr & (xpot - 1);
+   const int y0 = vflr & (ypot - 1);
  
     const float *tx[4];
        
@@ -1052,8 +1040,8 @@ img_filter_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview,
        get_texel_quad_2d_no_border_single_tile(sp_sview, addr, x0, y0, tx);
     }
     else {
-      unsigned x1 = (x0 + 1) & (xpot - 1);
-      unsigned y1 = (y0 + 1) & (ypot - 1);
+      const unsigned x1 = (x0 + 1) & (xpot - 1);
+      const unsigned y1 = (y0 + 1) & (ypot - 1);
        get_texel_quad_2d_no_border(sp_sview, addr, x0, y0, x1, y1, tx);
     }
  
@@ -1071,25 +1059,25 @@ img_filter_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview,
  
  
  static inline void
-img_filter_2d_nearest_repeat_POT(struct sp_sampler_view *sp_sview,
-                                 struct sp_sampler *sp_samp,
+img_filter_2d_nearest_repeat_POT(const struct sp_sampler_view *sp_sview,
+                                 const struct sp_sampler *sp_samp,
                                   const struct img_filter_args *args,
                                   float rgba[TGSI_QUAD_SIZE])
  {
-   unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
-   unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
+   const unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
+   const unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
     const float *out;
     union tex_tile_address addr;
     int c;
  
-   float u = args->s * xpot + args->offset[0];
-   float v = args->t * ypot + args->offset[1];
+   const float u = args->s * xpot + args->offset[0];
+   const float v = args->t * ypot + args->offset[1];
  
-   int uflr = util_ifloor(u);
-   int vflr = util_ifloor(v);
+   const int uflr = util_ifloor(u);
+   const int vflr = util_ifloor(v);
  
-   int x0 = uflr & (xpot - 1);
-   int y0 = vflr & (ypot - 1);
+   const int x0 = uflr & (xpot - 1);
+   const int y0 = vflr & (ypot - 1);
  
     addr.value = 0;
     addr.bits.level = args->level;
@@ -1105,18 +1093,18 @@ img_filter_2d_nearest_repeat_POT(struct sp_sampler_view *sp_sview,
  
  
  static inline void
-img_filter_2d_nearest_clamp_POT(struct sp_sampler_view *sp_sview,
-                                struct sp_sampler *sp_samp,
+img_filter_2d_nearest_clamp_POT(const struct sp_sampler_view *sp_sview,
+                                const struct sp_sampler *sp_samp,
                                  const struct img_filter_args *args,
                                  float rgba[TGSI_QUAD_SIZE])
  {
-   unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
-   unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
+   const unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
+   const unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
     union tex_tile_address addr;
     int c;
  
-   float u = args->s * xpot + args->offset[0];
-   float v = args->t * ypot + args->offset[1];
+   const float u = args->s * xpot + args->offset[0];
+   const float v = args->t * ypot + args->offset[1];
  
     int x0, y0;
     const float *out;
@@ -1147,20 +1135,18 @@ img_filter_2d_nearest_clamp_POT(struct sp_sampler_view *sp_sview,
  
  
  static void
-img_filter_1d_nearest(struct sp_sampler_view *sp_sview,
-                      struct sp_sampler *sp_samp,
+img_filter_1d_nearest(const struct sp_sampler_view *sp_sview,
+                      const struct sp_sampler *sp_samp,
                        const struct img_filter_args *args,
                        float rgba[TGSI_QUAD_SIZE])
  {
     const struct pipe_resource *texture = sp_sview->base.texture;
-   int width;
+   const int width = u_minify(texture->width0, args->level);
     int x;
     union tex_tile_address addr;
     const float *out;
     int c;
  
-   width = u_minify(texture->width0, args->level);
-
     assert(width > 0);
  
     addr.value = 0;
@@ -1179,28 +1165,26 @@ img_filter_1d_nearest(struct sp_sampler_view *sp_sview,
  
  
  static void
-img_filter_1d_array_nearest(struct sp_sampler_view *sp_sview,
-                            struct sp_sampler *sp_samp,
+img_filter_1d_array_nearest(const struct sp_sampler_view *sp_sview,
+                            const struct sp_sampler *sp_samp,
                              const struct img_filter_args *args,
                              float *rgba)
  {
     const struct pipe_resource *texture = sp_sview->base.texture;
-   int width;
-   int x, layer;
+   const int width = u_minify(texture->width0, args->level);
+   const int layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer,
+                                    sp_sview->base.u.tex.last_layer);
+   int x;
     union tex_tile_address addr;
     const float *out;
     int c;
  
-   width = u_minify(texture->width0, args->level);
-
     assert(width > 0);
  
     addr.value = 0;
     addr.bits.level = args->level;
  
     sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
-   layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer,
-                          sp_sview->base.u.tex.last_layer);
  
     out = get_texel_1d_array(sp_sview, sp_samp, addr, x, layer);
     for (c = 0; c < TGSI_QUAD_SIZE; c++)
@@ -1213,21 +1197,19 @@ img_filter_1d_array_nearest(struct sp_sampler_view *sp_sview,
  
  
  static void
-img_filter_2d_nearest(struct sp_sampler_view *sp_sview,
-                      struct sp_sampler *sp_samp,
+img_filter_2d_nearest(const struct sp_sampler_view *sp_sview,
+                      const struct sp_sampler *sp_samp,
                        const struct img_filter_args *args,
                        float *rgba)
  {
     const struct pipe_resource *texture = sp_sview->base.texture;
-   int width, height;
+   const int width = u_minify(texture->width0, args->level);
+   const int height = u_minify(texture->height0, args->level);
     int x, y;
     union tex_tile_address addr;
     const float *out;
     int c;
  
-   width = u_minify(texture->width0, args->level);
-   height = u_minify(texture->height0, args->level);
-
     assert(width > 0);
     assert(height > 0);
   
@@ -1248,21 +1230,21 @@ img_filter_2d_nearest(struct sp_sampler_view *sp_sview,
  
  
  static void
-img_filter_2d_array_nearest(struct sp_sampler_view *sp_sview,
-                            struct sp_sampler *sp_samp,
+img_filter_2d_array_nearest(const struct sp_sampler_view *sp_sview,
+                            const struct sp_sampler *sp_samp,
                              const struct img_filter_args *args,
                              float *rgba)
  {
     const struct pipe_resource *texture = sp_sview->base.texture;
-   int width, height;
-   int x, y, layer;
+   const int width = u_minify(texture->width0, args->level);
+   const int height = u_minify(texture->height0, args->level);
+   const int layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer,
+                                    sp_sview->base.u.tex.last_layer);
+   int x, y;
     union tex_tile_address addr;
     const float *out;
     int c;
  
-   width = u_minify(texture->width0, args->level);
-   height = u_minify(texture->height0, args->level);
-
     assert(width > 0);
     assert(height > 0);
   
@@ -1271,8 +1253,6 @@ img_filter_2d_array_nearest(struct sp_sampler_view *sp_sview,
  
     sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
     sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
-   layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer,
-                          sp_sview->base.u.tex.last_layer);
  
     out = get_texel_2d_array(sp_sview, sp_samp, addr, x, y, layer);
     for (c = 0; c < TGSI_QUAD_SIZE; c++)
@@ -1285,21 +1265,20 @@ img_filter_2d_array_nearest(struct sp_sampler_view *sp_sview,
  
  
  static void
-img_filter_cube_nearest(struct sp_sampler_view *sp_sview,
-                        struct sp_sampler *sp_samp,
+img_filter_cube_nearest(const struct sp_sampler_view *sp_sview,
+                        const struct sp_sampler *sp_samp,
                          const struct img_filter_args *args,
                          float *rgba)
  {
     const struct pipe_resource *texture = sp_sview->base.texture;
-   int width, height;
-   int x, y, layerface;
+   const int width = u_minify(texture->width0, args->level);
+   const int height = u_minify(texture->height0, args->level);
+   const int layerface = args->face_id + sp_sview->base.u.tex.first_layer;
+   int x, y;
     union tex_tile_address addr;
     const float *out;
     int c;
  
-   width = u_minify(texture->width0, args->level);
-   height = u_minify(texture->height0, args->level);
-
     assert(width > 0);
     assert(height > 0);
   
@@ -1319,7 +1298,6 @@ img_filter_cube_nearest(struct sp_sampler_view *sp_sview,
        sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
     }
  
-   layerface = args->face_id + sp_sview->base.u.tex.first_layer;
     out = get_texel_cube_array(sp_sview, sp_samp, addr, x, y, layerface);
     for (c = 0; c < TGSI_QUAD_SIZE; c++)
        rgba[TGSI_NUM_CHANNELS*c] = out[c];
@@ -1330,21 +1308,23 @@ img_filter_cube_nearest(struct sp_sampler_view *sp_sview,
  }
  
  static void
-img_filter_cube_array_nearest(struct sp_sampler_view *sp_sview,
-                              struct sp_sampler *sp_samp,
+img_filter_cube_array_nearest(const struct sp_sampler_view *sp_sview,
+                              const struct sp_sampler *sp_samp,
                                const struct img_filter_args *args,
                                float *rgba)
  {
     const struct pipe_resource *texture = sp_sview->base.texture;
-   int width, height;
-   int x, y, layerface;
+   const int width = u_minify(texture->width0, args->level);
+   const int height = u_minify(texture->height0, args->level);
+   const int layerface =
+      coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer,
+                     sp_sview->base.u.tex.first_layer,
+                     sp_sview->base.u.tex.last_layer - 5) + args->face_id;
+   int x, y;
     union tex_tile_address addr;
     const float *out;
     int c;
  
-   width = u_minify(texture->width0, args->level);
-   height = u_minify(texture->height0, args->level);
-
     assert(width > 0);
     assert(height > 0);
   
@@ -1353,9 +1333,6 @@ img_filter_cube_array_nearest(struct sp_sampler_view *sp_sview,
  
     sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
     sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
-   layerface = coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer,
-                              sp_sview->base.u.tex.first_layer,
-                              sp_sview->base.u.tex.last_layer - 5) + args->face_id;
  
     out = get_texel_cube_array(sp_sview, sp_samp, addr, x, y, layerface);
     for (c = 0; c < TGSI_QUAD_SIZE; c++)
@@ -1367,22 +1344,20 @@ img_filter_cube_array_nearest(struct sp_sampler_view *sp_sview,
  }
  
  static void
-img_filter_3d_nearest(struct sp_sampler_view *sp_sview,
-                      struct sp_sampler *sp_samp,
+img_filter_3d_nearest(const struct sp_sampler_view *sp_sview,
+                      const struct sp_sampler *sp_samp,
                        const struct img_filter_args *args,
                        float *rgba)
  {
     const struct pipe_resource *texture = sp_sview->base.texture;
-   int width, height, depth;
+   const int width = u_minify(texture->width0, args->level);
+   const int height = u_minify(texture->height0, args->level);
+   const int depth = u_minify(texture->depth0, args->level);
     int x, y, z;
     union tex_tile_address addr;
     const float *out;
     int c;
  
-   width = u_minify(texture->width0, args->level);
-   height = u_minify(texture->height0, args->level);
-   depth = u_minify(texture->depth0, args->level);
-
     assert(width > 0);
     assert(height > 0);
     assert(depth > 0);
@@ -1401,21 +1376,19 @@ img_filter_3d_nearest(struct sp_sampler_view *sp_sview,
  
  
  static void
-img_filter_1d_linear(struct sp_sampler_view *sp_sview,
-                     struct sp_sampler *sp_samp,
+img_filter_1d_linear(const struct sp_sampler_view *sp_sview,
+                     const struct sp_sampler *sp_samp,
                       const struct img_filter_args *args,
                       float *rgba)
  {
     const struct pipe_resource *texture = sp_sview->base.texture;
-   int width;
+   const int width = u_minify(texture->width0, args->level);
     int x0, x1;
     float xw; /* weights */
     union tex_tile_address addr;
     const float *tx0, *tx1;
     int c;
  
-   width = u_minify(texture->width0, args->level);
-
     assert(width > 0);
  
     addr.value = 0;
@@ -1433,29 +1406,27 @@ img_filter_1d_linear(struct sp_sampler_view *sp_sview,
  
  
  static void
-img_filter_1d_array_linear(struct sp_sampler_view *sp_sview,
-                           struct sp_sampler *sp_samp,
+img_filter_1d_array_linear(const struct sp_sampler_view *sp_sview,
+                           const struct sp_sampler *sp_samp,
                             const struct img_filter_args *args,
                             float *rgba)
  {
     const struct pipe_resource *texture = sp_sview->base.texture;
-   int width;
-   int x0, x1, layer;
+   const int width = u_minify(texture->width0, args->level);
+   const int layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer,
+                                    sp_sview->base.u.tex.last_layer);
+   int x0, x1;
     float xw; /* weights */
     union tex_tile_address addr;
     const float *tx0, *tx1;
     int c;
  
-   width = u_minify(texture->width0, args->level);
-
     assert(width > 0);
  
     addr.value = 0;
     addr.bits.level = args->level;
  
     sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
-   layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer,
-                          sp_sview->base.u.tex.last_layer);
  
     tx0 = get_texel_1d_array(sp_sview, sp_samp, addr, x0, layer);
     tx1 = get_texel_1d_array(sp_sview, sp_samp, addr, x1, layer);
@@ -1533,22 +1504,20 @@ get_gather_value(const struct sp_sampler_view *sp_sview,
  
  
  static void
-img_filter_2d_linear(struct sp_sampler_view *sp_sview,
-                     struct sp_sampler *sp_samp,
+img_filter_2d_linear(const struct sp_sampler_view *sp_sview,
+                     const struct sp_sampler *sp_samp,
                       const struct img_filter_args *args,
                       float *rgba)
  {
     const struct pipe_resource *texture = sp_sview->base.texture;
-   int width, height;
+   const int width = u_minify(texture->width0, args->level);
+   const int height = u_minify(texture->height0, args->level);
     int x0, y0, x1, y1;
     float xw, yw; /* weights */
     union tex_tile_address addr;
     const float *tx[4];
     int c;
  
-   width = u_minify(texture->width0, args->level);
-   height = u_minify(texture->height0, args->level);
-
     assert(width > 0);
     assert(height > 0);
  
@@ -1579,22 +1548,22 @@ img_filter_2d_linear(struct sp_sampler_view *sp_sview,
  
  
  static void
-img_filter_2d_array_linear(struct sp_sampler_view *sp_sview,
-                           struct sp_sampler *sp_samp,
+img_filter_2d_array_linear(const struct sp_sampler_view *sp_sview,
+                           const struct sp_sampler *sp_samp,
                             const struct img_filter_args *args,
                             float *rgba)
  {
     const struct pipe_resource *texture = sp_sview->base.texture;
-   int width, height;
-   int x0, y0, x1, y1, layer;
+   const int width = u_minify(texture->width0, args->level);
+   const int height = u_minify(texture->height0, args->level);
+   const int layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer,
+                                    sp_sview->base.u.tex.last_layer);
+   int x0, y0, x1, y1;
     float xw, yw; /* weights */
     union tex_tile_address addr;
     const float *tx[4];
     int c;
  
-   width = u_minify(texture->width0, args->level);
-   height = u_minify(texture->height0, args->level);
-
     assert(width > 0);
     assert(height > 0);
  
@@ -1603,8 +1572,6 @@ img_filter_2d_array_linear(struct sp_sampler_view *sp_sview,
  
     sp_samp->linear_texcoord_s(args->s, width,  args->offset[0], &x0, &x1, &xw);
     sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
-   layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer,
-                          sp_sview->base.u.tex.last_layer);
  
     tx[0] = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y0, layer);
     tx[1] = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y0, layer);
@@ -1627,14 +1594,16 @@ img_filter_2d_array_linear(struct sp_sampler_view *sp_sview,
  
  
  static void
-img_filter_cube_linear(struct sp_sampler_view *sp_sview,
-                       struct sp_sampler *sp_samp,
+img_filter_cube_linear(const struct sp_sampler_view *sp_sview,
+                       const struct sp_sampler *sp_samp,
                         const struct img_filter_args *args,
                         float *rgba)
  {
     const struct pipe_resource *texture = sp_sview->base.texture;
-   int width, height;
-   int x0, y0, x1, y1, layer;
+   const int width = u_minify(texture->width0, args->level);
+   const int height = u_minify(texture->height0, args->level);
+   const int layer = sp_sview->base.u.tex.first_layer;
+   int x0, y0, x1, y1;
     float xw, yw; /* weights */
     union tex_tile_address addr;
     const float *tx[4];
@@ -1642,9 +1611,6 @@ img_filter_cube_linear(struct sp_sampler_view *sp_sview,
           corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE];
     int c;
  
-   width = u_minify(texture->width0, args->level);
-   height = u_minify(texture->height0, args->level);
-
     assert(width > 0);
     assert(height > 0);
  
@@ -1665,8 +1631,6 @@ img_filter_cube_linear(struct sp_sampler_view *sp_sview,
        sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
     }
  
-   layer = sp_sview->base.u.tex.first_layer;
-
     if (sp_samp->base.seamless_cube_map) {
        tx[0] = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, args->face_id);
        tx[1] = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, args->face_id);
@@ -1695,14 +1659,19 @@ img_filter_cube_linear(struct sp_sampler_view *sp_sview,
  
  
  static void
-img_filter_cube_array_linear(struct sp_sampler_view *sp_sview,
-                             struct sp_sampler *sp_samp,
+img_filter_cube_array_linear(const struct sp_sampler_view *sp_sview,
+                             const struct sp_sampler *sp_samp,
                               const struct img_filter_args *args,
                               float *rgba)
  {
     const struct pipe_resource *texture = sp_sview->base.texture;
-   int width, height;
-   int x0, y0, x1, y1, layer;
+   const int width = u_minify(texture->width0, args->level);
+   const int height = u_minify(texture->height0, args->level);
+   const int layer =
+      coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer,
+                     sp_sview->base.u.tex.first_layer,
+                     sp_sview->base.u.tex.last_layer - 5);
+   int x0, y0, x1, y1;
     float xw, yw; /* weights */
     union tex_tile_address addr;
     const float *tx[4];
@@ -1710,9 +1679,6 @@ img_filter_cube_array_linear(struct sp_sampler_view *sp_sview,
           corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE];
     int c;
  
-   width = u_minify(texture->width0, args->level);
-   height = u_minify(texture->height0, args->level);
-
     assert(width > 0);
     assert(height > 0);
  
@@ -1733,10 +1699,6 @@ img_filter_cube_array_linear(struct sp_sampler_view *sp_sview,
        sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
     }
  
-   layer = coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer,
-                          sp_sview->base.u.tex.first_layer,
-                          sp_sview->base.u.tex.last_layer - 5);
-
     if (sp_samp->base.seamless_cube_map) {
        tx[0] = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, args->face_id);
        tx[1] = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, args->face_id);
@@ -1764,23 +1726,21 @@ img_filter_cube_array_linear(struct sp_sampler_view *sp_sview,
  }
  
  static void
-img_filter_3d_linear(struct sp_sampler_view *sp_sview,
-                     struct sp_sampler *sp_samp,
+img_filter_3d_linear(const struct sp_sampler_view *sp_sview,
+                     const struct sp_sampler *sp_samp,
                       const struct img_filter_args *args,
                       float *rgba)
  {
     const struct pipe_resource *texture = sp_sview->base.texture;
-   int width, height, depth;
+   const int width = u_minify(texture->width0, args->level);
+   const int height = u_minify(texture->height0, args->level);
+   const int depth = u_minify(texture->depth0, args->level);
     int x0, x1, y0, y1, z0, z1;
     float xw, yw, zw; /* interpolation weights */
     union tex_tile_address addr;
     const float *tx00, *tx01, *tx02, *tx03, *tx10, *tx11, *tx12, *tx13;
     int c;
  
-   width = u_minify(texture->width0, args->level);
-   height = u_minify(texture->height0, args->level);
-   depth = u_minify(texture->depth0, args->level);
-
     addr.value = 0;
     addr.bits.level = args->level;
  
@@ -1826,24 +1786,24 @@ compute_lod(const struct pipe_sampler_state *sampler,
              const float lod_in[TGSI_QUAD_SIZE],
              float lod[TGSI_QUAD_SIZE])
  {
-   float min_lod = sampler->min_lod;
-   float max_lod = sampler->max_lod;
+   const float min_lod = sampler->min_lod;
+   const float max_lod = sampler->max_lod;
     uint i;
  
     switch (control) {
-   case tgsi_sampler_lod_none:
-   case tgsi_sampler_lod_zero:
+   case TGSI_SAMPLER_LOD_NONE:
+   case TGSI_SAMPLER_LOD_ZERO:
     /* XXX FIXME */
-   case tgsi_sampler_derivs_explicit:
+   case TGSI_SAMPLER_DERIVS_EXPLICIT:
        lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(biased_lambda, min_lod, max_lod);
        break;
-   case tgsi_sampler_lod_bias:
+   case TGSI_SAMPLER_LOD_BIAS:
        for (i = 0; i < TGSI_QUAD_SIZE; i++) {
           lod[i] = biased_lambda + lod_in[i];
           lod[i] = CLAMP(lod[i], min_lod, max_lod);
        }
        break;
-   case tgsi_sampler_lod_explicit:
+   case TGSI_SAMPLER_LOD_EXPLICIT:
        for (i = 0; i < TGSI_QUAD_SIZE; i++) {
           lod[i] = CLAMP(lod_in[i], min_lod, max_lod);
        }
@@ -1855,50 +1815,47 @@ compute_lod(const struct pipe_sampler_state *sampler,
  }
  
  
-/* Calculate level of detail for every fragment.
+/* Calculate level of detail for every fragment. The computed value is not
+ * clamped to lod_min and lod_max.
   * \param lod_in per-fragment lod_bias or explicit_lod.
   * \param lod results per-fragment lod.
   */
  static inline void
-compute_lambda_lod(struct sp_sampler_view *sp_sview,
-                   struct sp_sampler *sp_samp,
-                   const float s[TGSI_QUAD_SIZE],
-                   const float t[TGSI_QUAD_SIZE],
-                   const float p[TGSI_QUAD_SIZE],
-                   const float lod_in[TGSI_QUAD_SIZE],
-                   enum tgsi_sampler_control control,
-                   float lod[TGSI_QUAD_SIZE])
+compute_lambda_lod_unclamped(const struct sp_sampler_view *sp_sview,
+                             const struct sp_sampler *sp_samp,
+                             const float s[TGSI_QUAD_SIZE],
+                             const float t[TGSI_QUAD_SIZE],
+                             const float p[TGSI_QUAD_SIZE],
+                             const float lod_in[TGSI_QUAD_SIZE],
+                             enum tgsi_sampler_control control,
+                             float lod[TGSI_QUAD_SIZE])
  {
     const struct pipe_sampler_state *sampler = &sp_samp->base;
-   float lod_bias = sampler->lod_bias;
-   float min_lod = sampler->min_lod;
-   float max_lod = sampler->max_lod;
+   const float lod_bias = sampler->lod_bias;
     float lambda;
     uint i;
  
     switch (control) {
-   case tgsi_sampler_lod_none:
+   case TGSI_SAMPLER_LOD_NONE:
        /* XXX FIXME */
-   case tgsi_sampler_derivs_explicit:
+   case TGSI_SAMPLER_DERIVS_EXPLICIT:
        lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
-      lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lambda, min_lod, max_lod);
+      lod[0] = lod[1] = lod[2] = lod[3] = lambda;
        break;
-   case tgsi_sampler_lod_bias:
+   case TGSI_SAMPLER_LOD_BIAS:
        lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
        for (i = 0; i < TGSI_QUAD_SIZE; i++) {
           lod[i] = lambda + lod_in[i];
-         lod[i] = CLAMP(lod[i], min_lod, max_lod);
        }
        break;
-   case tgsi_sampler_lod_explicit:
+   case TGSI_SAMPLER_LOD_EXPLICIT:
        for (i = 0; i < TGSI_QUAD_SIZE; i++) {
-         lod[i] = CLAMP(lod_in[i], min_lod, max_lod);
+         lod[i] = lod_in[i] + lod_bias;
        }
        break;
-   case tgsi_sampler_lod_zero:
-   case tgsi_sampler_gather:
-      /* this is all static state in the sampler really need clamp here? */
-      lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lod_bias, min_lod, max_lod);
+   case TGSI_SAMPLER_LOD_ZERO:
+   case TGSI_SAMPLER_GATHER:
+      lod[0] = lod[1] = lod[2] = lod[3] = lod_bias;
        break;
     default:
        assert(0);
@@ -1906,6 +1863,32 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview,
     }
  }
  
+/* Calculate level of detail for every fragment.
+ * \param lod_in per-fragment lod_bias or explicit_lod.
+ * \param lod results per-fragment lod.
+ */
+static inline void
+compute_lambda_lod(const struct sp_sampler_view *sp_sview,
+                   const struct sp_sampler *sp_samp,
+                   const float s[TGSI_QUAD_SIZE],
+                   const float t[TGSI_QUAD_SIZE],
+                   const float p[TGSI_QUAD_SIZE],
+                   const float lod_in[TGSI_QUAD_SIZE],
+                   enum tgsi_sampler_control control,
+                   float lod[TGSI_QUAD_SIZE])
+{
+   const struct pipe_sampler_state *sampler = &sp_samp->base;
+   const float min_lod = sampler->min_lod;
+   const float max_lod = sampler->max_lod;
+   int i;
+
+   compute_lambda_lod_unclamped(sp_sview, sp_samp,
+                                s, t, p, lod_in, control, lod);
+   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+      lod[i] = CLAMP(lod[i], min_lod, max_lod);
+   }
+}
+
  static inline unsigned
  get_gather_component(const float lod_in[TGSI_QUAD_SIZE])
  {
@@ -1913,9 +1896,46 @@ get_gather_component(const float lod_in[TGSI_QUAD_SIZE])
     return (*(unsigned int *)lod_in) & 0x3;
  }
  
+/**
+ * Clamps given lod to both lod limits and mip level limits. Clamping to the
+ * latter limits is done so that lod is relative to the first (base) level.
+ */
  static void
-mip_filter_linear(struct sp_sampler_view *sp_sview,
-                  struct sp_sampler *sp_samp,
+clamp_lod(const struct sp_sampler_view *sp_sview,
+          const struct sp_sampler *sp_samp,
+          const float lod[TGSI_QUAD_SIZE],
+          float clamped[TGSI_QUAD_SIZE])
+{
+   const float min_lod = sp_samp->base.min_lod;
+   const float max_lod = sp_samp->base.max_lod;
+   const float min_level = sp_sview->base.u.tex.first_level;
+   const float max_level = sp_sview->base.u.tex.last_level;
+   int i;
+
+   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+      float cl = lod[i];
+
+      cl = CLAMP(cl, min_lod, max_lod);
+      cl = CLAMP(cl, 0, max_level - min_level);
+      clamped[i] = cl;
+   }
+}
+
+/**
+ * Get mip level relative to base level for linear mip filter
+ */
+static void
+mip_rel_level_linear(const struct sp_sampler_view *sp_sview,
+                     const struct sp_sampler *sp_samp,
+                     const float lod[TGSI_QUAD_SIZE],
+                     float level[TGSI_QUAD_SIZE])
+{
+   clamp_lod(sp_sview, sp_samp, lod, level);
+}
+
+static void
+mip_filter_linear(const struct sp_sampler_view *sp_sview,
+                  const struct sp_sampler *sp_samp,
                    img_filter_func min_filter,
                    img_filter_func mag_filter,
                    const float s[TGSI_QUAD_SIZE],
@@ -1934,16 +1954,16 @@ mip_filter_linear(struct sp_sampler_view *sp_sview,
     compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
  
     args.offset = filt_args->offset;
-   args.gather_only = filt_args->control == tgsi_sampler_gather;
+   args.gather_only = filt_args->control == TGSI_SAMPLER_GATHER;
     args.gather_comp = get_gather_component(lod_in);
  
     for (j = 0; j < TGSI_QUAD_SIZE; j++) {
-      int level0 = psview->u.tex.first_level + (int)lod[j];
+      const int level0 = psview->u.tex.first_level + (int)lod[j];
  
        args.s = s[j];
        args.t = t[j];
        args.p = p[j];
-      args.face_id = sp_sview->faces[j];
+      args.face_id = filt_args->faces[j];
  
        if (lod[j] < 0.0) {
           args.level = psview->u.tex.first_level;
@@ -1976,13 +1996,32 @@ mip_filter_linear(struct sp_sampler_view *sp_sview,
  
  
  /**
+ * Get mip level relative to base level for nearest mip filter
+ */
+static void
+mip_rel_level_nearest(const struct sp_sampler_view *sp_sview,
+                      const struct sp_sampler *sp_samp,
+                      const float lod[TGSI_QUAD_SIZE],
+                      float level[TGSI_QUAD_SIZE])
+{
+   int j;
+
+   clamp_lod(sp_sview, sp_samp, lod, level);
+   for (j = 0; j < TGSI_QUAD_SIZE; j++)
+      /* TODO: It should rather be:
+       * level[j] = ceil(level[j] + 0.5F) - 1.0F;
+       */
+      level[j] = (int)(level[j] + 0.5F);
+}
+
+/**
   * Compute nearest mipmap level from texcoords.
   * Then sample the texture level for four elements of a quad.
   * \param c0  the LOD bias factors, or absolute LODs (depending on control)
   */
  static void
-mip_filter_nearest(struct sp_sampler_view *sp_sview,
-                   struct sp_sampler *sp_samp,
+mip_filter_nearest(const struct sp_sampler_view *sp_sview,
+                   const struct sp_sampler *sp_samp,
                     img_filter_func min_filter,
                     img_filter_func mag_filter,
                     const float s[TGSI_QUAD_SIZE],
@@ -1999,7 +2038,7 @@ mip_filter_nearest(struct sp_sampler_view *sp_sview,
     struct img_filter_args args;
  
     args.offset = filt_args->offset;
-   args.gather_only = filt_args->control == tgsi_sampler_gather;
+   args.gather_only = filt_args->control == TGSI_SAMPLER_GATHER;
     args.gather_comp = get_gather_component(lod_in);
  
     compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
@@ -2008,13 +2047,13 @@ mip_filter_nearest(struct sp_sampler_view *sp_sview,
        args.s = s[j];
        args.t = t[j];
        args.p = p[j];
-      args.face_id = sp_sview->faces[j];
+      args.face_id = filt_args->faces[j];
  
        if (lod[j] < 0.0) {
           args.level = psview->u.tex.first_level;
           mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
        } else {
-         int level = psview->u.tex.first_level + (int)(lod[j] + 0.5F);
+         const int level = psview->u.tex.first_level + (int)(lod[j] + 0.5F);
           args.level = MIN2(level, (int)psview->u.tex.last_level);
           min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
        }
@@ -2026,9 +2065,25 @@ mip_filter_nearest(struct sp_sampler_view *sp_sview,
  }
  
  
+/**
+ * Get mip level relative to base level for none mip filter
+ */
+static void
+mip_rel_level_none(const struct sp_sampler_view *sp_sview,
+                   const struct sp_sampler *sp_samp,
+                   const float lod[TGSI_QUAD_SIZE],
+                   float level[TGSI_QUAD_SIZE])
+{
+   int j;
+
+   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+      level[j] = 0;
+   }
+}
+
  static void
-mip_filter_none(struct sp_sampler_view *sp_sview,
-                struct sp_sampler *sp_samp,
+mip_filter_none(const struct sp_sampler_view *sp_sview,
+                const struct sp_sampler *sp_samp,
                  img_filter_func min_filter,
                  img_filter_func mag_filter,
                  const float s[TGSI_QUAD_SIZE],
@@ -2045,7 +2100,7 @@ mip_filter_none(struct sp_sampler_view *sp_sview,
  
     args.level = sp_sview->base.u.tex.first_level;
     args.offset = filt_args->offset;
-   args.gather_only = filt_args->control == tgsi_sampler_gather;
+   args.gather_only = filt_args->control == TGSI_SAMPLER_GATHER;
  
     compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
  
@@ -2053,7 +2108,7 @@ mip_filter_none(struct sp_sampler_view *sp_sview,
        args.s = s[j];
        args.t = t[j];
        args.p = p[j];
-      args.face_id = sp_sview->faces[j];
+      args.face_id = filt_args->faces[j];
        if (lod[j] < 0.0) {
           mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
        }
@@ -2064,9 +2119,21 @@ mip_filter_none(struct sp_sampler_view *sp_sview,
  }
  
  
+/**
+ * Get mip level relative to base level for none mip filter
+ */
  static void
-mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview,
-                                 struct sp_sampler *sp_samp,
+mip_rel_level_none_no_filter_select(const struct sp_sampler_view *sp_sview,
+                                    const struct sp_sampler *sp_samp,
+                                    const float lod[TGSI_QUAD_SIZE],
+                                    float level[TGSI_QUAD_SIZE])
+{
+   mip_rel_level_none(sp_sview, sp_samp, lod, level);
+}
+
+static void
+mip_filter_none_no_filter_select(const struct sp_sampler_view *sp_sview,
+                                 const struct sp_sampler *sp_samp,
                                   img_filter_func min_filter,
                                   img_filter_func mag_filter,
                                   const float s[TGSI_QUAD_SIZE],
@@ -2081,12 +2148,12 @@ mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview,
     struct img_filter_args args;
     args.level = sp_sview->base.u.tex.first_level;
     args.offset = filt_args->offset;
-   args.gather_only = filt_args->control == tgsi_sampler_gather;
+   args.gather_only = filt_args->control == TGSI_SAMPLER_GATHER;
     for (j = 0; j < TGSI_QUAD_SIZE; j++) {
        args.s = s[j];
        args.t = t[j];
        args.p = p[j];
-      args.face_id = sp_sview->faces[j];
+      args.face_id = filt_args->faces[j];
        mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
     }
  }
@@ -2095,7 +2162,7 @@ mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview,
  /* For anisotropic filtering */
  #define WEIGHT_LUT_SIZE 1024
  
-static float *weightLut = NULL;
+static const float *weightLut = NULL;
  
  /**
   * Creates the look-up table used to speed-up EWA sampling
@@ -2105,14 +2172,15 @@ create_filter_table(void)
  {
     unsigned i;
     if (!weightLut) {
-      weightLut = (float *) MALLOC(WEIGHT_LUT_SIZE * sizeof(float));
+      float *lut = (float *) MALLOC(WEIGHT_LUT_SIZE * sizeof(float));
  
        for (i = 0; i < WEIGHT_LUT_SIZE; ++i) {
-         float alpha = 2;
-         float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1);
-         float weight = (float) exp(-alpha * r2);
-         weightLut[i] = weight;
+         const float alpha = 2;
+         const float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1);
+         const float weight = (float) exp(-alpha * r2);
+         lut[i] = weight;
        }
+      weightLut = lut;
     }
  }
  
@@ -2125,13 +2193,14 @@ create_filter_table(void)
   * "Fundamentals of Texture Mapping and Image Warping" (1989)
   */
  static void
-img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
-                  struct sp_sampler *sp_samp,
+img_filter_2d_ewa(const struct sp_sampler_view *sp_sview,
+                  const struct sp_sampler *sp_samp,
                    img_filter_func min_filter,
                    img_filter_func mag_filter,
                    const float s[TGSI_QUAD_SIZE],
                    const float t[TGSI_QUAD_SIZE],
                    const float p[TGSI_QUAD_SIZE],
+                  const uint faces[TGSI_QUAD_SIZE],
                    unsigned level,
                    const float dudx, const float dvdx,
                    const float dudy, const float dvdy,
@@ -2140,15 +2209,15 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
     const struct pipe_resource *texture = sp_sview->base.texture;
  
     // ??? Won't the image filters blow up if level is negative?
-   unsigned level0 = level > 0 ? level : 0;
-   float scaling = 1.0f / (1 << level0);
-   int width = u_minify(texture->width0, level0);
-   int height = u_minify(texture->height0, level0);
+   const unsigned level0 = level > 0 ? level : 0;
+   const float scaling = 1.0f / (1 << level0);
+   const int width = u_minify(texture->width0, level0);
+   const int height = u_minify(texture->height0, level0);
     struct img_filter_args args;
-   float ux = dudx * scaling;
-   float vx = dvdx * scaling;
-   float uy = dudy * scaling;
-   float vy = dvdy * scaling;
+   const float ux = dudx * scaling;
+   const float vx = dvdx * scaling;
+   const float uy = dudy * scaling;
+   const float vy = dvdy * scaling;
  
     /* compute ellipse coefficients to bound the region: 
      * A*x*x + B*x*y + C*y*y = F.
@@ -2162,29 +2231,15 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
     /* assert(F > 0.0); */
  
     /* Compute the ellipse's (u,v) bounding box in texture space */
-   float d = -B*B+4.0f*C*A;
-   float box_u = 2.0f / d * sqrtf(d*C*F); /* box_u -> half of bbox with   */
-   float box_v = 2.0f / d * sqrtf(A*d*F); /* box_v -> half of bbox height */
+   const float d = -B*B+4.0f*C*A;
+   const float box_u = 2.0f / d * sqrtf(d*C*F); /* box_u -> half of bbox with   */
+   const float box_v = 2.0f / d * sqrtf(A*d*F); /* box_v -> half of bbox height */
  
     float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
     float s_buffer[TGSI_QUAD_SIZE];
     float t_buffer[TGSI_QUAD_SIZE];
     float weight_buffer[TGSI_QUAD_SIZE];
-   unsigned buffer_next;
     int j;
-   float den; /* = 0.0F; */
-   float ddq;
-   float U; /* = u0 - tex_u; */
-   int v;
-
-   /* Scale ellipse formula to directly index the Filter Lookup Table.
-    * i.e. scale so that F = WEIGHT_LUT_SIZE-1
-    */
-   double formScale = (double) (WEIGHT_LUT_SIZE - 1) / F;
-   A *= formScale;
-   B *= formScale;
-   C *= formScale;
-   /* F *= formScale; */ /* no need to scale F as we don't use it below here */
  
     /* For each quad, the du and dx values are the same and so the ellipse is
      * also the same. Note that texel/image access can only be performed using
@@ -2193,7 +2248,16 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
      * using the s_buffer/t_buffer and weight_buffer. Only when the buffer is
      * full, then the pixel values are read from the image.
      */
-   ddq = 2 * A;
+   const float ddq = 2 * A;
+
+   /* Scale ellipse formula to directly index the Filter Lookup Table.
+    * i.e. scale so that F = WEIGHT_LUT_SIZE-1
+    */
+   const double formScale = (double) (WEIGHT_LUT_SIZE - 1) / F;
+   A *= formScale;
+   B *= formScale;
+   C *= formScale;
+   /* F *= formScale; */ /* no need to scale F as we don't use it below here */
  
     args.level = level;
     for (j = 0; j < TGSI_QUAD_SIZE; j++) {
@@ -2201,22 +2265,23 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
         * and incrementally update the value of Ax^2+Bxy*Cy^2; when this
         * value, q, is less than F, we're inside the ellipse
         */
-      float tex_u = -0.5F + s[j] * texture->width0 * scaling;
-      float tex_v = -0.5F + t[j] * texture->height0 * scaling;
+      const float tex_u = -0.5F + s[j] * texture->width0 * scaling;
+      const float tex_v = -0.5F + t[j] * texture->height0 * scaling;
  
-      int u0 = (int) floorf(tex_u - box_u);
-      int u1 = (int) ceilf(tex_u + box_u);
-      int v0 = (int) floorf(tex_v - box_v);
-      int v1 = (int) ceilf(tex_v + box_v);
+      const int u0 = (int) floorf(tex_u - box_u);
+      const int u1 = (int) ceilf(tex_u + box_u);
+      const int v0 = (int) floorf(tex_v - box_v);
+      const int v1 = (int) ceilf(tex_v + box_v);
+      const float U = u0 - tex_u;
  
        float num[4] = {0.0F, 0.0F, 0.0F, 0.0F};
-      buffer_next = 0;
-      den = 0;
-      args.face_id = sp_sview->faces[j];
+      unsigned buffer_next = 0;
+      float den = 0;
+      int v;
+      args.face_id = faces[j];
  
-      U = u0 - tex_u;
        for (v = v0; v <= v1; ++v) {
-         float V = v - tex_v;
+         const float V = v - tex_v;
           float dq = A * (2 * U + 1) + B * V;
           float q = (C * V + B * U) * V + A * U * U;
  
@@ -2230,7 +2295,7 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
                  * should not happen, though
                  */
                 const int qClamped = q >= 0.0F ? q : 0;
-               float weight = weightLut[qClamped];
+               const float weight = weightLut[qClamped];
  
                 weight_buffer[buffer_next] = weight;
                 s_buffer[buffer_next] = u / ((float) width);
@@ -2317,11 +2382,23 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
  
  
  /**
+ * Get mip level relative to base level for linear mip filter
+ */
+static void
+mip_rel_level_linear_aniso(const struct sp_sampler_view *sp_sview,
+                           const struct sp_sampler *sp_samp,
+                           const float lod[TGSI_QUAD_SIZE],
+                           float level[TGSI_QUAD_SIZE])
+{
+   mip_rel_level_linear(sp_sview, sp_samp, lod, level);
+}
+
+/**
   * Sample 2D texture using an anisotropic filter.
   */
  static void
-mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
-                        struct sp_sampler *sp_samp,
+mip_filter_linear_aniso(const struct sp_sampler_view *sp_sview,
+                        const struct sp_sampler *sp_samp,
                          img_filter_func min_filter,
                          img_filter_func mag_filter,
                          const float s[TGSI_QUAD_SIZE],
@@ -2338,23 +2415,23 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
     float lambda;
     float lod[TGSI_QUAD_SIZE];
  
-   float s_to_u = u_minify(texture->width0, psview->u.tex.first_level);
-   float t_to_v = u_minify(texture->height0, psview->u.tex.first_level);
-   float dudx = (s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]) * s_to_u;
-   float dudy = (s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]) * s_to_u;
-   float dvdx = (t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
-   float dvdy = (t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]) * t_to_v;
+   const float s_to_u = u_minify(texture->width0, psview->u.tex.first_level);
+   const float t_to_v = u_minify(texture->height0, psview->u.tex.first_level);
+   const float dudx = (s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]) * s_to_u;
+   const float dudy = (s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]) * s_to_u;
+   const float dvdx = (t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
+   const float dvdy = (t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]) * t_to_v;
     struct img_filter_args args;
  
-   if (filt_args->control == tgsi_sampler_lod_bias ||
-       filt_args->control == tgsi_sampler_lod_none ||
+   if (filt_args->control == TGSI_SAMPLER_LOD_BIAS ||
+       filt_args->control == TGSI_SAMPLER_LOD_NONE ||
         /* XXX FIXME */
-       filt_args->control == tgsi_sampler_derivs_explicit) {
+       filt_args->control == TGSI_SAMPLER_DERIVS_EXPLICIT) {
        /* note: instead of working with Px and Py, we will use the 
         * squared length instead, to avoid sqrt.
         */
-      float Px2 = dudx * dudx + dvdx * dvdx;
-      float Py2 = dudy * dudy + dvdy * dvdy;
+      const float Px2 = dudx * dudx + dvdx * dvdx;
+      const float Py2 = dudy * dudy + dvdy * dvdy;
  
        float Pmax2;
        float Pmin2;
@@ -2389,8 +2466,8 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
        compute_lod(&sp_samp->base, filt_args->control, lambda, lod_in, lod);
     }
     else {
-      assert(filt_args->control == tgsi_sampler_lod_explicit ||
-             filt_args->control == tgsi_sampler_lod_zero);
+      assert(filt_args->control == TGSI_SAMPLER_LOD_EXPLICIT ||
+             filt_args->control == TGSI_SAMPLER_LOD_ZERO);
        compute_lod(&sp_samp->base, filt_args->control, sp_samp->base.lod_bias, lod_in, lod);
     }
     
@@ -2409,7 +2486,7 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
           args.t = t[j];
           args.p = p[j];
           args.level = psview->u.tex.last_level;
-         args.face_id = sp_sview->faces[j];
+         args.face_id = filt_args->faces[j];
           min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
        }
     }
@@ -2418,7 +2495,7 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
         * seem to be worth the extra running time.
         */
        img_filter_2d_ewa(sp_sview, sp_samp, min_filter, mag_filter,
-                        s, t, p, level0,
+                        s, t, p, filt_args->faces, level0,
                          dudx, dvdx, dudy, dvdy, rgba);
     }
  
@@ -2427,6 +2504,18 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
     }
  }
  
+/**
+ * Get mip level relative to base level for linear mip filter
+ */
+static void
+mip_rel_level_linear_2d_linear_repeat_POT(
+   const struct sp_sampler_view *sp_sview,
+   const struct sp_sampler *sp_samp,
+   const float lod[TGSI_QUAD_SIZE],
+   float level[TGSI_QUAD_SIZE])
+{
+   mip_rel_level_linear(sp_sview, sp_samp, lod, level);
+}
  
  /**
   * Specialized version of mip_filter_linear with hard-wired calls to
@@ -2434,8 +2523,8 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
   */
  static void
  mip_filter_linear_2d_linear_repeat_POT(
-   struct sp_sampler_view *sp_sview,
-   struct sp_sampler *sp_samp,
+   const struct sp_sampler_view *sp_sview,
+   const struct sp_sampler *sp_samp,
     img_filter_func min_filter,
     img_filter_func mag_filter,
     const float s[TGSI_QUAD_SIZE],
@@ -2453,16 +2542,16 @@ mip_filter_linear_2d_linear_repeat_POT(
     compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
  
     for (j = 0; j < TGSI_QUAD_SIZE; j++) {
-      int level0 = psview->u.tex.first_level + (int)lod[j];
+      const int level0 = psview->u.tex.first_level + (int)lod[j];
        struct img_filter_args args;
        /* Catches both negative and large values of level0:
         */
        args.s = s[j];
        args.t = t[j];
        args.p = p[j];
-      args.face_id = sp_sview->faces[j];
+      args.face_id = filt_args->faces[j];
        args.offset = filt_args->offset;
-      args.gather_only = filt_args->control == tgsi_sampler_gather;
+      args.gather_only = filt_args->control == TGSI_SAMPLER_GATHER;
        if ((unsigned)level0 >= psview->u.tex.last_level) {
           if (level0 < 0)
              args.level = psview->u.tex.first_level;
@@ -2473,7 +2562,7 @@ mip_filter_linear_2d_linear_repeat_POT(
  
        }
        else {
-         float levelBlend = frac(lod[j]);
+         const float levelBlend = frac(lod[j]);
           float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
           int c;
  
@@ -2492,13 +2581,42 @@ mip_filter_linear_2d_linear_repeat_POT(
     }
  }
  
+static const struct sp_filter_funcs funcs_linear = {
+   mip_rel_level_linear,
+   mip_filter_linear
+};
+
+static const struct sp_filter_funcs funcs_nearest = {
+   mip_rel_level_nearest,
+   mip_filter_nearest
+};
+
+static const struct sp_filter_funcs funcs_none = {
+   mip_rel_level_none,
+   mip_filter_none
+};
+
+static const struct sp_filter_funcs funcs_none_no_filter_select = {
+   mip_rel_level_none_no_filter_select,
+   mip_filter_none_no_filter_select
+};
+
+static const struct sp_filter_funcs funcs_linear_aniso = {
+   mip_rel_level_linear_aniso,
+   mip_filter_linear_aniso
+};
+
+static const struct sp_filter_funcs funcs_linear_2d_linear_repeat_POT = {
+   mip_rel_level_linear_2d_linear_repeat_POT,
+   mip_filter_linear_2d_linear_repeat_POT
+};
  
  /**
   * Do shadow/depth comparisons.
   */
  static void
-sample_compare(struct sp_sampler_view *sp_sview,
-               struct sp_sampler *sp_samp,
+sample_compare(const struct sp_sampler_view *sp_sview,
+               const struct sp_sampler *sp_samp,
                 const float s[TGSI_QUAD_SIZE],
                 const float t[TGSI_QUAD_SIZE],
                 const float p[TGSI_QUAD_SIZE],
@@ -2511,9 +2629,14 @@ sample_compare(struct sp_sampler_view *sp_sview,
     int j, v;
     int k[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
     float pc[4];
-   const struct util_format_description *format_desc;
-   unsigned chan_type;
-   bool is_gather = (control == tgsi_sampler_gather);
+   const struct util_format_description *format_desc =
+      util_format_description(sp_sview->base.format);
+   /* not entirely sure we couldn't end up with non-valid swizzle here */
+   const unsigned chan_type =
+      format_desc->swizzle[0] <= UTIL_FORMAT_SWIZZLE_W ?
+      format_desc->channel[format_desc->swizzle[0]].type :
+      UTIL_FORMAT_TYPE_FLOAT;
+   const bool is_gather = (control == TGSI_SAMPLER_GATHER);
  
     /**
      * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
@@ -2540,11 +2663,6 @@ sample_compare(struct sp_sampler_view *sp_sview,
        pc[3] = p[3];
     }
  
-   format_desc = util_format_description(sp_sview->base.format);
-   /* not entirely sure we couldn't end up with non-valid swizzle here */
-   chan_type = format_desc->swizzle[0] <= UTIL_FORMAT_SWIZZLE_W ?
-                  format_desc->channel[format_desc->swizzle[0]].type :
-                  UTIL_FORMAT_TYPE_FLOAT;
     if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
        /*
         * clamping is a result of conversion to texture format, hence
@@ -2883,10 +3001,47 @@ get_img_filter(const struct sp_sampler_view *sp_sview,
     }
  }
  
+/**
+ * Get mip filter funcs, and optionally both img min filter and img mag
+ * filter. Note that both img filter function pointers must be either non-NULL
+ * or NULL.
+ */
+static void
+get_filters(const struct sp_sampler_view *sp_sview,
+            const struct sp_sampler *sp_samp,
+            const enum tgsi_sampler_control control,
+            const struct sp_filter_funcs **funcs,
+            img_filter_func *min,
+            img_filter_func *mag)
+{
+   assert(funcs);
+   if (control == TGSI_SAMPLER_GATHER) {
+      *funcs = &funcs_nearest;
+      if (min) {
+         *min = get_img_filter(sp_sview, &sp_samp->base,
+                               PIPE_TEX_FILTER_LINEAR, true);
+      }
+   } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
+      *funcs = &funcs_linear_2d_linear_repeat_POT;
+   } else {
+      *funcs = sp_samp->filter_funcs;
+      if (min) {
+         assert(mag);
+         *min = get_img_filter(sp_sview, &sp_samp->base,
+                               sp_samp->min_img_filter, false);
+         if (sp_samp->min_mag_equal) {
+            *mag = *min;
+         } else {
+            *mag = get_img_filter(sp_sview, &sp_samp->base,
+                                  sp_samp->base.mag_img_filter, false);
+         }
+      }
+   }
+}
  
  static void
-sample_mip(struct sp_sampler_view *sp_sview,
-           struct sp_sampler *sp_samp,
+sample_mip(const struct sp_sampler_view *sp_sview,
+           const struct sp_sampler *sp_samp,
             const float s[TGSI_QUAD_SIZE],
             const float t[TGSI_QUAD_SIZE],
             const float p[TGSI_QUAD_SIZE],
@@ -2895,35 +3050,22 @@ sample_mip(struct sp_sampler_view *sp_sview,
             const struct filter_args *filt_args,
             float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
  {
-   mip_filter_func mip_filter;
+   const struct sp_filter_funcs *funcs = NULL;
     img_filter_func min_img_filter = NULL;
     img_filter_func mag_img_filter = NULL;
  
-   if (filt_args->control == tgsi_sampler_gather) {
-      mip_filter = mip_filter_nearest;
-      min_img_filter = get_img_filter(sp_sview, &sp_samp->base, PIPE_TEX_FILTER_LINEAR, true);
-   } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
-      mip_filter = mip_filter_linear_2d_linear_repeat_POT;
-   }
-   else {
-      mip_filter = sp_samp->mip_filter;
-      min_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->min_img_filter, false);
-      if (sp_samp->min_mag_equal) {
-         mag_img_filter = min_img_filter;
-      }
-      else {
-         mag_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->base.mag_img_filter, false);
-      }
-   }
+   get_filters(sp_sview, sp_samp, filt_args->control,
+               &funcs, &min_img_filter, &mag_img_filter);
  
-   mip_filter(sp_sview, sp_samp, min_img_filter, mag_img_filter,
-              s, t, p, c0, lod, filt_args, rgba);
+   funcs->filter(sp_sview, sp_samp, min_img_filter, mag_img_filter,
+                 s, t, p, c0, lod, filt_args, rgba);
  
     if (sp_samp->base.compare_mode != PIPE_TEX_COMPARE_NONE) {
-      sample_compare(sp_sview, sp_samp, s, t, p, c0, lod, filt_args->control, rgba);
+      sample_compare(sp_sview, sp_samp, s, t, p, c0,
+                     lod, filt_args->control, rgba);
     }
  
-   if (sp_sview->need_swizzle && filt_args->control != tgsi_sampler_gather) {
+   if (sp_sview->need_swizzle && filt_args->control != TGSI_SAMPLER_GATHER) {
        float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
        memcpy(rgba_temp, rgba, sizeof(rgba_temp));
        do_swizzling(&sp_sview->base, rgba_temp, rgba);
@@ -2933,27 +3075,23 @@ sample_mip(struct sp_sampler_view *sp_sview,
  
  
  /**
- * Use 3D texcoords to choose a cube face, then sample the 2D cube faces.
- * Put face info into the sampler faces[] array.
+ * This function uses cube texture coordinates to choose a face of a cube and
+ * computes the 2D cube face coordinates. Puts face info into the sampler
+ * faces[] array.
   */
  static void
-sample_cube(struct sp_sampler_view *sp_sview,
-            struct sp_sampler *sp_samp,
-            const float s[TGSI_QUAD_SIZE],
-            const float t[TGSI_QUAD_SIZE],
-            const float p[TGSI_QUAD_SIZE],
-            const float c0[TGSI_QUAD_SIZE],
-            const float c1[TGSI_QUAD_SIZE],
-            const struct filter_args *filt_args,
-            float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
+convert_cube(const struct sp_sampler_view *sp_sview,
+             const struct sp_sampler *sp_samp,
+             const float s[TGSI_QUAD_SIZE],
+             const float t[TGSI_QUAD_SIZE],
+             const float p[TGSI_QUAD_SIZE],
+             const float c0[TGSI_QUAD_SIZE],
+             float ssss[TGSI_QUAD_SIZE],
+             float tttt[TGSI_QUAD_SIZE],
+             float pppp[TGSI_QUAD_SIZE],
+             uint faces[TGSI_QUAD_SIZE])
  {
     unsigned j;
-   float ssss[4], tttt[4];
-
-   /* Not actually used, but the intermediate steps that do the
-    * dereferencing don't know it.
-    */
-   static float pppp[4] = { 0, 0, 0, 0 };
  
     pppp[0] = c0[0];
     pppp[1] = c0[1];
@@ -2991,43 +3129,45 @@ sample_cube(struct sp_sampler_view *sp_sview,
        const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
  
        if (arx >= ary && arx >= arz) {
-         float sign = (rx >= 0.0F) ? 1.0F : -1.0F;
-         uint face = (rx >= 0.0F) ? PIPE_TEX_FACE_POS_X : PIPE_TEX_FACE_NEG_X;
+         const float sign = (rx >= 0.0F) ? 1.0F : -1.0F;
+         const uint face = (rx >= 0.0F) ?
+            PIPE_TEX_FACE_POS_X : PIPE_TEX_FACE_NEG_X;
           for (j = 0; j < TGSI_QUAD_SIZE; j++) {
              const float ima = -0.5F / fabsf(s[j]);
              ssss[j] = sign *  p[j] * ima + 0.5F;
              tttt[j] =         t[j] * ima + 0.5F;
-            sp_sview->faces[j] = face;
+            faces[j] = face;
           }
        }
        else if (ary >= arx && ary >= arz) {
-         float sign = (ry >= 0.0F) ? 1.0F : -1.0F;
-         uint face = (ry >= 0.0F) ? PIPE_TEX_FACE_POS_Y : PIPE_TEX_FACE_NEG_Y;
+         const float sign = (ry >= 0.0F) ? 1.0F : -1.0F;
+         const uint face = (ry >= 0.0F) ?
+            PIPE_TEX_FACE_POS_Y : PIPE_TEX_FACE_NEG_Y;
           for (j = 0; j < TGSI_QUAD_SIZE; j++) {
              const float ima = -0.5F / fabsf(t[j]);
              ssss[j] =        -s[j] * ima + 0.5F;
              tttt[j] = sign * -p[j] * ima + 0.5F;
-            sp_sview->faces[j] = face;
+            faces[j] = face;
           }
        }
        else {
-         float sign = (rz >= 0.0F) ? 1.0F : -1.0F;
-         uint face = (rz >= 0.0F) ? PIPE_TEX_FACE_POS_Z : PIPE_TEX_FACE_NEG_Z;
+         const float sign = (rz >= 0.0F) ? 1.0F : -1.0F;
+         const uint face = (rz >= 0.0F) ?
+            PIPE_TEX_FACE_POS_Z : PIPE_TEX_FACE_NEG_Z;
           for (j = 0; j < TGSI_QUAD_SIZE; j++) {
              const float ima = -0.5F / fabsf(p[j]);
              ssss[j] = sign * -s[j] * ima + 0.5F;
              tttt[j] =         t[j] * ima + 0.5F;
-            sp_sview->faces[j] = face;
+            faces[j] = face;
           }
        }
     }
-
-   sample_mip(sp_sview, sp_samp, ssss, tttt, pppp, c0, c1, filt_args, rgba);
  }
  
  
  static void
-sp_get_dims(struct sp_sampler_view *sp_sview, int level,
+sp_get_dims(const struct sp_sampler_view *sp_sview,
+            int level,
              int dims[4])
  {
     const struct pipe_sampler_view *view = &sp_sview->base;
@@ -3085,7 +3225,7 @@ sp_get_dims(struct sp_sampler_view *sp_sview, int level,
   * coords to the texture image size.
   */
  static void
-sp_get_texels(struct sp_sampler_view *sp_sview,
+sp_get_texels(const struct sp_sampler_view *sp_sview,
                const int v_i[TGSI_QUAD_SIZE],
                const int v_j[TGSI_QUAD_SIZE],
                const int v_k[TGSI_QUAD_SIZE],
@@ -3097,24 +3237,24 @@ sp_get_texels(struct sp_sampler_view *sp_sview,
     const struct pipe_resource *texture = sp_sview->base.texture;
     int j, c;
     const float *tx;
-   int width, height, depth;
-
-   addr.value = 0;
     /* TODO write a better test for LOD */
-   addr.bits.level = sp_sview->base.target == PIPE_BUFFER ? 0 :
-                        CLAMP(lod[0] + sp_sview->base.u.tex.first_level, 
-                              sp_sview->base.u.tex.first_level,
-                              sp_sview->base.u.tex.last_level);
+   const unsigned level =
+      sp_sview->base.target == PIPE_BUFFER ? 0 :
+      CLAMP(lod[0] + sp_sview->base.u.tex.first_level,
+            sp_sview->base.u.tex.first_level,
+            sp_sview->base.u.tex.last_level);
+   const int width = u_minify(texture->width0, level);
+   const int height = u_minify(texture->height0, level);
+   const int depth = u_minify(texture->depth0, level);
  
-   width = u_minify(texture->width0, addr.bits.level);
-   height = u_minify(texture->height0, addr.bits.level);
-   depth = u_minify(texture->depth0, addr.bits.level);
+   addr.value = 0;
+   addr.bits.level = level;
  
     switch (sp_sview->base.target) {
     case PIPE_BUFFER:
     case PIPE_TEXTURE_1D:
        for (j = 0; j < TGSI_QUAD_SIZE; j++) {
-         int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
+         const int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
           tx = get_texel_2d_no_border(sp_sview, addr, x, 0);
           for (c = 0; c < 4; c++) {
              rgba[c][j] = tx[c];
@@ -3123,9 +3263,9 @@ sp_get_texels(struct sp_sampler_view *sp_sview,
        break;
     case PIPE_TEXTURE_1D_ARRAY:
        for (j = 0; j < TGSI_QUAD_SIZE; j++) {
-         int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
-         int y = CLAMP(v_j[j], sp_sview->base.u.tex.first_layer,
-                       sp_sview->base.u.tex.last_layer);
+         const int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
+         const int y = CLAMP(v_j[j], sp_sview->base.u.tex.first_layer,
+                             sp_sview->base.u.tex.last_layer);
           tx = get_texel_2d_no_border(sp_sview, addr, x, y);
           for (c = 0; c < 4; c++) {
              rgba[c][j] = tx[c];
@@ -3135,8 +3275,8 @@ sp_get_texels(struct sp_sampler_view *sp_sview,
     case PIPE_TEXTURE_2D:
     case PIPE_TEXTURE_RECT:
        for (j = 0; j < TGSI_QUAD_SIZE; j++) {
-         int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
-         int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
+         const int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
+         const int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
           tx = get_texel_2d_no_border(sp_sview, addr, x, y);
           for (c = 0; c < 4; c++) {
              rgba[c][j] = tx[c];
@@ -3145,10 +3285,10 @@ sp_get_texels(struct sp_sampler_view *sp_sview,
        break;
     case PIPE_TEXTURE_2D_ARRAY:
        for (j = 0; j < TGSI_QUAD_SIZE; j++) {
-         int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
-         int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
-         int layer = CLAMP(v_k[j], sp_sview->base.u.tex.first_layer,
-                           sp_sview->base.u.tex.last_layer);
+         const int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
+         const int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
+         const int layer = CLAMP(v_k[j], sp_sview->base.u.tex.first_layer,
+                                 sp_sview->base.u.tex.last_layer);
           tx = get_texel_3d_no_border(sp_sview, addr, x, y, layer);
           for (c = 0; c < 4; c++) {
              rgba[c][j] = tx[c];
@@ -3216,13 +3356,13 @@ softpipe_create_sampler_state(struct pipe_context *pipe,
     switch (sampler->min_mip_filter) {
     case PIPE_TEX_MIPFILTER_NONE:
        if (sampler->min_img_filter == sampler->mag_img_filter)
-         samp->mip_filter = mip_filter_none_no_filter_select;
+         samp->filter_funcs = &funcs_none_no_filter_select;
        else
-         samp->mip_filter = mip_filter_none;
+         samp->filter_funcs = &funcs_none;
        break;
  
     case PIPE_TEX_MIPFILTER_NEAREST:
-      samp->mip_filter = mip_filter_nearest;
+      samp->filter_funcs = &funcs_nearest;
        break;
  
     case PIPE_TEX_MIPFILTER_LINEAR:
@@ -3234,11 +3374,11 @@ softpipe_create_sampler_state(struct pipe_context *pipe,
            sampler->max_anisotropy <= 1) {
           samp->min_mag_equal_repeat_linear = TRUE;
        }
-      samp->mip_filter = mip_filter_linear;
+      samp->filter_funcs = &funcs_linear;
  
        /* Anisotropic filtering extension. */
        if (sampler->max_anisotropy > 1) {
-         samp->mip_filter = mip_filter_linear_aniso;
+         samp->filter_funcs = &funcs_linear_aniso;
  
           /* Override min_img_filter:
            * min_img_filter needs to be set to NEAREST since we need to access
@@ -3297,7 +3437,7 @@ softpipe_create_sampler_view(struct pipe_context *pipe,
                               const struct pipe_sampler_view *templ)
  {
     struct sp_sampler_view *sview = CALLOC_STRUCT(sp_sampler_view);
-   struct softpipe_resource *spr = (struct softpipe_resource *)resource;
+   const struct softpipe_resource *spr = (struct softpipe_resource *)resource;
  
     if (sview) {
        struct pipe_sampler_view *view = &sview->base;
@@ -3341,12 +3481,8 @@ softpipe_create_sampler_view(struct pipe_context *pipe,
           sview->need_swizzle = TRUE;
        }
  
-      if (view->target == PIPE_TEXTURE_CUBE ||
-          view->target == PIPE_TEXTURE_CUBE_ARRAY)
-         sview->get_samples = sample_cube;
-      else {
-         sview->get_samples = sample_mip;
-      }
+      sview->need_cube_convert = (view->target == PIPE_TEXTURE_CUBE ||
+                                  view->target == PIPE_TEXTURE_CUBE_ARRAY);
        sview->pot2d = spr->pot &&
                       (view->target == PIPE_TEXTURE_2D ||
                        view->target == PIPE_TEXTURE_RECT);
@@ -3359,12 +3495,20 @@ softpipe_create_sampler_view(struct pipe_context *pipe,
  }
  
  
+static inline const struct sp_tgsi_sampler *
+sp_tgsi_sampler_cast_c(const struct tgsi_sampler *sampler)
+{
+   return (const struct sp_tgsi_sampler *)sampler;
+}
+
+
  static void
  sp_tgsi_get_dims(struct tgsi_sampler *tgsi_sampler,
                   const unsigned sview_index,
                   int level, int dims[4])
  {
-   struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
+   const struct sp_tgsi_sampler *sp_samp =
+      sp_tgsi_sampler_cast_c(tgsi_sampler);
  
     assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
     /* always have a view here but texture is NULL if no sampler view was set. */
@@ -3390,13 +3534,20 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
                      enum tgsi_sampler_control control,
                      float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
  {
-   struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
+   const struct sp_tgsi_sampler *sp_tgsi_samp =
+      sp_tgsi_sampler_cast_c(tgsi_sampler);
+   const struct sp_sampler_view *sp_sview;
+   const struct sp_sampler *sp_samp;
     struct filter_args filt_args;
+
     assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
     assert(sampler_index < PIPE_MAX_SAMPLERS);
-   assert(sp_samp->sp_sampler[sampler_index]);
+   assert(sp_tgsi_samp->sp_sampler[sampler_index]);
+
+   sp_sview = &sp_tgsi_samp->sp_sview[sview_index];
+   sp_samp = sp_tgsi_samp->sp_sampler[sampler_index];
     /* always have a view here but texture is NULL if no sampler view was set. */
-   if (!sp_samp->sp_sview[sview_index].base.texture) {
+   if (!sp_sview->base.texture) {
        int i, j;
        for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
           for (i = 0; i < TGSI_QUAD_SIZE; i++) {
@@ -3408,11 +3559,79 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
  
     filt_args.control = control;
     filt_args.offset = offset;
-   sp_samp->sp_sview[sview_index].get_samples(&sp_samp->sp_sview[sview_index],
-                                              sp_samp->sp_sampler[sampler_index],
-                                              s, t, p, c0, lod, &filt_args, rgba);
+
+   if (sp_sview->need_cube_convert) {
+      float cs[TGSI_QUAD_SIZE];
+      float ct[TGSI_QUAD_SIZE];
+      float cp[TGSI_QUAD_SIZE];
+      uint faces[TGSI_QUAD_SIZE];
+
+      convert_cube(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp, faces);
+
+      filt_args.faces = faces;
+      sample_mip(sp_sview, sp_samp, cs, ct, cp, c0, lod, &filt_args, rgba);
+   } else {
+      static const uint zero_faces[TGSI_QUAD_SIZE] = {0, 0, 0, 0};
+
+      filt_args.faces = zero_faces;
+      sample_mip(sp_sview, sp_samp, s, t, p, c0, lod, &filt_args, rgba);
+   }
  }
  
+static void
+sp_tgsi_query_lod(const struct tgsi_sampler *tgsi_sampler,
+                  const unsigned sview_index,
+                  const unsigned sampler_index,
+                  const float s[TGSI_QUAD_SIZE],
+                  const float t[TGSI_QUAD_SIZE],
+                  const float p[TGSI_QUAD_SIZE],
+                  const float c0[TGSI_QUAD_SIZE],
+                  const enum tgsi_sampler_control control,
+                  float mipmap[TGSI_QUAD_SIZE],
+                  float lod[TGSI_QUAD_SIZE])
+{
+   static const float lod_in[TGSI_QUAD_SIZE] = { 0.0, 0.0, 0.0, 0.0 };
+
+   const struct sp_tgsi_sampler *sp_tgsi_samp =
+      sp_tgsi_sampler_cast_c(tgsi_sampler);
+   const struct sp_sampler_view *sp_sview;
+   const struct sp_sampler *sp_samp;
+   const struct sp_filter_funcs *funcs;
+   int i;
+
+   assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
+   assert(sampler_index < PIPE_MAX_SAMPLERS);
+   assert(sp_tgsi_samp->sp_sampler[sampler_index]);
+
+   sp_sview = &sp_tgsi_samp->sp_sview[sview_index];
+   sp_samp = sp_tgsi_samp->sp_sampler[sampler_index];
+   /* always have a view here but texture is NULL if no sampler view was
+    * set. */
+   if (!sp_sview->base.texture) {
+      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+         mipmap[i] = 0.0f;
+         lod[i] = 0.0f;
+      }
+      return;
+   }
+
+   if (sp_sview->need_cube_convert) {
+      float cs[TGSI_QUAD_SIZE];
+      float ct[TGSI_QUAD_SIZE];
+      float cp[TGSI_QUAD_SIZE];
+      uint unused_faces[TGSI_QUAD_SIZE];
+
+      convert_cube(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp, unused_faces);
+      compute_lambda_lod_unclamped(sp_sview, sp_samp,
+                                   cs, ct, cp, lod_in, control, lod);
+   } else {
+      compute_lambda_lod_unclamped(sp_sview, sp_samp,
+                                   s, t, p, lod_in, control, lod);
+   }
+
+   get_filters(sp_sview, sp_samp, control, &funcs, NULL, NULL);
+   funcs->relative_level(sp_sview, sp_samp, lod, mipmap);
+}
  
  static void
  sp_tgsi_get_texel(struct tgsi_sampler *tgsi_sampler,
@@ -3422,7 +3641,8 @@ sp_tgsi_get_texel(struct tgsi_sampler *tgsi_sampler,
                    const int lod[TGSI_QUAD_SIZE], const int8_t offset[3],
                    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
  {
-   struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
+   const struct sp_tgsi_sampler *sp_samp =
+      sp_tgsi_sampler_cast_c(tgsi_sampler);
  
     assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
     /* always have a view here but texture is NULL if no sampler view was set. */
@@ -3449,7 +3669,7 @@ sp_create_tgsi_sampler(void)
     samp->base.get_dims = sp_tgsi_get_dims;
     samp->base.get_samples = sp_tgsi_get_samples;
     samp->base.get_texel = sp_tgsi_get_texel;
+   samp->base.query_lod = sp_tgsi_query_lod;
  
     return samp;
  }
-
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h

index 7d1aafc..d591487 100644 (file)
--- a/src/gallium/drivers/softpipe/sp_tex_sample.h
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
@@ -64,18 +64,19 @@ struct img_filter_args {
     int gather_comp;
  };
  
-typedef void (*img_filter_func)(struct sp_sampler_view *sp_sview,
-                                struct sp_sampler *sp_samp,
+typedef void (*img_filter_func)(const struct sp_sampler_view *sp_sview,
+                                const struct sp_sampler *sp_samp,
                                  const struct img_filter_args *args,
                                  float *rgba);
  
  struct filter_args {
     enum tgsi_sampler_control control;
     const int8_t *offset;
+   const uint *faces;
  };
  
-typedef void (*mip_filter_func)(struct sp_sampler_view *sp_sview,
-                                struct sp_sampler *sp_samp,
+typedef void (*mip_filter_func)(const struct sp_sampler_view *sp_sview,
+                                const struct sp_sampler *sp_samp,
                                  img_filter_func min_filter,
                                  img_filter_func mag_filter,
                                  const float s[TGSI_QUAD_SIZE],
@@ -87,16 +88,10 @@ typedef void (*mip_filter_func)(struct sp_sampler_view *sp_sview,
                                  float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
  
  
-typedef void (*filter_func)(struct sp_sampler_view *sp_sview,
-                            struct sp_sampler *sp_samp,
-                            const float s[TGSI_QUAD_SIZE],
-                            const float t[TGSI_QUAD_SIZE],
-                            const float p[TGSI_QUAD_SIZE],
-                            const float c0[TGSI_QUAD_SIZE],
-                            const float lod[TGSI_QUAD_SIZE],
-                            const struct filter_args *args,
-                            float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
-
+typedef void (*mip_level_func)(const struct sp_sampler_view *sp_sview,
+                               const struct sp_sampler *sp_samp,
+                               const float lod[TGSI_QUAD_SIZE],
+                               float level[TGSI_QUAD_SIZE]);
  
  typedef void (*fetch_func)(struct sp_sampler_view *sp_sview,
                             const int i[TGSI_QUAD_SIZE],
@@ -116,11 +111,7 @@ struct sp_sampler_view
  
     boolean need_swizzle;
     boolean pot2d;
-
-   filter_func get_samples;
-
-   /* this is just abusing the sampler_view object as local storage */
-   unsigned faces[TGSI_QUAD_SIZE];
+   boolean need_cube_convert;
  
     /* these are different per shader type */
     struct softpipe_tex_tile_cache *cache;
@@ -128,6 +119,10 @@ struct sp_sampler_view
  
  };
  
+struct sp_filter_funcs {
+   mip_level_func relative_level;
+   mip_filter_func filter;
+};
  
  struct sp_sampler {
     struct pipe_sampler_state base;
@@ -144,7 +139,7 @@ struct sp_sampler {
     wrap_linear_func linear_texcoord_t;
     wrap_linear_func linear_texcoord_p;
  
-   mip_filter_func mip_filter;
+   const struct sp_filter_funcs *filter_funcs;
  };
  
  
diff --git a/src/gallium/drivers/svga/Makefile.sources b/src/gallium/drivers/svga/Makefile.sources

index 276e6a8..5c022f4 100644 (file)
--- a/src/gallium/drivers/svga/Makefile.sources
+++ b/src/gallium/drivers/svga/Makefile.sources
@@ -1,6 +1,7 @@
  C_SOURCES := \
         svga_cmd.c \
         svga_cmd.h \
+       svga_cmd_vgpu10.c \
         svga_context.c \
         svga_context.h \
         svga_debug.h \
@@ -12,6 +13,8 @@ C_SOURCES := \
         svga_format.c \
         svga_format.h \
         svga_hw_reg.h \
+       svga_link.c \
+       svga_link.h \
         svga_pipe_blend.c \
         svga_pipe_blit.c \
         svga_pipe_clear.c \
@@ -20,10 +23,12 @@ C_SOURCES := \
         svga_pipe_draw.c \
         svga_pipe_flush.c \
         svga_pipe_fs.c \
+       svga_pipe_gs.c \
         svga_pipe_misc.c \
         svga_pipe_query.c \
         svga_pipe_rasterizer.c \
         svga_pipe_sampler.c \
+       svga_pipe_streamout.c \
         svga_pipe_vertex.c \
         svga_pipe_vs.c \
         svga_public.h \
@@ -44,15 +49,19 @@ C_SOURCES := \
         svga_shader.c \
         svga_shader.h \
         svga_state.c \
+       svga_state.h \
         svga_state_constants.c \
         svga_state_framebuffer.c \
         svga_state_fs.c \
-       svga_state.h \
+       svga_state_gs.c \
         svga_state_need_swtnl.c \
         svga_state_rss.c \
+       svga_state_sampler.c \
+       svga_state_tgsi_transform.c \
         svga_state_tss.c \
         svga_state_vdecl.c \
         svga_state_vs.c \
+       svga_streamout.h \
         svga_surface.c \
         svga_surface.h \
         svga_swtnl_backend.c \
@@ -65,6 +74,7 @@ C_SOURCES := \
         svga_tgsi_emit.h \
         svga_tgsi.h \
         svga_tgsi_insn.c \
+       svga_tgsi_vgpu10.c \
         svga_winsys.h \
         \
         svgadump/svga_dump.c \
@@ -80,6 +90,7 @@ SVGA_H_FILES := \
         include/svga3d_caps.h \
         include/svga3d_cmd.h \
         include/svga3d_devcaps.h \
+       include/svga3d_dx.h \
         include/svga3d_limits.h \
         include/svga3d_reg.h \
         include/svga3d_shaderdefs.h \
@@ -89,5 +100,6 @@ SVGA_H_FILES := \
         include/svga_overlay.h \
         include/svga_reg.h \
         include/svga_types.h \
+       include/VGPU10ShaderTokens.h \
         include/vmware_pack_begin.h \
         include/vmware_pack_end.h
diff --git a/src/gallium/drivers/svga/include/VGPU10ShaderTokens.h b/src/gallium/drivers/svga/include/VGPU10ShaderTokens.h

new file mode 100644 (file)

index 0000000..4440235
--- /dev/null
+++ b/src/gallium/drivers/svga/include/VGPU10ShaderTokens.h
@@ -0,0 +1,489 @@
+/**********************************************************
+ * Copyright 2007-2015 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * VGPU10ShaderTokens.h --
+ *
+ *    VGPU10 shader token definitions.
+ *
+ */
+
+#ifndef VGPU10SHADERTOKENS_H
+#define VGPU10SHADERTOKENS_H
+
+/* Shader limits */
+#define VGPU10_MAX_VS_INPUTS 16
+#define VGPU10_MAX_VS_OUTPUTS 16
+#define VGPU10_MAX_GS_INPUTS 16
+#define VGPU10_MAX_GS_OUTPUTS 32
+#define VGPU10_MAX_FS_INPUTS 32
+#define VGPU10_MAX_FS_OUTPUTS 8
+#define VGPU10_MAX_TEMPS 4096
+#define VGPU10_MAX_CONSTANT_BUFFERS 14
+#define VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT 4096
+#define VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT 4096
+#define VGPU10_MAX_SAMPLERS 16
+#define VGPU10_MAX_RESOURCES 128
+#define VGPU10_MIN_TEXEL_FETCH_OFFSET -8
+#define VGPU10_MAX_TEXEL_FETCH_OFFSET 7
+
+typedef enum {
+   VGPU10_PIXEL_SHADER = 0,
+   VGPU10_VERTEX_SHADER = 1,
+   VGPU10_GEOMETRY_SHADER = 2
+} VGPU10_PROGRAM_TYPE;
+
+typedef union {
+   struct {
+      unsigned int minorVersion  : 4;
+      unsigned int majorVersion  : 4;
+      unsigned int               : 8;
+      unsigned int programType   : 16; /* VGPU10_PROGRAM_TYPE */
+   };
+   uint32 value;
+} VGPU10ProgramToken;
+
+
+typedef enum {
+   VGPU10_OPCODE_ADD                               = 0,
+   VGPU10_OPCODE_AND                               = 1,
+   VGPU10_OPCODE_BREAK                             = 2,
+   VGPU10_OPCODE_BREAKC                            = 3,
+   VGPU10_OPCODE_CALL                              = 4,
+   VGPU10_OPCODE_CALLC                             = 5,
+   VGPU10_OPCODE_CASE                              = 6,
+   VGPU10_OPCODE_CONTINUE                          = 7,
+   VGPU10_OPCODE_CONTINUEC                         = 8,
+   VGPU10_OPCODE_CUT                               = 9,
+   VGPU10_OPCODE_DEFAULT                           = 10,
+   VGPU10_OPCODE_DERIV_RTX                         = 11,
+   VGPU10_OPCODE_DERIV_RTY                         = 12,
+   VGPU10_OPCODE_DISCARD                           = 13,
+   VGPU10_OPCODE_DIV                               = 14,
+   VGPU10_OPCODE_DP2                               = 15,
+   VGPU10_OPCODE_DP3                               = 16,
+   VGPU10_OPCODE_DP4                               = 17,
+   VGPU10_OPCODE_ELSE                              = 18,
+   VGPU10_OPCODE_EMIT                              = 19,
+   VGPU10_OPCODE_EMITTHENCUT                       = 20,
+   VGPU10_OPCODE_ENDIF                             = 21,
+   VGPU10_OPCODE_ENDLOOP                           = 22,
+   VGPU10_OPCODE_ENDSWITCH                         = 23,
+   VGPU10_OPCODE_EQ                                = 24,
+   VGPU10_OPCODE_EXP                               = 25,
+   VGPU10_OPCODE_FRC                               = 26,
+   VGPU10_OPCODE_FTOI                              = 27,
+   VGPU10_OPCODE_FTOU                              = 28,
+   VGPU10_OPCODE_GE                                = 29,
+   VGPU10_OPCODE_IADD                              = 30,
+   VGPU10_OPCODE_IF                                = 31,
+   VGPU10_OPCODE_IEQ                               = 32,
+   VGPU10_OPCODE_IGE                               = 33,
+   VGPU10_OPCODE_ILT                               = 34,
+   VGPU10_OPCODE_IMAD                              = 35,
+   VGPU10_OPCODE_IMAX                              = 36,
+   VGPU10_OPCODE_IMIN                              = 37,
+   VGPU10_OPCODE_IMUL                              = 38,
+   VGPU10_OPCODE_INE                               = 39,
+   VGPU10_OPCODE_INEG                              = 40,
+   VGPU10_OPCODE_ISHL                              = 41,
+   VGPU10_OPCODE_ISHR                              = 42,
+   VGPU10_OPCODE_ITOF                              = 43,
+   VGPU10_OPCODE_LABEL                             = 44,
+   VGPU10_OPCODE_LD                                = 45,
+   VGPU10_OPCODE_LD_MS                             = 46,
+   VGPU10_OPCODE_LOG                               = 47,
+   VGPU10_OPCODE_LOOP                              = 48,
+   VGPU10_OPCODE_LT                                = 49,
+   VGPU10_OPCODE_MAD                               = 50,
+   VGPU10_OPCODE_MIN                               = 51,
+   VGPU10_OPCODE_MAX                               = 52,
+   VGPU10_OPCODE_CUSTOMDATA                        = 53,
+   VGPU10_OPCODE_MOV                               = 54,
+   VGPU10_OPCODE_MOVC                              = 55,
+   VGPU10_OPCODE_MUL                               = 56,
+   VGPU10_OPCODE_NE                                = 57,
+   VGPU10_OPCODE_NOP                               = 58,
+   VGPU10_OPCODE_NOT                               = 59,
+   VGPU10_OPCODE_OR                                = 60,
+   VGPU10_OPCODE_RESINFO                           = 61,
+   VGPU10_OPCODE_RET                               = 62,
+   VGPU10_OPCODE_RETC                              = 63,
+   VGPU10_OPCODE_ROUND_NE                          = 64,
+   VGPU10_OPCODE_ROUND_NI                          = 65,
+   VGPU10_OPCODE_ROUND_PI                          = 66,
+   VGPU10_OPCODE_ROUND_Z                           = 67,
+   VGPU10_OPCODE_RSQ                               = 68,
+   VGPU10_OPCODE_SAMPLE                            = 69,
+   VGPU10_OPCODE_SAMPLE_C                          = 70,
+   VGPU10_OPCODE_SAMPLE_C_LZ                       = 71,
+   VGPU10_OPCODE_SAMPLE_L                          = 72,
+   VGPU10_OPCODE_SAMPLE_D                          = 73,
+   VGPU10_OPCODE_SAMPLE_B                          = 74,
+   VGPU10_OPCODE_SQRT                              = 75,
+   VGPU10_OPCODE_SWITCH                            = 76,
+   VGPU10_OPCODE_SINCOS                            = 77,
+   VGPU10_OPCODE_UDIV                              = 78,
+   VGPU10_OPCODE_ULT                               = 79,
+   VGPU10_OPCODE_UGE                               = 80,
+   VGPU10_OPCODE_UMUL                              = 81,
+   VGPU10_OPCODE_UMAD                              = 82,
+   VGPU10_OPCODE_UMAX                              = 83,
+   VGPU10_OPCODE_UMIN                              = 84,
+   VGPU10_OPCODE_USHR                              = 85,
+   VGPU10_OPCODE_UTOF                              = 86,
+   VGPU10_OPCODE_XOR                               = 87,
+   VGPU10_OPCODE_DCL_RESOURCE                      = 88,
+   VGPU10_OPCODE_DCL_CONSTANT_BUFFER               = 89,
+   VGPU10_OPCODE_DCL_SAMPLER                       = 90,
+   VGPU10_OPCODE_DCL_INDEX_RANGE                   = 91,
+   VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY  = 92,
+   VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE            = 93,
+   VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT       = 94,
+   VGPU10_OPCODE_DCL_INPUT                         = 95,
+   VGPU10_OPCODE_DCL_INPUT_SGV                     = 96,
+   VGPU10_OPCODE_DCL_INPUT_SIV                     = 97,
+   VGPU10_OPCODE_DCL_INPUT_PS                      = 98,
+   VGPU10_OPCODE_DCL_INPUT_PS_SGV                  = 99,
+   VGPU10_OPCODE_DCL_INPUT_PS_SIV                  = 100,
+   VGPU10_OPCODE_DCL_OUTPUT                        = 101,
+   VGPU10_OPCODE_DCL_OUTPUT_SGV                    = 102,
+   VGPU10_OPCODE_DCL_OUTPUT_SIV                    = 103,
+   VGPU10_OPCODE_DCL_TEMPS                         = 104,
+   VGPU10_OPCODE_DCL_INDEXABLE_TEMP                = 105,
+   VGPU10_OPCODE_DCL_GLOBAL_FLAGS                  = 106,
+   VGPU10_OPCODE_IDIV                              = 107,
+   VGPU10_NUM_OPCODES                  /* Should be the last entry. */
+} VGPU10_OPCODE_TYPE;
+
+typedef enum {
+   VGPU10_INTERPOLATION_UNDEFINED = 0,
+   VGPU10_INTERPOLATION_CONSTANT = 1,
+   VGPU10_INTERPOLATION_LINEAR = 2,
+   VGPU10_INTERPOLATION_LINEAR_CENTROID = 3,
+   VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE = 4,
+   VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID = 5,
+   VGPU10_INTERPOLATION_LINEAR_SAMPLE = 6,                  /* DX10.1 */
+   VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE = 7     /* DX10.1 */
+} VGPU10_INTERPOLATION_MODE;
+
+typedef enum {
+   VGPU10_RESOURCE_DIMENSION_UNKNOWN = 0,
+   VGPU10_RESOURCE_DIMENSION_BUFFER = 1,
+   VGPU10_RESOURCE_DIMENSION_TEXTURE1D = 2,
+   VGPU10_RESOURCE_DIMENSION_TEXTURE2D = 3,
+   VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS = 4,
+   VGPU10_RESOURCE_DIMENSION_TEXTURE3D = 5,
+   VGPU10_RESOURCE_DIMENSION_TEXTURECUBE = 6,
+   VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY = 7,
+   VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY = 8,
+   VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY = 9,
+   VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY = 10
+} VGPU10_RESOURCE_DIMENSION;
+
+typedef enum {
+   VGPU10_SAMPLER_MODE_DEFAULT = 0,
+   VGPU10_SAMPLER_MODE_COMPARISON = 1,
+   VGPU10_SAMPLER_MODE_MONO = 2
+} VGPU10_SAMPLER_MODE;
+
+typedef enum {
+   VGPU10_INSTRUCTION_TEST_ZERO     = 0,
+   VGPU10_INSTRUCTION_TEST_NONZERO  = 1
+} VGPU10_INSTRUCTION_TEST_BOOLEAN;
+
+typedef enum {
+   VGPU10_CB_IMMEDIATE_INDEXED   = 0,
+   VGPU10_CB_DYNAMIC_INDEXED     = 1
+} VGPU10_CB_ACCESS_PATTERN;
+
+typedef enum {
+   VGPU10_PRIMITIVE_UNDEFINED    = 0,
+   VGPU10_PRIMITIVE_POINT        = 1,
+   VGPU10_PRIMITIVE_LINE         = 2,
+   VGPU10_PRIMITIVE_TRIANGLE     = 3,
+   VGPU10_PRIMITIVE_LINE_ADJ     = 6,
+   VGPU10_PRIMITIVE_TRIANGLE_ADJ = 7
+} VGPU10_PRIMITIVE;
+
+typedef enum {
+   VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED          = 0,
+   VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST          = 1,
+   VGPU10_PRIMITIVE_TOPOLOGY_LINELIST           = 2,
+   VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP          = 3,
+   VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST       = 4,
+   VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP      = 5,
+   VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ       = 10,
+   VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ      = 11,
+   VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ   = 12,
+   VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ  = 13
+} VGPU10_PRIMITIVE_TOPOLOGY;
+
+typedef enum {
+   VGPU10_CUSTOMDATA_COMMENT                       = 0,
+   VGPU10_CUSTOMDATA_DEBUGINFO                     = 1,
+   VGPU10_CUSTOMDATA_OPAQUE                        = 2,
+   VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER = 3
+} VGPU10_CUSTOMDATA_CLASS;
+
+typedef enum {
+   VGPU10_RESINFO_RETURN_FLOAT      = 0,
+   VGPU10_RESINFO_RETURN_RCPFLOAT   = 1,
+   VGPU10_RESINFO_RETURN_UINT       = 2
+} VGPU10_RESINFO_RETURN_TYPE;
+
+typedef union {
+   struct {
+      unsigned int opcodeType          : 11; /* VGPU10_OPCODE_TYPE */
+      unsigned int interpolationMode   : 4;  /* VGPU10_INTERPOLATION_MODE */
+      unsigned int                     : 3;
+      unsigned int testBoolean         : 1;  /* VGPU10_INSTRUCTION_TEST_BOOLEAN */
+      unsigned int                     : 5;
+      unsigned int instructionLength   : 7;
+      unsigned int extended            : 1;
+   };
+   struct {
+      unsigned int                     : 11;
+      unsigned int resourceDimension   : 5;  /* VGPU10_RESOURCE_DIMENSION */
+   };
+   struct {
+      unsigned int                     : 11;
+      unsigned int samplerMode         : 4;  /* VGPU10_SAMPLER_MODE */
+   };
+   struct {
+      unsigned int                     : 11;
+      unsigned int accessPattern       : 1;  /* VGPU10_CB_ACCESS_PATTERN */
+   };
+   struct {
+      unsigned int                     : 11;
+      unsigned int primitive           : 6;  /* VGPU10_PRIMITIVE */
+   };
+   struct {
+      unsigned int                     : 11;
+      unsigned int primitiveTopology   : 6;  /* VGPU10_PRIMITIVE_TOPOLOGY */
+   };
+   struct {
+      unsigned int                     : 11;
+      unsigned int customDataClass     : 21; /* VGPU10_CUSTOMDATA_CLASS */
+   };
+   struct {
+      unsigned int                     : 11;
+      unsigned int resinfoReturnType   : 2;  /* VGPU10_RESINFO_RETURN_TYPE */
+      unsigned int saturate            : 1;
+   };
+   struct {
+      unsigned int                     : 11;
+      unsigned int refactoringAllowed  : 1;
+   };
+   uint32 value;
+} VGPU10OpcodeToken0;
+
+
+typedef enum {
+   VGPU10_EXTENDED_OPCODE_EMPTY = 0,
+   VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS
+} VGPU10_EXTENDED_OPCODE_TYPE;
+
+typedef union {
+   struct {
+      unsigned int opcodeType : 6;  /* VGPU10_EXTENDED_OPCODE_TYPE */
+      unsigned int            : 3;
+      unsigned int offsetU    : 4;  /* Two's complement. */
+      unsigned int offsetV    : 4;  /* Two's complement. */
+      unsigned int offsetW    : 4;  /* Two's complement. */
+      unsigned int            : 10;
+      unsigned int extended   : 1;
+   };
+   uint32 value;
+} VGPU10OpcodeToken1;
+
+
+typedef enum {
+   VGPU10_OPERAND_0_COMPONENT = 0,
+   VGPU10_OPERAND_1_COMPONENT = 1,
+   VGPU10_OPERAND_4_COMPONENT = 2,
+   VGPU10_OPERAND_N_COMPONENT = 3   /* Unused for now. */
+} VGPU10_OPERAND_NUM_COMPONENTS;
+
+typedef enum {
+   VGPU10_OPERAND_4_COMPONENT_MASK_MODE = 0,
+   VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE = 1,
+   VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE = 2
+} VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE;
+
+#define VGPU10_OPERAND_4_COMPONENT_MASK_X    0x1
+#define VGPU10_OPERAND_4_COMPONENT_MASK_Y    0x2
+#define VGPU10_OPERAND_4_COMPONENT_MASK_Z    0x4
+#define VGPU10_OPERAND_4_COMPONENT_MASK_W    0x8
+
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XY   (VGPU10_OPERAND_4_COMPONENT_MASK_X   | VGPU10_OPERAND_4_COMPONENT_MASK_Y)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XZ   (VGPU10_OPERAND_4_COMPONENT_MASK_X   | VGPU10_OPERAND_4_COMPONENT_MASK_Z)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XW   (VGPU10_OPERAND_4_COMPONENT_MASK_X   | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_YZ   (VGPU10_OPERAND_4_COMPONENT_MASK_Y   | VGPU10_OPERAND_4_COMPONENT_MASK_Z)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_YW   (VGPU10_OPERAND_4_COMPONENT_MASK_Y   | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_ZW   (VGPU10_OPERAND_4_COMPONENT_MASK_Z   | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XYZ  (VGPU10_OPERAND_4_COMPONENT_MASK_XY  | VGPU10_OPERAND_4_COMPONENT_MASK_Z)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XYW  (VGPU10_OPERAND_4_COMPONENT_MASK_XY  | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XZW  (VGPU10_OPERAND_4_COMPONENT_MASK_XZ  | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_YZW  (VGPU10_OPERAND_4_COMPONENT_MASK_YZ  | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XYZW (VGPU10_OPERAND_4_COMPONENT_MASK_XYZ | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_ALL  VGPU10_OPERAND_4_COMPONENT_MASK_XYZW
+
+#define VGPU10_REGISTER_INDEX_FROM_SEMANTIC  0xffffffff
+
+typedef enum {
+   VGPU10_COMPONENT_X = 0,
+   VGPU10_COMPONENT_Y = 1,
+   VGPU10_COMPONENT_Z = 2,
+   VGPU10_COMPONENT_W = 3
+} VGPU10_COMPONENT_NAME;
+
+typedef enum {
+   VGPU10_OPERAND_TYPE_TEMP = 0,
+   VGPU10_OPERAND_TYPE_INPUT = 1,
+   VGPU10_OPERAND_TYPE_OUTPUT = 2,
+   VGPU10_OPERAND_TYPE_INDEXABLE_TEMP = 3,
+   VGPU10_OPERAND_TYPE_IMMEDIATE32 = 4,
+   VGPU10_OPERAND_TYPE_IMMEDIATE64 = 5,
+   VGPU10_OPERAND_TYPE_SAMPLER = 6,
+   VGPU10_OPERAND_TYPE_RESOURCE = 7,
+   VGPU10_OPERAND_TYPE_CONSTANT_BUFFER = 8,
+   VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER = 9,
+   VGPU10_OPERAND_TYPE_LABEL = 10,
+   VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID = 11,
+   VGPU10_OPERAND_TYPE_OUTPUT_DEPTH = 12,
+   VGPU10_OPERAND_TYPE_NULL = 13,
+   VGPU10_OPERAND_TYPE_RASTERIZER = 14,            /* DX10.1 */
+   VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK = 15   /* DX10.1 */
+} VGPU10_OPERAND_TYPE;
+
+typedef enum {
+   VGPU10_OPERAND_INDEX_0D = 0,
+   VGPU10_OPERAND_INDEX_1D = 1,
+   VGPU10_OPERAND_INDEX_2D = 2,
+   VGPU10_OPERAND_INDEX_3D = 3
+} VGPU10_OPERAND_INDEX_DIMENSION;
+
+typedef enum {
+   VGPU10_OPERAND_INDEX_IMMEDIATE32 = 0,
+   VGPU10_OPERAND_INDEX_IMMEDIATE64 = 1,
+   VGPU10_OPERAND_INDEX_RELATIVE = 2,
+   VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE = 3,
+   VGPU10_OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE = 4
+} VGPU10_OPERAND_INDEX_REPRESENTATION;
+
+typedef union {
+   struct {
+      unsigned int numComponents          : 2;  /* VGPU10_OPERAND_NUM_COMPONENTS */
+      unsigned int selectionMode          : 2;  /* VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE */
+      unsigned int mask                   : 4;  /* D3D10_SB_OPERAND_4_COMPONENT_MASK_* */
+      unsigned int                        : 4;
+      unsigned int operandType            : 8;  /* VGPU10_OPERAND_TYPE */
+      unsigned int indexDimension         : 2;  /* VGPU10_OPERAND_INDEX_DIMENSION */
+      unsigned int index0Representation   : 3;  /* VGPU10_OPERAND_INDEX_REPRESENTATION */
+      unsigned int index1Representation   : 3;  /* VGPU10_OPERAND_INDEX_REPRESENTATION */
+      unsigned int                        : 3;
+      unsigned int extended               : 1;
+   };
+   struct {
+      unsigned int                        : 4;
+      unsigned int swizzleX               : 2;  /* VGPU10_COMPONENT_NAME */
+      unsigned int swizzleY               : 2;  /* VGPU10_COMPONENT_NAME */
+      unsigned int swizzleZ               : 2;  /* VGPU10_COMPONENT_NAME */
+      unsigned int swizzleW               : 2;  /* VGPU10_COMPONENT_NAME */
+   };
+   struct {
+      unsigned int                        : 4;
+      unsigned int selectMask             : 2;  /* VGPU10_COMPONENT_NAME */
+   };
+   uint32 value;
+} VGPU10OperandToken0;
+
+
+typedef enum {
+   VGPU10_EXTENDED_OPERAND_EMPTY = 0,
+   VGPU10_EXTENDED_OPERAND_MODIFIER = 1
+} VGPU10_EXTENDED_OPERAND_TYPE;
+
+typedef enum {
+   VGPU10_OPERAND_MODIFIER_NONE = 0,
+   VGPU10_OPERAND_MODIFIER_NEG = 1,
+   VGPU10_OPERAND_MODIFIER_ABS = 2,
+   VGPU10_OPERAND_MODIFIER_ABSNEG = 3
+} VGPU10_OPERAND_MODIFIER;
+
+typedef union {
+   struct {
+      unsigned int extendedOperandType : 6;  /* VGPU10_EXTENDED_OPERAND_TYPE */
+      unsigned int operandModifier     : 8;  /* VGPU10_OPERAND_MODIFIER */
+      unsigned int                     : 17;
+      unsigned int extended            : 1;
+   };
+   uint32 value;
+} VGPU10OperandToken1;
+
+
+typedef enum {
+   VGPU10_RETURN_TYPE_UNORM = 1,
+   VGPU10_RETURN_TYPE_SNORM = 2,
+   VGPU10_RETURN_TYPE_SINT = 3,
+   VGPU10_RETURN_TYPE_UINT = 4,
+   VGPU10_RETURN_TYPE_FLOAT = 5,
+   VGPU10_RETURN_TYPE_MIXED = 6
+} VGPU10_RESOURCE_RETURN_TYPE;
+
+typedef union {
+   struct {
+      unsigned int component0 : 4;  /* VGPU10_RESOURCE_RETURN_TYPE */
+      unsigned int component1 : 4;  /* VGPU10_RESOURCE_RETURN_TYPE */
+      unsigned int component2 : 4;  /* VGPU10_RESOURCE_RETURN_TYPE */
+      unsigned int component3 : 4;  /* VGPU10_RESOURCE_RETURN_TYPE */
+   };
+   uint32 value;
+} VGPU10ResourceReturnTypeToken;
+
+
+typedef enum {
+   VGPU10_NAME_UNDEFINED = 0,
+   VGPU10_NAME_POSITION = 1,
+   VGPU10_NAME_CLIP_DISTANCE = 2,
+   VGPU10_NAME_CULL_DISTANCE = 3,
+   VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX = 4,
+   VGPU10_NAME_VIEWPORT_ARRAY_INDEX = 5,
+   VGPU10_NAME_VERTEX_ID = 6,
+   VGPU10_NAME_PRIMITIVE_ID = 7,
+   VGPU10_NAME_INSTANCE_ID = 8,
+   VGPU10_NAME_IS_FRONT_FACE = 9,
+   VGPU10_NAME_SAMPLE_INDEX = 10,
+} VGPU10_SYSTEM_NAME;
+
+typedef union {
+   struct {
+      unsigned int name : 16; /* VGPU10_SYSTEM_NAME */
+   };
+   uint32 value;
+} VGPU10NameToken;
+
+#endif
diff --git a/src/gallium/drivers/svga/include/svga3d_caps.h b/src/gallium/drivers/svga/include/svga3d_caps.h

index c6c8e36..01c8ba7 100644 (file)
--- a/src/gallium/drivers/svga/include/svga3d_caps.h
+++ b/src/gallium/drivers/svga/include/svga3d_caps.h
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 2007-2014 VMware, Inc.  All rights reserved.
+ * Copyright 2007-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
@@ -111,4 +111,4 @@ SVGA3dCapsRecord;
  typedef uint32 SVGA3dCapPair[2];
  
  
-#endif // _SVGA3D_CAPS_H_
+#endif
diff --git a/src/gallium/drivers/svga/include/svga3d_cmd.h b/src/gallium/drivers/svga/include/svga3d_cmd.h

index 8953bf0..c843417 100644 (file)
--- a/src/gallium/drivers/svga/include/svga3d_cmd.h
+++ b/src/gallium/drivers/svga/include/svga3d_cmd.h
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 1998-2014 VMware, Inc.  All rights reserved.
+ * Copyright 1998-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
@@ -53,130 +53,227 @@
   * and up.
   */
  
-#define SVGA_3D_CMD_LEGACY_BASE                                1000
-#define SVGA_3D_CMD_BASE                                       1040
-
-#define SVGA_3D_CMD_SURFACE_DEFINE                             1040
-#define SVGA_3D_CMD_SURFACE_DESTROY                            1041
-#define SVGA_3D_CMD_SURFACE_COPY                               1042
-#define SVGA_3D_CMD_SURFACE_STRETCHBLT                         1043
-#define SVGA_3D_CMD_SURFACE_DMA                                1044
-#define SVGA_3D_CMD_CONTEXT_DEFINE                             1045
-#define SVGA_3D_CMD_CONTEXT_DESTROY                            1046
-#define SVGA_3D_CMD_SETTRANSFORM                               1047
-#define SVGA_3D_CMD_SETZRANGE                                  1048
-#define SVGA_3D_CMD_SETRENDERSTATE                             1049
-#define SVGA_3D_CMD_SETRENDERTARGET                            1050
-#define SVGA_3D_CMD_SETTEXTURESTATE                            1051
-#define SVGA_3D_CMD_SETMATERIAL                                1052
-#define SVGA_3D_CMD_SETLIGHTDATA                               1053
-#define SVGA_3D_CMD_SETLIGHTENABLED                            1054
-#define SVGA_3D_CMD_SETVIEWPORT                                1055
-#define SVGA_3D_CMD_SETCLIPPLANE                               1056
-#define SVGA_3D_CMD_CLEAR                                      1057
-#define SVGA_3D_CMD_PRESENT                                    1058
-#define SVGA_3D_CMD_SHADER_DEFINE                              1059
-#define SVGA_3D_CMD_SHADER_DESTROY                             1060
-#define SVGA_3D_CMD_SET_SHADER                                 1061
-#define SVGA_3D_CMD_SET_SHADER_CONST                           1062
-#define SVGA_3D_CMD_DRAW_PRIMITIVES                            1063
-#define SVGA_3D_CMD_SETSCISSORRECT                             1064
-#define SVGA_3D_CMD_BEGIN_QUERY                                1065
-#define SVGA_3D_CMD_END_QUERY                                  1066
-#define SVGA_3D_CMD_WAIT_FOR_QUERY                             1067
-#define SVGA_3D_CMD_PRESENT_READBACK                           1068
-#define SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN                     1069
-#define SVGA_3D_CMD_SURFACE_DEFINE_V2                          1070
-#define SVGA_3D_CMD_GENERATE_MIPMAPS                           1071
-#define SVGA_3D_CMD_VIDEO_CREATE_DECODER                       1072
-#define SVGA_3D_CMD_VIDEO_DESTROY_DECODER                      1073
-#define SVGA_3D_CMD_VIDEO_CREATE_PROCESSOR                     1074
-#define SVGA_3D_CMD_VIDEO_DESTROY_PROCESSOR                    1075
-#define SVGA_3D_CMD_VIDEO_DECODE_START_FRAME                   1076
-#define SVGA_3D_CMD_VIDEO_DECODE_RENDER                        1077
-#define SVGA_3D_CMD_VIDEO_DECODE_END_FRAME                     1078
-#define SVGA_3D_CMD_VIDEO_PROCESS_FRAME                        1079
-#define SVGA_3D_CMD_ACTIVATE_SURFACE                           1080
-#define SVGA_3D_CMD_DEACTIVATE_SURFACE                         1081
-#define SVGA_3D_CMD_SCREEN_DMA                                 1082
-#define SVGA_3D_CMD_SET_UNITY_SURFACE_COOKIE                   1083
-#define SVGA_3D_CMD_OPEN_CONTEXT_SURFACE                       1084
-
-#define SVGA_3D_CMD_LOGICOPS_BITBLT                            1085
-#define SVGA_3D_CMD_LOGICOPS_TRANSBLT                          1086
-#define SVGA_3D_CMD_LOGICOPS_STRETCHBLT                        1087
-#define SVGA_3D_CMD_LOGICOPS_COLORFILL                         1088
-#define SVGA_3D_CMD_LOGICOPS_ALPHABLEND                        1089
-#define SVGA_3D_CMD_LOGICOPS_CLEARTYPEBLEND                    1090
-
-#define SVGA_3D_CMD_SET_OTABLE_BASE                            1091
-#define SVGA_3D_CMD_READBACK_OTABLE                            1092
-
-#define SVGA_3D_CMD_DEFINE_GB_MOB                              1093
-#define SVGA_3D_CMD_DESTROY_GB_MOB                             1094
-#define SVGA_3D_CMD_REDEFINE_GB_MOB                            1095
-#define SVGA_3D_CMD_UPDATE_GB_MOB_MAPPING                      1096
-
-#define SVGA_3D_CMD_DEFINE_GB_SURFACE                          1097
-#define SVGA_3D_CMD_DESTROY_GB_SURFACE                         1098
-#define SVGA_3D_CMD_BIND_GB_SURFACE                            1099
-#define SVGA_3D_CMD_COND_BIND_GB_SURFACE                       1100
-#define SVGA_3D_CMD_UPDATE_GB_IMAGE                            1101
-#define SVGA_3D_CMD_UPDATE_GB_SURFACE                          1102
-#define SVGA_3D_CMD_READBACK_GB_IMAGE                          1103
-#define SVGA_3D_CMD_READBACK_GB_SURFACE                        1104
-#define SVGA_3D_CMD_INVALIDATE_GB_IMAGE                        1105
-#define SVGA_3D_CMD_INVALIDATE_GB_SURFACE                      1106
-
-#define SVGA_3D_CMD_DEFINE_GB_CONTEXT                          1107
-#define SVGA_3D_CMD_DESTROY_GB_CONTEXT                         1108
-#define SVGA_3D_CMD_BIND_GB_CONTEXT                            1109
-#define SVGA_3D_CMD_READBACK_GB_CONTEXT                        1110
-#define SVGA_3D_CMD_INVALIDATE_GB_CONTEXT                      1111
-
-#define SVGA_3D_CMD_DEFINE_GB_SHADER                           1112
-#define SVGA_3D_CMD_DESTROY_GB_SHADER                          1113
-#define SVGA_3D_CMD_BIND_GB_SHADER                             1114
-
-#define SVGA_3D_CMD_BIND_SHADERCONSTS                          1115
-
-#define SVGA_3D_CMD_BEGIN_GB_QUERY                             1116
-#define SVGA_3D_CMD_END_GB_QUERY                               1117
-#define SVGA_3D_CMD_WAIT_FOR_GB_QUERY                          1118
-
-#define SVGA_3D_CMD_NOP                                        1119
-
-#define SVGA_3D_CMD_ENABLE_GART                                1120
-#define SVGA_3D_CMD_DISABLE_GART                               1121
-#define SVGA_3D_CMD_MAP_MOB_INTO_GART                          1122
-#define SVGA_3D_CMD_UNMAP_GART_RANGE                           1123
-
-#define SVGA_3D_CMD_DEFINE_GB_SCREENTARGET                     1124
-#define SVGA_3D_CMD_DESTROY_GB_SCREENTARGET                    1125
-#define SVGA_3D_CMD_BIND_GB_SCREENTARGET                       1126
-#define SVGA_3D_CMD_UPDATE_GB_SCREENTARGET                     1127
-
-#define SVGA_3D_CMD_READBACK_GB_IMAGE_PARTIAL                  1128
-#define SVGA_3D_CMD_INVALIDATE_GB_IMAGE_PARTIAL                1129
-
-#define SVGA_3D_CMD_SET_GB_SHADERCONSTS_INLINE                 1130
-
-#define SVGA_3D_CMD_GB_SCREEN_DMA                              1131
-#define SVGA_3D_CMD_BIND_GB_SURFACE_WITH_PITCH                 1132
-#define SVGA_3D_CMD_GB_MOB_FENCE                               1133
-#define SVGA_3D_CMD_DEFINE_GB_SURFACE_V2                       1134
-#define SVGA_3D_CMD_DEFINE_GB_MOB64                            1135
-#define SVGA_3D_CMD_REDEFINE_GB_MOB64                          1136
-#define SVGA_3D_CMD_NOP_ERROR                                  1137
-
-#define SVGA_3D_CMD_RESERVED1                                  1138
-#define SVGA_3D_CMD_RESERVED2                                  1139
-#define SVGA_3D_CMD_RESERVED3                                  1140
-#define SVGA_3D_CMD_RESERVED4                                  1141
-#define SVGA_3D_CMD_RESERVED5                                  1142
-
-#define SVGA_3D_CMD_MAX                                        1203
-#define SVGA_3D_CMD_FUTURE_MAX                                 3000
+typedef enum {
+   SVGA_3D_CMD_LEGACY_BASE                                = 1000,
+   SVGA_3D_CMD_BASE                                       = 1040,
+
+   SVGA_3D_CMD_SURFACE_DEFINE                             = 1040,
+   SVGA_3D_CMD_SURFACE_DESTROY                            = 1041,
+   SVGA_3D_CMD_SURFACE_COPY                               = 1042,
+   SVGA_3D_CMD_SURFACE_STRETCHBLT                         = 1043,
+   SVGA_3D_CMD_SURFACE_DMA                                = 1044,
+   SVGA_3D_CMD_CONTEXT_DEFINE                             = 1045,
+   SVGA_3D_CMD_CONTEXT_DESTROY                            = 1046,
+   SVGA_3D_CMD_SETTRANSFORM                               = 1047,
+   SVGA_3D_CMD_SETZRANGE                                  = 1048,
+   SVGA_3D_CMD_SETRENDERSTATE                             = 1049,
+   SVGA_3D_CMD_SETRENDERTARGET                            = 1050,
+   SVGA_3D_CMD_SETTEXTURESTATE                            = 1051,
+   SVGA_3D_CMD_SETMATERIAL                                = 1052,
+   SVGA_3D_CMD_SETLIGHTDATA                               = 1053,
+   SVGA_3D_CMD_SETLIGHTENABLED                            = 1054,
+   SVGA_3D_CMD_SETVIEWPORT                                = 1055,
+   SVGA_3D_CMD_SETCLIPPLANE                               = 1056,
+   SVGA_3D_CMD_CLEAR                                      = 1057,
+   SVGA_3D_CMD_PRESENT                                    = 1058,
+   SVGA_3D_CMD_SHADER_DEFINE                              = 1059,
+   SVGA_3D_CMD_SHADER_DESTROY                             = 1060,
+   SVGA_3D_CMD_SET_SHADER                                 = 1061,
+   SVGA_3D_CMD_SET_SHADER_CONST                           = 1062,
+   SVGA_3D_CMD_DRAW_PRIMITIVES                            = 1063,
+   SVGA_3D_CMD_SETSCISSORRECT                             = 1064,
+   SVGA_3D_CMD_BEGIN_QUERY                                = 1065,
+   SVGA_3D_CMD_END_QUERY                                  = 1066,
+   SVGA_3D_CMD_WAIT_FOR_QUERY                             = 1067,
+   SVGA_3D_CMD_PRESENT_READBACK                           = 1068,
+   SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN                     = 1069,
+   SVGA_3D_CMD_SURFACE_DEFINE_V2                          = 1070,
+   SVGA_3D_CMD_GENERATE_MIPMAPS                           = 1071,
+   SVGA_3D_CMD_VIDEO_CREATE_DECODER                       = 1072,
+   SVGA_3D_CMD_VIDEO_DESTROY_DECODER                      = 1073,
+   SVGA_3D_CMD_VIDEO_CREATE_PROCESSOR                     = 1074,
+   SVGA_3D_CMD_VIDEO_DESTROY_PROCESSOR                    = 1075,
+   SVGA_3D_CMD_VIDEO_DECODE_START_FRAME                   = 1076,
+   SVGA_3D_CMD_VIDEO_DECODE_RENDER                        = 1077,
+   SVGA_3D_CMD_VIDEO_DECODE_END_FRAME                     = 1078,
+   SVGA_3D_CMD_VIDEO_PROCESS_FRAME                        = 1079,
+   SVGA_3D_CMD_ACTIVATE_SURFACE                           = 1080,
+   SVGA_3D_CMD_DEACTIVATE_SURFACE                         = 1081,
+   SVGA_3D_CMD_SCREEN_DMA                                 = 1082,
+   SVGA_3D_CMD_DEAD1                                      = 1083,
+   SVGA_3D_CMD_DEAD2                                      = 1084,
+
+   SVGA_3D_CMD_LOGICOPS_BITBLT                            = 1085,
+   SVGA_3D_CMD_LOGICOPS_TRANSBLT                          = 1086,
+   SVGA_3D_CMD_LOGICOPS_STRETCHBLT                        = 1087,
+   SVGA_3D_CMD_LOGICOPS_COLORFILL                         = 1088,
+   SVGA_3D_CMD_LOGICOPS_ALPHABLEND                        = 1089,
+   SVGA_3D_CMD_LOGICOPS_CLEARTYPEBLEND                    = 1090,
+
+   SVGA_3D_CMD_SET_OTABLE_BASE                            = 1091,
+   SVGA_3D_CMD_READBACK_OTABLE                            = 1092,
+
+   SVGA_3D_CMD_DEFINE_GB_MOB                              = 1093,
+   SVGA_3D_CMD_DESTROY_GB_MOB                             = 1094,
+   SVGA_3D_CMD_DEAD3                                      = 1095,
+   SVGA_3D_CMD_UPDATE_GB_MOB_MAPPING                      = 1096,
+
+   SVGA_3D_CMD_DEFINE_GB_SURFACE                          = 1097,
+   SVGA_3D_CMD_DESTROY_GB_SURFACE                         = 1098,
+   SVGA_3D_CMD_BIND_GB_SURFACE                            = 1099,
+   SVGA_3D_CMD_COND_BIND_GB_SURFACE                       = 1100,
+   SVGA_3D_CMD_UPDATE_GB_IMAGE                            = 1101,
+   SVGA_3D_CMD_UPDATE_GB_SURFACE                          = 1102,
+   SVGA_3D_CMD_READBACK_GB_IMAGE                          = 1103,
+   SVGA_3D_CMD_READBACK_GB_SURFACE                        = 1104,
+   SVGA_3D_CMD_INVALIDATE_GB_IMAGE                        = 1105,
+   SVGA_3D_CMD_INVALIDATE_GB_SURFACE                      = 1106,
+
+   SVGA_3D_CMD_DEFINE_GB_CONTEXT                          = 1107,
+   SVGA_3D_CMD_DESTROY_GB_CONTEXT                         = 1108,
+   SVGA_3D_CMD_BIND_GB_CONTEXT                            = 1109,
+   SVGA_3D_CMD_READBACK_GB_CONTEXT                        = 1110,
+   SVGA_3D_CMD_INVALIDATE_GB_CONTEXT                      = 1111,
+
+   SVGA_3D_CMD_DEFINE_GB_SHADER                           = 1112,
+   SVGA_3D_CMD_DESTROY_GB_SHADER                          = 1113,
+   SVGA_3D_CMD_BIND_GB_SHADER                             = 1114,
+
+   SVGA_3D_CMD_SET_OTABLE_BASE64                          = 1115,
+
+   SVGA_3D_CMD_BEGIN_GB_QUERY                             = 1116,
+   SVGA_3D_CMD_END_GB_QUERY                               = 1117,
+   SVGA_3D_CMD_WAIT_FOR_GB_QUERY                          = 1118,
+
+   SVGA_3D_CMD_NOP                                        = 1119,
+
+   SVGA_3D_CMD_ENABLE_GART                                = 1120,
+   SVGA_3D_CMD_DISABLE_GART                               = 1121,
+   SVGA_3D_CMD_MAP_MOB_INTO_GART                          = 1122,
+   SVGA_3D_CMD_UNMAP_GART_RANGE                           = 1123,
+
+   SVGA_3D_CMD_DEFINE_GB_SCREENTARGET                     = 1124,
+   SVGA_3D_CMD_DESTROY_GB_SCREENTARGET                    = 1125,
+   SVGA_3D_CMD_BIND_GB_SCREENTARGET                       = 1126,
+   SVGA_3D_CMD_UPDATE_GB_SCREENTARGET                     = 1127,
+
+   SVGA_3D_CMD_READBACK_GB_IMAGE_PARTIAL                  = 1128,
+   SVGA_3D_CMD_INVALIDATE_GB_IMAGE_PARTIAL                = 1129,
+
+   SVGA_3D_CMD_SET_GB_SHADERCONSTS_INLINE                 = 1130,
+
+   SVGA_3D_CMD_GB_SCREEN_DMA                              = 1131,
+   SVGA_3D_CMD_BIND_GB_SURFACE_WITH_PITCH                 = 1132,
+   SVGA_3D_CMD_GB_MOB_FENCE                               = 1133,
+   SVGA_3D_CMD_DEFINE_GB_SURFACE_V2                       = 1134,
+   SVGA_3D_CMD_DEFINE_GB_MOB64                            = 1135,
+   SVGA_3D_CMD_REDEFINE_GB_MOB64                          = 1136,
+   SVGA_3D_CMD_NOP_ERROR                                  = 1137,
+
+   SVGA_3D_CMD_SET_VERTEX_STREAMS                         = 1138,
+   SVGA_3D_CMD_SET_VERTEX_DECLS                           = 1139,
+   SVGA_3D_CMD_SET_VERTEX_DIVISORS                        = 1140,
+   SVGA_3D_CMD_DRAW                                       = 1141,
+   SVGA_3D_CMD_DRAW_INDEXED                               = 1142,
+
+   /*
+    * DX10 Commands
+    */
+   SVGA_3D_CMD_DX_MIN                                     = 1143,
+   SVGA_3D_CMD_DX_DEFINE_CONTEXT                          = 1143,
+   SVGA_3D_CMD_DX_DESTROY_CONTEXT                         = 1144,
+   SVGA_3D_CMD_DX_BIND_CONTEXT                            = 1145,
+   SVGA_3D_CMD_DX_READBACK_CONTEXT                        = 1146,
+   SVGA_3D_CMD_DX_INVALIDATE_CONTEXT                      = 1147,
+   SVGA_3D_CMD_DX_SET_SINGLE_CONSTANT_BUFFER              = 1148,
+   SVGA_3D_CMD_DX_SET_SHADER_RESOURCES                    = 1149,
+   SVGA_3D_CMD_DX_SET_SHADER                              = 1150,
+   SVGA_3D_CMD_DX_SET_SAMPLERS                            = 1151,
+   SVGA_3D_CMD_DX_DRAW                                    = 1152,
+   SVGA_3D_CMD_DX_DRAW_INDEXED                            = 1153,
+   SVGA_3D_CMD_DX_DRAW_INSTANCED                          = 1154,
+   SVGA_3D_CMD_DX_DRAW_INDEXED_INSTANCED                  = 1155,
+   SVGA_3D_CMD_DX_DRAW_AUTO                               = 1156,
+   SVGA_3D_CMD_DX_SET_INPUT_LAYOUT                        = 1157,
+   SVGA_3D_CMD_DX_SET_VERTEX_BUFFERS                      = 1158,
+   SVGA_3D_CMD_DX_SET_INDEX_BUFFER                        = 1159,
+   SVGA_3D_CMD_DX_SET_TOPOLOGY                            = 1160,
+   SVGA_3D_CMD_DX_SET_RENDERTARGETS                       = 1161,
+   SVGA_3D_CMD_DX_SET_BLEND_STATE                         = 1162,
+   SVGA_3D_CMD_DX_SET_DEPTHSTENCIL_STATE                  = 1163,
+   SVGA_3D_CMD_DX_SET_RASTERIZER_STATE                    = 1164,
+   SVGA_3D_CMD_DX_DEFINE_QUERY                            = 1165,
+   SVGA_3D_CMD_DX_DESTROY_QUERY                           = 1166,
+   SVGA_3D_CMD_DX_BIND_QUERY                              = 1167,
+   SVGA_3D_CMD_DX_SET_QUERY_OFFSET                        = 1168,
+   SVGA_3D_CMD_DX_BEGIN_QUERY                             = 1169,
+   SVGA_3D_CMD_DX_END_QUERY                               = 1170,
+   SVGA_3D_CMD_DX_READBACK_QUERY                          = 1171,
+   SVGA_3D_CMD_DX_SET_PREDICATION                         = 1172,
+   SVGA_3D_CMD_DX_SET_SOTARGETS                           = 1173,
+   SVGA_3D_CMD_DX_SET_VIEWPORTS                           = 1174,
+   SVGA_3D_CMD_DX_SET_SCISSORRECTS                        = 1175,
+   SVGA_3D_CMD_DX_CLEAR_RENDERTARGET_VIEW                 = 1176,
+   SVGA_3D_CMD_DX_CLEAR_DEPTHSTENCIL_VIEW                 = 1177,
+   SVGA_3D_CMD_DX_PRED_COPY_REGION                        = 1178,
+   SVGA_3D_CMD_DX_PRED_COPY                               = 1179,
+   SVGA_3D_CMD_DX_STRETCHBLT                              = 1180,
+   SVGA_3D_CMD_DX_GENMIPS                                 = 1181,
+   SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE                      = 1182,
+   SVGA_3D_CMD_DX_READBACK_SUBRESOURCE                    = 1183,
+   SVGA_3D_CMD_DX_INVALIDATE_SUBRESOURCE                  = 1184,
+   SVGA_3D_CMD_DX_DEFINE_SHADERRESOURCE_VIEW              = 1185,
+   SVGA_3D_CMD_DX_DESTROY_SHADERRESOURCE_VIEW             = 1186,
+   SVGA_3D_CMD_DX_DEFINE_RENDERTARGET_VIEW                = 1187,
+   SVGA_3D_CMD_DX_DESTROY_RENDERTARGET_VIEW               = 1188,
+   SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_VIEW                = 1189,
+   SVGA_3D_CMD_DX_DESTROY_DEPTHSTENCIL_VIEW               = 1190,
+   SVGA_3D_CMD_DX_DEFINE_ELEMENTLAYOUT                    = 1191,
+   SVGA_3D_CMD_DX_DESTROY_ELEMENTLAYOUT                   = 1192,
+   SVGA_3D_CMD_DX_DEFINE_BLEND_STATE                      = 1193,
+   SVGA_3D_CMD_DX_DESTROY_BLEND_STATE                     = 1194,
+   SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_STATE               = 1195,
+   SVGA_3D_CMD_DX_DESTROY_DEPTHSTENCIL_STATE              = 1196,
+   SVGA_3D_CMD_DX_DEFINE_RASTERIZER_STATE                 = 1197,
+   SVGA_3D_CMD_DX_DESTROY_RASTERIZER_STATE                = 1198,
+   SVGA_3D_CMD_DX_DEFINE_SAMPLER_STATE                    = 1199,
+   SVGA_3D_CMD_DX_DESTROY_SAMPLER_STATE                   = 1200,
+   SVGA_3D_CMD_DX_DEFINE_SHADER                           = 1201,
+   SVGA_3D_CMD_DX_DESTROY_SHADER                          = 1202,
+   SVGA_3D_CMD_DX_BIND_SHADER                             = 1203,
+   SVGA_3D_CMD_DX_DEFINE_STREAMOUTPUT                     = 1204,
+   SVGA_3D_CMD_DX_DESTROY_STREAMOUTPUT                    = 1205,
+   SVGA_3D_CMD_DX_SET_STREAMOUTPUT                        = 1206,
+   SVGA_3D_CMD_DX_SET_COTABLE                             = 1207,
+   SVGA_3D_CMD_DX_READBACK_COTABLE                        = 1208,
+   SVGA_3D_CMD_DX_BUFFER_COPY                             = 1209,
+   SVGA_3D_CMD_DX_TRANSFER_FROM_BUFFER                    = 1210,
+   SVGA_3D_CMD_DX_SURFACE_COPY_AND_READBACK               = 1211,
+   SVGA_3D_CMD_DX_MOVE_QUERY                              = 1212,
+   SVGA_3D_CMD_DX_BIND_ALL_QUERY                          = 1213,
+   SVGA_3D_CMD_DX_READBACK_ALL_QUERY                      = 1214,
+   SVGA_3D_CMD_DX_PRED_TRANSFER_FROM_BUFFER               = 1215,
+   SVGA_3D_CMD_DX_MOB_FENCE_64                            = 1216,
+   SVGA_3D_CMD_DX_BIND_ALL_SHADER                         = 1217,
+   SVGA_3D_CMD_DX_HINT                                    = 1218,
+   SVGA_3D_CMD_DX_BUFFER_UPDATE                           = 1219,
+   SVGA_3D_CMD_DX_SET_VS_CONSTANT_BUFFER_OFFSET           = 1220,
+   SVGA_3D_CMD_DX_SET_PS_CONSTANT_BUFFER_OFFSET           = 1221,
+   SVGA_3D_CMD_DX_SET_GS_CONSTANT_BUFFER_OFFSET           = 1222,
+
+   /*
+    * Reserve some IDs to be used for the DX11 shader types.
+    */
+   SVGA_3D_CMD_DX_RESERVED1                               = 1223,
+   SVGA_3D_CMD_DX_RESERVED2                               = 1224,
+   SVGA_3D_CMD_DX_RESERVED3                               = 1225,
+
+   SVGA_3D_CMD_DX_COND_BIND_ALL_SHADER                    = 1226,
+
+   SVGA_3D_CMD_DX_MAX                                     = 1227,
+   SVGA_3D_CMD_MAX                                        = 1227,
+   SVGA_3D_CMD_FUTURE_MAX                                 = 3000
+} SVGAFifo3dCmdId;
  
  /*
   * FIFO command format definitions:
@@ -194,54 +291,6 @@ struct {
  #include "vmware_pack_end.h"
  SVGA3dCmdHeader;
  
-typedef enum {
-   SVGA3D_SURFACE_CUBEMAP               = (1 << 0),
-
-   /*
-    * HINT flags are not enforced by the device but are useful for
-    * performance.
-    */
-   SVGA3D_SURFACE_HINT_STATIC           = (1 << 1),
-   SVGA3D_SURFACE_HINT_DYNAMIC          = (1 << 2),
-   SVGA3D_SURFACE_HINT_INDEXBUFFER      = (1 << 3),
-   SVGA3D_SURFACE_HINT_VERTEXBUFFER     = (1 << 4),
-   SVGA3D_SURFACE_HINT_TEXTURE          = (1 << 5),
-   SVGA3D_SURFACE_HINT_RENDERTARGET     = (1 << 6),
-   SVGA3D_SURFACE_HINT_DEPTHSTENCIL     = (1 << 7),
-   SVGA3D_SURFACE_HINT_WRITEONLY        = (1 << 8),
-   SVGA3D_SURFACE_MASKABLE_ANTIALIAS    = (1 << 9),
-   SVGA3D_SURFACE_AUTOGENMIPMAPS        = (1 << 10),
-   SVGA3D_SURFACE_DECODE_RENDERTARGET   = (1 << 11),
-
-   /*
-    * Is this surface using a base-level pitch for it's mob backing?
-    *
-    * This flag is not intended to be set by guest-drivers, but is instead
-    * set by the device when the surface is bound to a mob with a specified
-    * pitch.
-    */
-   SVGA3D_SURFACE_MOB_PITCH             = (1 << 12),
-
-   SVGA3D_SURFACE_INACTIVE              = (1 << 13),
-   SVGA3D_SURFACE_HINT_RT_LOCKABLE      = (1 << 14),
-   SVGA3D_SURFACE_VOLUME                = (1 << 15),
-
-   /*
-    * Required to be set on a surface to bind it to a screen target.
-    */
-   SVGA3D_SURFACE_SCREENTARGET          = (1 << 16),
-
-   SVGA3D_SURFACE_RESERVED1             = (1 << 17),
-   SVGA3D_SURFACE_1D                    = (1 << 18),
-   SVGA3D_SURFACE_ARRAY                 = (1 << 19),
-
-} SVGA3dSurfaceFlags;
-
-#define SVGA3D_SURFACE_HB_DISALLOWED_MASK (SVGA3D_SURFACE_SCREENTARGET | \
-                                           SVGA3D_SURFACE_MOB_PITCH    | \
-                                           SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \
-                                           SVGA3D_SURFACE_BIND_STREAM_OUTPUT)
-
  typedef
  #include "vmware_pack_begin.h"
  struct {
@@ -669,6 +718,128 @@ SVGA3dCmdDrawPrimitives;      /* SVGA_3D_CMD_DRAWPRIMITIVES */
  typedef
  #include "vmware_pack_begin.h"
  struct {
+   uint32 cid;
+
+   uint32 primitiveCount;        /* How many primitives to render */
+   uint32 startVertexLocation;   /* Which vertex do we start rendering at. */
+
+   uint8 primitiveType;          /* SVGA3dPrimitiveType */
+   uint8 padding[3];
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDraw;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 cid;
+
+   uint8 primitiveType;       /* SVGA3dPrimitiveType */
+
+   uint32 indexBufferSid;     /* Valid index buffer sid. */
+   uint32 indexBufferOffset;  /* Byte offset into the vertex buffer, almost */
+                             /* always 0 for DX9 guests, non-zero for OpenGL */
+                              /* guests.  We can't represent non-multiple of */
+                              /* stride offsets in D3D9Renderer... */
+   uint8 indexBufferStride;   /* Allowable values = 1, 2, or 4 */
+
+   int32 baseVertexLocation;  /* Bias applied to the index when selecting a */
+                              /* vertex from the streams, may be negative */
+
+   uint32 primitiveCount;     /* How many primitives to render */
+   uint32 pad0;
+   uint16 pad1;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDrawIndexed;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   /*
+    * Describe a vertex array's data type, and define how it is to be
+    * used by the fixed function pipeline or the vertex shader. It
+    * isn't useful to have two VertexDecls with the same
+    * VertexArrayIdentity in one draw call.
+    */
+   uint16 streamOffset;
+   uint8 stream;
+   uint8 type;          /* SVGA3dDeclType */
+   uint8 method;        /* SVGA3dDeclMethod */
+   uint8 usage;         /* SVGA3dDeclUsage */
+   uint8 usageIndex;
+   uint8 padding;
+
+}
+#include "vmware_pack_end.h"
+SVGA3dVertexElement;
+
+/*
+ * Should the vertex element respect the stream value?  The high bit of the
+ * stream should be set to indicate that the stream should be respected.  If
+ * the high bit is not set, the stream will be ignored and replaced by the index
+ * of the position of the currently considered vertex element.
+ *
+ * All guests should set this bit and correctly specify the stream going
+ * forward.
+ */
+#define SVGA3D_VERTEX_ELEMENT_RESPECT_STREAM (1 << 7)
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 cid;
+
+   uint32 numElements;
+
+   /*
+    * Followed by numElements SVGA3dVertexElement structures.
+    *
+    * If numElements < SVGA3D_MAX_VERTEX_ARRAYS, the remaining elements
+    * are cleared and will not be used by following draws.
+    */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdSetVertexDecls;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 sid;
+   uint32 stride;
+   uint32 offset;
+}
+#include "vmware_pack_end.h"
+SVGA3dVertexStream;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 cid;
+
+   uint32 numStreams;
+   /*
+    * Followed by numStream SVGA3dVertexStream structures.
+    *
+    * If numStreams < SVGA3D_MAX_VERTEX_ARRAYS, the remaining streams
+    * are cleared and will not be used by following draws.
+    */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdSetVertexStreams;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 cid;
+   uint32 numDivisors;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdSetVertexDivisors;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
     uint32                   stage;
     SVGA3dTextureStateName   name;
     union {
@@ -989,38 +1160,6 @@ struct SVGA3dCmdScreenDMA {
  SVGA3dCmdScreenDMA;        /* SVGA_3D_CMD_SCREEN_DMA */
  
  /*
- * Set Unity Surface Cookie
- *
- * Associates the supplied cookie with the surface id for use with
- * Unity.  This cookie is a hint from guest to host, there is no way
- * for the guest to readback the cookie and the host is free to drop
- * the cookie association at will.  The default value for the cookie
- * on all surfaces is 0.
- */
-
-typedef
-#include "vmware_pack_begin.h"
-struct SVGA3dCmdSetUnitySurfaceCookie {
-   uint32 sid;
-   uint64 cookie;
-}
-#include "vmware_pack_end.h"
-SVGA3dCmdSetUnitySurfaceCookie;   /* SVGA_3D_CMD_SET_UNITY_SURFACE_COOKIE */
-
-/*
- * Open a context-specific surface in a non-context-specific manner.
- */
-
-typedef
-#include "vmware_pack_begin.h"
-struct SVGA3dCmdOpenContextSurface {
-   uint32 sid;
-}
-#include "vmware_pack_end.h"
-SVGA3dCmdOpenContextSurface;   /* SVGA_3D_CMD_OPEN_CONTEXT_SURFACE */
-
-
-/*
   * Logic ops
   */
  
@@ -1139,8 +1278,8 @@ struct SVGA3dCmdLogicOpsClearTypeBlend {
     uint32 gamma;
     uint32 color;
     uint32 color2;
-   int alphaOffsetX;
-   int alphaOffsetY;
+   int32 alphaOffsetX;
+   int32 alphaOffsetY;
     /* Followed by variable number of SVGA3dBox structures */
  }
  #include "vmware_pack_end.h"
@@ -1151,12 +1290,80 @@ SVGA3dCmdLogicOpsClearTypeBlend;   /* SVGA_3D_CMD_LOGICOPS_CLEARTYPEBLEND */
   * Guest-backed objects definitions.
   */
  
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGAMobFormat ptDepth;
+   uint32 sizeInBytes;
+   PPN64 base;
+}
+#include "vmware_pack_end.h"
+SVGAOTableMobEntry;
+#define SVGA3D_OTABLE_MOB_ENTRY_SIZE (sizeof(SVGAOTableMobEntry))
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGA3dSurfaceFormat format;
+   SVGA3dSurfaceFlags surfaceFlags;
+   uint32 numMipLevels;
+   uint32 multisampleCount;
+   SVGA3dTextureFilter autogenFilter;
+   SVGA3dSize size;
+   SVGAMobId mobid;
+   uint32 arraySize;
+   uint32 mobPitch;
+   uint32 pad[5];
+}
+#include "vmware_pack_end.h"
+SVGAOTableSurfaceEntry;
+#define SVGA3D_OTABLE_SURFACE_ENTRY_SIZE (sizeof(SVGAOTableSurfaceEntry))
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 cid;
+   SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGAOTableContextEntry;
+#define SVGA3D_OTABLE_CONTEXT_ENTRY_SIZE (sizeof(SVGAOTableContextEntry))
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGA3dShaderType type;
+   uint32 sizeInBytes;
+   uint32 offsetInBytes;
+   SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGAOTableShaderEntry;
+#define SVGA3D_OTABLE_SHADER_ENTRY_SIZE (sizeof(SVGAOTableShaderEntry))
+
  #define SVGA_STFLAG_PRIMARY (1 << 0)
  typedef uint32 SVGAScreenTargetFlags;
  
  typedef
  #include "vmware_pack_begin.h"
  struct {
+   SVGA3dSurfaceImageId image;
+   uint32 width;
+   uint32 height;
+   int32 xRoot;
+   int32 yRoot;
+   SVGAScreenTargetFlags flags;
+   uint32 dpi;
+   uint32 pad[7];
+}
+#include "vmware_pack_end.h"
+SVGAOTableScreenTargetEntry;
+#define SVGA3D_OTABLE_SCREEN_TARGET_ENTRY_SIZE \
+       (sizeof(SVGAOTableScreenTargetEntry))
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
     float value[4];
  }
  #include "vmware_pack_end.h"
@@ -1178,6 +1385,209 @@ struct {
  #include "vmware_pack_end.h"
  SVGA3dShaderConstBool;
  
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint16 streamOffset;
+   uint8 stream;
+   uint8 type;
+   uint8 methodUsage;
+   uint8 usageIndex;
+}
+#include "vmware_pack_end.h"
+SVGAGBVertexElement;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 sid;
+   uint16 stride;
+   uint32 offset;
+}
+#include "vmware_pack_end.h"
+SVGAGBVertexStream;
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGA3dRect viewport;
+   SVGA3dRect scissorRect;
+   SVGA3dZRange zRange;
+
+   SVGA3dSurfaceImageId renderTargets[SVGA3D_RT_MAX];
+   SVGAGBVertexElement decl1[4];
+
+   uint32 renderStates[SVGA3D_RS_MAX];
+   SVGAGBVertexElement decl2[18];
+   uint32 pad0[2];
+
+   struct {
+      SVGA3dFace face;
+      SVGA3dMaterial material;
+   } material;
+
+   float clipPlanes[SVGA3D_NUM_CLIPPLANES][4];
+   float matrices[SVGA3D_TRANSFORM_MAX][16];
+
+   SVGA3dBool lightEnabled[SVGA3D_NUM_LIGHTS];
+   SVGA3dLightData lightData[SVGA3D_NUM_LIGHTS];
+
+   /*
+    * Shaders currently bound
+    */
+   uint32 shaders[SVGA3D_NUM_SHADERTYPE_PREDX];
+   SVGAGBVertexElement decl3[10];
+   uint32 pad1[3];
+
+   uint32 occQueryActive;
+   uint32 occQueryValue;
+
+   /*
+    * Int/Bool Shader constants
+    */
+   SVGA3dShaderConstInt pShaderIValues[SVGA3D_CONSTINTREG_MAX];
+   SVGA3dShaderConstInt vShaderIValues[SVGA3D_CONSTINTREG_MAX];
+   uint16 pShaderBValues;
+   uint16 vShaderBValues;
+
+
+   SVGAGBVertexStream streams[SVGA3D_MAX_VERTEX_ARRAYS];
+   SVGA3dVertexDivisor divisors[SVGA3D_MAX_VERTEX_ARRAYS];
+   uint32 numVertexDecls;
+   uint32 numVertexStreams;
+   uint32 numVertexDivisors;
+   uint32 pad2[30];
+
+   /*
+    * Texture Stages
+    *
+    * SVGA3D_TS_INVALID through SVGA3D_TS_CONSTANT are in the
+    * textureStages array.
+    * SVGA3D_TS_COLOR_KEY is in tsColorKey.
+    */
+   uint32 tsColorKey[SVGA3D_NUM_TEXTURE_UNITS];
+   uint32 textureStages[SVGA3D_NUM_TEXTURE_UNITS][SVGA3D_TS_CONSTANT + 1];
+   uint32 tsColorKeyEnable[SVGA3D_NUM_TEXTURE_UNITS];
+
+   /*
+    * Float Shader constants.
+    */
+   SVGA3dShaderConstFloat pShaderFValues[SVGA3D_CONSTREG_MAX];
+   SVGA3dShaderConstFloat vShaderFValues[SVGA3D_CONSTREG_MAX];
+}
+#include "vmware_pack_end.h"
+SVGAGBContextData;
+#define SVGA3D_CONTEXT_DATA_SIZE (sizeof(SVGAGBContextData))
+
+/*
+ * SVGA3dCmdSetOTableBase --
+ *
+ * This command allows the guest to specify the base PPN of the
+ * specified object table.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGAOTableType type;
+   PPN baseAddress;
+   uint32 sizeInBytes;
+   uint32 validSizeInBytes;
+   SVGAMobFormat ptDepth;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdSetOTableBase;  /* SVGA_3D_CMD_SET_OTABLE_BASE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGAOTableType type;
+   PPN64 baseAddress;
+   uint32 sizeInBytes;
+   uint32 validSizeInBytes;
+   SVGAMobFormat ptDepth;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdSetOTableBase64;  /* SVGA_3D_CMD_SET_OTABLE_BASE64 */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGAOTableType type;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdReadbackOTable;  /* SVGA_3D_CMD_READBACK_OTABLE */
+
+/*
+ * Define a memory object (Mob) in the OTable.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDefineGBMob {
+   SVGAMobId mobid;
+   SVGAMobFormat ptDepth;
+   PPN base;
+   uint32 sizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDefineGBMob;   /* SVGA_3D_CMD_DEFINE_GB_MOB */
+
+
+/*
+ * Destroys an object in the OTable.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDestroyGBMob {
+   SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDestroyGBMob;   /* SVGA_3D_CMD_DESTROY_GB_MOB */
+
+
+/*
+ * Define a memory object (Mob) in the OTable with a PPN64 base.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDefineGBMob64 {
+   SVGAMobId mobid;
+   SVGAMobFormat ptDepth;
+   PPN64 base;
+   uint32 sizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDefineGBMob64;   /* SVGA_3D_CMD_DEFINE_GB_MOB64 */
+
+/*
+ * Redefine an object in the OTable with PPN64 base.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdRedefineGBMob64 {
+   SVGAMobId mobid;
+   SVGAMobFormat ptDepth;
+   PPN64 base;
+   uint32 sizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdRedefineGBMob64;   /* SVGA_3D_CMD_REDEFINE_GB_MOB64 */
+
+/*
+ * Notification that the page tables have been modified.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdUpdateGBMobMapping {
+   SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdUpdateGBMobMapping;   /* SVGA_3D_CMD_UPDATE_GB_MOB_MAPPING */
+
  /*
   * Define a guest-backed surface.
   */
@@ -1243,7 +1653,7 @@ SVGA3dCmdBindGBSurfaceWithPitch;   /* SVGA_3D_CMD_BIND_GB_SURFACE_WITH_PITCH */
  
  typedef
  #include "vmware_pack_begin.h"
-struct{
+struct SVGA3dCmdCondBindGBSurface {
     uint32 sid;
     SVGAMobId testMobid;
     SVGAMobId mobid;
@@ -1477,18 +1887,6 @@ struct SVGA3dCmdDestroyGBShader {
  #include "vmware_pack_end.h"
  SVGA3dCmdDestroyGBShader;   /* SVGA_3D_CMD_DESTROY_GB_SHADER */
  
-
-typedef
-#include "vmware_pack_begin.h"
-struct SVGA3dCmdBindGBShaderConsts {
-   uint32 cid;
-   SVGA3dShaderType shaderType;
-   SVGA3dShaderConstType shaderConstType;
-   uint32 sid;
-}
-#include "vmware_pack_end.h"
-SVGA3dCmdBindGBShaderConsts;   /* SVGA_3D_CMD_BIND_SHADERCONSTS */
-
  typedef
  #include "vmware_pack_begin.h"
  struct {
@@ -1553,7 +1951,7 @@ typedef
  #include "vmware_pack_begin.h"
  struct {
     SVGAMobId mobid;
-   uint32 fbOffset;
+   uint32 mustBeZero;
     uint32 initialized;
  }
  #include "vmware_pack_end.h"
@@ -1649,6 +2047,6 @@ struct {
     uint32 mobOffset;
  }
  #include "vmware_pack_end.h"
-SVGA3dCmdGBMobFence;  /* SVGA_3D_CMD_GB_MOB_FENCE*/
+SVGA3dCmdGBMobFence;  /* SVGA_3D_CMD_GB_MOB_FENCE */
  
-#endif // _SVGA3D_CMD_H_
+#endif /* _SVGA3D_CMD_H_ */
diff --git a/src/gallium/drivers/svga/include/svga3d_devcaps.h b/src/gallium/drivers/svga/include/svga3d_devcaps.h

index 915f3c7..ade210b 100644 (file)
--- a/src/gallium/drivers/svga/include/svga3d_devcaps.h
+++ b/src/gallium/drivers/svga/include/svga3d_devcaps.h
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 1998-2014 VMware, Inc.  All rights reserved.
+ * Copyright 1998-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
@@ -223,9 +223,230 @@ typedef enum {
      */
     SVGA3D_DEVCAP_TS_COLOR_KEY                      = 93, /* boolean */
  
+   /*
+    * Deprecated.
+    */
+   SVGA3D_DEVCAP_DEAD2                             = 94,
+
+   /*
+    * Does the device support the DX commands?
+    */
+   SVGA3D_DEVCAP_DX                                = 95,
+
+   /*
+    * What is the maximum size of a texture array?
+    *
+    * (Even if this cap is zero, cubemaps are still allowed.)
+    */
+   SVGA3D_DEVCAP_MAX_TEXTURE_ARRAY_SIZE            = 96,
+
+   /*
+    * What is the maximum number of vertex buffers that can
+    * be used in the DXContext inputAssembly?
+    */
+   SVGA3D_DEVCAP_DX_MAX_VERTEXBUFFERS              = 97,
+
+   /*
+    * What is the maximum number of constant buffers
+    * that can be expected to work correctly with a
+    * DX context?
+    */
+   SVGA3D_DEVCAP_DX_MAX_CONSTANT_BUFFERS           = 98,
+
+   /*
+    * Does the device support provoking vertex control?
+    * If zero, the first vertex will always be the provoking vertex.
+    */
+   SVGA3D_DEVCAP_DX_PROVOKING_VERTEX               = 99,
+
+   SVGA3D_DEVCAP_DXFMT_X8R8G8B8                    = 100,
+   SVGA3D_DEVCAP_DXFMT_A8R8G8B8                    = 101,
+   SVGA3D_DEVCAP_DXFMT_R5G6B5                      = 102,
+   SVGA3D_DEVCAP_DXFMT_X1R5G5B5                    = 103,
+   SVGA3D_DEVCAP_DXFMT_A1R5G5B5                    = 104,
+   SVGA3D_DEVCAP_DXFMT_A4R4G4B4                    = 105,
+   SVGA3D_DEVCAP_DXFMT_Z_D32                       = 106,
+   SVGA3D_DEVCAP_DXFMT_Z_D16                       = 107,
+   SVGA3D_DEVCAP_DXFMT_Z_D24S8                     = 108,
+   SVGA3D_DEVCAP_DXFMT_Z_D15S1                     = 109,
+   SVGA3D_DEVCAP_DXFMT_LUMINANCE8                  = 110,
+   SVGA3D_DEVCAP_DXFMT_LUMINANCE4_ALPHA4           = 111,
+   SVGA3D_DEVCAP_DXFMT_LUMINANCE16                 = 112,
+   SVGA3D_DEVCAP_DXFMT_LUMINANCE8_ALPHA8           = 113,
+   SVGA3D_DEVCAP_DXFMT_DXT1                        = 114,
+   SVGA3D_DEVCAP_DXFMT_DXT2                        = 115,
+   SVGA3D_DEVCAP_DXFMT_DXT3                        = 116,
+   SVGA3D_DEVCAP_DXFMT_DXT4                        = 117,
+   SVGA3D_DEVCAP_DXFMT_DXT5                        = 118,
+   SVGA3D_DEVCAP_DXFMT_BUMPU8V8                    = 119,
+   SVGA3D_DEVCAP_DXFMT_BUMPL6V5U5                  = 120,
+   SVGA3D_DEVCAP_DXFMT_BUMPX8L8V8U8                = 121,
+   SVGA3D_DEVCAP_DXFMT_FORMAT_DEAD1                = 122,
+   SVGA3D_DEVCAP_DXFMT_ARGB_S10E5                  = 123,
+   SVGA3D_DEVCAP_DXFMT_ARGB_S23E8                  = 124,
+   SVGA3D_DEVCAP_DXFMT_A2R10G10B10                 = 125,
+   SVGA3D_DEVCAP_DXFMT_V8U8                        = 126,
+   SVGA3D_DEVCAP_DXFMT_Q8W8V8U8                    = 127,
+   SVGA3D_DEVCAP_DXFMT_CxV8U8                      = 128,
+   SVGA3D_DEVCAP_DXFMT_X8L8V8U8                    = 129,
+   SVGA3D_DEVCAP_DXFMT_A2W10V10U10                 = 130,
+   SVGA3D_DEVCAP_DXFMT_ALPHA8                      = 131,
+   SVGA3D_DEVCAP_DXFMT_R_S10E5                     = 132,
+   SVGA3D_DEVCAP_DXFMT_R_S23E8                     = 133,
+   SVGA3D_DEVCAP_DXFMT_RG_S10E5                    = 134,
+   SVGA3D_DEVCAP_DXFMT_RG_S23E8                    = 135,
+   SVGA3D_DEVCAP_DXFMT_BUFFER                      = 136,
+   SVGA3D_DEVCAP_DXFMT_Z_D24X8                     = 137,
+   SVGA3D_DEVCAP_DXFMT_V16U16                      = 138,
+   SVGA3D_DEVCAP_DXFMT_G16R16                      = 139,
+   SVGA3D_DEVCAP_DXFMT_A16B16G16R16                = 140,
+   SVGA3D_DEVCAP_DXFMT_UYVY                        = 141,
+   SVGA3D_DEVCAP_DXFMT_YUY2                        = 142,
+   SVGA3D_DEVCAP_DXFMT_NV12                        = 143,
+   SVGA3D_DEVCAP_DXFMT_AYUV                        = 144,
+   SVGA3D_DEVCAP_DXFMT_R32G32B32A32_TYPELESS       = 145,
+   SVGA3D_DEVCAP_DXFMT_R32G32B32A32_UINT           = 146,
+   SVGA3D_DEVCAP_DXFMT_R32G32B32A32_SINT           = 147,
+   SVGA3D_DEVCAP_DXFMT_R32G32B32_TYPELESS          = 148,
+   SVGA3D_DEVCAP_DXFMT_R32G32B32_FLOAT             = 149,
+   SVGA3D_DEVCAP_DXFMT_R32G32B32_UINT              = 150,
+   SVGA3D_DEVCAP_DXFMT_R32G32B32_SINT              = 151,
+   SVGA3D_DEVCAP_DXFMT_R16G16B16A16_TYPELESS       = 152,
+   SVGA3D_DEVCAP_DXFMT_R16G16B16A16_UINT           = 153,
+   SVGA3D_DEVCAP_DXFMT_R16G16B16A16_SNORM          = 154,
+   SVGA3D_DEVCAP_DXFMT_R16G16B16A16_SINT           = 155,
+   SVGA3D_DEVCAP_DXFMT_R32G32_TYPELESS             = 156,
+   SVGA3D_DEVCAP_DXFMT_R32G32_UINT                 = 157,
+   SVGA3D_DEVCAP_DXFMT_R32G32_SINT                 = 158,
+   SVGA3D_DEVCAP_DXFMT_R32G8X24_TYPELESS           = 159,
+   SVGA3D_DEVCAP_DXFMT_D32_FLOAT_S8X24_UINT        = 160,
+   SVGA3D_DEVCAP_DXFMT_R32_FLOAT_X8X24_TYPELESS    = 161,
+   SVGA3D_DEVCAP_DXFMT_X32_TYPELESS_G8X24_UINT     = 162,
+   SVGA3D_DEVCAP_DXFMT_R10G10B10A2_TYPELESS        = 163,
+   SVGA3D_DEVCAP_DXFMT_R10G10B10A2_UINT            = 164,
+   SVGA3D_DEVCAP_DXFMT_R11G11B10_FLOAT             = 165,
+   SVGA3D_DEVCAP_DXFMT_R8G8B8A8_TYPELESS           = 166,
+   SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UNORM              = 167,
+   SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UNORM_SRGB         = 168,
+   SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UINT               = 169,
+   SVGA3D_DEVCAP_DXFMT_R8G8B8A8_SINT               = 170,
+   SVGA3D_DEVCAP_DXFMT_R16G16_TYPELESS             = 171,
+   SVGA3D_DEVCAP_DXFMT_R16G16_UINT                 = 172,
+   SVGA3D_DEVCAP_DXFMT_R16G16_SINT                 = 173,
+   SVGA3D_DEVCAP_DXFMT_R32_TYPELESS                = 174,
+   SVGA3D_DEVCAP_DXFMT_D32_FLOAT                   = 175,
+   SVGA3D_DEVCAP_DXFMT_R32_UINT                    = 176,
+   SVGA3D_DEVCAP_DXFMT_R32_SINT                    = 177,
+   SVGA3D_DEVCAP_DXFMT_R24G8_TYPELESS              = 178,
+   SVGA3D_DEVCAP_DXFMT_D24_UNORM_S8_UINT           = 179,
+   SVGA3D_DEVCAP_DXFMT_R24_UNORM_X8_TYPELESS       = 180,
+   SVGA3D_DEVCAP_DXFMT_X24_TYPELESS_G8_UINT        = 181,
+   SVGA3D_DEVCAP_DXFMT_R8G8_TYPELESS               = 182,
+   SVGA3D_DEVCAP_DXFMT_R8G8_UNORM                  = 183,
+   SVGA3D_DEVCAP_DXFMT_R8G8_UINT                   = 184,
+   SVGA3D_DEVCAP_DXFMT_R8G8_SINT                   = 185,
+   SVGA3D_DEVCAP_DXFMT_R16_TYPELESS                = 186,
+   SVGA3D_DEVCAP_DXFMT_R16_UNORM                   = 187,
+   SVGA3D_DEVCAP_DXFMT_R16_UINT                    = 188,
+   SVGA3D_DEVCAP_DXFMT_R16_SNORM                   = 189,
+   SVGA3D_DEVCAP_DXFMT_R16_SINT                    = 190,
+   SVGA3D_DEVCAP_DXFMT_R8_TYPELESS                 = 191,
+   SVGA3D_DEVCAP_DXFMT_R8_UNORM                    = 192,
+   SVGA3D_DEVCAP_DXFMT_R8_UINT                     = 193,
+   SVGA3D_DEVCAP_DXFMT_R8_SNORM                    = 194,
+   SVGA3D_DEVCAP_DXFMT_R8_SINT                     = 195,
+   SVGA3D_DEVCAP_DXFMT_P8                          = 196,
+   SVGA3D_DEVCAP_DXFMT_R9G9B9E5_SHAREDEXP          = 197,
+   SVGA3D_DEVCAP_DXFMT_R8G8_B8G8_UNORM             = 198,
+   SVGA3D_DEVCAP_DXFMT_G8R8_G8B8_UNORM             = 199,
+   SVGA3D_DEVCAP_DXFMT_BC1_TYPELESS                = 200,
+   SVGA3D_DEVCAP_DXFMT_BC1_UNORM_SRGB              = 201,
+   SVGA3D_DEVCAP_DXFMT_BC2_TYPELESS                = 202,
+   SVGA3D_DEVCAP_DXFMT_BC2_UNORM_SRGB              = 203,
+   SVGA3D_DEVCAP_DXFMT_BC3_TYPELESS                = 204,
+   SVGA3D_DEVCAP_DXFMT_BC3_UNORM_SRGB              = 205,
+   SVGA3D_DEVCAP_DXFMT_BC4_TYPELESS                = 206,
+   SVGA3D_DEVCAP_DXFMT_ATI1                        = 207,
+   SVGA3D_DEVCAP_DXFMT_BC4_SNORM                   = 208,
+   SVGA3D_DEVCAP_DXFMT_BC5_TYPELESS                = 209,
+   SVGA3D_DEVCAP_DXFMT_ATI2                        = 210,
+   SVGA3D_DEVCAP_DXFMT_BC5_SNORM                   = 211,
+   SVGA3D_DEVCAP_DXFMT_R10G10B10_XR_BIAS_A2_UNORM  = 212,
+   SVGA3D_DEVCAP_DXFMT_B8G8R8A8_TYPELESS           = 213,
+   SVGA3D_DEVCAP_DXFMT_B8G8R8A8_UNORM_SRGB         = 214,
+   SVGA3D_DEVCAP_DXFMT_B8G8R8X8_TYPELESS           = 215,
+   SVGA3D_DEVCAP_DXFMT_B8G8R8X8_UNORM_SRGB         = 216,
+   SVGA3D_DEVCAP_DXFMT_Z_DF16                      = 217,
+   SVGA3D_DEVCAP_DXFMT_Z_DF24                      = 218,
+   SVGA3D_DEVCAP_DXFMT_Z_D24S8_INT                 = 219,
+   SVGA3D_DEVCAP_DXFMT_YV12                        = 220,
+   SVGA3D_DEVCAP_DXFMT_R32G32B32A32_FLOAT          = 221,
+   SVGA3D_DEVCAP_DXFMT_R16G16B16A16_FLOAT          = 222,
+   SVGA3D_DEVCAP_DXFMT_R16G16B16A16_UNORM          = 223,
+   SVGA3D_DEVCAP_DXFMT_R32G32_FLOAT                = 224,
+   SVGA3D_DEVCAP_DXFMT_R10G10B10A2_UNORM           = 225,
+   SVGA3D_DEVCAP_DXFMT_R8G8B8A8_SNORM              = 226,
+   SVGA3D_DEVCAP_DXFMT_R16G16_FLOAT                = 227,
+   SVGA3D_DEVCAP_DXFMT_R16G16_UNORM                = 228,
+   SVGA3D_DEVCAP_DXFMT_R16G16_SNORM                = 229,
+   SVGA3D_DEVCAP_DXFMT_R32_FLOAT                   = 230,
+   SVGA3D_DEVCAP_DXFMT_R8G8_SNORM                  = 231,
+   SVGA3D_DEVCAP_DXFMT_R16_FLOAT                   = 232,
+   SVGA3D_DEVCAP_DXFMT_D16_UNORM                   = 233,
+   SVGA3D_DEVCAP_DXFMT_A8_UNORM                    = 234,
+   SVGA3D_DEVCAP_DXFMT_BC1_UNORM                   = 235,
+   SVGA3D_DEVCAP_DXFMT_BC2_UNORM                   = 236,
+   SVGA3D_DEVCAP_DXFMT_BC3_UNORM                   = 237,
+   SVGA3D_DEVCAP_DXFMT_B5G6R5_UNORM                = 238,
+   SVGA3D_DEVCAP_DXFMT_B5G5R5A1_UNORM              = 239,
+   SVGA3D_DEVCAP_DXFMT_B8G8R8A8_UNORM              = 240,
+   SVGA3D_DEVCAP_DXFMT_B8G8R8X8_UNORM              = 241,
+   SVGA3D_DEVCAP_DXFMT_BC4_UNORM                   = 242,
+   SVGA3D_DEVCAP_DXFMT_BC5_UNORM                   = 243,
+
     SVGA3D_DEVCAP_MAX                       /* This must be the last index. */
  } SVGA3dDevCapIndex;
  
+/*
+ * Bit definitions for DXFMT devcaps
+ *
+ *
+ * SUPPORTED: Can the format be defined?
+ * SHADER_SAMPLE: Can the format be sampled from a shader?
+ * COLOR_RENDERTARGET: Can the format be a color render target?
+ * DEPTH_RENDERTARGET: Can the format be a depth render target?
+ * BLENDABLE: Is the format blendable?
+ * MIPS: Does the format support mip levels?
+ * ARRAY: Does the format support texture arrays?
+ * VOLUME: Does the format support having volume?
+ * MULTISAMPLE_2: Does the format support 2x multisample?
+ * MULTISAMPLE_4: Does the format support 4x multisample?
+ * MULTISAMPLE_8: Does the format support 8x multisample?
+ */
+#define SVGA3D_DXFMT_SUPPORTED                (1 <<  0)
+#define SVGA3D_DXFMT_SHADER_SAMPLE            (1 <<  1)
+#define SVGA3D_DXFMT_COLOR_RENDERTARGET       (1 <<  2)
+#define SVGA3D_DXFMT_DEPTH_RENDERTARGET       (1 <<  3)
+#define SVGA3D_DXFMT_BLENDABLE                (1 <<  4)
+#define SVGA3D_DXFMT_MIPS                     (1 <<  5)
+#define SVGA3D_DXFMT_ARRAY                    (1 <<  6)
+#define SVGA3D_DXFMT_VOLUME                   (1 <<  7)
+#define SVGA3D_DXFMT_DX_VERTEX_BUFFER         (1 <<  8)
+#define SVGADX_DXFMT_MULTISAMPLE_2            (1 <<  9)
+#define SVGADX_DXFMT_MULTISAMPLE_4            (1 << 10)
+#define SVGADX_DXFMT_MULTISAMPLE_8            (1 << 11)
+#define SVGADX_DXFMT_MAX                      (1 << 12)
+
+/*
+ * Convenience mask for any multisample capability.
+ *
+ * The multisample bits imply both load and render capability.
+ */
+#define SVGA3D_DXFMT_MULTISAMPLE ( \
+           SVGADX_DXFMT_MULTISAMPLE_2 | \
+           SVGADX_DXFMT_MULTISAMPLE_4 | \
+           SVGADX_DXFMT_MULTISAMPLE_8 )
+
  typedef union {
     Bool   b;
     uint32 u;
@@ -233,4 +454,4 @@ typedef union {
     float  f;
  } SVGA3dDevCapResult;
  
-#endif // _SVGA3D_DEVCAPS_H_
+#endif /* _SVGA3D_DEVCAPS_H_ */
diff --git a/src/gallium/drivers/svga/include/svga3d_dx.h b/src/gallium/drivers/svga/include/svga3d_dx.h

new file mode 100644 (file)

index 0000000..fce2b04
--- /dev/null
+++ b/src/gallium/drivers/svga/include/svga3d_dx.h
@@ -0,0 +1,1521 @@
+/**********************************************************
+ * Copyright 2007-2015 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * svga3d_dx.h --
+ *
+ *       SVGA 3d hardware definitions for DX10 support.
+ */
+
+#ifndef _SVGA3D_DX_H_
+#define _SVGA3D_DX_H_
+
+#define INCLUDE_ALLOW_MODULE
+#define INCLUDE_ALLOW_USERLEVEL
+#define INCLUDE_ALLOW_VMCORE
+#include "includeCheck.h"
+
+#include "svga3d_limits.h"
+
+#define SVGA3D_INPUT_MIN               0
+#define SVGA3D_INPUT_PER_VERTEX_DATA   0
+#define SVGA3D_INPUT_PER_INSTANCE_DATA 1
+#define SVGA3D_INPUT_MAX               2
+typedef uint32 SVGA3dInputClassification;
+
+#define SVGA3D_RESOURCE_TYPE_MIN      1
+#define SVGA3D_RESOURCE_BUFFER        1
+#define SVGA3D_RESOURCE_TEXTURE1D     2
+#define SVGA3D_RESOURCE_TEXTURE2D     3
+#define SVGA3D_RESOURCE_TEXTURE3D     4
+#define SVGA3D_RESOURCE_TEXTURECUBE   5
+#define SVGA3D_RESOURCE_TYPE_DX10_MAX 6
+#define SVGA3D_RESOURCE_BUFFEREX      6
+#define SVGA3D_RESOURCE_TYPE_MAX      7
+typedef uint32 SVGA3dResourceType;
+
+#define SVGA3D_DEPTH_WRITE_MASK_ZERO   0
+#define SVGA3D_DEPTH_WRITE_MASK_ALL    1
+typedef uint8 SVGA3dDepthWriteMask;
+
+#define SVGA3D_FILTER_MIP_LINEAR  (1 << 0)
+#define SVGA3D_FILTER_MAG_LINEAR  (1 << 2)
+#define SVGA3D_FILTER_MIN_LINEAR  (1 << 4)
+#define SVGA3D_FILTER_ANISOTROPIC (1 << 6)
+#define SVGA3D_FILTER_COMPARE     (1 << 7)
+typedef uint32 SVGA3dFilter;
+
+#define SVGA3D_CULL_INVALID 0
+#define SVGA3D_CULL_MIN     1
+#define SVGA3D_CULL_NONE    1
+#define SVGA3D_CULL_FRONT   2
+#define SVGA3D_CULL_BACK    3
+#define SVGA3D_CULL_MAX     4
+typedef uint8 SVGA3dCullMode;
+
+#define SVGA3D_COMPARISON_INVALID         0
+#define SVGA3D_COMPARISON_MIN             1
+#define SVGA3D_COMPARISON_NEVER           1
+#define SVGA3D_COMPARISON_LESS            2
+#define SVGA3D_COMPARISON_EQUAL           3
+#define SVGA3D_COMPARISON_LESS_EQUAL      4
+#define SVGA3D_COMPARISON_GREATER         5
+#define SVGA3D_COMPARISON_NOT_EQUAL       6
+#define SVGA3D_COMPARISON_GREATER_EQUAL   7
+#define SVGA3D_COMPARISON_ALWAYS          8
+#define SVGA3D_COMPARISON_MAX             9
+typedef uint8 SVGA3dComparisonFunc;
+
+#define SVGA3D_DX_MAX_VERTEXBUFFERS 32
+#define SVGA3D_DX_MAX_VERTEXINPUTREGISTERS 16
+#define SVGA3D_DX_MAX_SOTARGETS 4
+#define SVGA3D_DX_MAX_SRVIEWS 128
+#define SVGA3D_DX_MAX_CONSTBUFFERS 16
+#define SVGA3D_DX_MAX_SAMPLERS 16
+
+/* Id limits */
+static const uint32 SVGA3dBlendObjectCountPerContext = 4096;
+static const uint32 SVGA3dDepthStencilObjectCountPerContext = 4096;
+
+typedef uint32 SVGA3dSurfaceId;
+typedef uint32 SVGA3dShaderResourceViewId;
+typedef uint32 SVGA3dRenderTargetViewId;
+typedef uint32 SVGA3dDepthStencilViewId;
+
+typedef uint32 SVGA3dShaderId;
+typedef uint32 SVGA3dElementLayoutId;
+typedef uint32 SVGA3dSamplerId;
+typedef uint32 SVGA3dBlendStateId;
+typedef uint32 SVGA3dDepthStencilStateId;
+typedef uint32 SVGA3dRasterizerStateId;
+typedef uint32 SVGA3dQueryId;
+typedef uint32 SVGA3dStreamOutputId;
+
+typedef union {
+   struct {
+      float r;
+      float g;
+      float b;
+      float a;
+   };
+
+   float value[4];
+} SVGA3dRGBAFloat;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 cid;
+   SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGAOTableDXContextEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineContext {
+   uint32 cid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineContext;   /* SVGA_3D_CMD_DX_DEFINE_CONTEXT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyContext {
+   uint32 cid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyContext;   /* SVGA_3D_CMD_DX_DESTROY_CONTEXT */
+
+/*
+ * Bind a DX context.
+ *
+ * validContents should be set to 0 for new contexts,
+ * and 1 if this is an old context which is getting paged
+ * back on to the device.
+ *
+ * For new contexts, it is recommended that the driver
+ * issue commands to initialize all interesting state
+ * prior to rendering.
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBindContext {
+   uint32 cid;
+   SVGAMobId mobid;
+   uint32 validContents;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBindContext;   /* SVGA_3D_CMD_DX_BIND_CONTEXT */
+
+/*
+ * Readback a DX context.
+ * (Request that the device flush the contents back into guest memory.)
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXReadbackContext {
+   uint32 cid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXReadbackContext;   /* SVGA_3D_CMD_DX_READBACK_CONTEXT */
+
+/*
+ * Invalidate a guest-backed context.
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXInvalidateContext {
+   uint32 cid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXInvalidateContext;   /* SVGA_3D_CMD_DX_INVALIDATE_CONTEXT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dReplyFormatData {
+   uint32 formatSupport;
+   uint32 msaa2xQualityLevels:5;
+   uint32 msaa4xQualityLevels:5;
+   uint32 msaa8xQualityLevels:5;
+   uint32 msaa16xQualityLevels:5;
+   uint32 msaa32xQualityLevels:5;
+   uint32 pad:7;
+}
+#include "vmware_pack_end.h"
+SVGA3dReplyFormatData;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetSingleConstantBuffer {
+   uint32 slot;
+   SVGA3dShaderType type;
+   SVGA3dSurfaceId sid;
+   uint32 offsetInBytes;
+   uint32 sizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetSingleConstantBuffer;
+/* SVGA_3D_CMD_DX_SET_SINGLE_CONSTANT_BUFFER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetShaderResources {
+   uint32 startView;
+   SVGA3dShaderType type;
+
+   /*
+    * Followed by a variable number of SVGA3dShaderResourceViewId's.
+    */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetShaderResources; /* SVGA_3D_CMD_DX_SET_SHADER_RESOURCES */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetShader {
+   SVGA3dShaderId shaderId;
+   SVGA3dShaderType type;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetShader; /* SVGA_3D_CMD_DX_SET_SHADER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetSamplers {
+   uint32 startSampler;
+   SVGA3dShaderType type;
+
+   /*
+    * Followed by a variable number of SVGA3dSamplerId's.
+    */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetSamplers; /* SVGA_3D_CMD_DX_SET_SAMPLERS */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDraw {
+   uint32 vertexCount;
+   uint32 startVertexLocation;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDraw; /* SVGA_3D_CMD_DX_DRAW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDrawIndexed {
+   uint32 indexCount;
+   uint32 startIndexLocation;
+   int32  baseVertexLocation;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDrawIndexed; /* SVGA_3D_CMD_DX_DRAW_INDEXED */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDrawInstanced {
+   uint32 vertexCountPerInstance;
+   uint32 instanceCount;
+   uint32 startVertexLocation;
+   uint32 startInstanceLocation;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDrawInstanced; /* SVGA_3D_CMD_DX_DRAW_INSTANCED */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDrawIndexedInstanced {
+   uint32 indexCountPerInstance;
+   uint32 instanceCount;
+   uint32 startIndexLocation;
+   int32  baseVertexLocation;
+   uint32 startInstanceLocation;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDrawIndexedInstanced; /* SVGA_3D_CMD_DX_DRAW_INDEXED_INSTANCED */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDrawAuto {
+   uint32 pad0;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDrawAuto; /* SVGA_3D_CMD_DX_DRAW_AUTO */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetInputLayout {
+   SVGA3dElementLayoutId elementLayoutId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetInputLayout; /* SVGA_3D_CMD_DX_SET_INPUT_LAYOUT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dVertexBuffer {
+   SVGA3dSurfaceId sid;
+   uint32 stride;
+   uint32 offset;
+}
+#include "vmware_pack_end.h"
+SVGA3dVertexBuffer;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetVertexBuffers {
+   uint32 startBuffer;
+   /* Followed by a variable number of SVGA3dVertexBuffer's. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetVertexBuffers; /* SVGA_3D_CMD_DX_SET_VERTEX_BUFFERS */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetIndexBuffer {
+   SVGA3dSurfaceId sid;
+   SVGA3dSurfaceFormat format;
+   uint32 offset;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetIndexBuffer; /* SVGA_3D_CMD_DX_SET_INDEX_BUFFER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetTopology {
+   SVGA3dPrimitiveType topology;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetTopology; /* SVGA_3D_CMD_DX_SET_TOPOLOGY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetRenderTargets {
+   SVGA3dDepthStencilViewId depthStencilViewId;
+   /* Followed by a variable number of SVGA3dRenderTargetViewId's. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetRenderTargets; /* SVGA_3D_CMD_DX_SET_RENDERTARGETS */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetBlendState {
+   SVGA3dBlendStateId blendId;
+   float blendFactor[4];
+   uint32 sampleMask;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetBlendState; /* SVGA_3D_CMD_DX_SET_BLEND_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetDepthStencilState {
+   SVGA3dDepthStencilStateId depthStencilId;
+   uint32 stencilRef;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetDepthStencilState; /* SVGA_3D_CMD_DX_SET_DEPTHSTENCIL_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetRasterizerState {
+   SVGA3dRasterizerStateId rasterizerId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetRasterizerState; /* SVGA_3D_CMD_DX_SET_RASTERIZER_STATE */
+
+#define SVGA3D_DXQUERY_FLAG_PREDICATEHINT (1 << 0)
+typedef uint32 SVGA3dDXQueryFlags;
+
+/*
+ * The SVGADXQueryDeviceState and SVGADXQueryDeviceBits are used by the device
+ * to track query state transitions, but are not intended to be used by the
+ * driver.
+ */
+#define SVGADX_QDSTATE_INVALID   ((uint8)-1) /* Query has no state */
+#define SVGADX_QDSTATE_MIN       0
+#define SVGADX_QDSTATE_IDLE      0   /* Query hasn't started yet */
+#define SVGADX_QDSTATE_ACTIVE    1   /* Query is actively gathering data */
+#define SVGADX_QDSTATE_PENDING   2   /* Query is waiting for results */
+#define SVGADX_QDSTATE_FINISHED  3   /* Query has completed */
+#define SVGADX_QDSTATE_MAX       4
+typedef uint8 SVGADXQueryDeviceState;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGA3dQueryTypeUint8 type;
+   uint16 pad0;
+   SVGADXQueryDeviceState state;
+   SVGA3dDXQueryFlags flags;
+   SVGAMobId mobid;
+   uint32 offset;
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXQueryEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineQuery {
+   SVGA3dQueryId queryId;
+   SVGA3dQueryType type;
+   SVGA3dDXQueryFlags flags;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineQuery; /* SVGA_3D_CMD_DX_DEFINE_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyQuery {
+   SVGA3dQueryId queryId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyQuery; /* SVGA_3D_CMD_DX_DESTROY_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBindQuery {
+   SVGA3dQueryId queryId;
+   SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBindQuery; /* SVGA_3D_CMD_DX_BIND_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetQueryOffset {
+   SVGA3dQueryId queryId;
+   uint32 mobOffset;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetQueryOffset; /* SVGA_3D_CMD_DX_SET_QUERY_OFFSET */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBeginQuery {
+   SVGA3dQueryId queryId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBeginQuery; /* SVGA_3D_CMD_DX_QUERY_BEGIN */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXEndQuery {
+   SVGA3dQueryId queryId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXEndQuery; /* SVGA_3D_CMD_DX_QUERY_END */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXReadbackQuery {
+   SVGA3dQueryId queryId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXReadbackQuery; /* SVGA_3D_CMD_DX_READBACK_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXMoveQuery {
+   SVGA3dQueryId queryId;
+   SVGAMobId mobid;
+   uint32 mobOffset;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXMoveQuery; /* SVGA_3D_CMD_DX_MOVE_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBindAllQuery {
+   uint32 cid;
+   SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBindAllQuery; /* SVGA_3D_CMD_DX_BIND_ALL_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXReadbackAllQuery {
+   uint32 cid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXReadbackAllQuery; /* SVGA_3D_CMD_DX_READBACK_ALL_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetPredication {
+   SVGA3dQueryId queryId;
+   uint32 predicateValue;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetPredication; /* SVGA_3D_CMD_DX_SET_PREDICATION */
+
+typedef
+#include "vmware_pack_begin.h"
+struct MKS3dDXSOState {
+   uint32 offset;       /* Starting offset */
+   uint32 intOffset;    /* Internal offset */
+   uint32 vertexCount;  /* vertices written */
+   uint32 sizeInBytes;  /* max bytes to write */
+}
+#include "vmware_pack_end.h"
+SVGA3dDXSOState;
+
+/* Set the offset field to this value to append SO values to the buffer */
+#define SVGA3D_DX_SO_OFFSET_APPEND ((uint32) ~0u)
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dSoTarget {
+   SVGA3dSurfaceId sid;
+   uint32 offset;
+   uint32 sizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dSoTarget;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetSOTargets {
+   uint32 pad0;
+   /* Followed by a variable number of SVGA3dSOTarget's. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetSOTargets; /* SVGA_3D_CMD_DX_SET_SOTARGETS */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dViewport
+{
+   float x;
+   float y;
+   float width;
+   float height;
+   float minDepth;
+   float maxDepth;
+}
+#include "vmware_pack_end.h"
+SVGA3dViewport;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetViewports {
+   uint32 pad0;
+   /* Followed by a variable number of SVGA3dViewport's. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetViewports; /* SVGA_3D_CMD_DX_SET_VIEWPORTS */
+
+#define SVGA3D_DX_MAX_VIEWPORTS  16
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetScissorRects {
+   uint32 pad0;
+   /* Followed by a variable number of SVGASignedRect's. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetScissorRects; /* SVGA_3D_CMD_DX_SET_SCISSORRECTS */
+
+#define SVGA3D_DX_MAX_SCISSORRECTS  16
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXClearRenderTargetView {
+   SVGA3dRenderTargetViewId renderTargetViewId;
+   SVGA3dRGBAFloat rgba;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXClearRenderTargetView; /* SVGA_3D_CMD_DX_CLEAR_RENDERTARGET_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXClearDepthStencilView {
+   uint16 flags;
+   uint16 stencil;
+   SVGA3dDepthStencilViewId depthStencilViewId;
+   float depth;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXClearDepthStencilView; /* SVGA_3D_CMD_DX_CLEAR_DEPTHSTENCIL_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXPredCopyRegion {
+   SVGA3dSurfaceId dstSid;
+   uint32 dstSubResource;
+   SVGA3dSurfaceId srcSid;
+   uint32 srcSubResource;
+   SVGA3dCopyBox box;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXPredCopyRegion;
+/* SVGA_3D_CMD_DX_PRED_COPY_REGION */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXPredCopy {
+   SVGA3dSurfaceId dstSid;
+   SVGA3dSurfaceId srcSid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXPredCopy; /* SVGA_3D_CMD_DX_PRED_COPY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBufferCopy {
+   SVGA3dSurfaceId dest;
+   SVGA3dSurfaceId src;
+   uint32 destX;
+   uint32 srcX;
+   uint32 width;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBufferCopy;
+/* SVGA_3D_CMD_DX_BUFFER_COPY */
+
+typedef uint32 SVGA3dDXStretchBltMode;
+#define SVGADX_STRETCHBLT_LINEAR         (1 << 0)
+#define SVGADX_STRETCHBLT_FORCE_SRC_SRGB (1 << 1)
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXStretchBlt {
+   SVGA3dSurfaceId srcSid;
+   uint32 srcSubResource;
+   SVGA3dSurfaceId dstSid;
+   uint32 destSubResource;
+   SVGA3dBox boxSrc;
+   SVGA3dBox boxDest;
+   SVGA3dDXStretchBltMode mode;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXStretchBlt; /* SVGA_3D_CMD_DX_STRETCHBLT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXGenMips {
+   SVGA3dShaderResourceViewId shaderResourceViewId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXGenMips; /* SVGA_3D_CMD_DX_GENMIPS */
+
+/*
+ * Defines a resource/DX surface.  Resources share the surfaceId namespace.
+ *
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDefineGBSurface_v2 {
+   uint32 sid;
+   SVGA3dSurfaceFlags surfaceFlags;
+   SVGA3dSurfaceFormat format;
+   uint32 numMipLevels;
+   uint32 multisampleCount;
+   SVGA3dTextureFilter autogenFilter;
+   SVGA3dSize size;
+   uint32 arraySize;
+   uint32 pad;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDefineGBSurface_v2;   /* SVGA_3D_CMD_DEFINE_GB_SURFACE_V2 */
+
+/*
+ * Update a sub-resource in a guest-backed resource.
+ * (Inform the device that the guest-contents have been updated.)
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXUpdateSubResource {
+   SVGA3dSurfaceId sid;
+   uint32 subResource;
+   SVGA3dBox box;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXUpdateSubResource;   /* SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE */
+
+/*
+ * Readback a subresource in a guest-backed resource.
+ * (Request the device to flush the dirty contents into the guest.)
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXReadbackSubResource {
+   SVGA3dSurfaceId sid;
+   uint32 subResource;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXReadbackSubResource;   /* SVGA_3D_CMD_DX_READBACK_SUBRESOURCE */
+
+/*
+ * Invalidate an image in a guest-backed surface.
+ * (Notify the device that the contents can be lost.)
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXInvalidateSubResource {
+   SVGA3dSurfaceId sid;
+   uint32 subResource;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXInvalidateSubResource;   /* SVGA_3D_CMD_DX_INVALIDATE_SUBRESOURCE */
+
+
+/*
+ * Raw byte wise transfer from a buffer surface into another surface
+ * of the requested box.
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXTransferFromBuffer {
+   SVGA3dSurfaceId srcSid;
+   uint32 srcOffset;
+   uint32 srcPitch;
+   uint32 srcSlicePitch;
+   SVGA3dSurfaceId destSid;
+   uint32 destSubResource;
+   SVGA3dBox destBox;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXTransferFromBuffer;   /* SVGA_3D_CMD_DX_TRANSFER_FROM_BUFFER */
+
+
+/*
+ * Raw byte wise transfer from a buffer surface into another surface
+ * of the requested box.  Supported if SVGA3D_DEVCAP_DXCONTEXT is set.
+ * The context is implied from the command buffer header.
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXPredTransferFromBuffer {
+   SVGA3dSurfaceId srcSid;
+   uint32 srcOffset;
+   uint32 srcPitch;
+   uint32 srcSlicePitch;
+   SVGA3dSurfaceId destSid;
+   uint32 destSubResource;
+   SVGA3dBox destBox;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXPredTransferFromBuffer;
+/* SVGA_3D_CMD_DX_PRED_TRANSFER_FROM_BUFFER */
+
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSurfaceCopyAndReadback {
+   SVGA3dSurfaceId srcSid;
+   SVGA3dSurfaceId destSid;
+   SVGA3dCopyBox box;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSurfaceCopyAndReadback;
+/* SVGA_3D_CMD_DX_SURFACE_COPY_AND_READBACK */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXHint {
+   uint32 hintId;
+
+   /*
+    * Followed by variable sized data depending on the hintId.
+    */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXHint;
+/* SVGA_3D_CMD_DX_HINT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBufferUpdate {
+   SVGA3dSurfaceId sid;
+   uint32 x;
+   uint32 width;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBufferUpdate;
+/* SVGA_3D_CMD_DX_BUFFER_UPDATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetConstantBufferOffset {
+   uint32 slot;
+   uint32 offsetInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetConstantBufferOffset;
+
+typedef SVGA3dCmdDXSetConstantBufferOffset SVGA3dCmdDXSetVSConstantBufferOffset;
+/* SVGA_3D_CMD_DX_SET_VS_CONSTANT_BUFFER_OFFSET */
+
+typedef SVGA3dCmdDXSetConstantBufferOffset SVGA3dCmdDXSetPSConstantBufferOffset;
+/* SVGA_3D_CMD_DX_SET_PS_CONSTANT_BUFFER_OFFSET */
+
+typedef SVGA3dCmdDXSetConstantBufferOffset SVGA3dCmdDXSetGSConstantBufferOffset;
+/* SVGA_3D_CMD_DX_SET_GS_CONSTANT_BUFFER_OFFSET */
+
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   union {
+      struct {
+         uint32 firstElement;
+         uint32 numElements;
+         uint32 pad0;
+         uint32 pad1;
+      } buffer;
+      struct {
+         uint32 mostDetailedMip;
+         uint32 firstArraySlice;
+         uint32 mipLevels;
+         uint32 arraySize;
+      } tex;
+      struct {
+         uint32 firstElement;
+         uint32 numElements;
+         uint32 flags;
+         uint32 pad0;
+      } bufferex;
+   };
+}
+#include "vmware_pack_end.h"
+SVGA3dShaderResourceViewDesc;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGA3dSurfaceId sid;
+   SVGA3dSurfaceFormat format;
+   SVGA3dResourceType resourceDimension;
+   SVGA3dShaderResourceViewDesc desc;
+   uint32 pad;
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXSRViewEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineShaderResourceView {
+   SVGA3dShaderResourceViewId shaderResourceViewId;
+
+   SVGA3dSurfaceId sid;
+   SVGA3dSurfaceFormat format;
+   SVGA3dResourceType resourceDimension;
+
+   SVGA3dShaderResourceViewDesc desc;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineShaderResourceView;
+/* SVGA_3D_CMD_DX_DEFINE_SHADERRESOURCE_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyShaderResourceView {
+   SVGA3dShaderResourceViewId shaderResourceViewId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyShaderResourceView;
+/* SVGA_3D_CMD_DX_DESTROY_SHADERRESOURCE_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dRenderTargetViewDesc {
+   union {
+      struct {
+         uint32 firstElement;
+         uint32 numElements;
+      } buffer;
+      struct {
+         uint32 mipSlice;
+         uint32 firstArraySlice;
+         uint32 arraySize;
+      } tex;                    /* 1d, 2d, cube */
+      struct {
+         uint32 mipSlice;
+         uint32 firstW;
+         uint32 wSize;
+      } tex3D;
+   };
+}
+#include "vmware_pack_end.h"
+SVGA3dRenderTargetViewDesc;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGA3dSurfaceId sid;
+   SVGA3dSurfaceFormat format;
+   SVGA3dResourceType resourceDimension;
+   SVGA3dRenderTargetViewDesc desc;
+   uint32 pad[2];
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXRTViewEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineRenderTargetView {
+   SVGA3dRenderTargetViewId renderTargetViewId;
+
+   SVGA3dSurfaceId sid;
+   SVGA3dSurfaceFormat format;
+   SVGA3dResourceType resourceDimension;
+
+   SVGA3dRenderTargetViewDesc desc;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineRenderTargetView;
+/* SVGA_3D_CMD_DX_DEFINE_RENDERTARGET_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyRenderTargetView {
+   SVGA3dRenderTargetViewId renderTargetViewId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyRenderTargetView;
+/* SVGA_3D_CMD_DX_DESTROY_RENDERTARGET_VIEW */
+
+/*
+ */
+#define SVGA3D_DXDSVIEW_CREATE_READ_ONLY_DEPTH   0x01
+#define SVGA3D_DXDSVIEW_CREATE_READ_ONLY_STENCIL 0x02
+#define SVGA3D_DXDSVIEW_CREATE_FLAG_MASK         0x03
+typedef uint8 SVGA3DCreateDSViewFlags;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGA3dSurfaceId sid;
+   SVGA3dSurfaceFormat format;
+   SVGA3dResourceType resourceDimension;
+   uint32 mipSlice;
+   uint32 firstArraySlice;
+   uint32 arraySize;
+   SVGA3DCreateDSViewFlags flags;
+   uint8 pad0;
+   uint16 pad1;
+   uint32 pad2;
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXDSViewEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineDepthStencilView {
+   SVGA3dDepthStencilViewId depthStencilViewId;
+
+   SVGA3dSurfaceId sid;
+   SVGA3dSurfaceFormat format;
+   SVGA3dResourceType resourceDimension;
+   uint32 mipSlice;
+   uint32 firstArraySlice;
+   uint32 arraySize;
+   SVGA3DCreateDSViewFlags flags;
+   uint8 pad0;
+   uint16 pad1;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineDepthStencilView;
+/* SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyDepthStencilView {
+   SVGA3dDepthStencilViewId depthStencilViewId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyDepthStencilView;
+/* SVGA_3D_CMD_DX_DESTROY_DEPTHSTENCIL_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dInputElementDesc {
+   uint32 inputSlot;
+   uint32 alignedByteOffset;
+   SVGA3dSurfaceFormat format;
+   SVGA3dInputClassification inputSlotClass;
+   uint32 instanceDataStepRate;
+   uint32 inputRegister;
+}
+#include "vmware_pack_end.h"
+SVGA3dInputElementDesc;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   /*
+    * XXX: How many of these can there be?
+    */
+   uint32 elid;
+   uint32 numDescs;
+   SVGA3dInputElementDesc desc[32];
+   uint32 pad[62];
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXElementLayoutEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineElementLayout {
+   SVGA3dElementLayoutId elementLayoutId;
+   /* Followed by a variable number of SVGA3dInputElementDesc's. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineElementLayout;
+/* SVGA_3D_CMD_DX_DEFINE_ELEMENTLAYOUT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyElementLayout {
+   SVGA3dElementLayoutId elementLayoutId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyElementLayout;
+/* SVGA_3D_CMD_DX_DESTROY_ELEMENTLAYOUT */
+
+
+#define SVGA3D_DX_MAX_RENDER_TARGETS 8
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dDXBlendStatePerRT {
+      uint8 blendEnable;
+      uint8 srcBlend;
+      uint8 destBlend;
+      uint8 blendOp;
+      uint8 srcBlendAlpha;
+      uint8 destBlendAlpha;
+      uint8 blendOpAlpha;
+      uint8 renderTargetWriteMask;
+      uint8 logicOpEnable;
+      uint8 logicOp;
+      uint16 pad0;
+}
+#include "vmware_pack_end.h"
+SVGA3dDXBlendStatePerRT;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint8 alphaToCoverageEnable;
+   uint8 independentBlendEnable;
+   uint16 pad0;
+   SVGA3dDXBlendStatePerRT perRT[SVGA3D_MAX_RENDER_TARGETS];
+   uint32 pad1[7];
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXBlendStateEntry;
+
+/*
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineBlendState {
+   SVGA3dBlendStateId blendId;
+   uint8 alphaToCoverageEnable;
+   uint8 independentBlendEnable;
+   uint16 pad0;
+   SVGA3dDXBlendStatePerRT perRT[SVGA3D_MAX_RENDER_TARGETS];
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineBlendState; /* SVGA_3D_CMD_DX_DEFINE_BLEND_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyBlendState {
+   SVGA3dBlendStateId blendId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyBlendState; /* SVGA_3D_CMD_DX_DESTROY_BLEND_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint8 depthEnable;
+   SVGA3dDepthWriteMask depthWriteMask;
+   SVGA3dComparisonFunc depthFunc;
+   uint8 stencilEnable;
+   uint8 frontEnable;
+   uint8 backEnable;
+   uint8 stencilReadMask;
+   uint8 stencilWriteMask;
+
+   uint8 frontStencilFailOp;
+   uint8 frontStencilDepthFailOp;
+   uint8 frontStencilPassOp;
+   SVGA3dComparisonFunc frontStencilFunc;
+
+   uint8 backStencilFailOp;
+   uint8 backStencilDepthFailOp;
+   uint8 backStencilPassOp;
+   SVGA3dComparisonFunc backStencilFunc;
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXDepthStencilEntry;
+
+/*
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineDepthStencilState {
+   SVGA3dDepthStencilStateId depthStencilId;
+
+   uint8 depthEnable;
+   SVGA3dDepthWriteMask depthWriteMask;
+   SVGA3dComparisonFunc depthFunc;
+   uint8 stencilEnable;
+   uint8 frontEnable;
+   uint8 backEnable;
+   uint8 stencilReadMask;
+   uint8 stencilWriteMask;
+
+   uint8 frontStencilFailOp;
+   uint8 frontStencilDepthFailOp;
+   uint8 frontStencilPassOp;
+   SVGA3dComparisonFunc frontStencilFunc;
+
+   uint8 backStencilFailOp;
+   uint8 backStencilDepthFailOp;
+   uint8 backStencilPassOp;
+   SVGA3dComparisonFunc backStencilFunc;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineDepthStencilState;
+/* SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyDepthStencilState {
+   SVGA3dDepthStencilStateId depthStencilId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyDepthStencilState;
+/* SVGA_3D_CMD_DX_DESTROY_DEPTHSTENCIL_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint8 fillMode;
+   SVGA3dCullMode cullMode;
+   uint8 frontCounterClockwise;
+   uint8 provokingVertexLast;
+   int32 depthBias;
+   float depthBiasClamp;
+   float slopeScaledDepthBias;
+   uint8 depthClipEnable;
+   uint8 scissorEnable;
+   uint8 multisampleEnable;
+   uint8 antialiasedLineEnable;
+   float lineWidth;
+   uint8 lineStippleEnable;
+   uint8 lineStippleFactor;
+   uint16 lineStipplePattern;
+   uint32 forcedSampleCount;
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXRasterizerStateEntry;
+
+/*
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineRasterizerState {
+   SVGA3dRasterizerStateId rasterizerId;
+
+   uint8 fillMode;
+   SVGA3dCullMode cullMode;
+   uint8 frontCounterClockwise;
+   uint8 provokingVertexLast;
+   int32 depthBias;
+   float depthBiasClamp;
+   float slopeScaledDepthBias;
+   uint8 depthClipEnable;
+   uint8 scissorEnable;
+   uint8 multisampleEnable;
+   uint8 antialiasedLineEnable;
+   float lineWidth;
+   uint8 lineStippleEnable;
+   uint8 lineStippleFactor;
+   uint16 lineStipplePattern;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineRasterizerState;
+/* SVGA_3D_CMD_DX_DEFINE_RASTERIZER_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyRasterizerState {
+   SVGA3dRasterizerStateId rasterizerId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyRasterizerState;
+/* SVGA_3D_CMD_DX_DESTROY_RASTERIZER_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   SVGA3dFilter filter;
+   uint8 addressU;
+   uint8 addressV;
+   uint8 addressW;
+   uint8 pad0;
+   float mipLODBias;
+   uint8 maxAnisotropy;
+   SVGA3dComparisonFunc comparisonFunc;
+   uint16 pad1;
+   SVGA3dRGBAFloat borderColor;
+   float minLOD;
+   float maxLOD;
+   uint32 pad2[6];
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXSamplerEntry;
+
+/*
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineSamplerState {
+   SVGA3dSamplerId samplerId;
+   SVGA3dFilter filter;
+   uint8 addressU;
+   uint8 addressV;
+   uint8 addressW;
+   uint8 pad0;
+   float mipLODBias;
+   uint8 maxAnisotropy;
+   SVGA3dComparisonFunc comparisonFunc;
+   uint16 pad1;
+   SVGA3dRGBAFloat borderColor;
+   float minLOD;
+   float maxLOD;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineSamplerState; /* SVGA_3D_CMD_DX_DEFINE_SAMPLER_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroySamplerState {
+   SVGA3dSamplerId samplerId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroySamplerState; /* SVGA_3D_CMD_DX_DESTROY_SAMPLER_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineShader {
+   SVGA3dShaderId shaderId;
+   SVGA3dShaderType type;
+   uint32 sizeInBytes; /* Number of bytes of shader text. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineShader; /* SVGA_3D_CMD_DX_DEFINE_SHADER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGACOTableDXShaderEntry {
+   SVGA3dShaderType type;
+   uint32 sizeInBytes;
+   uint32 offsetInBytes;
+   SVGAMobId mobid;
+   uint32 pad[4];
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXShaderEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyShader {
+   SVGA3dShaderId shaderId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyShader; /* SVGA_3D_CMD_DX_DESTROY_SHADER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBindShader {
+   uint32 cid;
+   uint32 shid;
+   SVGAMobId mobid;
+   uint32 offsetInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBindShader;   /* SVGA_3D_CMD_DX_BIND_SHADER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBindAllShader {
+   uint32 cid;
+   SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBindAllShader;   /* SVGA_3D_CMD_DX_BIND_ALL_SHADER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXCondBindAllShader {
+   uint32 cid;
+   SVGAMobId testMobid;
+   SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXCondBindAllShader;   /* SVGA_3D_CMD_DX_COND_BIND_ALL_SHADER */
+
+/*
+ * The maximum number of streamout decl's in each streamout entry.
+ */
+#define SVGA3D_MAX_STREAMOUT_DECLS 64
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dStreamOutputDeclarationEntry {
+   uint32 outputSlot;
+   uint32 registerIndex;
+   uint8  registerMask;
+   uint8  pad0;
+   uint16 pad1;
+   uint32 stream;
+}
+#include "vmware_pack_end.h"
+SVGA3dStreamOutputDeclarationEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGAOTableStreamOutputEntry {
+   uint32 numOutputStreamEntries;
+   SVGA3dStreamOutputDeclarationEntry decl[SVGA3D_MAX_STREAMOUT_DECLS];
+   uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS];
+   uint32 rasterizedStream;
+   uint32 pad[250];
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXStreamOutputEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineStreamOutput {
+   SVGA3dStreamOutputId soid;
+   uint32 numOutputStreamEntries;
+   SVGA3dStreamOutputDeclarationEntry decl[SVGA3D_MAX_STREAMOUT_DECLS];
+   uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS];
+   uint32 rasterizedStream;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineStreamOutput; /* SVGA_3D_CMD_DX_DEFINE_STREAMOUTPUT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyStreamOutput {
+   SVGA3dStreamOutputId soid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyStreamOutput; /* SVGA_3D_CMD_DX_DESTROY_STREAMOUTPUT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetStreamOutput {
+   SVGA3dStreamOutputId soid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetStreamOutput; /* SVGA_3D_CMD_DX_SET_STREAMOUTPUT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint64 value;
+   uint32 mobId;
+   uint32 mobOffset;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXMobFence64;  /* SVGA_3D_CMD_DX_MOB_FENCE_64 */
+
+/*
+ * SVGA3dCmdSetCOTable --
+ *
+ * This command allows the guest to bind a mob to a context-object table.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetCOTable {
+   uint32 cid;
+   uint32 mobid;
+   SVGACOTableType type;
+   uint32 validSizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetCOTable; /* SVGA_3D_CMD_DX_SET_COTABLE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXReadbackCOTable {
+   uint32 cid;
+   SVGACOTableType type;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXReadbackCOTable; /* SVGA_3D_CMD_DX_READBACK_COTABLE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCOTableData {
+   uint32 mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCOTableData;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dBufferBinding {
+   uint32 bufferId;
+   uint32 stride;
+   uint32 offset;
+}
+#include "vmware_pack_end.h"
+SVGA3dBufferBinding;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dConstantBufferBinding {
+   uint32 sid;
+   uint32 offsetInBytes;
+   uint32 sizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dConstantBufferBinding;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGADXInputAssemblyMobFormat {
+   uint32 layoutId;
+   SVGA3dBufferBinding vertexBuffers[SVGA3D_DX_MAX_VERTEXBUFFERS];
+   uint32 indexBufferSid;
+   uint32 pad;
+   uint32 indexBufferOffset;
+   uint32 indexBufferFormat;
+   uint32 topology;
+}
+#include "vmware_pack_end.h"
+SVGADXInputAssemblyMobFormat;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGADXContextMobFormat {
+   SVGADXInputAssemblyMobFormat inputAssembly;
+
+   struct {
+      uint32 blendStateId;
+      uint32 blendFactor[4];
+      uint32 sampleMask;
+      uint32 depthStencilStateId;
+      uint32 stencilRef;
+      uint32 rasterizerStateId;
+      uint32 depthStencilViewId;
+      uint32 renderTargetViewIds[SVGA3D_MAX_SIMULTANEOUS_RENDER_TARGETS];
+      uint32 unorderedAccessViewIds[SVGA3D_MAX_UAVIEWS];
+   } renderState;
+
+   struct {
+      uint32 targets[SVGA3D_DX_MAX_SOTARGETS];
+      uint32 soid;
+   } streamOut;
+   uint32 pad0[11];
+
+   uint8 numViewports;
+   uint8 numScissorRects;
+   uint16 pad1[1];
+
+   uint32 pad2[3];
+
+   SVGA3dViewport viewports[SVGA3D_DX_MAX_VIEWPORTS];
+   uint32 pad3[32];
+
+   SVGASignedRect scissorRects[SVGA3D_DX_MAX_SCISSORRECTS];
+   uint32 pad4[64];
+
+   struct {
+      uint32 queryID;
+      uint32 value;
+   } predication;
+   uint32 pad5[2];
+
+   struct {
+      uint32 shaderId;
+      SVGA3dConstantBufferBinding constantBuffers[SVGA3D_DX_MAX_CONSTBUFFERS];
+      uint32 shaderResources[SVGA3D_DX_MAX_SRVIEWS];
+      uint32 samplers[SVGA3D_DX_MAX_SAMPLERS];
+   } shaderState[SVGA3D_NUM_SHADERTYPE];
+   uint32 pad6[26];
+
+   SVGA3dQueryId queryID[SVGA3D_MAX_QUERY];
+
+   SVGA3dCOTableData cotables[SVGA_COTABLE_MAX];
+   uint32 pad7[380];
+}
+#include "vmware_pack_end.h"
+SVGADXContextMobFormat;
+
+#endif /* _SVGA3D_DX_H_ */
diff --git a/src/gallium/drivers/svga/include/svga3d_limits.h b/src/gallium/drivers/svga/include/svga3d_limits.h

index 367e8cf..a1c3687 100644 (file)
--- a/src/gallium/drivers/svga/include/svga3d_limits.h
+++ b/src/gallium/drivers/svga/include/svga3d_limits.h
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 2007-2014 VMware, Inc.  All rights reserved.
+ * Copyright 2007-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
@@ -41,6 +41,7 @@
  #define SVGA3D_NUM_CLIPPLANES                   6
  #define SVGA3D_MAX_RENDER_TARGETS               8
  #define SVGA3D_MAX_SIMULTANEOUS_RENDER_TARGETS  (SVGA3D_MAX_RENDER_TARGETS)
+#define SVGA3D_MAX_UAVIEWS                      8
  #define SVGA3D_MAX_CONTEXT_IDS                  256
  #define SVGA3D_MAX_SURFACE_IDS                  (32 * 1024)
  
@@ -56,9 +57,6 @@
  
  #define SVGA3D_NUM_TEXTURE_UNITS                32
  #define SVGA3D_NUM_LIGHTS                       8
-#define SVGA3D_MAX_VIDEODECODERS                8
-#define SVGA3D_MAX_VIDEOPROCESSORS              8
-#define SVGA3D_MAX_VIDEODECODER_FRAMES          400
  
  /*
   * Maximum size in dwords of shader text the SVGA device will allow.
@@ -98,4 +96,4 @@
   */
  #define SVGA3D_MAX_DRAW_PRIMITIVE_RANGES 32
  
-#endif // _SVGA3D_LIMITS_H_
+#endif /* _SVGA3D_LIMITS_H_ */
diff --git a/src/gallium/drivers/svga/include/svga3d_reg.h b/src/gallium/drivers/svga/include/svga3d_reg.h

index 01705f3..b44ce64 100644 (file)
--- a/src/gallium/drivers/svga/include/svga3d_reg.h
+++ b/src/gallium/drivers/svga/include/svga3d_reg.h
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 1998-2014 VMware, Inc.  All rights reserved.
+ * Copyright 1998-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
@@ -43,6 +43,7 @@
  #include "svga3d_types.h"
  #include "svga3d_limits.h"
  #include "svga3d_cmd.h"
+#include "svga3d_dx.h"
  #include "svga3d_devcaps.h"
  
  
diff --git a/src/gallium/drivers/svga/include/svga3d_surfacedefs.h b/src/gallium/drivers/svga/include/svga3d_surfacedefs.h

index ce5475b..efa358b 100644 (file)
--- a/src/gallium/drivers/svga/include/svga3d_surfacedefs.h
+++ b/src/gallium/drivers/svga/include/svga3d_surfacedefs.h
@@ -1,27 +1,29 @@
-/**********************************************************
- * Copyright 1998-2014 VMware, Inc.  All rights reserved.
+/**************************************************************************
   *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
+ * Copyright © 1998-2015 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
   *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
   *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
   *
- **********************************************************/
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
  
  /*
   * svga3d_surfacedefs.h --
@@ -53,645 +55,851 @@
   */
  
  enum svga3d_block_desc {
-       SVGA3DBLOCKDESC_NONE        = 0,         /* No channels are active */
-       SVGA3DBLOCKDESC_BLUE        = 1 << 0,    /* Block with red channel
-                                                   data */
-       SVGA3DBLOCKDESC_U           = 1 << 0,    /* Block with bump U channel
-                                                   data */
-       SVGA3DBLOCKDESC_UV_VIDEO    = 1 << 7,    /* Block with alternating video
-                                                   U and V */
-       SVGA3DBLOCKDESC_GREEN       = 1 << 1,    /* Block with green channel
-                                                   data */
-       SVGA3DBLOCKDESC_V           = 1 << 1,    /* Block with bump V channel
-                                                   data */
-       SVGA3DBLOCKDESC_STENCIL     = 1 << 1,    /* Block with a stencil
-                                                   channel */
-       SVGA3DBLOCKDESC_RED         = 1 << 2,    /* Block with blue channel
-                                                   data */
-       SVGA3DBLOCKDESC_W           = 1 << 2,    /* Block with bump W channel
-                                                   data */
-       SVGA3DBLOCKDESC_LUMINANCE   = 1 << 2,    /* Block with luminance channel
-                                                   data */
-       SVGA3DBLOCKDESC_Y           = 1 << 2,    /* Block with video luminance
-                                                   data */
-       SVGA3DBLOCKDESC_DEPTH       = 1 << 2,    /* Block with depth channel */
-       SVGA3DBLOCKDESC_ALPHA       = 1 << 3,    /* Block with an alpha
-                                                   channel */
-       SVGA3DBLOCKDESC_Q           = 1 << 3,    /* Block with bump Q channel
-                                                   data */
-       SVGA3DBLOCKDESC_BUFFER      = 1 << 4,    /* Block stores 1 byte of
-                                                   data */
-       SVGA3DBLOCKDESC_COMPRESSED  = 1 << 5,    /* Block stores n bytes of
-                                                   data depending on the
-                                                   compression method used */
-       SVGA3DBLOCKDESC_IEEE_FP     = 1 << 6,    /* Block stores data in an IEEE
-                                                   floating point
-                                                   representation in
-                                                   all channels */
-       SVGA3DBLOCKDESC_PLANAR_YUV  = 1 << 8,    /* Three separate blocks store
-                                                   data. */
-       SVGA3DBLOCKDESC_U_VIDEO     = 1 << 9,    /* Block with U video data */
-       SVGA3DBLOCKDESC_V_VIDEO     = 1 << 10,   /* Block with V video data */
-       SVGA3DBLOCKDESC_EXP         = 1 << 11,   /* Shared exponent */
-       SVGA3DBLOCKDESC_SRGB        = 1 << 12,   /* Data is in sRGB format */
-       SVGA3DBLOCKDESC_2PLANAR_YUV = 1 << 13,   /* 2 planes of Y, UV,
-                                                   e.g., NV12. */
-       SVGA3DBLOCKDESC_3PLANAR_YUV = 1 << 14,   /* 3 planes of separate
-                                                   Y, U, V, e.g., YV12. */
-
-       SVGA3DBLOCKDESC_RG         = SVGA3DBLOCKDESC_RED |
-       SVGA3DBLOCKDESC_GREEN,
-       SVGA3DBLOCKDESC_RGB        = SVGA3DBLOCKDESC_RG |
-       SVGA3DBLOCKDESC_BLUE,
-       SVGA3DBLOCKDESC_RGB_SRGB   = SVGA3DBLOCKDESC_RGB |
-       SVGA3DBLOCKDESC_SRGB,
-       SVGA3DBLOCKDESC_RGBA       = SVGA3DBLOCKDESC_RGB |
-       SVGA3DBLOCKDESC_ALPHA,
-       SVGA3DBLOCKDESC_RGBA_SRGB  = SVGA3DBLOCKDESC_RGBA |
-       SVGA3DBLOCKDESC_SRGB,
-       SVGA3DBLOCKDESC_UV         = SVGA3DBLOCKDESC_U |
-       SVGA3DBLOCKDESC_V,
-       SVGA3DBLOCKDESC_UVL        = SVGA3DBLOCKDESC_UV |
-       SVGA3DBLOCKDESC_LUMINANCE,
-       SVGA3DBLOCKDESC_UVW        = SVGA3DBLOCKDESC_UV |
-       SVGA3DBLOCKDESC_W,
-       SVGA3DBLOCKDESC_UVWA       = SVGA3DBLOCKDESC_UVW |
-       SVGA3DBLOCKDESC_ALPHA,
-       SVGA3DBLOCKDESC_UVWQ       = SVGA3DBLOCKDESC_U |
-       SVGA3DBLOCKDESC_V |
-       SVGA3DBLOCKDESC_W |
-       SVGA3DBLOCKDESC_Q,
-       SVGA3DBLOCKDESC_LA         = SVGA3DBLOCKDESC_LUMINANCE |
-       SVGA3DBLOCKDESC_ALPHA,
-       SVGA3DBLOCKDESC_R_FP       = SVGA3DBLOCKDESC_RED |
-       SVGA3DBLOCKDESC_IEEE_FP,
-       SVGA3DBLOCKDESC_RG_FP      = SVGA3DBLOCKDESC_R_FP |
-       SVGA3DBLOCKDESC_GREEN,
-       SVGA3DBLOCKDESC_RGB_FP     = SVGA3DBLOCKDESC_RG_FP |
-       SVGA3DBLOCKDESC_BLUE,
-       SVGA3DBLOCKDESC_RGBA_FP    = SVGA3DBLOCKDESC_RGB_FP |
-       SVGA3DBLOCKDESC_ALPHA,
-       SVGA3DBLOCKDESC_DS         = SVGA3DBLOCKDESC_DEPTH |
-       SVGA3DBLOCKDESC_STENCIL,
-       SVGA3DBLOCKDESC_YUV        = SVGA3DBLOCKDESC_UV_VIDEO |
-       SVGA3DBLOCKDESC_Y,
-       SVGA3DBLOCKDESC_AYUV       = SVGA3DBLOCKDESC_ALPHA |
-       SVGA3DBLOCKDESC_Y |
-       SVGA3DBLOCKDESC_U_VIDEO |
-       SVGA3DBLOCKDESC_V_VIDEO,
-       SVGA3DBLOCKDESC_RGBE       = SVGA3DBLOCKDESC_RGB |
-       SVGA3DBLOCKDESC_EXP,
-       SVGA3DBLOCKDESC_COMPRESSED_SRGB = SVGA3DBLOCKDESC_COMPRESSED |
-       SVGA3DBLOCKDESC_SRGB,
-       SVGA3DBLOCKDESC_NV12       = SVGA3DBLOCKDESC_PLANAR_YUV |
-       SVGA3DBLOCKDESC_2PLANAR_YUV,
-       SVGA3DBLOCKDESC_YV12       = SVGA3DBLOCKDESC_PLANAR_YUV |
-       SVGA3DBLOCKDESC_3PLANAR_YUV,
-};
  
-/*
- * SVGA3dSurfaceDesc describes the actual pixel data.
- *
- * This structure provides the following information:
- *    1. Block description.
- *    2. Dimensions of a block in the surface.
- *    3. Size of block in bytes.
- *    4. Bit depth of the pixel data.
- *    5. Channel bit depths and masks (if applicable).
- */
-#define SVGA3D_CHANNEL_DEF(type)               \
-       struct {                                \
-               union {                         \
-                       type blue;              \
-                       type u;                 \
-                       type uv_video;          \
-                       type u_video;           \
-               };                              \
-               union {                         \
-                       type green;             \
-                       type v;                 \
-                       type stencil;           \
-                       type v_video;           \
-               };                              \
-               union {                         \
-                       type red;               \
-                       type w;                 \
-                       type luminance;         \
-                       type y;                 \
-                       type depth;             \
-                       type data;              \
-               };                              \
-               union {                         \
-                       type alpha;             \
-                       type q;                 \
-                       type exp;               \
-               };                              \
-       }
-
-struct svga3d_surface_desc {
-       enum svga3d_block_desc block_desc;
-       SVGA3dSize block_size;
-       uint32 bytes_per_block;
-       uint32 pitch_bytes_per_block;
-
-       struct {
-               uint32 total;
-               SVGA3D_CHANNEL_DEF(uint8);
-       } bit_depth;
-
-       struct {
-               SVGA3D_CHANNEL_DEF(uint8);
-       } bit_offset;
+   SVGA3DBLOCKDESC_NONE        = 0,         /* No channels are active */
+   SVGA3DBLOCKDESC_BLUE        = 1 << 0,    /* Block with red channel data */
+   SVGA3DBLOCKDESC_U           = 1 << 0,    /* Block with bump U channel data */
+   SVGA3DBLOCKDESC_GREEN       = 1 << 1,    /* Block with green channel data */
+   SVGA3DBLOCKDESC_V           = 1 << 1,    /* Block with bump V channel data */
+   SVGA3DBLOCKDESC_RED         = 1 << 2,    /* Block with blue channel data */
+   SVGA3DBLOCKDESC_W           = 1 << 2,    /* Block with bump W channel data */
+   SVGA3DBLOCKDESC_LUMINANCE   = 1 << 2,    /* Block with luminance channel data */
+   SVGA3DBLOCKDESC_Y           = 1 << 2,    /* Block with video luminance data */
+   SVGA3DBLOCKDESC_ALPHA       = 1 << 3,    /* Block with an alpha channel */
+   SVGA3DBLOCKDESC_Q           = 1 << 3,    /* Block with bump Q channel data */
+   SVGA3DBLOCKDESC_BUFFER      = 1 << 4,    /* Block stores 1 byte of data */
+   SVGA3DBLOCKDESC_COMPRESSED  = 1 << 5,    /* Block stores n bytes of data depending
+                                               on the compression method used */
+   SVGA3DBLOCKDESC_IEEE_FP     = 1 << 6,    /* Block stores data in an IEEE floating point
+                                               representation in all channels */
+   SVGA3DBLOCKDESC_UV_VIDEO    = 1 << 7,    /* Block with alternating video U and V */
+   SVGA3DBLOCKDESC_PLANAR_YUV  = 1 << 8,    /* Three separate blocks store data. */
+   SVGA3DBLOCKDESC_U_VIDEO     = 1 << 9,    /* Block with U video data */
+   SVGA3DBLOCKDESC_V_VIDEO     = 1 << 10,   /* Block with V video data */
+   SVGA3DBLOCKDESC_EXP         = 1 << 11,   /* Shared exponent */
+   SVGA3DBLOCKDESC_SRGB        = 1 << 12,   /* Data is in sRGB format */
+   SVGA3DBLOCKDESC_2PLANAR_YUV = 1 << 13,   /* 2 planes of Y, UV, e.g., NV12. */
+   SVGA3DBLOCKDESC_3PLANAR_YUV = 1 << 14,   /* 3 planes of separate Y, U, V, e.g., YV12. */
+   SVGA3DBLOCKDESC_DEPTH       = 1 << 15,   /* Block with depth channel */
+   SVGA3DBLOCKDESC_STENCIL     = 1 << 16,   /* Block with a stencil channel */
+
+   SVGA3DBLOCKDESC_RG         = SVGA3DBLOCKDESC_RED |
+                                SVGA3DBLOCKDESC_GREEN,
+   SVGA3DBLOCKDESC_RGB        = SVGA3DBLOCKDESC_RG |
+                                SVGA3DBLOCKDESC_BLUE,
+   SVGA3DBLOCKDESC_RGB_SRGB   = SVGA3DBLOCKDESC_RGB |
+                                SVGA3DBLOCKDESC_SRGB,
+   SVGA3DBLOCKDESC_RGBA       = SVGA3DBLOCKDESC_RGB |
+                                SVGA3DBLOCKDESC_ALPHA,
+   SVGA3DBLOCKDESC_RGBA_SRGB  = SVGA3DBLOCKDESC_RGBA |
+                                SVGA3DBLOCKDESC_SRGB,
+   SVGA3DBLOCKDESC_UV         = SVGA3DBLOCKDESC_U |
+                                SVGA3DBLOCKDESC_V,
+   SVGA3DBLOCKDESC_UVL        = SVGA3DBLOCKDESC_UV |
+                                SVGA3DBLOCKDESC_LUMINANCE,
+   SVGA3DBLOCKDESC_UVW        = SVGA3DBLOCKDESC_UV |
+                                SVGA3DBLOCKDESC_W,
+   SVGA3DBLOCKDESC_UVWA       = SVGA3DBLOCKDESC_UVW |
+                                SVGA3DBLOCKDESC_ALPHA,
+   SVGA3DBLOCKDESC_UVWQ       = SVGA3DBLOCKDESC_U |
+                                SVGA3DBLOCKDESC_V |
+                                SVGA3DBLOCKDESC_W |
+                                SVGA3DBLOCKDESC_Q,
+   SVGA3DBLOCKDESC_LA         = SVGA3DBLOCKDESC_LUMINANCE |
+                                SVGA3DBLOCKDESC_ALPHA,
+   SVGA3DBLOCKDESC_R_FP       = SVGA3DBLOCKDESC_RED |
+                                SVGA3DBLOCKDESC_IEEE_FP,
+   SVGA3DBLOCKDESC_RG_FP      = SVGA3DBLOCKDESC_R_FP |
+                                SVGA3DBLOCKDESC_GREEN,
+   SVGA3DBLOCKDESC_RGB_FP     = SVGA3DBLOCKDESC_RG_FP |
+                                SVGA3DBLOCKDESC_BLUE,
+   SVGA3DBLOCKDESC_RGBA_FP    = SVGA3DBLOCKDESC_RGB_FP |
+                                SVGA3DBLOCKDESC_ALPHA,
+   SVGA3DBLOCKDESC_DS         = SVGA3DBLOCKDESC_DEPTH |
+                                SVGA3DBLOCKDESC_STENCIL,
+   SVGA3DBLOCKDESC_YUV        = SVGA3DBLOCKDESC_UV_VIDEO |
+                                SVGA3DBLOCKDESC_Y,
+   SVGA3DBLOCKDESC_AYUV       = SVGA3DBLOCKDESC_ALPHA |
+                                SVGA3DBLOCKDESC_Y |
+                                SVGA3DBLOCKDESC_U_VIDEO |
+                                SVGA3DBLOCKDESC_V_VIDEO,
+   SVGA3DBLOCKDESC_RGBE       = SVGA3DBLOCKDESC_RGB |
+                                SVGA3DBLOCKDESC_EXP,
+   SVGA3DBLOCKDESC_COMPRESSED_SRGB = SVGA3DBLOCKDESC_COMPRESSED |
+                                     SVGA3DBLOCKDESC_SRGB,
+   SVGA3DBLOCKDESC_NV12       = SVGA3DBLOCKDESC_PLANAR_YUV |
+                                SVGA3DBLOCKDESC_2PLANAR_YUV,
+   SVGA3DBLOCKDESC_YV12       = SVGA3DBLOCKDESC_PLANAR_YUV |
+                                SVGA3DBLOCKDESC_3PLANAR_YUV,
  };
  
-static const struct svga3d_surface_desc svga3d_surface_descs[] = {
-       {SVGA3DBLOCKDESC_NONE,
-        {1, 1, 1},  0, 0, {0, {{0}, {0}, {0}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_FORMAT_INVALID */
-
-       {SVGA3DBLOCKDESC_RGB,
-        {1, 1, 1},  4, 4, {24, {{8}, {8}, {8}, {0} } },
-        {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_X8R8G8B8 */
-
-       {SVGA3DBLOCKDESC_RGBA,
-        {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-        {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_A8R8G8B8 */
-
-       {SVGA3DBLOCKDESC_RGB,
-        {1, 1, 1},  2, 2, {16, {{5}, {6}, {5}, {0} } },
-        {{{0}, {5}, {11}, {0} } } },    /* SVGA3D_R5G6B5 */
-
-       {SVGA3DBLOCKDESC_RGB,
-        {1, 1, 1},  2, 2, {15, {{5}, {5}, {5}, {0} } },
-        {{{0}, {5}, {10}, {0} } } },    /* SVGA3D_X1R5G5B5 */
-
-       {SVGA3DBLOCKDESC_RGBA,
-        {1, 1, 1},  2, 2, {16, {{5}, {5}, {5}, {1} } },
-        {{{0}, {5}, {10}, {15} } } },   /* SVGA3D_A1R5G5B5 */
-
-       {SVGA3DBLOCKDESC_RGBA,
-        {1, 1, 1},  2, 2, {16, {{4}, {4}, {4}, {4} } },
-        {{{0}, {4}, {8}, {12} } } },    /* SVGA3D_A4R4G4B4 */
-
-       {SVGA3DBLOCKDESC_DEPTH,
-        {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_Z_D32 */
-
-       {SVGA3DBLOCKDESC_DEPTH,
-        {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_Z_D16 */
-
-       {SVGA3DBLOCKDESC_DS,
-        {1, 1, 1},  4, 4, {32, {{0}, {8}, {24}, {0} } },
-        {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_Z_D24S8 */
-
-       {SVGA3DBLOCKDESC_DS,
-        {1, 1, 1},  2, 2, {16, {{0}, {1}, {15}, {0} } },
-        {{{0}, {15}, {0}, {0} } } },    /* SVGA3D_Z_D15S1 */
-
-       {SVGA3DBLOCKDESC_LUMINANCE,
-        {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_LUMINANCE8 */
-
-       {SVGA3DBLOCKDESC_LA,
-        {1, 1, 1},  1, 1, {8, {{0}, {0}, {4}, {4} } },
-        {{{0}, {0}, {0}, {4} } } },     /* SVGA3D_LUMINANCE4_ALPHA4 */
-
-       {SVGA3DBLOCKDESC_LUMINANCE,
-        {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_LUMINANCE16 */
-
-       {SVGA3DBLOCKDESC_LA,
-        {1, 1, 1},  2, 2, {16, {{0}, {0}, {8}, {8} } },
-        {{{0}, {0}, {0}, {8} } } },     /* SVGA3D_LUMINANCE8_ALPHA8 */
-
-       {SVGA3DBLOCKDESC_COMPRESSED,
-        {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_DXT1 */
-
-       {SVGA3DBLOCKDESC_COMPRESSED,
-        {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_DXT2 */
-
-       {SVGA3DBLOCKDESC_COMPRESSED,
-        {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_DXT3 */
-
-       {SVGA3DBLOCKDESC_COMPRESSED,
-        {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_DXT4 */
-
-       {SVGA3DBLOCKDESC_COMPRESSED,
-        {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_DXT5 */
-
-       {SVGA3DBLOCKDESC_UV,
-        {1, 1, 1},  2, 2, {16, {{0}, {0}, {8}, {8} } },
-        {{{0}, {0}, {0}, {8} } } },     /* SVGA3D_BUMPU8V8 */
-
-       {SVGA3DBLOCKDESC_UVL,
-        {1, 1, 1},  2, 2, {16, {{5}, {5}, {6}, {0} } },
-        {{{11}, {6}, {0}, {0} } } },    /* SVGA3D_BUMPL6V5U5 */
-
-       {SVGA3DBLOCKDESC_UVL,
-        {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {0} } },
-        {{{16}, {8}, {0}, {0} } } },    /* SVGA3D_BUMPX8L8V8U8 */
-
-       {SVGA3DBLOCKDESC_UVL,
-        {1, 1, 1},  3, 3, {24, {{8}, {8}, {8}, {0} } },
-        {{{16}, {8}, {0}, {0} } } },    /* SVGA3D_BUMPL8V8U8 */
-
-       {SVGA3DBLOCKDESC_RGBA_FP,
-        {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
-        {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_ARGB_S10E5 */
-
-       {SVGA3DBLOCKDESC_RGBA_FP,
-        {1, 1, 1},  16, 16, {128, {{32}, {32}, {32}, {32} } },
-        {{{64}, {32}, {0}, {96} } } },  /* SVGA3D_ARGB_S23E8 */
-
-       {SVGA3DBLOCKDESC_RGBA,
-        {1, 1, 1},  4, 4, {32, {{10}, {10}, {10}, {2} } },
-        {{{0}, {10}, {20}, {30} } } },  /* SVGA3D_A2R10G10B10 */
-
-       {SVGA3DBLOCKDESC_UV,
-        {1, 1, 1},  2, 2, {16, {{8}, {8}, {0}, {0} } },
-        {{{8}, {0}, {0}, {0} } } },     /* SVGA3D_V8U8 */
-
-       {SVGA3DBLOCKDESC_UVWQ,
-        {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-        {{{24}, {16}, {8}, {0} } } },   /* SVGA3D_Q8W8V8U8 */
-
-       {SVGA3DBLOCKDESC_UV,
-        {1, 1, 1},  2, 2, {16, {{8}, {8}, {0}, {0} } },
-        {{{8}, {0}, {0}, {0} } } },     /* SVGA3D_CxV8U8 */
-
-       {SVGA3DBLOCKDESC_UVL,
-        {1, 1, 1},  4, 4, {24, {{8}, {8}, {8}, {0} } },
-        {{{16}, {8}, {0}, {0} } } },    /* SVGA3D_X8L8V8U8 */
-
-       {SVGA3DBLOCKDESC_UVWA,
-        {1, 1, 1},  4, 4, {32, {{10}, {10}, {10}, {2} } },
-        {{{0}, {10}, {20}, {30} } } },  /* SVGA3D_A2W10V10U10 */
-
-       {SVGA3DBLOCKDESC_ALPHA,
-        {1, 1, 1},  1, 1, {8, {{0}, {0}, {0}, {8} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_ALPHA8 */
-
-       {SVGA3DBLOCKDESC_R_FP,
-        {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R_S10E5 */
-
-       {SVGA3DBLOCKDESC_R_FP,
-        {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R_S23E8 */
-
-       {SVGA3DBLOCKDESC_RG_FP,
-        {1, 1, 1},  4, 4, {32, {{0}, {16}, {16}, {0} } },
-        {{{0}, {16}, {0}, {0} } } },    /* SVGA3D_RG_S10E5 */
-
-       {SVGA3DBLOCKDESC_RG_FP,
-        {1, 1, 1},  8, 8, {64, {{0}, {32}, {32}, {0} } },
-        {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_RG_S23E8 */
-
-       {SVGA3DBLOCKDESC_BUFFER,
-        {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BUFFER */
-
-       {SVGA3DBLOCKDESC_DEPTH,
-        {1, 1, 1},  4, 4, {32, {{0}, {0}, {24}, {0} } },
-        {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_Z_D24X8 */
-
-       {SVGA3DBLOCKDESC_UV,
-        {1, 1, 1},  4, 4, {32, {{16}, {16}, {0}, {0} } },
-        {{{16}, {0}, {0}, {0} } } },    /* SVGA3D_V16U16 */
-
-       {SVGA3DBLOCKDESC_RG,
-        {1, 1, 1},  4, 4, {32, {{0}, {16}, {16}, {0} } },
-        {{{0}, {0}, {16}, {0} } } },    /* SVGA3D_G16R16 */
-
-       {SVGA3DBLOCKDESC_RGBA,
-        {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
-        {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_A16B16G16R16 */
-
-       {SVGA3DBLOCKDESC_YUV,
-        {1, 1, 1},  2, 2, {16, {{8}, {0}, {8}, {0} } },
-        {{{0}, {0}, {8}, {0} } } },     /* SVGA3D_UYVY */
-
-       {SVGA3DBLOCKDESC_YUV,
-        {1, 1, 1},  2, 2, {16, {{8}, {0}, {8}, {0} } },
-        {{{8}, {0}, {0}, {0} } } },     /* SVGA3D_YUY2 */
-
-       {SVGA3DBLOCKDESC_NV12,
-        {2, 2, 1},  6, 2, {48, {{0}, {0}, {48}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_NV12 */
-
-       {SVGA3DBLOCKDESC_AYUV,
-        {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-        {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_AYUV */
-
-       {SVGA3DBLOCKDESC_RGBA,
-        {1, 1, 1},  16, 16, {128, {{32}, {32}, {32}, {32} } },
-        {{{64}, {32}, {0}, {96} } } },  /* SVGA3D_R32G32B32A32_TYPELESS */
-
-       {SVGA3DBLOCKDESC_RGBA,
-        {1, 1, 1},  16, 16, {128, {{32}, {32}, {32}, {32} } },
-        {{{64}, {32}, {0}, {96} } } },  /* SVGA3D_R32G32B32A32_UINT */
-
-       {SVGA3DBLOCKDESC_UVWQ,
-        {1, 1, 1},  16, 16, {128, {{32}, {32}, {32}, {32} } },
-        {{{64}, {32}, {0}, {96} } } },  /* SVGA3D_R32G32B32A32_SINT */
-
-       {SVGA3DBLOCKDESC_RGB,
-        {1, 1, 1},  12, 12, {96, {{32}, {32}, {32}, {0} } },
-        {{{64}, {32}, {0}, {0} } } },   /* SVGA3D_R32G32B32_TYPELESS */
-
-       {SVGA3DBLOCKDESC_RGB_FP,
-        {1, 1, 1},  12, 12, {96, {{32}, {32}, {32}, {0} } },
-        {{{64}, {32}, {0}, {0} } } },   /* SVGA3D_R32G32B32_FLOAT */
-
-       {SVGA3DBLOCKDESC_RGB,
-        {1, 1, 1},  12, 12, {96, {{32}, {32}, {32}, {0} } },
-        {{{64}, {32}, {0}, {0} } } },   /* SVGA3D_R32G32B32_UINT */
-
-       {SVGA3DBLOCKDESC_UVW,
-        {1, 1, 1},  12, 12, {96, {{32}, {32}, {32}, {0} } },
-        {{{64}, {32}, {0}, {0} } } },   /* SVGA3D_R32G32B32_SINT */
-
-       {SVGA3DBLOCKDESC_RGBA,
-        {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
-        {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_R16G16B16A16_TYPELESS */
-
-       {SVGA3DBLOCKDESC_RGBA,
-        {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
-        {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_R16G16B16A16_UINT */
-
-       {SVGA3DBLOCKDESC_UVWQ,
-        {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
-        {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_R16G16B16A16_SNORM */
-
-       {SVGA3DBLOCKDESC_UVWQ,
-        {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
-        {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_R16G16B16A16_SINT */
-
-       {SVGA3DBLOCKDESC_RG,
-        {1, 1, 1},  8, 8, {64, {{0}, {32}, {32}, {0} } },
-        {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_R32G32_TYPELESS */
-
-       {SVGA3DBLOCKDESC_RG,
-        {1, 1, 1},  8, 8, {64, {{0}, {32}, {32}, {0} } },
-        {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_R32G32_UINT */
  
-       {SVGA3DBLOCKDESC_UV,
-        {1, 1, 1},  8, 8, {64, {{0}, {32}, {32}, {0} } },
-        {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_R32G32_SINT */
+typedef struct SVGA3dChannelDef {
+  union {
+      uint8 blue;
+      uint8 u;
+      uint8 uv_video;
+      uint8 u_video;
+   };
+   union {
+      uint8 green;
+      uint8 v;
+      uint8 stencil;
+      uint8 v_video;
+   };
+   union {
+      uint8 red;
+      uint8 w;
+      uint8 luminance;
+      uint8 y;
+      uint8 depth;
+      uint8 data;
+   };
+   union {
+      uint8 alpha;
+      uint8 q;
+      uint8 exp;
+   };
+} SVGA3dChannelDef;
  
-       {SVGA3DBLOCKDESC_RG,
-        {1, 1, 1},  8, 8, {64, {{0}, {8}, {32}, {0} } },
-        {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_R32G8X24_TYPELESS */
-
-       {SVGA3DBLOCKDESC_DS,
-        {1, 1, 1},  8, 8, {64, {{0}, {8}, {32}, {0} } },
-        {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_D32_FLOAT_S8X24_UINT */
-
-       {SVGA3DBLOCKDESC_R_FP,
-        {1, 1, 1},  8, 8, {64, {{0}, {0}, {32}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },    /* SVGA3D_R32_FLOAT_X8_X24_TYPELESS */
-
-       {SVGA3DBLOCKDESC_GREEN,
-        {1, 1, 1},  8, 8, {64, {{0}, {8}, {0}, {0} } },
-        {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_X32_TYPELESS_G8X24_UINT */
-
-       {SVGA3DBLOCKDESC_RGBA,
-        {1, 1, 1},  4, 4, {32, {{10}, {10}, {10}, {2} } },
-        {{{0}, {10}, {20}, {30} } } },  /* SVGA3D_R10G10B10A2_TYPELESS */
-
-       {SVGA3DBLOCKDESC_RGBA,
-        {1, 1, 1},  4, 4, {32, {{10}, {10}, {10}, {2} } },
-        {{{0}, {10}, {20}, {30} } } },  /* SVGA3D_R10G10B10A2_UINT */
-
-       {SVGA3DBLOCKDESC_RGB_FP,
-        {1, 1, 1},  4, 4, {32, {{10}, {11}, {11}, {0} } },
-        {{{0}, {10}, {21}, {0} } } },  /* SVGA3D_R11G11B10_FLOAT */
-
-       {SVGA3DBLOCKDESC_RGBA,
-        {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-        {{{16}, {8}, {0}, {24} } } },   /* SVGA3D_R8G8B8A8_TYPELESS */
-
-       {SVGA3DBLOCKDESC_RGBA,
-        {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-        {{{16}, {8}, {0}, {24} } } },   /* SVGA3D_R8G8B8A8_UNORM */
-
-       {SVGA3DBLOCKDESC_RGBA_SRGB,
-        {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-        {{{16}, {8}, {0}, {24} } } },   /* SVGA3D_R8G8B8A8_UNORM_SRGB */
-
-       {SVGA3DBLOCKDESC_RGBA,
-        {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-        {{{16}, {8}, {0}, {24} } } },   /* SVGA3D_R8G8B8A8_UINT */
-
-       {SVGA3DBLOCKDESC_RGBA,
-        {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-        {{{16}, {8}, {0}, {24} } } },   /* SVGA3D_R8G8B8A8_SINT */
-
-       {SVGA3DBLOCKDESC_RG,
-        {1, 1, 1},  4, 4, {32, {{0}, {16}, {16}, {0} } },
-        {{{0}, {16}, {0}, {0} } } },    /* SVGA3D_R16G16_TYPELESS */
-
-       {SVGA3DBLOCKDESC_RG_FP,
-        {1, 1, 1},  4, 4, {32, {{0}, {16}, {16}, {0} } },
-        {{{0}, {16}, {0}, {0} } } },    /* SVGA3D_R16G16_UINT */
-
-       {SVGA3DBLOCKDESC_UV,
-        {1, 1, 1},  4, 4, {32, {{0}, {16}, {16}, {0} } },
-        {{{0}, {16}, {0}, {0} } } },    /* SVGA3D_R16G16_SINT */
-
-       {SVGA3DBLOCKDESC_RED,
-        {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R32_TYPELESS */
-
-       {SVGA3DBLOCKDESC_DEPTH,
-        {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_D32_FLOAT */
-
-       {SVGA3DBLOCKDESC_RED,
-        {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R32_UINT */
-
-       {SVGA3DBLOCKDESC_RED,
-        {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R32_SINT */
-
-       {SVGA3DBLOCKDESC_RG,
-        {1, 1, 1},  4, 4, {32, {{0}, {8}, {24}, {0} } },
-        {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_R24G8_TYPELESS */
-
-       {SVGA3DBLOCKDESC_DS,
-        {1, 1, 1},  4, 4, {32, {{0}, {8}, {24}, {0} } },
-        {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_D24_UNORM_S8_UINT */
-
-       {SVGA3DBLOCKDESC_RED,
-        {1, 1, 1},  4, 4, {32, {{0}, {0}, {24}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R24_UNORM_X8_TYPELESS */
-
-       {SVGA3DBLOCKDESC_GREEN,
-        {1, 1, 1},  4, 4, {32, {{0}, {8}, {0}, {0} } },
-        {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_X24_TYPELESS_G8_UINT */
-
-       {SVGA3DBLOCKDESC_RG,
-        {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
-        {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_R8G8_TYPELESS */
-
-       {SVGA3DBLOCKDESC_RG,
-        {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
-        {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_R8G8_UNORM */
-
-       {SVGA3DBLOCKDESC_RG,
-        {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
-        {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_R8G8_UINT */
-
-       {SVGA3DBLOCKDESC_UV,
-        {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
-        {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_R8G8_SINT */
-
-       {SVGA3DBLOCKDESC_RED,
-        {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R16_TYPELESS */
-
-       {SVGA3DBLOCKDESC_RED,
-        {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R16_UNORM */
-
-       {SVGA3DBLOCKDESC_RED,
-        {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R16_UINT */
-
-       {SVGA3DBLOCKDESC_U,
-        {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R16_SNORM */
-
-       {SVGA3DBLOCKDESC_U,
-        {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R16_SINT */
-
-       {SVGA3DBLOCKDESC_RED,
-        {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R8_TYPELESS */
-
-       {SVGA3DBLOCKDESC_RED,
-        {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R8_UNORM */
-
-       {SVGA3DBLOCKDESC_RED,
-        {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R8_UINT */
-
-       {SVGA3DBLOCKDESC_U,
-        {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R8_SNORM */
-
-       {SVGA3DBLOCKDESC_U,
-        {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R8_SINT */
-
-       {SVGA3DBLOCKDESC_RED,
-        {8, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R1_UNORM */
-
-       {SVGA3DBLOCKDESC_RGBE,
-        {1, 1, 1},  4, 4, {32, {{9}, {9}, {9}, {5} } },
-        {{{18}, {9}, {0}, {27} } } },   /* SVGA3D_R9G9B9E5_SHAREDEXP */
-
-       {SVGA3DBLOCKDESC_RG,
-        {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
-        {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_R8G8_B8G8_UNORM */
-
-       {SVGA3DBLOCKDESC_RG,
-        {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
-        {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_G8R8_G8B8_UNORM */
-
-       {SVGA3DBLOCKDESC_COMPRESSED,
-        {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC1_TYPELESS */
-
-       {SVGA3DBLOCKDESC_COMPRESSED_SRGB,
-        {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC1_UNORM_SRGB */
-
-       {SVGA3DBLOCKDESC_COMPRESSED,
-        {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC2_TYPELESS */
-
-       {SVGA3DBLOCKDESC_COMPRESSED_SRGB,
-        {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC2_UNORM_SRGB */
-
-       {SVGA3DBLOCKDESC_COMPRESSED,
-        {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC3_TYPELESS */
-
-       {SVGA3DBLOCKDESC_COMPRESSED_SRGB,
-        {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC3_UNORM_SRGB */
-
-       {SVGA3DBLOCKDESC_COMPRESSED,
-        {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC4_TYPELESS */
-
-       {SVGA3DBLOCKDESC_COMPRESSED,
-        {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC4_UNORM */
-
-       {SVGA3DBLOCKDESC_COMPRESSED,
-        {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC4_SNORM */
-
-       {SVGA3DBLOCKDESC_COMPRESSED,
-        {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC5_TYPELESS */
-
-       {SVGA3DBLOCKDESC_COMPRESSED,
-        {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC5_UNORM */
-
-       {SVGA3DBLOCKDESC_COMPRESSED,
-        {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC5_SNORM */
-
-       {SVGA3DBLOCKDESC_RGBA,
-        {1, 1, 1},  4, 4, {32, {{10}, {10}, {10}, {2} } },
-        {{{0}, {10}, {20}, {30} } } },  /* SVGA3D_R10G10B10_XR_BIAS_A2_UNORM */
-
-       {SVGA3DBLOCKDESC_RGBA,
-        {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-        {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_B8G8R8A8_TYPELESS */
-
-       {SVGA3DBLOCKDESC_RGBA_SRGB,
-        {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
-        {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_B8G8R8A8_UNORM_SRGB */
-
-       {SVGA3DBLOCKDESC_RGB,
-        {1, 1, 1},  4, 4, {24, {{8}, {8}, {8}, {0} } },
-        {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_B8G8R8X8_TYPELESS */
-
-       {SVGA3DBLOCKDESC_RGB_SRGB,
-        {1, 1, 1},  4, 4, {24, {{8}, {8}, {8}, {0} } },
-        {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_B8G8R8X8_UNORM_SRGB */
-
-       {SVGA3DBLOCKDESC_DEPTH,
-        {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_Z_DF16 */
+struct svga3d_surface_desc {
+   SVGA3dSurfaceFormat format;
+   enum svga3d_block_desc block_desc;
  
-       {SVGA3DBLOCKDESC_DS,
-        {1, 1, 1},  4, 4, {32, {{0}, {8}, {24}, {0} } },
-        {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_Z_DF24 */
+   SVGA3dSize block_size;
+   uint32 bytes_per_block;
+   uint32 pitch_bytes_per_block;
  
-       {SVGA3DBLOCKDESC_DS,
-        {1, 1, 1},  4, 4, {32, {{0}, {8}, {24}, {0} } },
-        {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_Z_D24S8_INT */
+   uint32 totalBitDepth;
+   SVGA3dChannelDef bitDepth;
+   SVGA3dChannelDef bitOffset;
+};
  
-       {SVGA3DBLOCKDESC_YV12,
-        {2, 2, 1},  6, 2, {48, {{0}, {0}, {48}, {0} } },
-        {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_YV12 */
+static const struct svga3d_surface_desc svga3d_surface_descs[] = {
+   {SVGA3D_FORMAT_INVALID, SVGA3DBLOCKDESC_NONE,
+      {1, 1, 1},  0, 0,
+      0, {{0}, {0}, {0}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_X8R8G8B8, SVGA3DBLOCKDESC_RGB,
+      {1, 1, 1},  4, 4,
+      24, {{8}, {8}, {8}, {0}},
+      {{0}, {8}, {16}, {24}}},
+
+   {SVGA3D_A8R8G8B8, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{0}, {8}, {16}, {24}}},
+
+   {SVGA3D_R5G6B5, SVGA3DBLOCKDESC_RGB,
+      {1, 1, 1},  2, 2,
+      16, {{5}, {6}, {5}, {0}},
+      {{0}, {5}, {11}, {0}}},
+
+   {SVGA3D_X1R5G5B5, SVGA3DBLOCKDESC_RGB,
+      {1, 1, 1},  2, 2,
+      15, {{5}, {5}, {5}, {0}},
+      {{0}, {5}, {10}, {0}}},
+
+   {SVGA3D_A1R5G5B5, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  2, 2,
+      16, {{5}, {5}, {5}, {1}},
+      {{0}, {5}, {10}, {15}}},
+
+   {SVGA3D_A4R4G4B4, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  2, 2,
+      16, {{4}, {4}, {4}, {4}},
+      {{0}, {4}, {8}, {12}}},
+
+   {SVGA3D_Z_D32, SVGA3DBLOCKDESC_DEPTH,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {0}, {32}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_Z_D16, SVGA3DBLOCKDESC_DEPTH,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_Z_D24S8, SVGA3DBLOCKDESC_DS,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {8}, {24}, {0}},
+      {{0}, {24}, {0}, {0}}},
+
+   {SVGA3D_Z_D15S1, SVGA3DBLOCKDESC_DS,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {1}, {15}, {0}},
+      {{0}, {15}, {0}, {0}}},
+
+   {SVGA3D_LUMINANCE8, SVGA3DBLOCKDESC_LUMINANCE,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {8}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_LUMINANCE4_ALPHA4, SVGA3DBLOCKDESC_LA,
+    {1  , 1, 1},  1, 1,
+      8, {{0}, {0}, {4}, {4}},
+      {{0}, {0}, {0}, {4}}},
+
+   {SVGA3D_LUMINANCE16, SVGA3DBLOCKDESC_LUMINANCE,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_LUMINANCE8_ALPHA8, SVGA3DBLOCKDESC_LA,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {8}, {8}},
+      {{0}, {0}, {0}, {8}}},
+
+   {SVGA3D_DXT1, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  8, 8,
+      64, {{0}, {0}, {64}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_DXT2, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_DXT3, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_DXT4, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_DXT5, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BUMPU8V8, SVGA3DBLOCKDESC_UV,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {8}, {8}},
+      {{0}, {0}, {0}, {8}}},
+
+   {SVGA3D_BUMPL6V5U5, SVGA3DBLOCKDESC_UVL,
+      {1, 1, 1},  2, 2,
+      16, {{5}, {5}, {6}, {0}},
+      {{11}, {6}, {0}, {0}}},
+
+   {SVGA3D_BUMPX8L8V8U8, SVGA3DBLOCKDESC_UVL,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {0}},
+      {{16}, {8}, {0}, {0}}},
+
+   {SVGA3D_FORMAT_DEAD1, SVGA3DBLOCKDESC_UVL,
+      {0, 0, 0},  0, 0,
+       0, {{0}, {0}, {0}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_ARGB_S10E5, SVGA3DBLOCKDESC_RGBA_FP,
+      {1, 1, 1},  8, 8,
+      64, {{16}, {16}, {16}, {16}},
+      {{32}, {16}, {0}, {48}}},
+
+   {SVGA3D_ARGB_S23E8, SVGA3DBLOCKDESC_RGBA_FP,
+      {1, 1, 1},  16, 16,
+      128, {{32}, {32}, {32}, {32}},
+      {{64}, {32}, {0}, {96}}},
+
+   {SVGA3D_A2R10G10B10, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{10}, {10}, {10}, {2}},
+      {{0}, {10}, {20}, {30}}},
+
+   {SVGA3D_V8U8, SVGA3DBLOCKDESC_UV,
+      {1, 1, 1},  2, 2,
+      16, {{8}, {8}, {0}, {0}},
+      {{8}, {0}, {0}, {0}}},
+
+   {SVGA3D_Q8W8V8U8, SVGA3DBLOCKDESC_UVWQ,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{24}, {16}, {8}, {0}}},
+
+   {SVGA3D_CxV8U8, SVGA3DBLOCKDESC_UV,
+      {1, 1, 1},  2, 2,
+      16, {{8}, {8}, {0}, {0}},
+      {{8}, {0}, {0}, {0}}},
+
+   {SVGA3D_X8L8V8U8, SVGA3DBLOCKDESC_UVL,
+      {1, 1, 1},  4, 4,
+      24, {{8}, {8}, {8}, {0}},
+      {{16}, {8}, {0}, {0}}},
+
+   {SVGA3D_A2W10V10U10, SVGA3DBLOCKDESC_UVWA,
+      {1, 1, 1},  4, 4,
+      32, {{10}, {10}, {10}, {2}},
+      {{0}, {10}, {20}, {30}}},
+
+   {SVGA3D_ALPHA8, SVGA3DBLOCKDESC_ALPHA,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {0}, {8}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R_S10E5, SVGA3DBLOCKDESC_R_FP,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R_S23E8, SVGA3DBLOCKDESC_R_FP,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {0}, {32}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_RG_S10E5, SVGA3DBLOCKDESC_RG_FP,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {16}, {16}, {0}},
+      {{0}, {16}, {0}, {0}}},
+
+   {SVGA3D_RG_S23E8, SVGA3DBLOCKDESC_RG_FP,
+      {1, 1, 1},  8, 8,
+      64, {{0}, {32}, {32}, {0}},
+      {{0}, {32}, {0}, {0}}},
+
+   {SVGA3D_BUFFER, SVGA3DBLOCKDESC_BUFFER,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {8}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_Z_D24X8, SVGA3DBLOCKDESC_DEPTH,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {0}, {24}, {0}},
+      {{0}, {24}, {0}, {0}}},
+
+   {SVGA3D_V16U16, SVGA3DBLOCKDESC_UV,
+      {1, 1, 1},  4, 4,
+      32, {{16}, {16}, {0}, {0}},
+      {{16}, {0}, {0}, {0}}},
+
+   {SVGA3D_G16R16, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {16}, {16}, {0}},
+      {{0}, {0}, {16}, {0}}},
+
+   {SVGA3D_A16B16G16R16, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  8, 8,
+      64, {{16}, {16}, {16}, {16}},
+      {{32}, {16}, {0}, {48}}},
+
+   {SVGA3D_UYVY, SVGA3DBLOCKDESC_YUV,
+      {1, 1, 1},  2, 2,
+      16, {{8}, {0}, {8}, {0}},
+      {{0}, {0}, {8}, {0}}},
+
+   {SVGA3D_YUY2, SVGA3DBLOCKDESC_YUV,
+      {1, 1, 1},  2, 2,
+      16, {{8}, {0}, {8}, {0}},
+      {{8}, {0}, {0}, {0}}},
+
+   {SVGA3D_NV12, SVGA3DBLOCKDESC_NV12,
+      {2, 2, 1},  6, 2,
+      48, {{0}, {0}, {48}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_AYUV, SVGA3DBLOCKDESC_AYUV,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{0}, {8}, {16}, {24}}},
+
+   {SVGA3D_R32G32B32A32_TYPELESS, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  16, 16,
+      128, {{32}, {32}, {32}, {32}},
+      {{64}, {32}, {0}, {96}}},
+
+   {SVGA3D_R32G32B32A32_UINT, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  16, 16,
+      128, {{32}, {32}, {32}, {32}},
+      {{64}, {32}, {0}, {96}}},
+
+   {SVGA3D_R32G32B32A32_SINT, SVGA3DBLOCKDESC_UVWQ,
+      {1, 1, 1},  16, 16,
+      128, {{32}, {32}, {32}, {32}},
+      {{64}, {32}, {0}, {96}}},
+
+   {SVGA3D_R32G32B32_TYPELESS, SVGA3DBLOCKDESC_RGB,
+      {1, 1, 1},  12, 12,
+      96, {{32}, {32}, {32}, {0}},
+      {{64}, {32}, {0}, {0}}},
+
+   {SVGA3D_R32G32B32_FLOAT, SVGA3DBLOCKDESC_RGB_FP,
+      {1, 1, 1},  12, 12,
+      96, {{32}, {32}, {32}, {0}},
+      {{64}, {32}, {0}, {0}}},
+
+   {SVGA3D_R32G32B32_UINT, SVGA3DBLOCKDESC_RGB,
+      {1, 1, 1},  12, 12,
+      96, {{32}, {32}, {32}, {0}},
+      {{64}, {32}, {0}, {0}}},
+
+   {SVGA3D_R32G32B32_SINT, SVGA3DBLOCKDESC_UVW,
+      {1, 1, 1},  12, 12,
+      96, {{32}, {32}, {32}, {0}},
+      {{64}, {32}, {0}, {0}}},
+
+   {SVGA3D_R16G16B16A16_TYPELESS, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  8, 8,
+      64, {{16}, {16}, {16}, {16}},
+      {{32}, {16}, {0}, {48}}},
+
+   {SVGA3D_R16G16B16A16_UINT, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  8, 8,
+      64, {{16}, {16}, {16}, {16}},
+      {{32}, {16}, {0}, {48}}},
+
+   {SVGA3D_R16G16B16A16_SNORM, SVGA3DBLOCKDESC_UVWQ,
+      {1, 1, 1},  8, 8,
+      64, {{16}, {16}, {16}, {16}},
+      {{32}, {16}, {0}, {48}}},
+
+   {SVGA3D_R16G16B16A16_SINT, SVGA3DBLOCKDESC_UVWQ,
+      {1, 1, 1},  8, 8,
+      64, {{16}, {16}, {16}, {16}},
+      {{32}, {16}, {0}, {48}}},
+
+   {SVGA3D_R32G32_TYPELESS, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  8, 8,
+      64, {{0}, {32}, {32}, {0}},
+      {{0}, {32}, {0}, {0}}},
+
+   {SVGA3D_R32G32_UINT, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  8, 8,
+      64, {{0}, {32}, {32}, {0}},
+      {{0}, {32}, {0}, {0}}},
+
+   {SVGA3D_R32G32_SINT, SVGA3DBLOCKDESC_UV,
+      {1, 1, 1},  8, 8,
+      64, {{0}, {32}, {32}, {0}},
+      {{0}, {32}, {0}, {0}}},
+
+   {SVGA3D_R32G8X24_TYPELESS, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  8, 8,
+      64, {{0}, {8}, {32}, {0}},
+      {{0}, {32}, {0}, {0}}},
+
+   {SVGA3D_D32_FLOAT_S8X24_UINT, SVGA3DBLOCKDESC_DS,
+      {1, 1, 1},  8, 8,
+      64, {{0}, {8}, {32}, {0}},
+      {{0}, {32}, {0}, {0}}},
+
+   {SVGA3D_R32_FLOAT_X8X24_TYPELESS, SVGA3DBLOCKDESC_R_FP,
+      {1, 1, 1},  8, 8,
+      64, {{0}, {0}, {32}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_X32_TYPELESS_G8X24_UINT, SVGA3DBLOCKDESC_GREEN,
+      {1, 1, 1},  8, 8,
+      64, {{0}, {8}, {0}, {0}},
+      {{0}, {32}, {0}, {0}}},
+
+   {SVGA3D_R10G10B10A2_TYPELESS, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{10}, {10}, {10}, {2}},
+      {{0}, {10}, {20}, {30}}},
+
+   {SVGA3D_R10G10B10A2_UINT, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{10}, {10}, {10}, {2}},
+      {{0}, {10}, {20}, {30}}},
+
+   {SVGA3D_R11G11B10_FLOAT, SVGA3DBLOCKDESC_RGB_FP,
+      {1, 1, 1},  4, 4,
+      32, {{10}, {11}, {11}, {0}},
+      {{0}, {10}, {21}, {0}}},
+
+   {SVGA3D_R8G8B8A8_TYPELESS, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{16}, {8}, {0}, {24}}},
+
+   {SVGA3D_R8G8B8A8_UNORM, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{16}, {8}, {0}, {24}}},
+
+   {SVGA3D_R8G8B8A8_UNORM_SRGB, SVGA3DBLOCKDESC_RGBA_SRGB,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{16}, {8}, {0}, {24}}},
+
+   {SVGA3D_R8G8B8A8_UINT, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{16}, {8}, {0}, {24}}},
+
+   {SVGA3D_R8G8B8A8_SINT, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{16}, {8}, {0}, {24}}},
+
+   {SVGA3D_R16G16_TYPELESS, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {16}, {16}, {0}},
+      {{0}, {16}, {0}, {0}}},
+
+   {SVGA3D_R16G16_UINT, SVGA3DBLOCKDESC_RG_FP,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {16}, {16}, {0}},
+      {{0}, {16}, {0}, {0}}},
+
+   {SVGA3D_R16G16_SINT, SVGA3DBLOCKDESC_UV,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {16}, {16}, {0}},
+      {{0}, {16}, {0}, {0}}},
+
+   {SVGA3D_R32_TYPELESS, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {0}, {32}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_D32_FLOAT, SVGA3DBLOCKDESC_DEPTH,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {0}, {32}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R32_UINT, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {0}, {32}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R32_SINT, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {0}, {32}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R24G8_TYPELESS, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {8}, {24}, {0}},
+      {{0}, {24}, {0}, {0}}},
+
+   {SVGA3D_D24_UNORM_S8_UINT, SVGA3DBLOCKDESC_DS,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {8}, {24}, {0}},
+      {{0}, {24}, {0}, {0}}},
+
+   {SVGA3D_R24_UNORM_X8_TYPELESS, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {0}, {24}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_X24_TYPELESS_G8_UINT, SVGA3DBLOCKDESC_GREEN,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {8}, {0}, {0}},
+      {{0}, {24}, {0}, {0}}},
+
+   {SVGA3D_R8G8_TYPELESS, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {8}, {8}, {0}},
+      {{0}, {8}, {0}, {0}}},
+
+   {SVGA3D_R8G8_UNORM, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {8}, {8}, {0}},
+      {{0}, {8}, {0}, {0}}},
+
+   {SVGA3D_R8G8_UINT, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {8}, {8}, {0}},
+      {{0}, {8}, {0}, {0}}},
+
+   {SVGA3D_R8G8_SINT, SVGA3DBLOCKDESC_UV,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {8}, {8}, {0}},
+      {{0}, {8}, {0}, {0}}},
+
+   {SVGA3D_R16_TYPELESS, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R16_UNORM, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R16_UINT, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R16_SNORM, SVGA3DBLOCKDESC_U,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R16_SINT, SVGA3DBLOCKDESC_U,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R8_TYPELESS, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {8}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R8_UNORM, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {8}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R8_UINT, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {8}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R8_SNORM, SVGA3DBLOCKDESC_U,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {8}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R8_SINT, SVGA3DBLOCKDESC_U,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {8}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_P8, SVGA3DBLOCKDESC_RED,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {8}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R9G9B9E5_SHAREDEXP, SVGA3DBLOCKDESC_RGBE,
+      {1, 1, 1},  4, 4,
+      32, {{9}, {9}, {9}, {5}},
+      {{18}, {9}, {0}, {27}}},
+
+   {SVGA3D_R8G8_B8G8_UNORM, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {8}, {8}, {0}},
+      {{0}, {8}, {0}, {0}}},
+
+   {SVGA3D_G8R8_G8B8_UNORM, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {8}, {8}, {0}},
+      {{0}, {8}, {0}, {0}}},
+
+   {SVGA3D_BC1_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  8, 8,
+      64, {{0}, {0}, {64}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC1_UNORM_SRGB, SVGA3DBLOCKDESC_COMPRESSED_SRGB,
+      {4, 4, 1},  8, 8,
+      64, {{0}, {0}, {64}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC2_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC2_UNORM_SRGB, SVGA3DBLOCKDESC_COMPRESSED_SRGB,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC3_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC3_UNORM_SRGB, SVGA3DBLOCKDESC_COMPRESSED_SRGB,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC4_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  8, 8,
+      64, {{0}, {0}, {64}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_ATI1, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  8, 8,
+      64, {{0}, {0}, {64}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC4_SNORM, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  8, 8,
+      64, {{0}, {0}, {64}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC5_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_ATI2, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC5_SNORM, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R10G10B10_XR_BIAS_A2_UNORM, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{10}, {10}, {10}, {2}},
+      {{0}, {10}, {20}, {30}}},
+
+   {SVGA3D_B8G8R8A8_TYPELESS, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{0}, {8}, {16}, {24}}},
+
+   {SVGA3D_B8G8R8A8_UNORM_SRGB, SVGA3DBLOCKDESC_RGBA_SRGB,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{0}, {8}, {16}, {24}}},
+
+   {SVGA3D_B8G8R8X8_TYPELESS, SVGA3DBLOCKDESC_RGB,
+      {1, 1, 1},  4, 4,
+      24, {{8}, {8}, {8}, {0}},
+      {{0}, {8}, {16}, {24}}},
+
+   {SVGA3D_B8G8R8X8_UNORM_SRGB, SVGA3DBLOCKDESC_RGB_SRGB,
+      {1, 1, 1},  4, 4,
+      24, {{8}, {8}, {8}, {0}},
+      {{0}, {8}, {16}, {24}}},
+
+   {SVGA3D_Z_DF16, SVGA3DBLOCKDESC_DEPTH,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_Z_DF24, SVGA3DBLOCKDESC_DEPTH,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {8}, {24}, {0}},
+      {{0}, {24}, {0}, {0}}},
+
+   {SVGA3D_Z_D24S8_INT, SVGA3DBLOCKDESC_DS,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {8}, {24}, {0}},
+      {{0}, {24}, {0}, {0}}},
+
+   {SVGA3D_YV12, SVGA3DBLOCKDESC_YV12,
+      {2, 2, 1},  6, 2,
+      48, {{0}, {0}, {48}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R32G32B32A32_FLOAT, SVGA3DBLOCKDESC_RGBA_FP,
+      {1, 1, 1},  16, 16,
+      128, {{32}, {32}, {32}, {32}},
+      {{64}, {32}, {0}, {96}}},
+
+   {SVGA3D_R16G16B16A16_FLOAT, SVGA3DBLOCKDESC_RGBA_FP,
+      {1, 1, 1},  8, 8,
+      64, {{16}, {16}, {16}, {16}},
+      {{32}, {16}, {0}, {48}}},
+
+   {SVGA3D_R16G16B16A16_UNORM, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  8, 8,
+      64, {{16}, {16}, {16}, {16}},
+      {{32}, {16}, {0}, {48}}},
+
+   {SVGA3D_R32G32_FLOAT, SVGA3DBLOCKDESC_RG_FP,
+      {1, 1, 1},  8, 8,
+      64, {{0}, {32}, {32}, {0}},
+      {{0}, {32}, {0}, {0}}},
+
+   {SVGA3D_R10G10B10A2_UNORM, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{10}, {10}, {10}, {2}},
+      {{0}, {10}, {20}, {30}}},
+
+   {SVGA3D_R8G8B8A8_SNORM, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{24}, {16}, {8}, {0}}},
+
+   {SVGA3D_R16G16_FLOAT, SVGA3DBLOCKDESC_RG_FP,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {16}, {16}, {0}},
+      {{0}, {16}, {0}, {0}}},
+
+   {SVGA3D_R16G16_UNORM, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {16}, {16}, {0}},
+      {{0}, {0}, {16}, {0}}},
+
+   {SVGA3D_R16G16_SNORM, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  4, 4,
+      32, {{16}, {16}, {0}, {0}},
+      {{16}, {0}, {0}, {0}}},
+
+   {SVGA3D_R32_FLOAT, SVGA3DBLOCKDESC_R_FP,
+      {1, 1, 1},  4, 4,
+      32, {{0}, {0}, {32}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_R8G8_SNORM, SVGA3DBLOCKDESC_RG,
+      {1, 1, 1},  2, 2,
+      16, {{8}, {8}, {0}, {0}},
+      {{8}, {0}, {0}, {0}}},
+
+   {SVGA3D_R16_FLOAT, SVGA3DBLOCKDESC_R_FP,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_D16_UNORM, SVGA3DBLOCKDESC_DEPTH,
+      {1, 1, 1},  2, 2,
+      16, {{0}, {0}, {16}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_A8_UNORM, SVGA3DBLOCKDESC_ALPHA,
+      {1, 1, 1},  1, 1,
+      8, {{0}, {0}, {0}, {8}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC1_UNORM, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  8, 8,
+      64, {{0}, {0}, {64}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC2_UNORM, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC3_UNORM, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_B5G6R5_UNORM, SVGA3DBLOCKDESC_RGB,
+      {1, 1, 1},  2, 2,
+      16, {{5}, {6}, {5}, {0}},
+      {{0}, {5}, {11}, {0}}},
+
+   {SVGA3D_B5G5R5A1_UNORM, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  2, 2,
+      16, {{5}, {5}, {5}, {1}},
+      {{0}, {5}, {10}, {15}}},
+
+   {SVGA3D_B8G8R8A8_UNORM, SVGA3DBLOCKDESC_RGBA,
+      {1, 1, 1},  4, 4,
+      32, {{8}, {8}, {8}, {8}},
+      {{0}, {8}, {16}, {24}}},
+
+   {SVGA3D_B8G8R8X8_UNORM, SVGA3DBLOCKDESC_RGB,
+      {1, 1, 1},  4, 4,
+      24, {{8}, {8}, {8}, {0}},
+      {{0}, {8}, {16}, {24}}},
+
+   {SVGA3D_BC4_UNORM, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  8, 8,
+      64, {{0}, {0}, {64}, {0}},
+      {{0}, {0}, {0}, {0}}},
+
+   {SVGA3D_BC5_UNORM, SVGA3DBLOCKDESC_COMPRESSED,
+      {4, 4, 1},  16, 16,
+      128, {{0}, {0}, {128}, {0}},
+      {{0}, {0}, {0}, {0}}},
  };
  
  
@@ -704,6 +912,16 @@ static inline uint32 clamped_umul32(uint32 a, uint32 b)
         return (tmp > (uint64_t) ((uint32) -1)) ? (uint32) -1 : tmp;
  }
  
+static inline uint32 clamped_uadd32(uint32 a, uint32 b)
+{
+       uint32 c = a + b;
+       if (c < a || c < b) {
+               return MAX_UINT32;
+       }
+       return c;
+}
+
+
  static inline const struct svga3d_surface_desc *
  svga3dsurface_get_desc(SVGA3dSurfaceFormat format)
  {
@@ -828,7 +1046,7 @@ static inline uint32
  svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format,
                                 SVGA3dSize baseLevelSize,
                                 uint32 numMipLevels,
-                               uint32 face,
+                               uint32 layer,
                                 uint32 mip)
  
  {
@@ -853,7 +1071,7 @@ svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format,
        }
     }
  
-   offset = mipChainBytes * face + mipChainBytesToLevel;
+   offset = mipChainBytes * layer + mipChainBytesToLevel;
  
     return offset;
  }
@@ -863,7 +1081,7 @@ static inline uint32
  svga3dsurface_get_serialized_size(SVGA3dSurfaceFormat format,
                                   SVGA3dSize base_level_size,
                                   uint32 num_mip_levels,
-                                 bool cubemap)
+                                  uint32 num_layers)
  {
         const struct svga3d_surface_desc *desc = svga3dsurface_get_desc(format);
         uint64_t total_size = 0;
@@ -876,8 +1094,7 @@ svga3dsurface_get_serialized_size(SVGA3dSurfaceFormat format,
                                                                   &size, 0);
         }
  
-       if (cubemap)
-               total_size *= SVGA3D_MAX_SURFACE_FACES;
+       total_size *= num_layers;
  
         return (total_size > (uint64_t) MAX_UINT32) ? MAX_UINT32 : 
                                                        (uint32) total_size;
diff --git a/src/gallium/drivers/svga/include/svga3d_types.h b/src/gallium/drivers/svga/include/svga3d_types.h

index fc4a6b9..de711c3 100644 (file)
--- a/src/gallium/drivers/svga/include/svga3d_types.h
+++ b/src/gallium/drivers/svga/include/svga3d_types.h
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 1998-2014 VMware, Inc.  All rights reserved.
+ * Copyright 2007-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
@@ -43,10 +43,6 @@
   */
  
  #define SVGA3D_INVALID_ID         ((uint32)-1)
-#define SVGA3D_INVALID_CID        SVGA3D_INVALID_ID
-#define SVGA3D_INVALID_SID        SVGA3D_INVALID_ID
-#define SVGA3D_INVALID_SHID       SVGA3D_INVALID_ID
-
  
  typedef uint32 SVGA3dBool; /* 32-bit Bool definition */
  typedef uint32 SVGA3dColor; /* a, r, g, b */
@@ -116,13 +112,7 @@ SVGA3dPoint;
  
  /*
   * Surface formats.
- *
- * If you modify this list, be sure to keep GLUtil.c in sync. It
- * includes the internal format definition of each surface in
- * GLUtil_ConvertSurfaceFormat, and it contains a table of
- * human-readable names in GLUtil_GetFormatName.
   */
-
  typedef enum SVGA3dSurfaceFormat {
     SVGA3D_FORMAT_INVALID               = 0,
  
@@ -155,7 +145,7 @@ typedef enum SVGA3dSurfaceFormat {
     SVGA3D_BUMPU8V8                     = 20,
     SVGA3D_BUMPL6V5U5                   = 21,
     SVGA3D_BUMPX8L8V8U8                 = 22,
-   SVGA3D_BUMPL8V8U8                   = 23,
+   SVGA3D_FORMAT_DEAD1                 = 23,
  
     SVGA3D_ARGB_S10E5                   = 24,   /* 16-bit floating-point ARGB */
     SVGA3D_ARGB_S23E8                   = 25,   /* 32-bit floating-point ARGB */
@@ -271,7 +261,7 @@ typedef enum SVGA3dSurfaceFormat {
     SVGA3D_B8G8R8X8_TYPELESS            = 116,
     SVGA3D_B8G8R8X8_UNORM_SRGB          = 117,
  
-   /* Advanced D3D9 depth formats. */
+   /* Advanced depth formats. */
     SVGA3D_Z_DF16                       = 118,
     SVGA3D_Z_DF24                       = 119,
     SVGA3D_Z_D24S8_INT                  = 120,
@@ -306,13 +296,157 @@ typedef enum SVGA3dSurfaceFormat {
     SVGA3D_FORMAT_MAX
  } SVGA3dSurfaceFormat;
  
+typedef uint32 SVGA3dSurfaceFlags;
+#define SVGA3D_SURFACE_CUBEMAP                (1 << 0)
+
+/*
+ * HINT flags are not enforced by the device but are useful for
+ * performance.
+ */
+#define SVGA3D_SURFACE_HINT_STATIC            (1 << 1)
+#define SVGA3D_SURFACE_HINT_DYNAMIC           (1 << 2)
+#define SVGA3D_SURFACE_HINT_INDEXBUFFER       (1 << 3)
+#define SVGA3D_SURFACE_HINT_VERTEXBUFFER      (1 << 4)
+#define SVGA3D_SURFACE_HINT_TEXTURE           (1 << 5)
+#define SVGA3D_SURFACE_HINT_RENDERTARGET      (1 << 6)
+#define SVGA3D_SURFACE_HINT_DEPTHSTENCIL      (1 << 7)
+#define SVGA3D_SURFACE_HINT_WRITEONLY         (1 << 8)
+#define SVGA3D_SURFACE_MASKABLE_ANTIALIAS     (1 << 9)
+#define SVGA3D_SURFACE_AUTOGENMIPMAPS         (1 << 10)
+#define SVGA3D_SURFACE_DECODE_RENDERTARGET    (1 << 11)
+
+/*
+ * Is this surface using a base-level pitch for it's mob backing?
+ *
+ * This flag is not intended to be set by guest-drivers, but is instead
+ * set by the device when the surface is bound to a mob with a specified
+ * pitch.
+ */
+#define SVGA3D_SURFACE_MOB_PITCH              (1 << 12)
+
+#define SVGA3D_SURFACE_INACTIVE               (1 << 13)
+#define SVGA3D_SURFACE_HINT_RT_LOCKABLE       (1 << 14)
+#define SVGA3D_SURFACE_VOLUME                 (1 << 15)
+
+/*
+ * Required to be set on a surface to bind it to a screen target.
+ */
+#define SVGA3D_SURFACE_SCREENTARGET           (1 << 16)
+
+/*
+ * Align images in the guest-backing mob to 16-bytes.
+ */
+#define SVGA3D_SURFACE_ALIGN16                (1 << 17)
+
+#define SVGA3D_SURFACE_1D                     (1 << 18)
+#define SVGA3D_SURFACE_ARRAY                  (1 << 19)
+
+/*
+ * Bind flags.
+ * These are enforced for any surface defined with DefineGBSurface_v2.
+ */
+#define SVGA3D_SURFACE_BIND_VERTEX_BUFFER     (1 << 20)
+#define SVGA3D_SURFACE_BIND_INDEX_BUFFER      (1 << 21)
+#define SVGA3D_SURFACE_BIND_CONSTANT_BUFFER   (1 << 22)
+#define SVGA3D_SURFACE_BIND_SHADER_RESOURCE   (1 << 23)
+#define SVGA3D_SURFACE_BIND_RENDER_TARGET     (1 << 24)
+#define SVGA3D_SURFACE_BIND_DEPTH_STENCIL     (1 << 25)
+#define SVGA3D_SURFACE_BIND_STREAM_OUTPUT     (1 << 26)
+
+/*
+ * The STAGING flags notes that the surface will not be used directly by the
+ * drawing pipeline, i.e. that it will not be bound to any bind point.
+ * Staging surfaces may be used by copy operations to move data in and out
+ * of other surfaces.  No bind flags may be set on surfaces with this flag.
+ *
+ * The HINT_INDIRECT_UPDATE flag suggests that the surface will receive
+ * updates indirectly, i.e. the surface will not be updated directly, but
+ * will receive copies from staging surfaces.
+ */
+#define SVGA3D_SURFACE_STAGING_UPLOAD         (1 << 27)
+#define SVGA3D_SURFACE_STAGING_DOWNLOAD       (1 << 28)
+#define SVGA3D_SURFACE_HINT_INDIRECT_UPDATE   (1 << 29)
+
+/*
+ * Setting this flag allow this surface to be used with the
+ * SVGA_3D_CMD_DX_TRANSFER_FROM_BUFFER command.  It is only valid for
+ * buffer surfaces, and no bind flags are allowed to be set on surfaces
+ * with this flag.
+ */
+#define SVGA3D_SURFACE_TRANSFER_FROM_BUFFER   (1 << 30)
  
  /*
- * These are really the D3DFORMAT_OP defines from the wdk. We need
- * them so that we can query the host for what the supported surface
- * operations are (when we're using the D3D backend, in particular),
- * and so we can send those operations to the guest.
+ * Marker for the last defined bit in SVGA3dSurfaceFlags.
   */
+#define SVGA3D_SURFACE_FLAG_MAX               (1 << 31)
+
+#define SVGA3D_SURFACE_HB_DISALLOWED_MASK        \
+        (  SVGA3D_SURFACE_MOB_PITCH    |         \
+           SVGA3D_SURFACE_SCREENTARGET |         \
+           SVGA3D_SURFACE_ALIGN16 |              \
+           SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \
+           SVGA3D_SURFACE_BIND_STREAM_OUTPUT |   \
+           SVGA3D_SURFACE_STAGING_UPLOAD |       \
+           SVGA3D_SURFACE_STAGING_DOWNLOAD |     \
+           SVGA3D_SURFACE_HINT_INDIRECT_UPDATE | \
+           SVGA3D_SURFACE_TRANSFER_FROM_BUFFER   \
+        )
+
+#define SVGA3D_SURFACE_2D_DISALLOWED_MASK           \
+        (  SVGA3D_SURFACE_CUBEMAP |                 \
+           SVGA3D_SURFACE_MASKABLE_ANTIALIAS |      \
+           SVGA3D_SURFACE_AUTOGENMIPMAPS |          \
+           SVGA3D_SURFACE_DECODE_RENDERTARGET |     \
+           SVGA3D_SURFACE_VOLUME |                  \
+           SVGA3D_SURFACE_1D |                      \
+           SVGA3D_SURFACE_BIND_VERTEX_BUFFER |      \
+           SVGA3D_SURFACE_BIND_INDEX_BUFFER |       \
+           SVGA3D_SURFACE_BIND_CONSTANT_BUFFER |    \
+           SVGA3D_SURFACE_BIND_DEPTH_STENCIL |      \
+           SVGA3D_SURFACE_BIND_STREAM_OUTPUT |      \
+           SVGA3D_SURFACE_TRANSFER_FROM_BUFFER      \
+        )
+
+#define SVGA3D_SURFACE_SCREENTARGET_DISALLOWED_MASK \
+        (  SVGA3D_SURFACE_CUBEMAP |                 \
+           SVGA3D_SURFACE_AUTOGENMIPMAPS |          \
+           SVGA3D_SURFACE_DECODE_RENDERTARGET |     \
+           SVGA3D_SURFACE_VOLUME |                  \
+           SVGA3D_SURFACE_1D |                      \
+           SVGA3D_SURFACE_BIND_VERTEX_BUFFER |      \
+           SVGA3D_SURFACE_BIND_INDEX_BUFFER |       \
+           SVGA3D_SURFACE_BIND_CONSTANT_BUFFER |    \
+           SVGA3D_SURFACE_BIND_DEPTH_STENCIL |      \
+           SVGA3D_SURFACE_BIND_STREAM_OUTPUT |      \
+           SVGA3D_SURFACE_INACTIVE |                \
+           SVGA3D_SURFACE_STAGING_UPLOAD |          \
+           SVGA3D_SURFACE_STAGING_DOWNLOAD |        \
+           SVGA3D_SURFACE_HINT_INDIRECT_UPDATE |    \
+           SVGA3D_SURFACE_TRANSFER_FROM_BUFFER      \
+        )
+
+#define SVGA3D_SURFACE_DX_ONLY_MASK             \
+        (  SVGA3D_SURFACE_BIND_STREAM_OUTPUT |  \
+           SVGA3D_SURFACE_STAGING_UPLOAD |      \
+           SVGA3D_SURFACE_STAGING_DOWNLOAD |    \
+           SVGA3D_SURFACE_TRANSFER_FROM_BUFFER  \
+        )
+
+#define SVGA3D_SURFACE_STAGING_MASK             \
+        (  SVGA3D_SURFACE_STAGING_UPLOAD |      \
+           SVGA3D_SURFACE_STAGING_DOWNLOAD      \
+        )
+
+#define SVGA3D_SURFACE_BIND_MASK                  \
+        (  SVGA3D_SURFACE_BIND_VERTEX_BUFFER   |  \
+           SVGA3D_SURFACE_BIND_INDEX_BUFFER    |  \
+           SVGA3D_SURFACE_BIND_CONSTANT_BUFFER |  \
+           SVGA3D_SURFACE_BIND_SHADER_RESOURCE |  \
+           SVGA3D_SURFACE_BIND_RENDER_TARGET   |  \
+           SVGA3D_SURFACE_BIND_DEPTH_STENCIL   |  \
+           SVGA3D_SURFACE_BIND_STREAM_OUTPUT      \
+        )
+
  typedef enum {
     SVGA3DFORMAT_OP_TEXTURE                               = 0x00000001,
     SVGA3DFORMAT_OP_VOLUMETEXTURE                         = 0x00000002,
@@ -656,25 +790,27 @@ union {
  SVGA3dLinePattern;
  
  typedef enum {
-   SVGA3D_BLENDOP_INVALID            = 0,
-   SVGA3D_BLENDOP_MIN                = 1,
-   SVGA3D_BLENDOP_ZERO               = 1,
-   SVGA3D_BLENDOP_ONE                = 2,
-   SVGA3D_BLENDOP_SRCCOLOR           = 3,
-   SVGA3D_BLENDOP_INVSRCCOLOR        = 4,
-   SVGA3D_BLENDOP_SRCALPHA           = 5,
-   SVGA3D_BLENDOP_INVSRCALPHA        = 6,
-   SVGA3D_BLENDOP_DESTALPHA          = 7,
-   SVGA3D_BLENDOP_INVDESTALPHA       = 8,
-   SVGA3D_BLENDOP_DESTCOLOR          = 9,
-   SVGA3D_BLENDOP_INVDESTCOLOR       = 10,
-   SVGA3D_BLENDOP_SRCALPHASAT        = 11,
-   SVGA3D_BLENDOP_BLENDFACTOR        = 12,
-   SVGA3D_BLENDOP_INVBLENDFACTOR     = 13,
-   SVGA3D_BLENDOP_SRC1COLOR          = 14,
-   SVGA3D_BLENDOP_INVSRC1COLOR       = 15,
-   SVGA3D_BLENDOP_SRC1ALPHA          = 16,
-   SVGA3D_BLENDOP_INVSRC1ALPHA       = 17,
+   SVGA3D_BLENDOP_INVALID             = 0,
+   SVGA3D_BLENDOP_MIN                 = 1,
+   SVGA3D_BLENDOP_ZERO                = 1,
+   SVGA3D_BLENDOP_ONE                 = 2,
+   SVGA3D_BLENDOP_SRCCOLOR            = 3,
+   SVGA3D_BLENDOP_INVSRCCOLOR         = 4,
+   SVGA3D_BLENDOP_SRCALPHA            = 5,
+   SVGA3D_BLENDOP_INVSRCALPHA         = 6,
+   SVGA3D_BLENDOP_DESTALPHA           = 7,
+   SVGA3D_BLENDOP_INVDESTALPHA        = 8,
+   SVGA3D_BLENDOP_DESTCOLOR           = 9,
+   SVGA3D_BLENDOP_INVDESTCOLOR        = 10,
+   SVGA3D_BLENDOP_SRCALPHASAT         = 11,
+   SVGA3D_BLENDOP_BLENDFACTOR         = 12,
+   SVGA3D_BLENDOP_INVBLENDFACTOR      = 13,
+   SVGA3D_BLENDOP_SRC1COLOR           = 14,
+   SVGA3D_BLENDOP_INVSRC1COLOR        = 15,
+   SVGA3D_BLENDOP_SRC1ALPHA           = 16,
+   SVGA3D_BLENDOP_INVSRC1ALPHA        = 17,
+   SVGA3D_BLENDOP_BLENDFACTORALPHA    = 18,
+   SVGA3D_BLENDOP_INVBLENDFACTORALPHA = 19,
     SVGA3D_BLENDOP_MAX
  } SVGA3dBlendOp;
  
@@ -690,6 +826,27 @@ typedef enum {
  } SVGA3dBlendEquation;
  
  typedef enum {
+   SVGA3D_DX11_LOGICOP_MIN           = 0,
+   SVGA3D_DX11_LOGICOP_CLEAR         = 0,
+   SVGA3D_DX11_LOGICOP_SET           = 1,
+   SVGA3D_DX11_LOGICOP_COPY          = 2,
+   SVGA3D_DX11_LOGICOP_COPY_INVERTED = 3,
+   SVGA3D_DX11_LOGICOP_NOOP          = 4,
+   SVGA3D_DX11_LOGICOP_INVERT        = 5,
+   SVGA3D_DX11_LOGICOP_AND           = 6,
+   SVGA3D_DX11_LOGICOP_NAND          = 7,
+   SVGA3D_DX11_LOGICOP_OR            = 8,
+   SVGA3D_DX11_LOGICOP_NOR           = 9,
+   SVGA3D_DX11_LOGICOP_XOR           = 10,
+   SVGA3D_DX11_LOGICOP_EQUIV         = 11,
+   SVGA3D_DX11_LOGICOP_AND_REVERSE   = 12,
+   SVGA3D_DX11_LOGICOP_AND_INVERTED  = 13,
+   SVGA3D_DX11_LOGICOP_OR_REVERSE    = 14,
+   SVGA3D_DX11_LOGICOP_OR_INVERTED   = 15,
+   SVGA3D_DX11_LOGICOP_MAX
+} SVGA3dDX11LogicOp;
+
+typedef enum {
     SVGA3D_FRONTWINDING_INVALID = 0,
     SVGA3D_FRONTWINDING_CW      = 1,
     SVGA3D_FRONTWINDING_CCW     = 2,
@@ -952,10 +1109,10 @@ typedef enum {
     SVGA3D_TEX_FILTER_NEAREST        = 1,
     SVGA3D_TEX_FILTER_LINEAR         = 2,
     SVGA3D_TEX_FILTER_ANISOTROPIC    = 3,
-   SVGA3D_TEX_FILTER_FLATCUBIC      = 4, // Deprecated, not implemented
-   SVGA3D_TEX_FILTER_GAUSSIANCUBIC  = 5, // Deprecated, not implemented
-   SVGA3D_TEX_FILTER_PYRAMIDALQUAD  = 6, // Not currently implemented
-   SVGA3D_TEX_FILTER_GAUSSIANQUAD   = 7, // Not currently implemented
+   SVGA3D_TEX_FILTER_FLATCUBIC      = 4, /* Deprecated, not implemented */
+   SVGA3D_TEX_FILTER_GAUSSIANCUBIC  = 5, /* Deprecated, not implemented */
+   SVGA3D_TEX_FILTER_PYRAMIDALQUAD  = 6, /* Not currently implemented */
+   SVGA3D_TEX_FILTER_GAUSSIANQUAD   = 7, /* Not currently implemented */
     SVGA3D_TEX_FILTER_MAX
  } SVGA3dTextureFilter;
  
@@ -1013,19 +1170,19 @@ typedef enum {
  
  typedef enum {
     SVGA3D_DECLUSAGE_POSITION     = 0,
-   SVGA3D_DECLUSAGE_BLENDWEIGHT,       //  1
-   SVGA3D_DECLUSAGE_BLENDINDICES,      //  2
-   SVGA3D_DECLUSAGE_NORMAL,            //  3
-   SVGA3D_DECLUSAGE_PSIZE,             //  4
-   SVGA3D_DECLUSAGE_TEXCOORD,          //  5
-   SVGA3D_DECLUSAGE_TANGENT,           //  6
-   SVGA3D_DECLUSAGE_BINORMAL,          //  7
-   SVGA3D_DECLUSAGE_TESSFACTOR,        //  8
-   SVGA3D_DECLUSAGE_POSITIONT,         //  9
-   SVGA3D_DECLUSAGE_COLOR,             // 10
-   SVGA3D_DECLUSAGE_FOG,               // 11
-   SVGA3D_DECLUSAGE_DEPTH,             // 12
-   SVGA3D_DECLUSAGE_SAMPLE,            // 13
+   SVGA3D_DECLUSAGE_BLENDWEIGHT,
+   SVGA3D_DECLUSAGE_BLENDINDICES,
+   SVGA3D_DECLUSAGE_NORMAL,
+   SVGA3D_DECLUSAGE_PSIZE,
+   SVGA3D_DECLUSAGE_TEXCOORD,
+   SVGA3D_DECLUSAGE_TANGENT,
+   SVGA3D_DECLUSAGE_BINORMAL,
+   SVGA3D_DECLUSAGE_TESSFACTOR,
+   SVGA3D_DECLUSAGE_POSITIONT,
+   SVGA3D_DECLUSAGE_COLOR,
+   SVGA3D_DECLUSAGE_FOG,
+   SVGA3D_DECLUSAGE_DEPTH,
+   SVGA3D_DECLUSAGE_SAMPLE,
     SVGA3D_DECLUSAGE_MAX
  } SVGA3dDeclUsage;
  
@@ -1033,10 +1190,11 @@ typedef enum {
     SVGA3D_DECLMETHOD_DEFAULT     = 0,
     SVGA3D_DECLMETHOD_PARTIALU,
     SVGA3D_DECLMETHOD_PARTIALV,
-   SVGA3D_DECLMETHOD_CROSSUV,          // Normal
+   SVGA3D_DECLMETHOD_CROSSUV,          /* Normal */
     SVGA3D_DECLMETHOD_UV,
-   SVGA3D_DECLMETHOD_LOOKUP,           // Lookup a displacement map
-   SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED, // Lookup a pre-sampled displacement map
+   SVGA3D_DECLMETHOD_LOOKUP,           /* Lookup a displacement map */
+   SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED, /* Lookup a pre-sampled displacement */
+                                       /* map */
  } SVGA3dDeclMethod;
  
  typedef enum {
@@ -1162,17 +1320,23 @@ typedef enum {
     SVGA3D_SHADERTYPE_MIN                        = 1,
     SVGA3D_SHADERTYPE_VS                         = 1,
     SVGA3D_SHADERTYPE_PS                         = 2,
-   SVGA3D_SHADERTYPE_MAX                        = 3,
     SVGA3D_SHADERTYPE_PREDX_MAX                  = 3,
     SVGA3D_SHADERTYPE_GS                         = 3,
-   SVGA3D_SHADERTYPE_DX_MAX                     = 4,
+   SVGA3D_SHADERTYPE_DX10_MAX                   = 4,
+   SVGA3D_SHADERTYPE_HS                         = 4,
+   SVGA3D_SHADERTYPE_DS                         = 5,
+   SVGA3D_SHADERTYPE_CS                         = 6,
+   SVGA3D_SHADERTYPE_MAX                        = 7
  } SVGA3dShaderType;
  
  #define SVGA3D_NUM_SHADERTYPE_PREDX \
     (SVGA3D_SHADERTYPE_PREDX_MAX - SVGA3D_SHADERTYPE_MIN)
  
-#define SVGA3D_NUM_SHADERTYPE_DX \
-   (SVGA3D_SHADERTYPE_DX_MAX - SVGA3D_SHADERTYPE_MIN)
+#define SVGA3D_NUM_SHADERTYPE_DX10 \
+   (SVGA3D_SHADERTYPE_DX10_MAX - SVGA3D_SHADERTYPE_MIN)
+
+#define SVGA3D_NUM_SHADERTYPE \
+   (SVGA3D_SHADERTYPE_MAX - SVGA3D_SHADERTYPE_MIN)
  
  typedef enum {
     SVGA3D_CONST_TYPE_MIN                        = 0,
@@ -1196,33 +1360,151 @@ typedef enum {
  } SVGA3dStretchBltMode;
  
  typedef enum {
-   SVGA3D_QUERYTYPE_INVALID                     = ((uint32)-1),
+   SVGA3D_QUERYTYPE_INVALID                     = ((uint8)-1),
     SVGA3D_QUERYTYPE_MIN                         = 0,
     SVGA3D_QUERYTYPE_OCCLUSION                   = 0,
-   SVGA3D_QUERYTYPE_EVENT                       = 1,
-   SVGA3D_QUERYTYPE_TIMESTAMP                   = 2,
-   SVGA3D_QUERYTYPE_TIMESTAMPDISJOINT           = 3,
-   SVGA3D_QUERYTYPE_PIPELINESTATS               = 4,
-   SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE          = 5,
-   SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS           = 6,
-   SVGA3D_QUERYTYPE_STREAMOVERFLOWPREDICATE     = 7,
-   SVGA3D_QUERYTYPE_OCCLUSION64                 = 8,
+   SVGA3D_QUERYTYPE_TIMESTAMP                   = 1,
+   SVGA3D_QUERYTYPE_TIMESTAMPDISJOINT           = 2,
+   SVGA3D_QUERYTYPE_PIPELINESTATS               = 3,
+   SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE          = 4,
+   SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS           = 5,
+   SVGA3D_QUERYTYPE_STREAMOVERFLOWPREDICATE     = 6,
+   SVGA3D_QUERYTYPE_OCCLUSION64                 = 7,
+   SVGA3D_QUERYTYPE_DX10_MAX                    = 8,
+   SVGA3D_QUERYTYPE_SOSTATS_STREAM0             = 8,
+   SVGA3D_QUERYTYPE_SOSTATS_STREAM1             = 9,
+   SVGA3D_QUERYTYPE_SOSTATS_STREAM2             = 10,
+   SVGA3D_QUERYTYPE_SOSTATS_STREAM3             = 11,
+   SVGA3D_QUERYTYPE_SOP_STREAM0                 = 12,
+   SVGA3D_QUERYTYPE_SOP_STREAM1                 = 13,
+   SVGA3D_QUERYTYPE_SOP_STREAM2                 = 14,
+   SVGA3D_QUERYTYPE_SOP_STREAM3                 = 15,
     SVGA3D_QUERYTYPE_MAX
  } SVGA3dQueryType;
  
+typedef uint8 SVGA3dQueryTypeUint8;
+
  #define SVGA3D_NUM_QUERYTYPE  (SVGA3D_QUERYTYPE_MAX - SVGA3D_QUERYTYPE_MIN)
  
  /*
   * This is the maximum number of queries per context that can be active
   * simultaneously between a beginQuery and endQuery.
   */
-#define SVGA3D_MAX_QUERY_PER_CONTEXT 64
+#define SVGA3D_MAX_QUERY 64
+
+/*
+ * Query result buffer formats
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 samplesRendered;
+}
+#include "vmware_pack_end.h"
+SVGADXOcclusionQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 passed;
+}
+#include "vmware_pack_end.h"
+SVGADXEventQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint64 timestamp;
+}
+#include "vmware_pack_end.h"
+SVGADXTimestampQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint64 realFrequency;
+   uint32 disjoint;
+}
+#include "vmware_pack_end.h"
+SVGADXTimestampDisjointQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint64 inputAssemblyVertices;
+   uint64 inputAssemblyPrimitives;
+   uint64 vertexShaderInvocations;
+   uint64 geometryShaderInvocations;
+   uint64 geometryShaderPrimitives;
+   uint64 clipperInvocations;
+   uint64 clipperPrimitives;
+   uint64 pixelShaderInvocations;
+   uint64 hullShaderInvocations;
+   uint64 domainShaderInvocations;
+   uint64 computeShaderInvocations;
+}
+#include "vmware_pack_end.h"
+SVGADXPipelineStatisticsQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 anySamplesRendered;
+}
+#include "vmware_pack_end.h"
+SVGADXOcclusionPredicateQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint64 numPrimitivesWritten;
+   uint64 numPrimitivesRequired;
+}
+#include "vmware_pack_end.h"
+SVGADXStreamOutStatisticsQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint32 overflowed;
+}
+#include "vmware_pack_end.h"
+SVGADXStreamOutPredicateQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+   uint64 samplesRendered;
+}
+#include "vmware_pack_end.h"
+SVGADXOcclusion64QueryResult;
+
+/*
+ * SVGADXQueryResultUnion is not intended for use in the protocol, but is
+ * very helpful when working with queries generically.
+ */
+typedef
+#include "vmware_pack_begin.h"
+union SVGADXQueryResultUnion {
+   SVGADXOcclusionQueryResult occ;
+   SVGADXEventQueryResult event;
+   SVGADXTimestampQueryResult ts;
+   SVGADXTimestampDisjointQueryResult tsDisjoint;
+   SVGADXPipelineStatisticsQueryResult pipelineStats;
+   SVGADXOcclusionPredicateQueryResult occPred;
+   SVGADXStreamOutStatisticsQueryResult soStats;
+   SVGADXStreamOutPredicateQueryResult soPred;
+   SVGADXOcclusion64QueryResult occ64;
+}
+#include "vmware_pack_end.h"
+SVGADXQueryResultUnion;
+
  
  typedef enum {
-   SVGA3D_QUERYSTATE_PENDING     = 0,      /* Waiting on the host (set by guest) */
-   SVGA3D_QUERYSTATE_SUCCEEDED   = 1,      /* Completed successfully (set by host) */
-   SVGA3D_QUERYSTATE_FAILED      = 2,      /* Completed unsuccessfully (set by host) */
-   SVGA3D_QUERYSTATE_NEW         = 3,      /* Never submitted (For guest use only) */
+   SVGA3D_QUERYSTATE_PENDING     = 0,      /* Query is not finished yet */
+   SVGA3D_QUERYSTATE_SUCCEEDED   = 1,      /* Completed successfully */
+   SVGA3D_QUERYSTATE_FAILED      = 2,      /* Completed unsuccessfully */
+   SVGA3D_QUERYSTATE_NEW         = 3,      /* Never submitted (guest only) */
  } SVGA3dQueryState;
  
  typedef enum {
@@ -1249,9 +1531,9 @@ typedef
  struct {
     union {
        struct {
-         uint16  function;       // SVGA3dFogFunction
-         uint8   type;           // SVGA3dFogType
-         uint8   base;           // SVGA3dFogBase
+        uint16  function;       /* SVGA3dFogFunction */
+        uint8   type;           /* SVGA3dFogType */
+        uint8   base;           /* SVGA3dFogBase */
        };
        uint32     uintValue;
     };
@@ -1287,8 +1569,47 @@ SVGA3dSize;
  /*
   * Guest-backed objects definitions.
   */
+typedef enum {
+   SVGA_OTABLE_MOB             = 0,
+   SVGA_OTABLE_MIN             = 0,
+   SVGA_OTABLE_SURFACE         = 1,
+   SVGA_OTABLE_CONTEXT         = 2,
+   SVGA_OTABLE_SHADER          = 3,
+   SVGA_OTABLE_SCREENTARGET    = 4,
+
+   SVGA_OTABLE_DX9_MAX         = 5,
  
-typedef uint32 SVGAMobId;
+   SVGA_OTABLE_DXCONTEXT       = 5,
+   SVGA_OTABLE_MAX             = 6
+} SVGAOTableType;
+
+/*
+ * Deprecated.
+ */
+#define SVGA_OTABLE_COUNT 4
+
+typedef enum {
+   SVGA_COTABLE_MIN             = 0,
+   SVGA_COTABLE_RTVIEW          = 0,
+   SVGA_COTABLE_DSVIEW          = 1,
+   SVGA_COTABLE_SRVIEW          = 2,
+   SVGA_COTABLE_ELEMENTLAYOUT   = 3,
+   SVGA_COTABLE_BLENDSTATE      = 4,
+   SVGA_COTABLE_DEPTHSTENCIL    = 5,
+   SVGA_COTABLE_RASTERIZERSTATE = 6,
+   SVGA_COTABLE_SAMPLER         = 7,
+   SVGA_COTABLE_STREAMOUTPUT    = 8,
+   SVGA_COTABLE_DXQUERY         = 9,
+   SVGA_COTABLE_DXSHADER        = 10,
+   SVGA_COTABLE_DX10_MAX        = 11,
+   SVGA_COTABLE_UAVIEW          = 11,
+   SVGA_COTABLE_MAX
+} SVGACOTableType;
+
+/*
+ * The largest size (number of entries) allowed in a COTable.
+ */
+#define SVGA_COTABLE_MAX_IDS (MAX_UINT16 - 2)
  
  typedef enum SVGAMobFormat {
     SVGA3D_MOBFMT_INVALID     = SVGA3D_INVALID_ID,
@@ -1300,7 +1621,11 @@ typedef enum SVGAMobFormat {
     SVGA3D_MOBFMT_PTDEPTH64_0 = 4,
     SVGA3D_MOBFMT_PTDEPTH64_1 = 5,
     SVGA3D_MOBFMT_PTDEPTH64_2 = 6,
+   SVGA3D_MOBFMT_PREDX_MAX   = 7,
+   SVGA3D_MOBFMT_EMPTY       = 7,
     SVGA3D_MOBFMT_MAX,
  } SVGAMobFormat;
  
-#endif // _SVGA3D_TYPES_H_
+#define SVGA3D_MOB_EMPTY_BASE 1
+
+#endif /* _SVGA3D_TYPES_H_ */
diff --git a/src/gallium/drivers/svga/include/svga_escape.h b/src/gallium/drivers/svga/include/svga_escape.h

index 9d44c47..884b1d1 100644 (file)
--- a/src/gallium/drivers/svga/include/svga_escape.h
+++ b/src/gallium/drivers/svga/include/svga_escape.h
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 2007-2014 VMware, Inc.  All rights reserved.
+ * Copyright 2007-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
@@ -75,7 +75,7 @@
   */
  
  #define SVGA_ESCAPE_VMWARE_HINT               0x00030000
-#define SVGA_ESCAPE_VMWARE_HINT_FULLSCREEN    0x00030001  // Deprecated
+#define SVGA_ESCAPE_VMWARE_HINT_FULLSCREEN    0x00030001  /* Deprecated */
  
  typedef
  struct {
diff --git a/src/gallium/drivers/svga/include/svga_overlay.h b/src/gallium/drivers/svga/include/svga_overlay.h

index ccbf791..161c3de 100644 (file)
--- a/src/gallium/drivers/svga/include/svga_overlay.h
+++ b/src/gallium/drivers/svga/include/svga_overlay.h
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 2007-2014 VMware, Inc.  All rights reserved.
+ * Copyright 2007-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
@@ -38,9 +38,9 @@
   * Video formats we support
   */
  
-#define VMWARE_FOURCC_YV12 0x32315659 // 'Y' 'V' '1' '2'
-#define VMWARE_FOURCC_YUY2 0x32595559 // 'Y' 'U' 'Y' '2'
-#define VMWARE_FOURCC_UYVY 0x59565955 // 'U' 'Y' 'V' 'Y'
+#define VMWARE_FOURCC_YV12 0x32315659 /* 'Y' 'V' '1' '2' */
+#define VMWARE_FOURCC_YUY2 0x32595559 /* 'Y' 'U' 'Y' '2' */
+#define VMWARE_FOURCC_UYVY 0x59565955 /* 'U' 'Y' 'V' 'Y' */
  
  typedef enum {
     SVGA_OVERLAY_FORMAT_INVALID = 0,
@@ -68,7 +68,7 @@ struct SVGAEscapeVideoSetRegs {
        uint32 streamId;
     } header;
  
-   // May include zero or more items.
+   /* May include zero or more items. */
     struct {
        uint32 registerId;
        uint32 value;
@@ -134,12 +134,12 @@ struct {
   */
  
  static inline Bool
-VMwareVideoGetAttributes(const SVGAOverlayFormat format,    // IN
-                         uint32 *width,                     // IN / OUT
-                         uint32 *height,                    // IN / OUT
-                         uint32 *size,                      // OUT
-                         uint32 *pitches,                   // OUT (optional)
-                         uint32 *offsets)                   // OUT (optional)
+VMwareVideoGetAttributes(const SVGAOverlayFormat format,    /* IN */
+                         uint32 *width,                     /* IN / OUT */
+                         uint32 *height,                    /* IN / OUT */
+                         uint32 *size,                      /* OUT */
+                         uint32 *pitches,                   /* OUT (optional) */
+                         uint32 *offsets)                   /* OUT (optional) */
  {
      int tmp;
  
@@ -196,4 +196,4 @@ VMwareVideoGetAttributes(const SVGAOverlayFormat format,    // IN
      return TRUE;
  }
  
-#endif // _SVGA_OVERLAY_H_
+#endif /* _SVGA_OVERLAY_H_ */
diff --git a/src/gallium/drivers/svga/include/svga_reg.h b/src/gallium/drivers/svga/include/svga_reg.h

index e75b442..2661eef 100644 (file)
--- a/src/gallium/drivers/svga/include/svga_reg.h
+++ b/src/gallium/drivers/svga/include/svga_reg.h
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 1998-2014 VMware, Inc.  All rights reserved.
+ * Copyright 1998-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
@@ -43,6 +43,8 @@ typedef enum {
     SVGA_REG_ENABLE_HIDE = (1 << 1),
  } SvgaRegEnable;
  
+typedef uint32 SVGAMobId;
+
  /*
   * Arbitrary and meaningless limits. Please ignore these when writing
   * new drivers.
@@ -490,7 +492,7 @@ typedef struct SVGAGMRImageFormat {
        struct {
           uint32 bitsPerPixel : 8;
           uint32 colorDepth   : 8;
-         uint32 reserved     : 16;  // Must be zero
+        uint32 reserved     : 16;  /* Must be zero */
        };
  
        uint32 value;
@@ -533,7 +535,7 @@ typedef struct SVGAColorBGRX {
           uint32 b : 8;
           uint32 g : 8;
           uint32 r : 8;
-         uint32 x : 8;  // Unused
+            uint32 x : 8;  /* Unused */
        };
  
        uint32 value;
@@ -605,24 +607,35 @@ struct {
   * SVGA_CAP_COMMAND_BUFFERS --
   *    Enable register based command buffer submission.
   *
+ * SVGA_CAP_DEAD1 --
+ *    This cap was incorrectly used by old drivers and should not be
+ *    reused.
+ *
+ * SVGA_CAP_CMD_BUFFERS_2 --
+ *    Enable support for the prepend command buffer submision
+ *    registers.  SVGA_REG_CMD_PREPEND_LOW and
+ *    SVGA_REG_CMD_PREPEND_HIGH.
+ *
   * SVGA_CAP_GBOBJECTS --
   *    Enable guest-backed objects and surfaces.
   *
+ * SVGA_CAP_CMD_BUFFERS_3 --
+ *    Enable support for command buffers in a mob.
   */
  
  #define SVGA_CAP_NONE               0x00000000
  #define SVGA_CAP_RECT_COPY          0x00000002
  #define SVGA_CAP_CURSOR             0x00000020
-#define SVGA_CAP_CURSOR_BYPASS      0x00000040   // Legacy (Use Cursor Bypass 3 instead)
-#define SVGA_CAP_CURSOR_BYPASS_2    0x00000080   // Legacy (Use Cursor Bypass 3 instead)
+#define SVGA_CAP_CURSOR_BYPASS      0x00000040
+#define SVGA_CAP_CURSOR_BYPASS_2    0x00000080
  #define SVGA_CAP_8BIT_EMULATION     0x00000100
  #define SVGA_CAP_ALPHA_CURSOR       0x00000200
  #define SVGA_CAP_3D                 0x00004000
  #define SVGA_CAP_EXTENDED_FIFO      0x00008000
-#define SVGA_CAP_MULTIMON           0x00010000   // Legacy multi-monitor support
+#define SVGA_CAP_MULTIMON           0x00010000
  #define SVGA_CAP_PITCHLOCK          0x00020000
  #define SVGA_CAP_IRQMASK            0x00040000
-#define SVGA_CAP_DISPLAY_TOPOLOGY   0x00080000   // Legacy multi-monitor support
+#define SVGA_CAP_DISPLAY_TOPOLOGY   0x00080000
  #define SVGA_CAP_GMR                0x00100000
  #define SVGA_CAP_TRACES             0x00200000
  #define SVGA_CAP_GMR2               0x00400000
@@ -631,6 +644,9 @@ struct {
  #define SVGA_CAP_DEAD1              0x02000000
  #define SVGA_CAP_CMD_BUFFERS_2      0x04000000
  #define SVGA_CAP_GBOBJECTS          0x08000000
+#define SVGA_CAP_CMD_BUFFERS_3      0x10000000
+
+#define SVGA_CAP_CMD_RESERVED       0x80000000
  
  
  /*
@@ -698,7 +714,7 @@ enum {
  
     SVGA_FIFO_CAPABILITIES = 4,
     SVGA_FIFO_FLAGS,
-   // Valid with SVGA_FIFO_CAP_FENCE:
+   /* Valid with SVGA_FIFO_CAP_FENCE: */
     SVGA_FIFO_FENCE,
  
     /*
@@ -710,20 +726,20 @@ enum {
      * These in block 3a, the VMX currently considers mandatory for the
      * extended FIFO.
      */
-   
-   // Valid if exists (i.e. if extended FIFO enabled):
+
+   /* Valid if exists (i.e. if extended FIFO enabled): */
     SVGA_FIFO_3D_HWVERSION,       /* See SVGA3dHardwareVersion in svga3d_reg.h */
-   // Valid with SVGA_FIFO_CAP_PITCHLOCK:
+   /* Valid with SVGA_FIFO_CAP_PITCHLOCK: */
     SVGA_FIFO_PITCHLOCK,
  
-   // Valid with SVGA_FIFO_CAP_CURSOR_BYPASS_3:
+   /* Valid with SVGA_FIFO_CAP_CURSOR_BYPASS_3: */
     SVGA_FIFO_CURSOR_ON,          /* Cursor bypass 3 show/hide register */
     SVGA_FIFO_CURSOR_X,           /* Cursor bypass 3 x register */
     SVGA_FIFO_CURSOR_Y,           /* Cursor bypass 3 y register */
     SVGA_FIFO_CURSOR_COUNT,       /* Incremented when any of the other 3 change */
     SVGA_FIFO_CURSOR_LAST_UPDATED,/* Last time the host updated the cursor */
  
-   // Valid with SVGA_FIFO_CAP_RESERVE:
+   /* Valid with SVGA_FIFO_CAP_RESERVE: */
     SVGA_FIFO_RESERVED,           /* Bytes past NEXT_CMD with real contents */
  
     /*
@@ -789,7 +805,7 @@ enum {
      * sets SVGA_FIFO_MIN high enough to leave room for them.
      */
  
-   // Valid if register exists:
+   /* Valid if register exists: */
     SVGA_FIFO_GUEST_3D_HWVERSION, /* Guest driver's 3D version */
     SVGA_FIFO_FENCE_GOAL,         /* Matching target for SVGA_IRQFLAG_FENCE_GOAL */
     SVGA_FIFO_BUSY,               /* See "FIFO Synchronization Registers" */
@@ -1046,7 +1062,7 @@ enum {
  
  #define SVGA_FIFO_FLAG_NONE                 0
  #define SVGA_FIFO_FLAG_ACCELFRONT       (1<<0)
-#define SVGA_FIFO_FLAG_RESERVED        (1<<31) // Internal use only
+#define SVGA_FIFO_FLAG_RESERVED        (1<<31) /* Internal use only */
  
  /*
   * FIFO reservation sentinel value
@@ -1079,22 +1095,23 @@ enum {
     SVGA_VIDEO_DATA_OFFSET,
     SVGA_VIDEO_FORMAT,
     SVGA_VIDEO_COLORKEY,
-   SVGA_VIDEO_SIZE,          // Deprecated
+   SVGA_VIDEO_SIZE,          /* Deprecated */
     SVGA_VIDEO_WIDTH,
     SVGA_VIDEO_HEIGHT,
     SVGA_VIDEO_SRC_X,
     SVGA_VIDEO_SRC_Y,
     SVGA_VIDEO_SRC_WIDTH,
     SVGA_VIDEO_SRC_HEIGHT,
-   SVGA_VIDEO_DST_X,         // Signed int32
-   SVGA_VIDEO_DST_Y,         // Signed int32
+   SVGA_VIDEO_DST_X,         /* Signed int32 */
+   SVGA_VIDEO_DST_Y,         /* Signed int32 */
     SVGA_VIDEO_DST_WIDTH,
     SVGA_VIDEO_DST_HEIGHT,
     SVGA_VIDEO_PITCH_1,
     SVGA_VIDEO_PITCH_2,
     SVGA_VIDEO_PITCH_3,
-   SVGA_VIDEO_DATA_GMRID,    // Optional, defaults to SVGA_GMR_FRAMEBUFFER
-   SVGA_VIDEO_DST_SCREEN_ID, // Optional, defaults to virtual coords (SVGA_ID_INVALID)
+   SVGA_VIDEO_DATA_GMRID,    /* Optional, defaults to SVGA_GMR_FRAMEBUFFER */
+   SVGA_VIDEO_DST_SCREEN_ID, /* Optional, defaults to virtual coords */
+                             /* (SVGA_ID_INVALID) */
     SVGA_VIDEO_NUM_REGS
  };
  
@@ -1180,10 +1197,10 @@ typedef struct SVGADisplayTopology {
   *    value of zero means no cloning should happen.
   */
  
-#define SVGA_SCREEN_MUST_BE_SET     (1 << 0) // Must be set or results undefined
-#define SVGA_SCREEN_HAS_ROOT SVGA_SCREEN_MUST_BE_SET // Deprecated
-#define SVGA_SCREEN_IS_PRIMARY      (1 << 1) // Guest considers this screen to be 'primary'
-#define SVGA_SCREEN_FULLSCREEN_HINT (1 << 2)  // Guest is running a fullscreen app here
+#define SVGA_SCREEN_MUST_BE_SET     (1 << 0)
+#define SVGA_SCREEN_HAS_ROOT SVGA_SCREEN_MUST_BE_SET /* Deprecated */
+#define SVGA_SCREEN_IS_PRIMARY      (1 << 1)
+#define SVGA_SCREEN_FULLSCREEN_HINT (1 << 2)
  
  /*
   * Added with SVGA_FIFO_CAP_SCREEN_OBJECT_2.  When the screen is
@@ -1207,7 +1224,7 @@ typedef struct SVGADisplayTopology {
  
  typedef
  struct {
-   uint32 structSize;   // sizeof(SVGAScreenObject)
+   uint32 structSize;   /* sizeof(SVGAScreenObject) */
     uint32 id;
     uint32 flags;
     struct {
@@ -1224,6 +1241,13 @@ struct {
      * with SVGA_FIFO_CAP_SCREEN_OBJECT.
      */
     SVGAGuestImage backingStore;
+
+   /*
+    * The cloneCount field is treated as a hint from the guest that
+    * the user wants this display to be cloned, cloneCount times.
+    *
+    * A value of zero means no cloning should happen.
+    */
     uint32 cloneCount;
  } SVGAScreenObject;
  
@@ -1238,7 +1262,7 @@ struct {
   *  Note the holes in the command ID numbers: These commands have been
   *  deprecated, and the old IDs must not be reused.
   *
- *  Command IDs from 1000 to 1999 are reserved for use by the SVGA3D
+ *  Command IDs from 1000 to 2999 are reserved for use by the SVGA3D
   *  protocol.
   *
   *  Each command's parameters are described by the comments and
@@ -1267,6 +1291,8 @@ typedef enum {
     SVGA_CMD_REMAP_GMR2            = 42,
     SVGA_CMD_DEAD                  = 43,
     SVGA_CMD_DEAD_2                = 44,
+   SVGA_CMD_NOP                   = 45,
+   SVGA_CMD_NOP_ERROR             = 46,
     SVGA_CMD_MAX
  } SVGAFifoCmdId;
  
@@ -1372,13 +1398,13 @@ struct {
  
  typedef
  struct {
-   uint32 id;             // Reserved, must be zero.
+   uint32 id;             /* Reserved, must be zero. */
     uint32 hotspotX;
     uint32 hotspotY;
     uint32 width;
     uint32 height;
-   uint32 andMaskDepth;   // Value must be 1 or equal to BITS_PER_PIXEL
-   uint32 xorMaskDepth;   // Value must be 1 or equal to BITS_PER_PIXEL
+   uint32 andMaskDepth;   /* Value must be 1 or equal to BITS_PER_PIXEL */
+   uint32 xorMaskDepth;   /* Value must be 1 or equal to BITS_PER_PIXEL */
     /*
      * Followed by scanline data for AND mask, then XOR mask.
      * Each scanline is padded to a 32-bit boundary.
@@ -1401,7 +1427,7 @@ struct {
  
  typedef
  struct {
-   uint32 id;             // Reserved, must be zero.
+   uint32 id;             /* Reserved, must be zero. */
     uint32 hotspotX;
     uint32 hotspotY;
     uint32 width;
@@ -1449,12 +1475,12 @@ struct {
  
  typedef
  struct {
-   uint32 color;     // In the same format as the GFB
+   uint32 color;     /* In the same format as the GFB */
     uint32 x;
     uint32 y;
     uint32 width;
     uint32 height;
-   uint32 rop;       // Must be SVGA_ROP_COPY
+   uint32 rop;       /* Must be SVGA_ROP_COPY */
  } SVGAFifoCmdFrontRopFill;
  
  
@@ -1526,7 +1552,7 @@ struct {
  
  typedef
  struct {
-   SVGAScreenObject screen;   // Variable-length according to version
+   SVGAScreenObject screen;   /* Variable-length according to version */
  } SVGAFifoCmdDefineScreen;
  
  
@@ -1807,8 +1833,8 @@ typedef
  struct {
     uint32 gmrId;
     SVGARemapGMR2Flags flags;
-   uint32 offsetPages; // offset in pages to begin remap
-   uint32 numPages; // number of pages to remap
+   uint32 offsetPages; /* offset in pages to begin remap */
+   uint32 numPages; /* number of pages to remap */
     /*
      * Followed by additional data depending on SVGARemapGMR2Flags.
      *
@@ -1823,7 +1849,7 @@ struct {
  /*
   * Size of SVGA device memory such as frame buffer and FIFO.
   */
-#define SVGA_VRAM_MIN_SIZE             (4 * 640 * 480) // bytes
+#define SVGA_VRAM_MIN_SIZE             (4 * 640 * 480) /* bytes */
  #define SVGA_VRAM_MIN_SIZE_3D       (16 * 1024 * 1024)
  #define SVGA_VRAM_MAX_SIZE         (128 * 1024 * 1024)
  #define SVGA_MEMORY_SIZE_MAX      (1024 * 1024 * 1024)
@@ -1832,7 +1858,7 @@ struct {
  #define SVGA_GRAPHICS_MEMORY_KB_MAX       (2 * 1024 * 1024)
  #define SVGA_GRAPHICS_MEMORY_KB_DEFAULT   (256 * 1024)
  
-#define SVGA_VRAM_SIZE_W2K          (64 * 1024 * 1024) // 64 MB
+#define SVGA_VRAM_SIZE_W2K          (64 * 1024 * 1024) /* 64 MB */
  
  /*
   * To simplify autoDetect display configuration, support a minimum of
@@ -1848,7 +1874,7 @@ struct {
  #define SVGA_VRAM_SIZE               (4 * 1024 * 1024)
  #define SVGA_VRAM_SIZE_3D           (64 * 1024 * 1024)
  #define SVGA_FIFO_SIZE                    (256 * 1024)
-#define SVGA_FIFO_SIZE_3D                 (516 * 1024) // Bump to 516KB to workaround WDDM driver issue (see bug# 744318)
+#define SVGA_FIFO_SIZE_3D                 (516 * 1024)
  #define SVGA_MEMORY_SIZE_DEFAULT   (160 * 1024 * 1024)
  #define SVGA_AUTODETECT_DEFAULT                  FALSE
  #else
diff --git a/src/gallium/drivers/svga/svga_cmd.c b/src/gallium/drivers/svga/svga_cmd.c

index b271832..d3cf52f 100644 (file)
--- a/src/gallium/drivers/svga/svga_cmd.c
+++ b/src/gallium/drivers/svga/svga_cmd.c
@@ -66,7 +66,7 @@ surface_to_surfaceid(struct svga_winsys_context *swc, // IN
     if (surface) {
        struct svga_surface *s = svga_surface(surface);
        swc->surface_relocation(swc, &id->sid, NULL, s->handle, flags);
-      id->face = s->real_face; /* faces have the same order */
+      id->face = s->real_layer; /* faces have the same order */
        id->mipmap = s->real_level;
     }
     else {
@@ -460,7 +460,7 @@ SVGA3D_SurfaceDMA(struct svga_winsys_context *swc,
  
     swc->surface_relocation(swc, &cmd->host.sid, NULL,
                             texture->handle, surface_flags);
-   cmd->host.face = st->face; /* PIPE_TEX_FACE_* and SVGA3D_CUBEFACE_* match */
+   cmd->host.face = st->slice; /* PIPE_TEX_FACE_* and SVGA3D_CUBEFACE_* match */
     cmd->host.mipmap = st->base.level;
  
     cmd->transfer = transfer;
@@ -842,6 +842,8 @@ SVGA3D_SetShader(struct svga_winsys_context *swc,
  {
     SVGA3dCmdSetShader *cmd;
  
+   assert(type == SVGA3D_SHADERTYPE_VS || type == SVGA3D_SHADERTYPE_PS);
+
     cmd = SVGA3D_FIFOReserve(swc,
                              SVGA_3D_CMD_SET_SHADER, sizeof *cmd,
                              0);
@@ -1385,7 +1387,7 @@ SVGA3D_BeginGBQuery(struct svga_winsys_context *swc,
     if(!cmd)
        return PIPE_ERROR_OUT_OF_MEMORY;
  
-   swc->context_relocation(swc, &cmd->cid);
+   cmd->cid = swc->cid;
     cmd->type = type;
  
     swc->commit(swc);
@@ -1465,7 +1467,7 @@ SVGA3D_EndGBQuery(struct svga_winsys_context *swc,
     if(!cmd)
        return PIPE_ERROR_OUT_OF_MEMORY;
  
-   swc->context_relocation(swc, &cmd->cid);
+   cmd->cid = swc->cid;
     cmd->type = type;
  
     swc->mob_relocation(swc, &cmd->mobid, &cmd->offset, buffer,
@@ -1552,7 +1554,7 @@ SVGA3D_WaitForGBQuery(struct svga_winsys_context *swc,
     if(!cmd)
        return PIPE_ERROR_OUT_OF_MEMORY;
  
-   swc->context_relocation(swc, &cmd->cid);
+   cmd->cid = swc->cid;
     cmd->type = type;
  
     swc->mob_relocation(swc, &cmd->mobid, &cmd->offset, buffer,
@@ -1615,36 +1617,6 @@ SVGA3D_WaitForQuery(struct svga_winsys_context *swc,
  
  
  enum pipe_error
-SVGA3D_DefineGBShader(struct svga_winsys_context *swc,
-                      struct svga_winsys_gb_shader *gbshader,
-                      SVGA3dShaderType type,
-                      uint32 sizeInBytes)
-{
-   SVGA3dCmdDefineGBShader *cmd;
-
-   assert(sizeInBytes % 4 == 0);
-   assert(type == SVGA3D_SHADERTYPE_VS ||
-          type == SVGA3D_SHADERTYPE_PS);
-
-   cmd = SVGA3D_FIFOReserve(swc,
-                            SVGA_3D_CMD_DEFINE_GB_SHADER,
-                            sizeof *cmd,
-                            1); /* one relocation */
-
-   if (!cmd)
-      return PIPE_ERROR_OUT_OF_MEMORY;
-
-   swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader);
-   cmd->type = type;
-   cmd->sizeInBytes = sizeInBytes;
-
-   swc->commit(swc);
-   
-   return PIPE_OK;
-}
-
-
-enum pipe_error
  SVGA3D_BindGBShader(struct svga_winsys_context *swc,
                      struct svga_winsys_gb_shader *gbshader)
  {
@@ -1658,7 +1630,7 @@ SVGA3D_BindGBShader(struct svga_winsys_context *swc,
        return PIPE_ERROR_OUT_OF_MEMORY;
  
     swc->shader_relocation(swc, &cmd->shid, &cmd->mobid,
-                         &cmd->offsetInBytes, gbshader);
+                         &cmd->offsetInBytes, gbshader, 0);
  
     swc->commit(swc);
  
@@ -1672,6 +1644,8 @@ SVGA3D_SetGBShader(struct svga_winsys_context *swc,
                     struct svga_winsys_gb_shader *gbshader)
  {
     SVGA3dCmdSetShader *cmd;
+
+   assert(type == SVGA3D_SHADERTYPE_VS || type == SVGA3D_SHADERTYPE_PS);
     
     cmd = SVGA3D_FIFOReserve(swc,
                              SVGA_3D_CMD_SET_SHADER,
@@ -1680,10 +1654,10 @@ SVGA3D_SetGBShader(struct svga_winsys_context *swc,
     if (!cmd)
        return PIPE_ERROR_OUT_OF_MEMORY;
     
-   swc->context_relocation(swc, &cmd->cid);
+   cmd->cid = swc->cid;
     cmd->type = type;
     if (gbshader)
-      swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader);
+      swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader, 0);
     else
        cmd->shid = SVGA_ID_INVALID;
     swc->commit(swc);
@@ -1692,27 +1666,6 @@ SVGA3D_SetGBShader(struct svga_winsys_context *swc,
  }
  
  
-enum pipe_error
-SVGA3D_DestroyGBShader(struct svga_winsys_context *swc,
-                       struct svga_winsys_gb_shader *gbshader)
-{
-   SVGA3dCmdDestroyGBShader *cmd = 
-      SVGA3D_FIFOReserve(swc,
-                         SVGA_3D_CMD_DESTROY_GB_SHADER,
-                         sizeof *cmd,
-                         1); /* one relocation */
-
-   if (!cmd)
-      return PIPE_ERROR_OUT_OF_MEMORY;
-
-   swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader);
-
-   swc->commit(swc);
-
-   return PIPE_OK;
-}
-
-
  /**
   * \param flags  mask of SVGA_RELOC_READ / _WRITE
   */
@@ -1738,89 +1691,6 @@ SVGA3D_BindGBSurface(struct svga_winsys_context *swc,
  }
  
  
-enum pipe_error
-SVGA3D_DefineGBContext(struct svga_winsys_context *swc)
-{
-   SVGA3dCmdDefineGBContext *cmd = 
-      SVGA3D_FIFOReserve(swc,
-                         SVGA_3D_CMD_DEFINE_GB_CONTEXT,
-                         sizeof *cmd,
-                         1);  /* one relocation */
-
-   if (!cmd)
-      return PIPE_ERROR_OUT_OF_MEMORY;
-
-   swc->context_relocation(swc, &cmd->cid);
-
-   swc->commit(swc);
-
-   return PIPE_OK;
-}
-
-
-enum pipe_error
-SVGA3D_DestroyGBContext(struct svga_winsys_context *swc)
-{
-   SVGA3dCmdDestroyGBContext *cmd = 
-      SVGA3D_FIFOReserve(swc,
-                         SVGA_3D_CMD_DESTROY_GB_CONTEXT,
-                         sizeof *cmd,
-                         1);  /* one relocation */
-
-   if (!cmd)
-      return PIPE_ERROR_OUT_OF_MEMORY;
-
-   swc->context_relocation(swc, &cmd->cid);
-
-   swc->commit(swc);
-
-   return PIPE_OK;
-}
-
-
-enum pipe_error
-SVGA3D_BindGBContext(struct svga_winsys_context *swc)
-{
-   SVGA3dCmdBindGBContext *cmd = 
-      SVGA3D_FIFOReserve(swc,
-                         SVGA_3D_CMD_BIND_GB_CONTEXT,
-                         sizeof *cmd,
-                         2);  /* two relocations */
-
-   if (!cmd)
-      return PIPE_ERROR_OUT_OF_MEMORY;
-
-   swc->context_relocation(swc, &cmd->cid);
-   swc->context_relocation(swc, &cmd->mobid);
-   cmd->validContents = 0;  /* XXX pass as a parameter? */
-
-   swc->commit(swc);
-
-   return PIPE_OK;
-}
-
-
-enum pipe_error
-SVGA3D_InvalidateGBContext(struct svga_winsys_context *swc)
-{
-   SVGA3dCmdInvalidateGBContext *cmd =
-      SVGA3D_FIFOReserve(swc,
-                         SVGA_3D_CMD_INVALIDATE_GB_CONTEXT,
-                         sizeof *cmd,
-                         1);  /* one relocation */
-
-   if (!cmd)
-      return PIPE_ERROR_OUT_OF_MEMORY;
-
-   swc->context_relocation(swc, &cmd->cid);
-
-   swc->commit(swc);
-
-   return PIPE_OK;
-}
-
-
-
  /**
   * Update an image in a guest-backed surface.
   * (Inform the device that the guest-contents have been updated.)
diff --git a/src/gallium/drivers/svga/svga_cmd.h b/src/gallium/drivers/svga/svga_cmd.h

index 6f658bf..271ee8e 100644 (file)
--- a/src/gallium/drivers/svga/svga_cmd.h
+++ b/src/gallium/drivers/svga/svga_cmd.h
@@ -47,6 +47,7 @@ struct svga_winsys_context;
  struct svga_winsys_buffer;
  struct svga_winsys_surface;
  struct svga_winsys_gb_shader;
+struct svga_winsys_gb_query;
  
  
  /*
@@ -229,12 +230,6 @@ SVGA3D_SetShader(struct svga_winsys_context *swc,
   */
  
  enum pipe_error
-SVGA3D_DefineGBShader(struct svga_winsys_context *swc,
-                      struct svga_winsys_gb_shader *gbshader,
-                      SVGA3dShaderType type,
-                      uint32 sizeInBytes);
-
-enum pipe_error
  SVGA3D_BindGBShader(struct svga_winsys_context *swc,
                      struct svga_winsys_gb_shader *gbshader);
  
@@ -244,26 +239,10 @@ SVGA3D_SetGBShader(struct svga_winsys_context *swc,
                     struct svga_winsys_gb_shader *gbshader);
  
  enum pipe_error
-SVGA3D_DestroyGBShader(struct svga_winsys_context *swc,
-                       struct svga_winsys_gb_shader *gbshader);
-
-enum pipe_error
  SVGA3D_BindGBSurface(struct svga_winsys_context *swc,
                       struct svga_winsys_surface *surface);
  
  enum pipe_error
-SVGA3D_DefineGBContext(struct svga_winsys_context *swc);
-
-enum pipe_error
-SVGA3D_DestroyGBContext(struct svga_winsys_context *swc);
-
-enum pipe_error
-SVGA3D_BindGBContext(struct svga_winsys_context *swc);
-
-enum pipe_error
-SVGA3D_InvalidateGBContext(struct svga_winsys_context *swc);
-
-enum pipe_error
  SVGA3D_UpdateGBImage(struct svga_winsys_context *swc,
                       struct svga_winsys_surface *surface,
                       const SVGA3dBox *box,
@@ -327,4 +306,336 @@ SVGA3D_WaitForQuery(struct svga_winsys_context *swc,
                      SVGA3dQueryType type,
                      struct svga_winsys_buffer *buffer);
  
+
+
+/*
+ * VGPU10 commands
+ */
+
+enum pipe_error
+SVGA3D_vgpu10_PredCopyRegion(struct svga_winsys_context *swc,
+                             struct svga_winsys_surface *dstSurf,
+                             uint32 dstSubResource,
+                             struct svga_winsys_surface *srcSurf,
+                             uint32 srcSubResource,
+                             const SVGA3dCopyBox *box);
+
+enum pipe_error
+SVGA3D_vgpu10_PredCopy(struct svga_winsys_context *swc,
+                       struct svga_winsys_surface *dstSurf,
+                       struct svga_winsys_surface *srcSurf);
+
+enum pipe_error
+SVGA3D_vgpu10_SetViewports(struct svga_winsys_context *swc,
+                           unsigned count, const SVGA3dViewport *viewports);
+
+enum pipe_error
+SVGA3D_vgpu10_SetShader(struct svga_winsys_context *swc,
+                        SVGA3dShaderType type,
+                        struct svga_winsys_gb_shader *gbshader,
+                        SVGA3dShaderId shaderId);
+
+enum pipe_error
+SVGA3D_vgpu10_SetShaderResources(struct svga_winsys_context *swc,
+                                 SVGA3dShaderType type,
+                                 uint32 startView,
+                                 unsigned count,
+                                 const SVGA3dShaderResourceViewId ids[],
+                                 struct svga_winsys_surface **views);
+
+enum pipe_error
+SVGA3D_vgpu10_SetSamplers(struct svga_winsys_context *swc,
+                          unsigned count,
+                          uint32 startSampler,
+                          SVGA3dShaderType type,
+                          const SVGA3dSamplerId *samplerIds);
+
+enum pipe_error
+SVGA3D_vgpu10_SetRenderTargets(struct svga_winsys_context *swc,
+                               unsigned color_count,
+                               struct pipe_surface **color_surfs,
+                               struct pipe_surface *depth_stencil_surf);
+
+enum pipe_error
+SVGA3D_vgpu10_SetBlendState(struct svga_winsys_context *swc,
+                            SVGA3dBlendStateId blendId,
+                            const float *blendFactor, uint32 sampleMask);
+
+enum pipe_error
+SVGA3D_vgpu10_SetDepthStencilState(struct svga_winsys_context *swc,
+                                   SVGA3dDepthStencilStateId depthStencilId,
+                                   uint32 stencilRef);
+
+enum pipe_error
+SVGA3D_vgpu10_SetRasterizerState(struct svga_winsys_context *swc,
+                                 SVGA3dRasterizerStateId rasterizerId);
+
+enum pipe_error
+SVGA3D_vgpu10_SetPredication(struct svga_winsys_context *swc,
+                             SVGA3dQueryId queryId,
+                             uint32 predicateValue);
+
+enum pipe_error
+SVGA3D_vgpu10_SetSOTargets(struct svga_winsys_context *swc,
+                           unsigned count, const SVGA3dSoTarget *targets,
+                           struct svga_winsys_surface **surfaces);
+
+enum pipe_error
+SVGA3D_vgpu10_SetScissorRects(struct svga_winsys_context *swc,
+                              unsigned count,
+                              const SVGASignedRect *rects);
+
+enum pipe_error
+SVGA3D_vgpu10_SetStreamOutput(struct svga_winsys_context *swc,
+                              SVGA3dStreamOutputId soid);
+
+enum pipe_error
+SVGA3D_vgpu10_Draw(struct svga_winsys_context *swc,
+                   uint32 vertexCount, uint32 startVertexLocation);
+
+enum pipe_error
+SVGA3D_vgpu10_DrawIndexed(struct svga_winsys_context *swc,
+                          uint32 indexCount, uint32 startIndexLocation,
+                          int32 baseVertexLocation);
+
+enum pipe_error
+SVGA3D_vgpu10_DrawInstanced(struct svga_winsys_context *swc,
+                            uint32 vertexCountPerInstance,
+                            uint32 instanceCount,
+                            uint32 startVertexLocation,
+                            uint32 startInstanceLocation);
+
+enum pipe_error
+SVGA3D_vgpu10_DrawIndexedInstanced(struct svga_winsys_context *swc,
+                                   uint32 indexCountPerInstance,
+                                   uint32 instanceCount,
+                                   uint32 startIndexLocation,
+                                   int32  baseVertexLocation,
+                                   uint32 startInstanceLocation);
+
+enum pipe_error
+SVGA3D_vgpu10_DrawAuto(struct svga_winsys_context *swc);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineQuery(struct svga_winsys_context *swc,
+                          SVGA3dQueryId queryId,
+                          SVGA3dQueryType type,
+                          SVGA3dDXQueryFlags flags);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyQuery(struct svga_winsys_context *swc,
+                           SVGA3dQueryId queryId);
+
+enum pipe_error
+SVGA3D_vgpu10_BindQuery(struct svga_winsys_context *swc,
+                        struct svga_winsys_gb_query *gbQuery,
+                        SVGA3dQueryId queryId);
+
+enum pipe_error
+SVGA3D_vgpu10_SetQueryOffset(struct svga_winsys_context *swc,
+                             SVGA3dQueryId queryId,
+                             uint32 mobOffset);
+
+enum pipe_error
+SVGA3D_vgpu10_BeginQuery(struct svga_winsys_context *swc,
+                         SVGA3dQueryId queryId);
+
+enum pipe_error
+SVGA3D_vgpu10_EndQuery(struct svga_winsys_context *swc,
+                       SVGA3dQueryId queryId);
+
+enum pipe_error
+SVGA3D_vgpu10_ClearRenderTargetView(struct svga_winsys_context *swc,
+                                    struct pipe_surface *color_surf,
+                                    const float *rgba);
+
+enum pipe_error
+SVGA3D_vgpu10_ClearDepthStencilView(struct svga_winsys_context *swc,
+                                    struct pipe_surface *ds_surf,
+                                    uint16 flags, uint16 stencil, float depth);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineShaderResourceView(struct svga_winsys_context *swc,
+                             SVGA3dShaderResourceViewId shaderResourceViewId,
+                             struct svga_winsys_surface *surf,
+                             SVGA3dSurfaceFormat format,
+                             SVGA3dResourceType resourceDimension,
+                             const SVGA3dShaderResourceViewDesc *desc);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyShaderResourceView(struct svga_winsys_context *swc,
+                            SVGA3dShaderResourceViewId shaderResourceViewId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineRenderTargetView(struct svga_winsys_context *swc,
+                                  SVGA3dRenderTargetViewId renderTargetViewId,
+                                  struct svga_winsys_surface *surface,
+                                  SVGA3dSurfaceFormat format,
+                                  SVGA3dResourceType resourceDimension,
+                                  const SVGA3dRenderTargetViewDesc *desc);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyRenderTargetView(struct svga_winsys_context *swc,
+                                SVGA3dRenderTargetViewId renderTargetViewId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineDepthStencilView(struct svga_winsys_context *swc,
+                                  SVGA3dDepthStencilViewId depthStencilViewId,
+                                  struct svga_winsys_surface *surface,
+                                  SVGA3dSurfaceFormat format,
+                                  SVGA3dResourceType resourceDimension,
+                                  const SVGA3dRenderTargetViewDesc *desc);
+
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyDepthStencilView(struct svga_winsys_context *swc,
+                                SVGA3dDepthStencilViewId depthStencilViewId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineElementLayout(struct svga_winsys_context *swc,
+                               unsigned count,
+                               SVGA3dElementLayoutId elementLayoutId,
+                               const SVGA3dInputElementDesc *elements);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyElementLayout(struct svga_winsys_context *swc,
+                                   SVGA3dElementLayoutId elementLayoutId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineBlendState(struct svga_winsys_context *swc,
+                               SVGA3dBlendStateId blendId,
+                               uint8 alphaToCoverageEnable,
+                               uint8 independentBlendEnable,
+                               const SVGA3dDXBlendStatePerRT *perRT);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyBlendState(struct svga_winsys_context *swc,
+                                SVGA3dBlendStateId blendId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineDepthStencilState(struct svga_winsys_context *swc,
+                                      SVGA3dDepthStencilStateId depthStencilId,
+                                      uint8 depthEnable,
+                                      SVGA3dDepthWriteMask depthWriteMask,
+                                      SVGA3dComparisonFunc depthFunc,
+                                      uint8 stencilEnable,
+                                      uint8 frontEnable,
+                                      uint8 backEnable,
+                                      uint8 stencilReadMask,
+                                      uint8 stencilWriteMask,
+                                      uint8 frontStencilFailOp,
+                                      uint8 frontStencilDepthFailOp,
+                                      uint8 frontStencilPassOp,
+                                      SVGA3dComparisonFunc frontStencilFunc,
+                                      uint8 backStencilFailOp,
+                                      uint8 backStencilDepthFailOp,
+                                      uint8 backStencilPassOp,
+                                      SVGA3dComparisonFunc backStencilFunc);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyDepthStencilState(struct svga_winsys_context *swc,
+                                       SVGA3dDepthStencilStateId depthStencilId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineRasterizerState(struct svga_winsys_context *swc,
+                                    SVGA3dRasterizerStateId rasterizerId,
+                                    uint8 fillMode,
+                                    SVGA3dCullMode cullMode,
+                                    uint8 frontCounterClockwise,
+                                    int32 depthBias,
+                                    float depthBiasClamp,
+                                    float slopeScaledDepthBias,
+                                    uint8 depthClipEnable,
+                                    uint8 scissorEnable,
+                                    uint8 multisampleEnable,
+                                    uint8 antialiasedLineEnable,
+                                    float lineWidth,
+                                    uint8 lineStippleEnable,
+                                    uint8 lineStippleFactor,
+                                    uint16 lineStipplePattern,
+                                    uint8 provokingVertexLast);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyRasterizerState(struct svga_winsys_context *swc,
+                                     SVGA3dRasterizerStateId rasterizerId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineSamplerState(struct svga_winsys_context *swc,
+                                 SVGA3dSamplerId samplerId,
+                                 SVGA3dFilter filter,
+                                 uint8 addressU,
+                                 uint8 addressV,
+                                 uint8 addressW,
+                                 float mipLODBias,
+                                 uint8 maxAnisotropy,
+                                 uint8 comparisonFunc,
+                                 SVGA3dRGBAFloat borderColor,
+                                 float minLOD,
+                                 float maxLOD);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroySamplerState(struct svga_winsys_context *swc,
+                                  SVGA3dSamplerId samplerId);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyShader(struct svga_winsys_context *swc,
+                            SVGA3dShaderId shaderId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineAndBindShader(struct svga_winsys_context *swc,
+                                  struct svga_winsys_gb_shader *gbshader,
+                                  SVGA3dShaderId shaderId,
+                                  SVGA3dShaderType type,
+                                  uint32 sizeInBytes);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineStreamOutput(struct svga_winsys_context *swc,
+      SVGA3dStreamOutputId soid,
+      uint32 numOutputStreamEntries,
+      uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS],
+      const SVGA3dStreamOutputDeclarationEntry decl[SVGA3D_MAX_STREAMOUT_DECLS]);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyStreamOutput(struct svga_winsys_context *swc,
+                                  SVGA3dStreamOutputId soid);
+
+enum pipe_error
+SVGA3D_vgpu10_ReadbackSubResource(struct svga_winsys_context *swc,
+                                  struct svga_winsys_surface *surface,
+                                  unsigned subResource);
+
+enum pipe_error
+SVGA3D_vgpu10_SetInputLayout(struct svga_winsys_context *swc,
+                             SVGA3dElementLayoutId elementLayoutId);
+
+enum pipe_error
+SVGA3D_vgpu10_SetVertexBuffers(struct svga_winsys_context *swc,
+                               unsigned count,
+                               uint32 startBuffer,
+                               const SVGA3dVertexBuffer *bufferInfo,
+                               struct svga_winsys_surface **surfaces);
+
+enum pipe_error
+SVGA3D_vgpu10_SetTopology(struct svga_winsys_context *swc,
+                          SVGA3dPrimitiveType topology);
+
+enum pipe_error
+SVGA3D_vgpu10_SetIndexBuffer(struct svga_winsys_context *swc,
+                             struct svga_winsys_surface *indexes,
+                             SVGA3dSurfaceFormat format, uint32 offset);
+
+enum pipe_error
+SVGA3D_vgpu10_SetSingleConstantBuffer(struct svga_winsys_context *swc,
+                                      unsigned slot,
+                                      SVGA3dShaderType type,
+                                      struct svga_winsys_surface *surface,
+                                      uint32 offsetInBytes,
+                                      uint32 sizeInBytes);
+
+enum pipe_error
+SVGA3D_vgpu10_UpdateSubResource(struct svga_winsys_context *swc,
+                                struct svga_winsys_surface *surface,
+                                const SVGA3dBox *box,
+                                unsigned subResource);
+
  #endif /* __SVGA3D_H__ */
diff --git a/src/gallium/drivers/svga/svga_cmd_vgpu10.c b/src/gallium/drivers/svga/svga_cmd_vgpu10.c

new file mode 100644 (file)

index 0000000..596ba95
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_cmd_vgpu10.c
@@ -0,0 +1,1289 @@
+/**********************************************************
+ * Copyright 2008-2013 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file svga_cmd_vgpu10.c
+ *
+ * Command construction utility for the vgpu10 SVGA3D protocol.
+ *
+ * \author Mingcheng Chen
+ * \author Brian Paul
+ */
+
+
+#include "svga_winsys.h"
+#include "svga_resource_buffer.h"
+#include "svga_resource_texture.h"
+#include "svga_surface.h"
+#include "svga_cmd.h"
+
+
+/**
+ * Emit a surface relocation for RenderTargetViewId
+ */
+static void
+view_relocation(struct svga_winsys_context *swc, // IN
+                struct pipe_surface *surface,    // IN
+                SVGA3dRenderTargetViewId *id,    // OUT
+                unsigned flags)
+{
+   if (surface) {
+      struct svga_surface *s = svga_surface(surface);
+      assert(s->handle);
+      swc->surface_relocation(swc, id, NULL, s->handle, flags);
+   }
+   else {
+      swc->surface_relocation(swc, id, NULL, NULL, flags);
+   }
+}
+
+
+/**
+ * Emit a surface relocation for a ResourceId.
+ */
+static void
+surface_to_resourceid(struct svga_winsys_context *swc, // IN
+                      struct svga_winsys_surface *surface,    // IN
+                      SVGA3dSurfaceId *sid,            // OUT
+                      unsigned flags)                  // IN
+{
+   if (surface) {
+      swc->surface_relocation(swc, sid, NULL, surface, flags);
+   }
+   else {
+      swc->surface_relocation(swc, sid, NULL, NULL, flags);
+   }
+}
+
+
+#define SVGA3D_CREATE_COMMAND(CommandName, CommandCode) \
+SVGA3dCmdDX##CommandName *cmd; \
+{ \
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_##CommandCode, \
+                            sizeof(SVGA3dCmdDX##CommandName), 0); \
+   if (!cmd) \
+      return PIPE_ERROR_OUT_OF_MEMORY; \
+}
+
+#define SVGA3D_CREATE_CMD_COUNT(CommandName, CommandCode, ElementClassName) \
+SVGA3dCmdDX##CommandName *cmd; \
+{ \
+   assert(count > 0); \
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_##CommandCode, \
+                            sizeof(SVGA3dCmdDX##CommandName) + \
+                            count * sizeof(ElementClassName), 0); \
+   if (!cmd) \
+      return PIPE_ERROR_OUT_OF_MEMORY; \
+}
+
+#define SVGA3D_COPY_BASIC(VariableName) \
+{ \
+   cmd->VariableName = VariableName; \
+}
+
+#define SVGA3D_COPY_BASIC_2(VariableName1, VariableName2) \
+{ \
+   SVGA3D_COPY_BASIC(VariableName1); \
+   SVGA3D_COPY_BASIC(VariableName2); \
+}
+
+#define SVGA3D_COPY_BASIC_3(VariableName1, VariableName2, VariableName3) \
+{ \
+   SVGA3D_COPY_BASIC_2(VariableName1, VariableName2); \
+   SVGA3D_COPY_BASIC(VariableName3); \
+}
+
+#define SVGA3D_COPY_BASIC_4(VariableName1, VariableName2, VariableName3, \
+                            VariableName4) \
+{ \
+   SVGA3D_COPY_BASIC_2(VariableName1, VariableName2); \
+   SVGA3D_COPY_BASIC_2(VariableName3, VariableName4); \
+}
+
+#define SVGA3D_COPY_BASIC_5(VariableName1, VariableName2, VariableName3, \
+                            VariableName4, VariableName5) \
+{\
+   SVGA3D_COPY_BASIC_3(VariableName1, VariableName2, VariableName3); \
+   SVGA3D_COPY_BASIC_2(VariableName4, VariableName5); \
+}
+
+#define SVGA3D_COPY_BASIC_6(VariableName1, VariableName2, VariableName3, \
+                            VariableName4, VariableName5, VariableName6) \
+{\
+   SVGA3D_COPY_BASIC_3(VariableName1, VariableName2, VariableName3); \
+   SVGA3D_COPY_BASIC_3(VariableName4, VariableName5, VariableName6); \
+}
+
+#define SVGA3D_COPY_BASIC_7(VariableName1, VariableName2, VariableName3, \
+                            VariableName4, VariableName5, VariableName6, \
+                            VariableName7) \
+{\
+   SVGA3D_COPY_BASIC_4(VariableName1, VariableName2, VariableName3, \
+                       VariableName4); \
+   SVGA3D_COPY_BASIC_3(VariableName5, VariableName6, VariableName7); \
+}
+
+#define SVGA3D_COPY_BASIC_8(VariableName1, VariableName2, VariableName3, \
+                            VariableName4, VariableName5, VariableName6, \
+                            VariableName7, VariableName8) \
+{\
+   SVGA3D_COPY_BASIC_4(VariableName1, VariableName2, VariableName3, \
+                       VariableName4); \
+   SVGA3D_COPY_BASIC_4(VariableName5, VariableName6, VariableName7, \
+                       VariableName8); \
+}
+
+#define SVGA3D_COPY_BASIC_9(VariableName1, VariableName2, VariableName3, \
+                            VariableName4, VariableName5, VariableName6, \
+                            VariableName7, VariableName8, VariableName9) \
+{\
+   SVGA3D_COPY_BASIC_5(VariableName1, VariableName2, VariableName3, \
+                       VariableName4, VariableName5); \
+   SVGA3D_COPY_BASIC_4(VariableName6, VariableName7, VariableName8, \
+                       VariableName9); \
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_PredCopyRegion(struct svga_winsys_context *swc,
+                             struct svga_winsys_surface *dstSurf,
+                             uint32 dstSubResource,
+                             struct svga_winsys_surface *srcSurf,
+                             uint32 srcSubResource,
+                             const SVGA3dCopyBox *box)
+{
+   SVGA3dCmdDXPredCopyRegion *cmd =
+      SVGA3D_FIFOReserve(swc,
+                         SVGA_3D_CMD_DX_PRED_COPY_REGION,
+                         sizeof(SVGA3dCmdDXPredCopyRegion),
+                         2);  /* two relocations */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->surface_relocation(swc, &cmd->dstSid, NULL, dstSurf, SVGA_RELOC_WRITE);
+   swc->surface_relocation(swc, &cmd->srcSid, NULL, srcSurf, SVGA_RELOC_READ);
+   cmd->dstSubResource = dstSubResource;
+   cmd->srcSubResource = srcSubResource;
+   cmd->box = *box;
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_PredCopy(struct svga_winsys_context *swc,
+                       struct svga_winsys_surface *dstSurf,
+                       struct svga_winsys_surface *srcSurf)
+{
+   SVGA3dCmdDXPredCopy *cmd =
+      SVGA3D_FIFOReserve(swc,
+                         SVGA_3D_CMD_DX_PRED_COPY,
+                         sizeof(SVGA3dCmdDXPredCopy),
+                         2);  /* two relocations */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->surface_relocation(swc, &cmd->dstSid, NULL, dstSurf, SVGA_RELOC_WRITE);
+   swc->surface_relocation(swc, &cmd->srcSid, NULL, srcSurf, SVGA_RELOC_READ);
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetViewports(struct svga_winsys_context *swc,
+                           unsigned count,
+                           const SVGA3dViewport *viewports)
+{
+   SVGA3D_CREATE_CMD_COUNT(SetViewports, SET_VIEWPORTS, SVGA3dViewport);
+
+   memcpy(cmd + 1, viewports, count * sizeof(SVGA3dViewport));
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_SetShader(struct svga_winsys_context *swc,
+                        SVGA3dShaderType type,
+                        struct svga_winsys_gb_shader *gbshader,
+                        SVGA3dShaderId shaderId)
+{
+   SVGA3dCmdDXSetShader *cmd = SVGA3D_FIFOReserve(swc,
+                                                  SVGA_3D_CMD_DX_SET_SHADER,
+                                                  sizeof *cmd,
+                                                  1);  /* one relocation */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->shader_relocation(swc, &cmd->shaderId, NULL, NULL, gbshader, 0);
+
+   cmd->type = type;
+   cmd->shaderId = shaderId;
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_SetShaderResources(struct svga_winsys_context *swc,
+                                 SVGA3dShaderType type,
+                                 uint32 startView,
+                                 unsigned count,
+                                 const SVGA3dShaderResourceViewId ids[],
+                                 struct svga_winsys_surface **views)
+{
+   SVGA3dCmdDXSetShaderResources *cmd;
+   SVGA3dShaderResourceViewId *cmd_ids;
+   unsigned i;
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_DX_SET_SHADER_RESOURCES,
+                            sizeof(SVGA3dCmdDXSetShaderResources) +
+                            count * sizeof(SVGA3dShaderResourceViewId),
+                            count); /* 'count' relocations */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+
+   cmd->type = type;
+   cmd->startView = startView;
+
+   cmd_ids = (SVGA3dShaderResourceViewId *) (cmd + 1);
+   for (i = 0; i < count; i++) {
+      swc->surface_relocation(swc, cmd_ids + i, NULL, views[i],
+                              SVGA_RELOC_READ);
+      cmd_ids[i] = ids[i];
+   }
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_SetSamplers(struct svga_winsys_context *swc,
+                          unsigned count,
+                          uint32 startSampler,
+                          SVGA3dShaderType type,
+                          const SVGA3dSamplerId *samplerIds)
+{
+   SVGA3D_CREATE_CMD_COUNT(SetSamplers, SET_SAMPLERS, SVGA3dSamplerId);
+
+   SVGA3D_COPY_BASIC_2(startSampler, type);
+   memcpy(cmd + 1, samplerIds, count * sizeof(SVGA3dSamplerId));
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_ClearRenderTargetView(struct svga_winsys_context *swc,
+                                    struct pipe_surface *color_surf,
+                                    const float *rgba)
+{
+   SVGA3dCmdDXClearRenderTargetView *cmd;
+   struct svga_surface *ss = svga_surface(color_surf);
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_DX_CLEAR_RENDERTARGET_VIEW,
+                            sizeof(SVGA3dCmdDXClearRenderTargetView),
+                            1); /* one relocation */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+
+   /* NOTE: The following is pretty tricky.  We need to emit a view/surface
+    * relocation and we have to provide a pointer to an ID which lies in
+    * the bounds of the command space which we just allocated.  However,
+    * we then need to overwrite it with the original RenderTargetViewId.
+    */
+   view_relocation(swc, color_surf, &cmd->renderTargetViewId,
+                   SVGA_RELOC_WRITE);
+   cmd->renderTargetViewId = ss->view_id;
+
+   COPY_4V(cmd->rgba.value, rgba);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_SetRenderTargets(struct svga_winsys_context *swc,
+                               unsigned color_count,
+                               struct pipe_surface **color_surfs,
+                               struct pipe_surface *depth_stencil_surf)
+{
+   const unsigned surf_count = color_count + 1;
+   SVGA3dCmdDXSetRenderTargets *cmd;
+   SVGA3dRenderTargetViewId *ctarget;
+   struct svga_surface *ss;
+   unsigned i;
+
+   assert(surf_count > 0);
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_DX_SET_RENDERTARGETS,
+                            sizeof(SVGA3dCmdDXSetRenderTargets) +
+                            color_count * sizeof(SVGA3dRenderTargetViewId),
+                            surf_count); /* 'surf_count' relocations */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   /* NOTE: See earlier comment about the tricky handling of the ViewIds.
+    */
+
+   /* Depth / Stencil buffer */
+   if (depth_stencil_surf) {
+      ss = svga_surface(depth_stencil_surf);
+      view_relocation(swc, depth_stencil_surf, &cmd->depthStencilViewId,
+                      SVGA_RELOC_WRITE);
+      cmd->depthStencilViewId = ss->view_id;
+   }
+   else {
+      /* no depth/stencil buffer - still need a relocation */
+      view_relocation(swc, NULL, &cmd->depthStencilViewId,
+                      SVGA_RELOC_WRITE);
+      cmd->depthStencilViewId = SVGA3D_INVALID_ID;
+   }
+
+   /* Color buffers */
+   ctarget = (SVGA3dRenderTargetViewId *) &cmd[1];
+   for (i = 0; i < color_count; i++) {
+      if (color_surfs[i]) {
+         ss = svga_surface(color_surfs[i]);
+         view_relocation(swc, color_surfs[i], ctarget + i, SVGA_RELOC_WRITE);
+         ctarget[i] = ss->view_id;
+      }
+      else {
+         view_relocation(swc, NULL, ctarget + i, SVGA_RELOC_WRITE);
+         ctarget[i] = SVGA3D_INVALID_ID;
+      }
+   }
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_SetBlendState(struct svga_winsys_context *swc,
+                            SVGA3dBlendStateId blendId,
+                            const float *blendFactor,
+                            uint32 sampleMask)
+{
+   SVGA3D_CREATE_COMMAND(SetBlendState, SET_BLEND_STATE);
+
+   SVGA3D_COPY_BASIC_2(blendId, sampleMask);
+   memcpy(cmd->blendFactor, blendFactor, sizeof(float) * 4);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetDepthStencilState(struct svga_winsys_context *swc,
+                                   SVGA3dDepthStencilStateId depthStencilId,
+                                   uint32 stencilRef)
+{
+   SVGA3D_CREATE_COMMAND(SetDepthStencilState, SET_DEPTHSTENCIL_STATE);
+
+   SVGA3D_COPY_BASIC_2(depthStencilId, stencilRef);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetRasterizerState(struct svga_winsys_context *swc,
+                                 SVGA3dRasterizerStateId rasterizerId)
+{
+   SVGA3D_CREATE_COMMAND(SetRasterizerState, SET_RASTERIZER_STATE);
+
+   cmd->rasterizerId = rasterizerId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetPredication(struct svga_winsys_context *swc,
+                             SVGA3dQueryId queryId,
+                             uint32 predicateValue)
+{
+   SVGA3dCmdDXSetPredication *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_SET_PREDICATION,
+                            sizeof *cmd, 0);
+
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->queryId = queryId;
+   cmd->predicateValue = predicateValue;
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetSOTargets(struct svga_winsys_context *swc,
+                           unsigned count,
+                           const SVGA3dSoTarget *targets,
+                           struct svga_winsys_surface **surfaces)
+{
+   SVGA3dCmdDXSetSOTargets *cmd;
+   SVGA3dSoTarget *sot;
+   unsigned i;
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_DX_SET_SOTARGETS,
+                            sizeof(SVGA3dCmdDXSetSOTargets) +
+                            count * sizeof(SVGA3dSoTarget),
+                            count);
+
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->pad0 = 0;
+   sot = (SVGA3dSoTarget *)(cmd + 1);
+   for (i = 0; i < count; i++, sot++) {
+      if (surfaces[i]) {
+         sot->offset = targets[i].offset;
+         sot->sizeInBytes = targets[i].sizeInBytes;
+         swc->surface_relocation(swc, &sot->sid, NULL, surfaces[i],
+                                 SVGA_RELOC_WRITE);
+      }
+      else {
+         sot->offset = 0;
+         sot->sizeInBytes = ~0u;
+         swc->surface_relocation(swc, &sot->sid, NULL, NULL,
+                                 SVGA_RELOC_WRITE);
+      }
+   }
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetScissorRects(struct svga_winsys_context *swc,
+                              unsigned count,
+                              const SVGASignedRect *rects)
+{
+   SVGA3dCmdDXSetScissorRects *cmd;
+
+   assert(count > 0);
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_SET_SCISSORRECTS,
+                            sizeof(SVGA3dCmdDXSetScissorRects) +
+                            count * sizeof(SVGASignedRect),
+                            0);
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   memcpy(cmd + 1, rects, count * sizeof(SVGASignedRect));
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetStreamOutput(struct svga_winsys_context *swc,
+                              SVGA3dStreamOutputId soid)
+{
+   SVGA3D_CREATE_COMMAND(SetStreamOutput, SET_STREAMOUTPUT);
+
+   cmd->soid = soid;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_Draw(struct svga_winsys_context *swc,
+                   uint32 vertexCount,
+                   uint32 startVertexLocation)
+{
+   SVGA3D_CREATE_COMMAND(Draw, DRAW);
+
+   SVGA3D_COPY_BASIC_2(vertexCount, startVertexLocation);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DrawIndexed(struct svga_winsys_context *swc,
+                          uint32 indexCount,
+                          uint32 startIndexLocation,
+                          int32 baseVertexLocation)
+{
+   SVGA3D_CREATE_COMMAND(DrawIndexed, DRAW_INDEXED);
+
+   SVGA3D_COPY_BASIC_3(indexCount, startIndexLocation,
+                       baseVertexLocation);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DrawInstanced(struct svga_winsys_context *swc,
+                            uint32 vertexCountPerInstance,
+                            uint32 instanceCount,
+                            uint32 startVertexLocation,
+                            uint32 startInstanceLocation)
+{
+   SVGA3D_CREATE_COMMAND(DrawInstanced, DRAW_INSTANCED);
+
+   SVGA3D_COPY_BASIC_4(vertexCountPerInstance, instanceCount,
+                       startVertexLocation, startInstanceLocation);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DrawIndexedInstanced(struct svga_winsys_context *swc,
+                                   uint32 indexCountPerInstance,
+                                   uint32 instanceCount,
+                                   uint32 startIndexLocation,
+                                   int32  baseVertexLocation,
+                                   uint32 startInstanceLocation)
+{
+   SVGA3D_CREATE_COMMAND(DrawIndexedInstanced, DRAW_INDEXED_INSTANCED);
+
+   SVGA3D_COPY_BASIC_5(indexCountPerInstance, instanceCount,
+                       startIndexLocation, baseVertexLocation,
+                       startInstanceLocation);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DrawAuto(struct svga_winsys_context *swc)
+{
+   SVGA3D_CREATE_COMMAND(DrawAuto, DRAW_AUTO);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineQuery(struct svga_winsys_context *swc,
+                          SVGA3dQueryId queryId,
+                          SVGA3dQueryType type,
+                          SVGA3dDXQueryFlags flags)
+{
+   SVGA3D_CREATE_COMMAND(DefineQuery, DEFINE_QUERY);
+
+   SVGA3D_COPY_BASIC_3(queryId, type, flags);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyQuery(struct svga_winsys_context *swc,
+                           SVGA3dQueryId queryId)
+{
+   SVGA3D_CREATE_COMMAND(DestroyQuery, DESTROY_QUERY);
+
+   cmd->queryId = queryId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_BindQuery(struct svga_winsys_context *swc,
+                        struct svga_winsys_gb_query *gbQuery,
+                        SVGA3dQueryId queryId)
+{
+   SVGA3dCmdDXBindQuery *cmd = SVGA3D_FIFOReserve(swc,
+                                                  SVGA_3D_CMD_DX_BIND_QUERY,
+                                                  sizeof *cmd,
+                                                  1);
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->queryId = queryId;
+   swc->query_relocation(swc, &cmd->mobid, gbQuery);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetQueryOffset(struct svga_winsys_context *swc,
+                             SVGA3dQueryId queryId,
+                             uint32 mobOffset)
+{
+   SVGA3D_CREATE_COMMAND(SetQueryOffset, SET_QUERY_OFFSET);
+   SVGA3D_COPY_BASIC_2(queryId, mobOffset);
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_BeginQuery(struct svga_winsys_context *swc,
+                         SVGA3dQueryId queryId)
+{
+   SVGA3D_CREATE_COMMAND(BeginQuery, BEGIN_QUERY);
+   cmd->queryId = queryId;
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_EndQuery(struct svga_winsys_context *swc,
+                       SVGA3dQueryId queryId)
+{
+   SVGA3D_CREATE_COMMAND(EndQuery, END_QUERY);
+   cmd->queryId = queryId;
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_ClearDepthStencilView(struct svga_winsys_context *swc,
+                                    struct pipe_surface *ds_surf,
+                                    uint16 flags,
+                                    uint16 stencil,
+                                    float depth)
+{
+   SVGA3dCmdDXClearDepthStencilView *cmd;
+   struct svga_surface *ss = svga_surface(ds_surf);
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_DX_CLEAR_DEPTHSTENCIL_VIEW,
+                            sizeof(SVGA3dCmdDXClearDepthStencilView),
+                            1); /* one relocation */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   /* NOTE: The following is pretty tricky.  We need to emit a view/surface
+    * relocation and we have to provide a pointer to an ID which lies in
+    * the bounds of the command space which we just allocated.  However,
+    * we then need to overwrite it with the original DepthStencilViewId.
+    */
+   view_relocation(swc, ds_surf, &cmd->depthStencilViewId,
+                   SVGA_RELOC_WRITE);
+   cmd->depthStencilViewId = ss->view_id;
+   cmd->flags = flags;
+   cmd->stencil = stencil;
+   cmd->depth = depth;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineShaderResourceView(struct svga_winsys_context *swc,
+                             SVGA3dShaderResourceViewId shaderResourceViewId,
+                             struct svga_winsys_surface *surface,
+                             SVGA3dSurfaceFormat format,
+                             SVGA3dResourceType resourceDimension,
+                             const SVGA3dShaderResourceViewDesc *desc)
+{
+   SVGA3dCmdDXDefineShaderResourceView *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_DEFINE_SHADERRESOURCE_VIEW,
+                            sizeof(SVGA3dCmdDXDefineShaderResourceView),
+                            1); /* one relocation */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   SVGA3D_COPY_BASIC_3(shaderResourceViewId, format, resourceDimension);
+
+   swc->surface_relocation(swc, &cmd->sid, NULL, surface,
+                           SVGA_RELOC_READ);
+
+   cmd->desc = *desc;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyShaderResourceView(struct svga_winsys_context *swc,
+                             SVGA3dShaderResourceViewId shaderResourceViewId)
+{
+   SVGA3D_CREATE_COMMAND(DestroyShaderResourceView,
+                       DESTROY_SHADERRESOURCE_VIEW);
+
+   cmd->shaderResourceViewId = shaderResourceViewId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_DefineRenderTargetView(struct svga_winsys_context *swc,
+                                  SVGA3dRenderTargetViewId renderTargetViewId,
+                                  struct svga_winsys_surface *surface,
+                                  SVGA3dSurfaceFormat format,
+                                  SVGA3dResourceType resourceDimension,
+                                  const SVGA3dRenderTargetViewDesc *desc)
+{
+   SVGA3dCmdDXDefineRenderTargetView *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_DEFINE_RENDERTARGET_VIEW,
+                            sizeof(SVGA3dCmdDXDefineRenderTargetView),
+                            1); /* one relocation */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   SVGA3D_COPY_BASIC_3(renderTargetViewId, format, resourceDimension);
+   cmd->desc = *desc;
+
+   surface_to_resourceid(swc, surface,
+                         &cmd->sid,
+                         SVGA_RELOC_READ | SVGA_RELOC_WRITE);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyRenderTargetView(struct svga_winsys_context *swc,
+                                 SVGA3dRenderTargetViewId renderTargetViewId)
+{
+   SVGA3D_CREATE_COMMAND(DestroyRenderTargetView, DESTROY_RENDERTARGET_VIEW);
+
+   cmd->renderTargetViewId = renderTargetViewId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_DefineDepthStencilView(struct svga_winsys_context *swc,
+                                  SVGA3dDepthStencilViewId depthStencilViewId,
+                                  struct svga_winsys_surface *surface,
+                                  SVGA3dSurfaceFormat format,
+                                  SVGA3dResourceType resourceDimension,
+                                  const SVGA3dRenderTargetViewDesc *desc)
+{
+   SVGA3dCmdDXDefineDepthStencilView *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_VIEW,
+                            sizeof(SVGA3dCmdDXDefineDepthStencilView),
+                            1); /* one relocation */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   SVGA3D_COPY_BASIC_3(depthStencilViewId, format, resourceDimension);
+   cmd->mipSlice = desc->tex.mipSlice;
+   cmd->firstArraySlice = desc->tex.firstArraySlice;
+   cmd->arraySize = desc->tex.arraySize;
+
+   surface_to_resourceid(swc, surface,
+                         &cmd->sid,
+                         SVGA_RELOC_READ | SVGA_RELOC_WRITE);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyDepthStencilView(struct svga_winsys_context *swc,
+                                 SVGA3dDepthStencilViewId depthStencilViewId)
+{
+   SVGA3D_CREATE_COMMAND(DestroyDepthStencilView, DESTROY_DEPTHSTENCIL_VIEW);
+
+   cmd->depthStencilViewId = depthStencilViewId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineElementLayout(struct svga_winsys_context *swc,
+                                  unsigned count,
+                                  SVGA3dElementLayoutId elementLayoutId,
+                                  const SVGA3dInputElementDesc *elements)
+{
+   SVGA3dCmdDXDefineElementLayout *cmd;
+   unsigned i;
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_DEFINE_ELEMENTLAYOUT,
+                            sizeof(SVGA3dCmdDXDefineElementLayout) +
+                            count * sizeof(SVGA3dInputElementDesc), 0);
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   /* check that all offsets are multiples of four */
+   for (i = 0; i < count; i++) {
+      assert(elements[i].alignedByteOffset % 4 == 0);
+   }
+   (void) i; /* silence unused var in release build */
+
+   cmd->elementLayoutId = elementLayoutId;
+   memcpy(cmd + 1, elements, count * sizeof(SVGA3dInputElementDesc));
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyElementLayout(struct svga_winsys_context *swc,
+                                   SVGA3dElementLayoutId elementLayoutId)
+{
+   SVGA3D_CREATE_COMMAND(DestroyElementLayout, DESTROY_ELEMENTLAYOUT);
+
+   cmd->elementLayoutId = elementLayoutId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineBlendState(struct svga_winsys_context *swc,
+                               SVGA3dBlendStateId blendId,
+                               uint8 alphaToCoverageEnable,
+                               uint8 independentBlendEnable,
+                               const SVGA3dDXBlendStatePerRT *perRT)
+{
+   SVGA3D_CREATE_COMMAND(DefineBlendState, DEFINE_BLEND_STATE);
+
+   cmd->blendId = blendId;
+   cmd->alphaToCoverageEnable = alphaToCoverageEnable;
+   cmd->independentBlendEnable = independentBlendEnable;
+   memcpy(cmd->perRT, perRT, sizeof(cmd->perRT));
+   cmd->pad0 = 0;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyBlendState(struct svga_winsys_context *swc,
+                                SVGA3dBlendStateId blendId)
+{
+   SVGA3D_CREATE_COMMAND(DestroyBlendState, DESTROY_BLEND_STATE);
+
+   cmd->blendId = blendId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineDepthStencilState(struct svga_winsys_context *swc,
+                                      SVGA3dDepthStencilStateId depthStencilId,
+                                      uint8 depthEnable,
+                                      SVGA3dDepthWriteMask depthWriteMask,
+                                      SVGA3dComparisonFunc depthFunc,
+                                      uint8 stencilEnable,
+                                      uint8 frontEnable,
+                                      uint8 backEnable,
+                                      uint8 stencilReadMask,
+                                      uint8 stencilWriteMask,
+                                      uint8 frontStencilFailOp,
+                                      uint8 frontStencilDepthFailOp,
+                                      uint8 frontStencilPassOp,
+                                      SVGA3dComparisonFunc frontStencilFunc,
+                                      uint8 backStencilFailOp,
+                                      uint8 backStencilDepthFailOp,
+                                      uint8 backStencilPassOp,
+                                      SVGA3dComparisonFunc backStencilFunc)
+{
+   SVGA3D_CREATE_COMMAND(DefineDepthStencilState, DEFINE_DEPTHSTENCIL_STATE);
+
+   SVGA3D_COPY_BASIC_9(depthStencilId, depthEnable,
+                       depthWriteMask, depthFunc,
+                       stencilEnable, frontEnable,
+                       backEnable, stencilReadMask,
+                       stencilWriteMask);
+   SVGA3D_COPY_BASIC_8(frontStencilFailOp, frontStencilDepthFailOp,
+                       frontStencilPassOp, frontStencilFunc,
+                       backStencilFailOp, backStencilDepthFailOp,
+                       backStencilPassOp, backStencilFunc);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyDepthStencilState(struct svga_winsys_context *swc,
+                                    SVGA3dDepthStencilStateId depthStencilId)
+{
+   SVGA3D_CREATE_COMMAND(DestroyDepthStencilState,
+                         DESTROY_DEPTHSTENCIL_STATE);
+
+   cmd->depthStencilId = depthStencilId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineRasterizerState(struct svga_winsys_context *swc,
+                                    SVGA3dRasterizerStateId rasterizerId,
+                                    uint8 fillMode,
+                                    SVGA3dCullMode cullMode,
+                                    uint8 frontCounterClockwise,
+                                    int32 depthBias,
+                                    float depthBiasClamp,
+                                    float slopeScaledDepthBias,
+                                    uint8 depthClipEnable,
+                                    uint8 scissorEnable,
+                                    uint8 multisampleEnable,
+                                    uint8 antialiasedLineEnable,
+                                    float lineWidth,
+                                    uint8 lineStippleEnable,
+                                    uint8 lineStippleFactor,
+                                    uint16 lineStipplePattern,
+                                    uint8 provokingVertexLast)
+{
+   SVGA3D_CREATE_COMMAND(DefineRasterizerState, DEFINE_RASTERIZER_STATE);
+
+   SVGA3D_COPY_BASIC_5(rasterizerId, fillMode,
+                       cullMode, frontCounterClockwise,
+                       depthBias);
+   SVGA3D_COPY_BASIC_6(depthBiasClamp, slopeScaledDepthBias,
+                       depthClipEnable, scissorEnable,
+                       multisampleEnable, antialiasedLineEnable);
+   cmd->lineWidth = lineWidth;
+   cmd->lineStippleEnable = lineStippleEnable;
+   cmd->lineStippleFactor = lineStippleFactor;
+   cmd->lineStipplePattern = lineStipplePattern;
+   cmd->provokingVertexLast = provokingVertexLast;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyRasterizerState(struct svga_winsys_context *swc,
+                                     SVGA3dRasterizerStateId rasterizerId)
+{
+   SVGA3D_CREATE_COMMAND(DestroyRasterizerState, DESTROY_RASTERIZER_STATE);
+
+   cmd->rasterizerId = rasterizerId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineSamplerState(struct svga_winsys_context *swc,
+                                 SVGA3dSamplerId samplerId,
+                                 SVGA3dFilter filter,
+                                 uint8 addressU,
+                                 uint8 addressV,
+                                 uint8 addressW,
+                                 float mipLODBias,
+                                 uint8 maxAnisotropy,
+                                 uint8 comparisonFunc,
+                                 SVGA3dRGBAFloat borderColor,
+                                 float minLOD,
+                                 float maxLOD)
+{
+   SVGA3D_CREATE_COMMAND(DefineSamplerState, DEFINE_SAMPLER_STATE);
+
+   SVGA3D_COPY_BASIC_6(samplerId, filter,
+                       addressU, addressV,
+                       addressW, mipLODBias);
+   SVGA3D_COPY_BASIC_5(maxAnisotropy, comparisonFunc,
+                       borderColor, minLOD,
+                       maxLOD);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroySamplerState(struct svga_winsys_context *swc,
+                                  SVGA3dSamplerId samplerId)
+{
+   SVGA3D_CREATE_COMMAND(DestroySamplerState, DESTROY_SAMPLER_STATE);
+
+   cmd->samplerId = samplerId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_DefineAndBindShader(struct svga_winsys_context *swc,
+                                  struct svga_winsys_gb_shader *gbshader,
+                                  SVGA3dShaderId shaderId,
+                                  SVGA3dShaderType type,
+                                  uint32 sizeInBytes)
+{
+   SVGA3dCmdHeader *header;
+   SVGA3dCmdDXDefineShader *dcmd;
+   SVGA3dCmdDXBindShader *bcmd;
+   unsigned totalSize = 2 * sizeof(*header) +
+                        sizeof(*dcmd) + sizeof(*bcmd);
+
+   /* Make sure there is room for both commands */
+   header = swc->reserve(swc, totalSize, 2);
+   if (!header)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   /* DXDefineShader command */
+   header->id = SVGA_3D_CMD_DX_DEFINE_SHADER;
+   header->size = sizeof(*dcmd);
+   dcmd = (SVGA3dCmdDXDefineShader *)(header + 1);
+   dcmd->shaderId = shaderId;
+   dcmd->type = type;
+   dcmd->sizeInBytes = sizeInBytes;
+
+   /* DXBindShader command */
+   header = (SVGA3dCmdHeader *)(dcmd + 1);
+
+   header->id = SVGA_3D_CMD_DX_BIND_SHADER;
+   header->size = sizeof(*bcmd);
+   bcmd = (SVGA3dCmdDXBindShader *)(header + 1);
+
+   bcmd->cid = swc->cid;
+   swc->shader_relocation(swc, NULL, &bcmd->mobid,
+                          &bcmd->offsetInBytes, gbshader, 0);
+
+   bcmd->shid = shaderId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyShader(struct svga_winsys_context *swc,
+                            SVGA3dShaderId shaderId)
+{
+   SVGA3D_CREATE_COMMAND(DestroyShader, DESTROY_SHADER);
+
+   cmd->shaderId = shaderId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineStreamOutput(struct svga_winsys_context *swc,
+       SVGA3dStreamOutputId soid,
+       uint32 numOutputStreamEntries,
+       uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS],
+       const SVGA3dStreamOutputDeclarationEntry decl[SVGA3D_MAX_STREAMOUT_DECLS])
+{
+   unsigned i;
+   SVGA3D_CREATE_COMMAND(DefineStreamOutput, DEFINE_STREAMOUTPUT);
+
+   cmd->soid = soid;
+   cmd->numOutputStreamEntries = numOutputStreamEntries;
+
+   for (i = 0; i < Elements(cmd->streamOutputStrideInBytes); i++)
+      cmd->streamOutputStrideInBytes[i] = streamOutputStrideInBytes[i];
+
+   memcpy(cmd->decl, decl,
+          sizeof(SVGA3dStreamOutputDeclarationEntry)
+          * SVGA3D_MAX_STREAMOUT_DECLS);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyStreamOutput(struct svga_winsys_context *swc,
+                                  SVGA3dStreamOutputId soid)
+{
+   SVGA3D_CREATE_COMMAND(DestroyStreamOutput, DESTROY_STREAMOUTPUT);
+
+   cmd->soid = soid;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetInputLayout(struct svga_winsys_context *swc,
+                             SVGA3dElementLayoutId elementLayoutId)
+{
+   SVGA3D_CREATE_COMMAND(SetInputLayout, SET_INPUT_LAYOUT);
+
+   cmd->elementLayoutId = elementLayoutId;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetVertexBuffers(struct svga_winsys_context *swc,
+                               unsigned count,
+                               uint32 startBuffer,
+                               const SVGA3dVertexBuffer *bufferInfo,
+                               struct svga_winsys_surface **surfaces)
+{
+   SVGA3dCmdDXSetVertexBuffers *cmd;
+   SVGA3dVertexBuffer *bufs;
+   unsigned i;
+
+   assert(count > 0);
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_SET_VERTEX_BUFFERS,
+                            sizeof(SVGA3dCmdDXSetVertexBuffers) +
+                            count * sizeof(SVGA3dVertexBuffer),
+                            count); /* 'count' relocations */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->startBuffer = startBuffer;
+
+   bufs = (SVGA3dVertexBuffer *) &cmd[1];
+   for (i = 0; i < count; i++) {
+      bufs[i].stride = bufferInfo[i].stride;
+      bufs[i].offset = bufferInfo[i].offset;
+      assert(bufs[i].stride % 4 == 0);
+      assert(bufs[i].offset % 4 == 0);
+      swc->surface_relocation(swc, &bufs[i].sid, NULL, surfaces[i],
+                              SVGA_RELOC_READ);
+   }
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetTopology(struct svga_winsys_context *swc,
+                          SVGA3dPrimitiveType topology)
+{
+   SVGA3D_CREATE_COMMAND(SetTopology, SET_TOPOLOGY);
+
+   cmd->topology = topology;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetIndexBuffer(struct svga_winsys_context *swc,
+                             struct svga_winsys_surface *indexes,
+                             SVGA3dSurfaceFormat format,
+                             uint32 offset)
+{
+   SVGA3dCmdDXSetIndexBuffer *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_SET_INDEX_BUFFER,
+                            sizeof(SVGA3dCmdDXSetIndexBuffer),
+                            1); /* one relocations */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->surface_relocation(swc, &cmd->sid, NULL, indexes, SVGA_RELOC_READ);
+   SVGA3D_COPY_BASIC_2(format, offset);
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetSingleConstantBuffer(struct svga_winsys_context *swc,
+                                      unsigned slot,
+                                      SVGA3dShaderType type,
+                                      struct svga_winsys_surface *surface,
+                                      uint32 offsetInBytes,
+                                      uint32 sizeInBytes)
+{
+   SVGA3dCmdDXSetSingleConstantBuffer *cmd;
+
+   assert(offsetInBytes % 256 == 0);
+   if (!surface)
+      assert(sizeInBytes == 0);
+   else
+      assert(sizeInBytes > 0);
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_SET_SINGLE_CONSTANT_BUFFER,
+                            sizeof(SVGA3dCmdDXSetSingleConstantBuffer),
+                            1);  /* one relocation */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->slot = slot;
+   cmd->type = type;
+   swc->surface_relocation(swc, &cmd->sid, NULL, surface, SVGA_RELOC_READ);
+   cmd->offsetInBytes = offsetInBytes;
+   cmd->sizeInBytes = sizeInBytes;
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_ReadbackSubResource(struct svga_winsys_context *swc,
+                                  struct svga_winsys_surface *surface,
+                                  unsigned subResource)
+{
+   SVGA3dCmdDXReadbackSubResource *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_READBACK_SUBRESOURCE,
+                            sizeof(SVGA3dCmdDXReadbackSubResource),
+                            1);
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->surface_relocation(swc, &cmd->sid, NULL, surface,
+                           SVGA_RELOC_READ | SVGA_RELOC_INTERNAL);
+   cmd->subResource = subResource;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_UpdateSubResource(struct svga_winsys_context *swc,
+                                struct svga_winsys_surface *surface,
+                                const SVGA3dBox *box,
+                                unsigned subResource)
+{
+   SVGA3dCmdDXUpdateSubResource *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE,
+                            sizeof(SVGA3dCmdDXUpdateSubResource),
+                            1);
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->surface_relocation(swc, &cmd->sid, NULL, surface,
+                           SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL);
+   cmd->subResource = subResource;
+   cmd->box = *box;
+
+   swc->commit(swc);
+   return PIPE_OK;
+}
diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c

index 673d17a..2bf795d 100644 (file)
--- a/src/gallium/drivers/svga/svga_context.c
+++ b/src/gallium/drivers/svga/svga_context.c
@@ -30,6 +30,7 @@
  #include "pipe/p_screen.h"
  #include "util/u_memory.h"
  #include "util/u_bitmask.h"
+#include "util/u_upload_mgr.h"
  
  #include "svga_context.h"
  #include "svga_screen.h"
@@ -42,6 +43,10 @@
  #include "svga_draw.h"
  #include "svga_debug.h"
  #include "svga_state.h"
+#include "svga_winsys.h"
+
+#define CONST0_UPLOAD_DEFAULT_SIZE 65536
+#define CONST0_UPLOAD_ALIGNMENT 256
  
  DEBUG_GET_ONCE_BOOL_OPTION(no_swtnl, "SVGA_NO_SWTNL", FALSE)
  DEBUG_GET_ONCE_BOOL_OPTION(force_swtnl, "SVGA_FORCE_SWTNL", FALSE);
@@ -53,27 +58,67 @@ DEBUG_GET_ONCE_BOOL_OPTION(force_hw_line_stipple, "SVGA_FORCE_HW_LINE_STIPPLE",
  static void svga_destroy( struct pipe_context *pipe )
  {
     struct svga_context *svga = svga_context( pipe );
-   struct svga_winsys_screen *sws = svga_screen(pipe->screen)->sws;
-   unsigned shader;
+   unsigned shader, i;
+
+   /* free any alternate rasterizer states used for point sprite */
+   for (i = 0; i < Elements(svga->rasterizer_no_cull); i++) {
+      if (svga->rasterizer_no_cull[i]) {
+         pipe->delete_rasterizer_state(pipe, svga->rasterizer_no_cull[i]);
+      }
+   }
+
+   /* free polygon stipple state */
+   if (svga->polygon_stipple.sampler) {
+      pipe->delete_sampler_state(pipe, svga->polygon_stipple.sampler);
+   }
+   if (svga->polygon_stipple.sampler_view) {
+      pipe->sampler_view_destroy(pipe,
+                                 &svga->polygon_stipple.sampler_view->base);
+   }
+   pipe_resource_reference(&svga->polygon_stipple.texture, NULL);
+
+   /* free HW constant buffers */
+   for (shader = 0; shader < Elements(svga->state.hw_draw.constbuf); shader++) {
+      pipe_resource_reference(&svga->state.hw_draw.constbuf[shader], NULL);
+   }
+
+   pipe->delete_blend_state(pipe, svga->noop_blend);
+
+   /* free query gb object */
+   if (svga->gb_query) {
+      pipe->destroy_query(pipe, NULL);
+      svga->gb_query = NULL;
+   }
  
     util_blitter_destroy(svga->blitter);
  
     svga_cleanup_framebuffer( svga );
     svga_cleanup_tss_binding( svga );
  
-   svga_hwtnl_destroy( svga->hwtnl );
-
     svga_cleanup_vertex_state(svga);
     
-   svga->swc->destroy(svga->swc);
-   
     svga_destroy_swtnl( svga );
+   svga_hwtnl_destroy( svga->hwtnl );
  
-   util_bitmask_destroy( svga->shader_id_bm );
+   svga->swc->destroy(svga->swc);
  
+   util_bitmask_destroy(svga->blend_object_id_bm);
+   util_bitmask_destroy(svga->ds_object_id_bm);
+   util_bitmask_destroy(svga->input_element_object_id_bm);
+   util_bitmask_destroy(svga->rast_object_id_bm);
+   util_bitmask_destroy(svga->sampler_object_id_bm);
+   util_bitmask_destroy(svga->sampler_view_id_bm);
+   util_bitmask_destroy(svga->shader_id_bm);
+   util_bitmask_destroy(svga->surface_view_id_bm);
+   util_bitmask_destroy(svga->stream_output_id_bm);
+   util_bitmask_destroy(svga->query_id_bm);
+   u_upload_destroy(svga->const0_upload);
+
+   /* free user's constant buffers */
     for (shader = 0; shader < PIPE_SHADER_TYPES; ++shader) {
-      pipe_resource_reference( &svga->curr.cbufs[shader].buffer, NULL );
-      sws->surface_reference(sws, &svga->state.hw_draw.hw_cb[shader], NULL);
+      for (i = 0; i < Elements(svga->curr.constbufs[shader]); ++i) {
+         pipe_resource_reference(&svga->curr.constbufs[shader][i].buffer, NULL);
+      }
     }
  
     FREE( svga );
@@ -90,7 +135,7 @@ struct pipe_context *svga_context_create(struct pipe_screen *screen,
  
     svga = CALLOC_STRUCT(svga_context);
     if (svga == NULL)
-      goto no_svga;
+      goto cleanup;
  
     LIST_INITHEAD(&svga->dirty_buffers);
  
@@ -100,8 +145,8 @@ struct pipe_context *svga_context_create(struct pipe_screen *screen,
     svga->pipe.clear = svga_clear;
  
     svga->swc = svgascreen->sws->context_create(svgascreen->sws);
-   if(!svga->swc)
-      goto no_swc;
+   if (!svga->swc)
+      goto cleanup;
  
     svga_init_resource_functions(svga);
     svga_init_blend_functions(svga);
@@ -114,11 +159,15 @@ struct pipe_context *svga_context_create(struct pipe_screen *screen,
     svga_init_sampler_functions(svga);
     svga_init_fs_functions(svga);
     svga_init_vs_functions(svga);
+   svga_init_gs_functions(svga);
     svga_init_vertex_functions(svga);
     svga_init_constbuffer_functions(svga);
     svga_init_query_functions(svga);
     svga_init_surface_functions(svga);
+   svga_init_stream_output_functions(svga);
  
+   /* init misc state */
+   svga->curr.sample_mask = ~0;
  
     /* debug */
     svga->debug.no_swtnl = debug_get_option_no_swtnl();
@@ -128,21 +177,54 @@ struct pipe_context *svga_context_create(struct pipe_screen *screen,
     svga->debug.no_line_width = debug_get_option_no_line_width();
     svga->debug.force_hw_line_stipple = debug_get_option_force_hw_line_stipple();
  
-   svga->shader_id_bm = util_bitmask_create();
-   if (svga->shader_id_bm == NULL)
-      goto no_shader_bm;
+   if (!(svga->blend_object_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->ds_object_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->input_element_object_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->rast_object_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->sampler_object_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->sampler_view_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->shader_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->surface_view_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->stream_output_id_bm = util_bitmask_create()))
+      goto cleanup;
+
+   if (!(svga->query_id_bm = util_bitmask_create()))
+      goto cleanup;
  
     svga->hwtnl = svga_hwtnl_create(svga);
     if (svga->hwtnl == NULL)
-      goto no_hwtnl;
+      goto cleanup;
  
     if (!svga_init_swtnl(svga))
-      goto no_swtnl;
+      goto cleanup;
  
     ret = svga_emit_initial_state( svga );
     if (ret != PIPE_OK)
-      goto no_state;
-   
+      goto cleanup;
+
+   svga->const0_upload = u_upload_create(&svga->pipe,
+                                         CONST0_UPLOAD_DEFAULT_SIZE,
+                                         CONST0_UPLOAD_ALIGNMENT,
+                                         PIPE_BIND_CONSTANT_BUFFER);
+   if (!svga->const0_upload)
+      goto cleanup;
+
     /* Avoid shortcircuiting state with initial value of zero.
      */
     memset(&svga->state.hw_clear, 0xcd, sizeof(svga->state.hw_clear));
@@ -151,24 +233,64 @@ struct pipe_context *svga_context_create(struct pipe_screen *screen,
  
     memset(&svga->state.hw_draw, 0xcd, sizeof(svga->state.hw_draw));
     memset(&svga->state.hw_draw.views, 0x0, sizeof(svga->state.hw_draw.views));
+   memset(&svga->state.hw_draw.num_sampler_views, 0,
+      sizeof(svga->state.hw_draw.num_sampler_views));
     svga->state.hw_draw.num_views = 0;
-   memset(&svga->state.hw_draw.hw_cb, 0x0, sizeof(svga->state.hw_draw.hw_cb));
+
+   /* Initialize the shader pointers */
+   svga->state.hw_draw.vs = NULL;
+   svga->state.hw_draw.gs = NULL;
+   svga->state.hw_draw.fs = NULL;
+   memset(svga->state.hw_draw.constbuf, 0,
+          sizeof(svga->state.hw_draw.constbuf));
+   memset(svga->state.hw_draw.default_constbuf_size, 0,
+          sizeof(svga->state.hw_draw.default_constbuf_size));
+   memset(svga->state.hw_draw.enabled_constbufs, 0,
+          sizeof(svga->state.hw_draw.enabled_constbufs));
+
+   /* Create a no-operation blend state which we will bind whenever the
+    * requested blend state is impossible (e.g. due to having an integer
+    * render target attached).
+    *
+    * XXX: We will probably actually need 16 of these, one for each possible
+    * RGBA color mask (4 bits).  Then, we would bind the one with a color mask
+    * matching the blend state it is replacing.
+    */
+   {
+      struct pipe_blend_state noop_tmpl = {0};
+      unsigned i;
+
+      for (i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) {
+         // Set the color mask to all-ones.  Later this may change.
+         noop_tmpl.rt[i].colormask = PIPE_MASK_RGBA;
+      }
+      svga->noop_blend = svga->pipe.create_blend_state(&svga->pipe, &noop_tmpl);
+   }
  
     svga->dirty = ~0;
  
     return &svga->pipe;
  
-no_state:
+cleanup:
     svga_destroy_swtnl(svga);
-no_swtnl:
-   svga_hwtnl_destroy( svga->hwtnl );
-no_hwtnl:
-   util_bitmask_destroy( svga->shader_id_bm );
-no_shader_bm:
-   svga->swc->destroy(svga->swc);
-no_swc:
+
+   if (svga->const0_upload)
+      u_upload_destroy(svga->const0_upload);
+   if (svga->hwtnl)
+      svga_hwtnl_destroy(svga->hwtnl);
+   if (svga->swc)
+      svga->swc->destroy(svga->swc);
+   util_bitmask_destroy(svga->blend_object_id_bm);
+   util_bitmask_destroy(svga->ds_object_id_bm);
+   util_bitmask_destroy(svga->input_element_object_id_bm);
+   util_bitmask_destroy(svga->rast_object_id_bm);
+   util_bitmask_destroy(svga->sampler_object_id_bm);
+   util_bitmask_destroy(svga->sampler_view_id_bm);
+   util_bitmask_destroy(svga->shader_id_bm);
+   util_bitmask_destroy(svga->surface_view_id_bm);
+   util_bitmask_destroy(svga->stream_output_id_bm);
+   util_bitmask_destroy(svga->query_id_bm);
     FREE(svga);
-no_svga:
     return NULL;
  }
  
@@ -195,11 +317,19 @@ void svga_context_flush( struct svga_context *svga,
     /* To force the re-emission of rendertargets and texture sampler bindings on
      * the next command buffer.
      */
-   svga->rebind.rendertargets = TRUE;
-   svga->rebind.texture_samplers = TRUE;
+   svga->rebind.flags.rendertargets = TRUE;
+   svga->rebind.flags.texture_samplers = TRUE;
+
     if (svga_have_gb_objects(svga)) {
-      svga->rebind.vs = TRUE;
-      svga->rebind.fs = TRUE;
+
+      svga->rebind.flags.constbufs = TRUE;
+      svga->rebind.flags.vs = TRUE;
+      svga->rebind.flags.fs = TRUE;
+      svga->rebind.flags.gs = TRUE;
+
+      if (svga_need_to_rebind_resources(svga)) {
+         svga->rebind.flags.query = TRUE;
+      }
     }
  
     if (SVGA_DEBUG & DEBUG_SYNC) {
@@ -215,6 +345,26 @@ void svga_context_flush( struct svga_context *svga,
  }
  
  
+/**
+ * Flush pending commands and wait for completion with a fence.
+ */
+void
+svga_context_finish(struct svga_context *svga)
+{
+   struct pipe_screen *screen = svga->pipe.screen;
+   struct pipe_fence_handle *fence = NULL;
+
+   svga_context_flush(svga, &fence);
+   svga->pipe.screen->fence_finish(screen, fence, PIPE_TIMEOUT_INFINITE);
+   screen->fence_reference(screen, &fence, NULL);
+}
+
+
+/**
+ * Emit pending drawing commands to the command buffer.
+ * If the command buffer overflows, we flush it and retry.
+ * \sa svga_hwtnl_flush()
+ */
  void svga_hwtnl_flush_retry( struct svga_context *svga )
  {
     enum pipe_error ret = PIPE_OK;
@@ -225,7 +375,7 @@ void svga_hwtnl_flush_retry( struct svga_context *svga )
        ret = svga_hwtnl_flush( svga->hwtnl );
     }
  
-   assert(ret == 0);
+   assert(ret == PIPE_OK);
  }
  
  
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h

index 2726346..e8575f3 100644 (file)
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -38,7 +38,6 @@
  
  #include "svga_screen.h"
  #include "svga_state.h"
-#include "svga_tgsi.h"
  #include "svga_winsys.h"
  #include "svga_hw_reg.h"
  #include "svga3d_shaderdefs.h"
@@ -48,7 +47,19 @@
  #define SVGA_QUERY_DRAW_CALLS   (PIPE_QUERY_DRIVER_SPECIFIC + 0)
  #define SVGA_QUERY_FALLBACKS    (PIPE_QUERY_DRIVER_SPECIFIC + 1)
  #define SVGA_QUERY_MEMORY_USED  (PIPE_QUERY_DRIVER_SPECIFIC + 2)
+#define SVGA_QUERY_MAX          (PIPE_QUERY_DRIVER_SPECIFIC + 3)
  
+/**
+ * Maximum supported number of constant buffers per shader
+ */
+#define SVGA_MAX_CONST_BUFS 14
+
+/**
+ * Maximum constant buffer size that can be set in the
+ * DXSetSingleConstantBuffer command is
+ * DX10 constant buffer element count * 4 4-bytes components
+ */
+#define SVGA_MAX_CONST_BUF_SIZE (4096 * 4 * sizeof(int))
  
  struct draw_vertex_shader;
  struct draw_fragment_shader;
@@ -57,49 +68,16 @@ struct SVGACmdMemory;
  struct util_bitmask;
  
  
-struct svga_shader
-{
-   const struct tgsi_token *tokens;
-
-   struct tgsi_shader_info info;
-
-   /** Head of linked list of variants */
-   struct svga_shader_variant *variants;
-
-   unsigned id;  /**< for debugging only */
-};
-
-
-struct svga_fragment_shader
-{
-   struct svga_shader base;
-
-   struct draw_fragment_shader *draw_shader;
-
-   /** Mask of which generic varying variables are read by this shader */
-   unsigned generic_inputs;
-   /** Table mapping original TGSI generic indexes to low integers */
-   int8_t generic_remap_table[MAX_GENERIC_VARYING];
-};
-
-
-struct svga_vertex_shader
-{
-   struct svga_shader base;
-
-   struct draw_vertex_shader *draw_shader;
-};
-
-
  struct svga_cache_context;
  struct svga_tracked_state;
  
  struct svga_blend_state {
+   unsigned need_white_fragments:1;
+   unsigned independent_blend_enable:1;
+   unsigned alpha_to_coverage:1;
+   unsigned blend_color_alpha:1;  /**< set blend color to alpha value */
  
-   boolean need_white_fragments;
-
-   /* Should be per-render-target:
-    */
+   /** Per-render target state */
     struct {
        uint8_t writemask;
  
@@ -112,8 +90,9 @@ struct svga_blend_state {
        uint8_t srcblend_alpha;
        uint8_t dstblend_alpha;
        uint8_t blendeq_alpha;
+   } rt[PIPE_MAX_COLOR_BUFS];
  
-   } rt[1];
+   SVGA3dBlendStateId id;  /**< vgpu10 */
  };
  
  struct svga_depth_stencil_state {
@@ -139,6 +118,8 @@ struct svga_depth_stencil_state {
     unsigned stencil_writemask:8;
  
     float    alpharef;
+
+   SVGA3dDepthStencilStateId id;  /**< vgpu10 */
  };
  
  #define SVGA_UNFILLED_DISABLE 0
@@ -167,11 +148,13 @@ struct svga_rasterizer_state {
     float pointsize;
     float linewidth;
     
-   unsigned hw_unfilled:16;         /* PIPE_POLYGON_MODE_x */
+   unsigned hw_fillmode:2;         /* PIPE_POLYGON_MODE_x */
  
     /** Which prims do we need help for?  Bitmask of (1 << PIPE_PRIM_x) flags */
     unsigned need_pipeline:16;
  
+   SVGA3dRasterizerStateId id;    /**< vgpu10 */
+
     /** For debugging: */
     const char* need_pipeline_tris_str;
     const char* need_pipeline_lines_str;
@@ -195,15 +178,45 @@ struct svga_sampler_state {
     unsigned min_lod;
     unsigned view_min_lod;
     unsigned view_max_lod;
+
+   SVGA3dSamplerId id;
  };
  
+
+struct svga_pipe_sampler_view
+{
+   struct pipe_sampler_view base;
+
+   SVGA3dShaderResourceViewId id;
+};
+
+
+static inline struct svga_pipe_sampler_view *
+svga_pipe_sampler_view(struct pipe_sampler_view *v)
+{
+   return (struct svga_pipe_sampler_view *) v;
+}
+
+
  struct svga_velems_state {
     unsigned count;
     struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
     SVGA3dDeclType decl_type[PIPE_MAX_ATTRIBS]; /**< vertex attrib formats */
-   unsigned adjust_attrib_range; /* bitmask of attrs needing range adjustment */
-   unsigned adjust_attrib_w_1;   /* bitmask of attrs needing w = 1 */
+
+   /** Bitmasks indicating which attributes need format conversion */
+   unsigned adjust_attrib_range;     /**< range adjustment */
+   unsigned attrib_is_pure_int;      /**< pure int */
+   unsigned adjust_attrib_w_1;       /**< set w = 1 */
+   unsigned adjust_attrib_itof;      /**< int->float */
+   unsigned adjust_attrib_utof;      /**< uint->float */
+   unsigned attrib_is_bgra;          /**< R / B swizzling */
+   unsigned attrib_puint_to_snorm;   /**< 10_10_10_2 packed uint -> snorm */
+   unsigned attrib_puint_to_uscaled; /**< 10_10_10_2 packed uint -> uscaled */
+   unsigned attrib_puint_to_sscaled; /**< 10_10_10_2 packed uint -> sscaled */
+
     boolean need_swvfetch;
+
+   SVGA3dElementLayoutId id; /**< VGPU10 */
  };
  
  /* Use to calculate differences between state emitted to hardware and
@@ -214,16 +227,22 @@ struct svga_state
     const struct svga_blend_state *blend;
     const struct svga_depth_stencil_state *depth;
     const struct svga_rasterizer_state *rast;
-   const struct svga_sampler_state *sampler[PIPE_MAX_SAMPLERS];
+   const struct svga_sampler_state *sampler[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
     const struct svga_velems_state *velems;
  
-   struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; /* or texture ID's? */
+   struct pipe_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; /* or texture ID's? */
     struct svga_fragment_shader *fs;
     struct svga_vertex_shader *vs;
+   struct svga_geometry_shader *user_gs; /* user-specified GS */
+   struct svga_geometry_shader *gs;      /* derived GS */
  
     struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
     struct pipe_index_buffer ib;
-   struct pipe_constant_buffer cbufs[PIPE_SHADER_TYPES];
+   /** Constant buffers for each shader.
+    * The size should probably always match with that of
+    * svga_shader_emitter_v10.num_shader_consts.
+    */
+   struct pipe_constant_buffer constbufs[PIPE_SHADER_TYPES][SVGA_MAX_CONST_BUFS];
  
     struct pipe_framebuffer_state framebuffer;
     float depthscale;
@@ -240,8 +259,8 @@ struct svga_state
     struct pipe_clip_state clip;
     struct pipe_viewport_state viewport;
  
-   unsigned num_samplers;
-   unsigned num_sampler_views;
+   unsigned num_samplers[PIPE_SHADER_TYPES];
+   unsigned num_sampler_views[PIPE_SHADER_TYPES];
     unsigned num_vertex_buffers;
     unsigned reduced_prim;
  
@@ -249,6 +268,8 @@ struct svga_state
        unsigned flag_1d;
        unsigned flag_srgb;
     } tex_flags;
+
+   unsigned sample_mask;
  };
  
  struct svga_prescale {
@@ -262,9 +283,7 @@ struct svga_prescale {
   */
  struct svga_hw_clear_state
  {
-   struct {
-      unsigned x,y,w,h;
-   } viewport;
+   SVGA3dRect viewport;
  
     struct {
        float zmin, zmax;
@@ -291,16 +310,29 @@ struct svga_hw_draw_state
     unsigned ts[SVGA3D_PIXEL_SAMPLERREG_MAX][SVGA3D_TS_MAX];
     float cb[PIPE_SHADER_TYPES][SVGA3D_CONSTREG_MAX][4];
  
-   /**
-    * For guest backed shader constants only.
-    */
-   struct svga_winsys_surface *hw_cb[PIPE_SHADER_TYPES];
-
     struct svga_shader_variant *fs;
     struct svga_shader_variant *vs;
+   struct svga_shader_variant *gs;
     struct svga_hw_view_state views[PIPE_MAX_SAMPLERS];
-
     unsigned num_views;
+   struct pipe_resource *constbuf[PIPE_SHADER_TYPES];
+
+   /* Bitmask of enabled constant bufffers */
+   unsigned enabled_constbufs[PIPE_SHADER_TYPES];
+
+   /* VGPU10 HW state (used to prevent emitting redundant state) */
+   SVGA3dDepthStencilStateId depth_stencil_id;
+   unsigned stencil_ref;
+   SVGA3dBlendStateId blend_id;
+   float blend_factor[4];
+   unsigned blend_sample_mask;
+   SVGA3dRasterizerStateId rasterizer_id;
+   SVGA3dElementLayoutId layout_id;
+   SVGA3dPrimitiveType topology;
+
+   /* used for rebinding */
+   unsigned num_sampler_views[PIPE_SHADER_TYPES];
+   unsigned default_constbuf_size[PIPE_SHADER_TYPES];
  };
  
  
@@ -326,12 +358,14 @@ struct svga_sw_state
  struct svga_hw_queue;
  
  struct svga_query;
+struct svga_qmem_alloc_entry;
  
  struct svga_context
  {
     struct pipe_context pipe;
     struct svga_winsys_context *swc;
     struct blitter_context *blitter;
+   struct u_upload_mgr *const0_upload;
  
     struct {
        boolean no_swtnl;
@@ -355,12 +389,42 @@ struct svga_context
        boolean new_vdecl;
     } swtnl;
  
+   /* Bitmask of blend state objects IDs */
+   struct util_bitmask *blend_object_id_bm;
+
+   /* Bitmask of depth/stencil state objects IDs */
+   struct util_bitmask *ds_object_id_bm;
+
+   /* Bitmaks of input element object IDs */
+   struct util_bitmask *input_element_object_id_bm;
+
+   /* Bitmask of rasterizer object IDs */
+   struct util_bitmask *rast_object_id_bm;
+
+   /* Bitmask of sampler state objects IDs */
+   struct util_bitmask *sampler_object_id_bm;
+
+   /* Bitmask of sampler view IDs */
+   struct util_bitmask *sampler_view_id_bm;
+
     /* Bitmask of used shader IDs */
     struct util_bitmask *shader_id_bm;
  
+   /* Bitmask of used surface view IDs */
+   struct util_bitmask *surface_view_id_bm;
+
+   /* Bitmask of used stream output IDs */
+   struct util_bitmask *stream_output_id_bm;
+
+   /* Bitmask of used query IDs */
+   struct util_bitmask *query_id_bm;
+
     struct {
        unsigned dirty[SVGA_STATE_MAX];
  
+      /** bitmasks of which const buffers are changed */
+      unsigned dirty_constbufs[PIPE_SHADER_TYPES];
+
        unsigned texture_timestamp;
  
        /* 
@@ -373,17 +437,28 @@ struct svga_context
     struct svga_state curr;      /* state from the state tracker */
     unsigned dirty;              /* statechanges since last update_state() */
  
-   struct {
-      unsigned rendertargets:1;
-      unsigned texture_samplers:1;
-      unsigned vs:1;
-      unsigned fs:1;
+   union {
+      struct {
+         unsigned rendertargets:1;
+         unsigned texture_samplers:1;
+         unsigned constbufs:1;
+         unsigned vs:1;
+         unsigned fs:1;
+         unsigned gs:1;
+         unsigned query:1;
+      } flags;
+      unsigned val;
     } rebind;
  
     struct svga_hwtnl *hwtnl;
  
-   /** The occlusion query currently in progress */
-   struct svga_query *sq;
+   /** Queries states */
+   struct svga_winsys_gb_query *gb_query;     /**< gb query object, one per context */
+   unsigned gb_query_len;                     /**< gb query object size */
+   struct util_bitmask *gb_query_alloc_mask;  /**< gb query object allocation mask */
+   struct svga_qmem_alloc_entry *gb_query_map[SVGA_QUERY_MAX];
+                                              /**< query mem block mapping */
+   struct svga_query *sq[SVGA_QUERY_MAX];     /**< queries currently in progress */
  
     /** List of buffers with queued transfers */
     struct list_head dirty_buffers;
@@ -391,12 +466,32 @@ struct svga_context
     /** performance / info queries */
     uint64_t num_draw_calls;  /**< SVGA_QUERY_DRAW_CALLS */
     uint64_t num_fallbacks;   /**< SVGA_QUERY_FALLBACKS */
+
+   /** The currently bound stream output targets */
+   unsigned num_so_targets;
+   struct svga_winsys_surface *so_surfaces[SVGA3D_DX_MAX_SOTARGETS];
+   struct pipe_stream_output_target *so_targets[SVGA3D_DX_MAX_SOTARGETS];
+   struct svga_stream_output *current_so;
+
+   /** A blend state with blending disabled, for falling back to when blending
+    * is illegal (e.g. an integer texture is bound)
+    */
+   struct svga_blend_state *noop_blend;
+
+   struct {
+      struct pipe_resource *texture;
+      struct svga_pipe_sampler_view *sampler_view;
+      void *sampler;
+   } polygon_stipple;
+
+   /** Alternate rasterizer states created for point sprite */
+   struct svga_rasterizer_state *rasterizer_no_cull[2];
  };
  
  /* A flag for each state_tracker state object:
   */
  #define SVGA_NEW_BLEND               0x1
-#define SVGA_NEW_DEPTH_STENCIL       0x2
+#define SVGA_NEW_DEPTH_STENCIL_ALPHA 0x2
  #define SVGA_NEW_RAST                0x4
  #define SVGA_NEW_SAMPLER             0x8
  #define SVGA_NEW_TEXTURE             0x10
@@ -422,7 +517,9 @@ struct svga_context
  #define SVGA_NEW_VS_VARIANT          0x1000000
  #define SVGA_NEW_TEXTURE_FLAGS       0x4000000
  #define SVGA_NEW_STENCIL_REF         0x8000000
-
+#define SVGA_NEW_GS                  0x10000000
+#define SVGA_NEW_GS_CONST_BUFFER     0x20000000
+#define SVGA_NEW_GS_VARIANT          0x40000000
  
  
  
@@ -457,11 +554,13 @@ void svga_init_rasterizer_functions( struct svga_context *svga );
  void svga_init_sampler_functions( struct svga_context *svga );
  void svga_init_fs_functions( struct svga_context *svga );
  void svga_init_vs_functions( struct svga_context *svga );
+void svga_init_gs_functions( struct svga_context *svga );
  void svga_init_vertex_functions( struct svga_context *svga );
  void svga_init_constbuffer_functions( struct svga_context *svga );
  void svga_init_draw_functions( struct svga_context *svga );
  void svga_init_query_functions( struct svga_context *svga );
  void svga_init_surface_functions(struct svga_context *svga);
+void svga_init_stream_output_functions( struct svga_context *svga );
  
  void svga_cleanup_vertex_state( struct svga_context *svga );
  void svga_cleanup_tss_binding( struct svga_context *svga );
@@ -470,6 +569,8 @@ void svga_cleanup_framebuffer( struct svga_context *svga );
  void svga_context_flush( struct svga_context *svga,
                           struct pipe_fence_handle **pfence );
  
+void svga_context_finish(struct svga_context *svga);
+
  void svga_hwtnl_flush_retry( struct svga_context *svga );
  void svga_hwtnl_flush_buffer( struct svga_context *svga,
                                struct pipe_resource *buffer );
@@ -504,5 +605,22 @@ svga_have_gb_dma(const struct svga_context *svga)
     return svga_screen(svga->pipe.screen)->sws->have_gb_dma;
  }
  
+static inline boolean
+svga_have_vgpu10(const struct svga_context *svga)
+{
+   return svga_screen(svga->pipe.screen)->sws->have_vgpu10;
+}
+
+static inline boolean
+svga_need_to_rebind_resources(const struct svga_context *svga)
+{
+   return svga_screen(svga->pipe.screen)->sws->need_to_rebind_resources;
+}
+
+static inline boolean
+svga_rects_equal(const SVGA3dRect *r1, const SVGA3dRect *r2)
+{
+   return memcmp(r1, r2, sizeof(*r1)) == 0;
+}
  
  #endif
diff --git a/src/gallium/drivers/svga/svga_debug.h b/src/gallium/drivers/svga/svga_debug.h

index 82c9b60..039f79d 100644 (file)
--- a/src/gallium/drivers/svga/svga_debug.h
+++ b/src/gallium/drivers/svga/svga_debug.h
@@ -29,21 +29,22 @@
  #include "pipe/p_compiler.h"
  #include "util/u_debug.h"
  
-#define DEBUG_DMA      0x1
-#define DEBUG_TGSI     0x4
-#define DEBUG_PIPE     0x8
-#define DEBUG_STATE    0x10
-#define DEBUG_SCREEN   0x20
-#define DEBUG_TEX      0x40
-#define DEBUG_SWTNL    0x80
-#define DEBUG_CONSTS   0x100
-#define DEBUG_VIEWPORT 0x200
-#define DEBUG_VIEWS    0x400
-#define DEBUG_PERF     0x800    /* print something when we hit any slow path operation */
-#define DEBUG_FLUSH    0x1000   /* flush after every draw */
-#define DEBUG_SYNC     0x2000   /* sync after every flush */
-#define DEBUG_QUERY    0x4000
-#define DEBUG_CACHE    0x8000
+#define DEBUG_DMA          0x1
+#define DEBUG_TGSI         0x4
+#define DEBUG_PIPE         0x8
+#define DEBUG_STATE        0x10
+#define DEBUG_SCREEN       0x20
+#define DEBUG_TEX          0x40
+#define DEBUG_SWTNL        0x80
+#define DEBUG_CONSTS       0x100
+#define DEBUG_VIEWPORT     0x200
+#define DEBUG_VIEWS        0x400
+#define DEBUG_PERF         0x800    /* print something when we hit any slow path operation */
+#define DEBUG_FLUSH        0x1000   /* flush after every draw */
+#define DEBUG_SYNC         0x2000   /* sync after every flush */
+#define DEBUG_QUERY        0x4000
+#define DEBUG_CACHE        0x8000
+#define DEBUG_STREAMOUT    0x10000
  
  #ifdef DEBUG
  extern int SVGA_DEBUG;
diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c

index 366a2dc..9b6451d 100644 (file)
--- a/src/gallium/drivers/svga/svga_draw.c
+++ b/src/gallium/drivers/svga/svga_draw.c
@@ -26,17 +26,19 @@
  #include "pipe/p_compiler.h"
  #include "util/u_inlines.h"
  #include "pipe/p_defines.h"
+#include "util/u_helpers.h"
  #include "util/u_memory.h"
  #include "util/u_math.h"
-#include "util/u_upload_mgr.h"
  
  #include "svga_context.h"
  #include "svga_draw.h"
  #include "svga_draw_private.h"
  #include "svga_debug.h"
  #include "svga_screen.h"
+#include "svga_resource.h"
  #include "svga_resource_buffer.h"
  #include "svga_resource_texture.h"
+#include "svga_shader.h"
  #include "svga_surface.h"
  #include "svga_winsys.h"
  #include "svga_cmd.h"
@@ -71,8 +73,8 @@ svga_hwtnl_destroy(struct svga_hwtnl *hwtnl)
        }
     }
  
-   for (i = 0; i < hwtnl->cmd.vdecl_count; i++)
-      pipe_resource_reference(&hwtnl->cmd.vdecl_vb[i], NULL);
+   for (i = 0; i < hwtnl->cmd.vbuf_count; i++)
+      pipe_resource_reference(&hwtnl->cmd.vbufs[i].buffer, NULL);
  
     for (i = 0; i < hwtnl->cmd.prim_count; i++)
        pipe_resource_reference(&hwtnl->cmd.prim_ib[i], NULL);
@@ -85,45 +87,55 @@ void
  svga_hwtnl_set_flatshade(struct svga_hwtnl *hwtnl,
                           boolean flatshade, boolean flatshade_first)
  {
-   hwtnl->hw_pv = PV_FIRST;
+   struct svga_screen *svgascreen = svga_screen(hwtnl->svga->pipe.screen);
+
+   /* User-specified PV */
     hwtnl->api_pv = (flatshade && !flatshade_first) ? PV_LAST : PV_FIRST;
+
+   /* Device supported PV */
+   if (svgascreen->haveProvokingVertex) {
+      /* use the mode specified by the user */
+      hwtnl->hw_pv = hwtnl->api_pv;
+   }
+   else {
+      /* the device only support first provoking vertex */
+      hwtnl->hw_pv = PV_FIRST;
+   }
  }
  
  
  void
-svga_hwtnl_set_unfilled(struct svga_hwtnl *hwtnl, unsigned mode)
+svga_hwtnl_set_fillmode(struct svga_hwtnl *hwtnl, unsigned mode)
  {
     hwtnl->api_fillmode = mode;
  }
  
  
  void
-svga_hwtnl_reset_vdecl(struct svga_hwtnl *hwtnl, unsigned count)
+svga_hwtnl_vertex_decls(struct svga_hwtnl *hwtnl,
+                        unsigned count,
+                        const SVGA3dVertexDecl * decls,
+                        const unsigned *buffer_indexes,
+                        SVGA3dElementLayoutId layout_id)
  {
-   unsigned i;
-
     assert(hwtnl->cmd.prim_count == 0);
-
-   for (i = count; i < hwtnl->cmd.vdecl_count; i++) {
-      pipe_resource_reference(&hwtnl->cmd.vdecl_vb[i], NULL);
-   }
-
     hwtnl->cmd.vdecl_count = count;
+   hwtnl->cmd.vdecl_layout_id = layout_id;
+   memcpy(hwtnl->cmd.vdecl, decls, count * sizeof(*decls));
+   memcpy(hwtnl->cmd.vdecl_buffer_index, buffer_indexes,
+          count * sizeof(unsigned));
  }
  
  
+/**
+ * Specify vertex buffers for hardware drawing.
+ */
  void
-svga_hwtnl_vdecl(struct svga_hwtnl *hwtnl,
-                 unsigned i,
-                 const SVGA3dVertexDecl * decl, struct pipe_resource *vb)
+svga_hwtnl_vertex_buffers(struct svga_hwtnl *hwtnl,
+                          unsigned count, struct pipe_vertex_buffer *buffers)
  {
-   assert(hwtnl->cmd.prim_count == 0);
-
-   assert(i < hwtnl->cmd.vdecl_count);
-
-   hwtnl->cmd.vdecl[i] = *decl;
-
-   pipe_resource_reference(&hwtnl->cmd.vdecl_vb[i], vb);
+   util_set_vertex_buffers_count(hwtnl->cmd.vbufs,
+                                 &hwtnl->cmd.vbuf_count, buffers, 0, count);
  }
  
  
@@ -145,8 +157,8 @@ svga_hwtnl_is_buffer_referred(struct svga_hwtnl *hwtnl,
        return FALSE;
     }
  
-   for (i = 0; i < hwtnl->cmd.vdecl_count; ++i) {
-      if (hwtnl->cmd.vdecl_vb[i] == buffer) {
+   for (i = 0; i < hwtnl->cmd.vbuf_count; ++i) {
+      if (hwtnl->cmd.vbufs[i].buffer == buffer) {
           return TRUE;
        }
     }
@@ -161,120 +173,444 @@ svga_hwtnl_is_buffer_referred(struct svga_hwtnl *hwtnl,
  }
  
  
-enum pipe_error
-svga_hwtnl_flush(struct svga_hwtnl *hwtnl)
+static enum pipe_error
+draw_vgpu9(struct svga_hwtnl *hwtnl)
  {
     struct svga_winsys_context *swc = hwtnl->cmd.swc;
     struct svga_context *svga = hwtnl->svga;
     enum pipe_error ret;
+   struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX];
+   struct svga_winsys_surface *ib_handle[QSZ];
+   struct svga_winsys_surface *handle;
+   SVGA3dVertexDecl *vdecl;
+   SVGA3dPrimitiveRange *prim;
+   unsigned i;
  
-   if (hwtnl->cmd.prim_count) {
-      struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX];
-      struct svga_winsys_surface *ib_handle[QSZ];
-      struct svga_winsys_surface *handle;
-      SVGA3dVertexDecl *vdecl;
-      SVGA3dPrimitiveRange *prim;
-      unsigned i;
+   for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
+      unsigned j = hwtnl->cmd.vdecl_buffer_index[i];
+      handle = svga_buffer_handle(svga, hwtnl->cmd.vbufs[j].buffer);
+      if (handle == NULL)
+         return PIPE_ERROR_OUT_OF_MEMORY;
  
-      for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
-         assert(!svga_buffer_is_user_buffer(hwtnl->cmd.vdecl_vb[i]));
-         handle = svga_buffer_handle(svga, hwtnl->cmd.vdecl_vb[i]);
+      vb_handle[i] = handle;
+   }
+
+   for (i = 0; i < hwtnl->cmd.prim_count; i++) {
+      if (hwtnl->cmd.prim_ib[i]) {
+         handle = svga_buffer_handle(svga, hwtnl->cmd.prim_ib[i]);
           if (handle == NULL)
              return PIPE_ERROR_OUT_OF_MEMORY;
+      }
+      else
+         handle = NULL;
+
+      ib_handle[i] = handle;
+   }
+
+   if (svga->rebind.flags.rendertargets) {
+      ret = svga_reemit_framebuffer_bindings(svga);
+      if (ret != PIPE_OK) {
+         return ret;
+      }
+   }
+
+   if (svga->rebind.flags.texture_samplers) {
+      ret = svga_reemit_tss_bindings(svga);
+      if (ret != PIPE_OK) {
+         return ret;
+      }
+   }
+
+   if (svga->rebind.flags.vs) {
+      ret = svga_reemit_vs_bindings(svga);
+      if (ret != PIPE_OK) {
+         return ret;
+      }
+   }
+
+   if (svga->rebind.flags.fs) {
+      ret = svga_reemit_fs_bindings(svga);
+      if (ret != PIPE_OK) {
+         return ret;
+      }
+   }
+
+   SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n",
+            svga->curr.framebuffer.cbufs[0] ?
+            svga_surface(svga->curr.framebuffer.cbufs[0])->handle : NULL,
+            hwtnl->cmd.prim_count);
  
-         vb_handle[i] = handle;
+   ret = SVGA3D_BeginDrawPrimitives(swc,
+                                    &vdecl,
+                                    hwtnl->cmd.vdecl_count,
+                                    &prim, hwtnl->cmd.prim_count);
+   if (ret != PIPE_OK)
+      return ret;
+
+   memcpy(vdecl,
+          hwtnl->cmd.vdecl,
+          hwtnl->cmd.vdecl_count * sizeof hwtnl->cmd.vdecl[0]);
+
+   for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
+      /* check for 4-byte alignment */
+      assert(vdecl[i].array.offset % 4 == 0);
+      assert(vdecl[i].array.stride % 4 == 0);
+
+      /* Given rangeHint is considered to be relative to indexBias, and
+       * indexBias varies per primitive, we cannot accurately supply an
+       * rangeHint when emitting more than one primitive per draw command.
+       */
+      if (hwtnl->cmd.prim_count == 1) {
+         vdecl[i].rangeHint.first = hwtnl->cmd.min_index[0];
+         vdecl[i].rangeHint.last = hwtnl->cmd.max_index[0] + 1;
+      }
+      else {
+         vdecl[i].rangeHint.first = 0;
+         vdecl[i].rangeHint.last = 0;
        }
  
-      for (i = 0; i < hwtnl->cmd.prim_count; i++) {
-         if (hwtnl->cmd.prim_ib[i]) {
-            assert(!svga_buffer_is_user_buffer(hwtnl->cmd.prim_ib[i]));
-            handle = svga_buffer_handle(svga, hwtnl->cmd.prim_ib[i]);
-            if (handle == NULL)
-               return PIPE_ERROR_OUT_OF_MEMORY;
+      swc->surface_relocation(swc,
+                              &vdecl[i].array.surfaceId,
+                              NULL, vb_handle[i], SVGA_RELOC_READ);
+   }
+
+   memcpy(prim,
+          hwtnl->cmd.prim, hwtnl->cmd.prim_count * sizeof hwtnl->cmd.prim[0]);
+
+   for (i = 0; i < hwtnl->cmd.prim_count; i++) {
+      swc->surface_relocation(swc,
+                              &prim[i].indexArray.surfaceId,
+                              NULL, ib_handle[i], SVGA_RELOC_READ);
+      pipe_resource_reference(&hwtnl->cmd.prim_ib[i], NULL);
+   }
+
+   SVGA_FIFOCommitAll(swc);
+
+   hwtnl->cmd.prim_count = 0;
+
+   return PIPE_OK;
+}
+
+
+static SVGA3dSurfaceFormat
+xlate_index_format(unsigned indexWidth)
+{
+   if (indexWidth == 2) {
+      return SVGA3D_R16_UINT;
+   }
+   else if (indexWidth == 4) {
+      return SVGA3D_R32_UINT;
+   }
+   else {
+      assert(!"Bad indexWidth");
+      return SVGA3D_R32_UINT;
+   }
+}
+
+
+static enum pipe_error
+validate_sampler_resources(struct svga_context *svga)
+{
+   unsigned shader;
+
+   assert(svga_have_vgpu10(svga));
+
+   for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
+      unsigned count = svga->curr.num_sampler_views[shader];
+      unsigned i;
+      struct svga_winsys_surface *surfaces[PIPE_MAX_SAMPLERS];
+      enum pipe_error ret;
+
+      /*
+       * Reference bound sampler resources to ensure pending updates are
+       * noticed by the device.
+       */
+      for (i = 0; i < count; i++) {
+         struct svga_pipe_sampler_view *sv =
+            svga_pipe_sampler_view(svga->curr.sampler_views[shader][i]);
+
+         if (sv) {
+            if (sv->base.texture->target == PIPE_BUFFER) {
+               surfaces[i] = svga_buffer_handle(svga, sv->base.texture);
+            }
+            else {
+               surfaces[i] = svga_texture(sv->base.texture)->handle;
+            }
           }
           else {
-            handle = NULL;
+            surfaces[i] = NULL;
           }
-
-         ib_handle[i] = handle;
        }
  
-      if (svga->rebind.rendertargets) {
-         ret = svga_reemit_framebuffer_bindings(svga);
-         if (ret != PIPE_OK) {
-            return ret;
-         }
+      if (shader == PIPE_SHADER_FRAGMENT &&
+          svga->curr.rast->templ.poly_stipple_enable) {
+         const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+         struct svga_pipe_sampler_view *sv =
+            svga->polygon_stipple.sampler_view;
+
+         assert(sv);
+         surfaces[unit] = svga_texture(sv->base.texture)->handle;
+         count = MAX2(count, unit+1);
        }
  
-      if (svga->rebind.texture_samplers) {
-         ret = svga_reemit_tss_bindings(svga);
-         if (ret != PIPE_OK) {
-            return ret;
+      /* rebind the shader resources if needed */
+      if (svga->rebind.flags.texture_samplers) {
+         for (i = 0; i < count; i++) {
+            if (surfaces[i]) {
+               ret = svga->swc->resource_rebind(svga->swc,
+                                                surfaces[i],
+                                                NULL,
+                                                SVGA_RELOC_READ);
+               if (ret != PIPE_OK)
+                  return ret;
+            }
           }
        }
+   }
+   svga->rebind.flags.texture_samplers = FALSE;
  
-      if (svga->rebind.vs) {
-         ret = svga_reemit_vs_bindings(svga);
-         if (ret != PIPE_OK) {
-            return ret;
+   return PIPE_OK;
+}
+
+
+static enum pipe_error
+validate_constant_buffers(struct svga_context *svga)
+{
+   unsigned shader;
+
+   assert(svga_have_vgpu10(svga));
+
+   for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
+      enum pipe_error ret;
+      struct svga_buffer *buffer;
+      struct svga_winsys_surface *handle;
+      unsigned enabled_constbufs;
+
+      /* Rebind the default constant buffer if needed */
+      if (svga->rebind.flags.constbufs) {
+         buffer = svga_buffer(svga->state.hw_draw.constbuf[shader]);
+         if (buffer) {
+            ret = svga->swc->resource_rebind(svga->swc,
+                                             buffer->handle,
+                                             NULL,
+                                             SVGA_RELOC_READ);
+            if (ret != PIPE_OK)
+               return ret;
           }
        }
  
-      if (svga->rebind.fs) {
-         ret = svga_reemit_fs_bindings(svga);
-         if (ret != PIPE_OK) {
-            return ret;
+      /*
+       * Reference other bound constant buffers to ensure pending updates are
+       * noticed by the device.
+       */
+      enabled_constbufs = svga->state.hw_draw.enabled_constbufs[shader] & ~1u;
+      while (enabled_constbufs) {
+         unsigned i = u_bit_scan(&enabled_constbufs);
+         buffer = svga_buffer(svga->curr.constbufs[shader][i].buffer);
+         if (buffer) {
+            handle = svga_buffer_handle(svga, &buffer->b.b);
+
+            if (svga->rebind.flags.constbufs) {
+               ret = svga->swc->resource_rebind(svga->swc,
+                                                handle,
+                                                NULL,
+                                                SVGA_RELOC_READ);
+               if (ret != PIPE_OK)
+                  return ret;
+            }
           }
        }
+   }
+   svga->rebind.flags.constbufs = FALSE;
+
+   return PIPE_OK;
+}
  
-      SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n",
-               svga->curr.framebuffer.cbufs[0] ?
-               svga_surface(svga->curr.framebuffer.cbufs[0])->handle : NULL,
-               hwtnl->cmd.prim_count);
  
-      ret = SVGA3D_BeginDrawPrimitives(swc, &vdecl, hwtnl->cmd.vdecl_count,
-                                       &prim, hwtnl->cmd.prim_count);
+static enum pipe_error
+draw_vgpu10(struct svga_hwtnl *hwtnl,
+            const SVGA3dPrimitiveRange *range,
+            unsigned vcount,
+            unsigned min_index,
+            unsigned max_index, struct pipe_resource *ib,
+            unsigned start_instance, unsigned instance_count)
+{
+   struct svga_context *svga = hwtnl->svga;
+   struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX];
+   struct svga_winsys_surface *ib_handle;
+   const unsigned vbuf_count = hwtnl->cmd.vbuf_count;
+   enum pipe_error ret;
+   unsigned i;
+
+   assert(svga_have_vgpu10(svga));
+   assert(hwtnl->cmd.prim_count == 0);
+
+   /* We need to reemit all the current resource bindings along with the Draw
+    * command to be sure that the referenced resources are available for the
+    * Draw command, just in case the surfaces associated with the resources
+    * are paged out.
+    */
+   if (svga->rebind.val) {
+      ret = svga_rebind_framebuffer_bindings(svga);
        if (ret != PIPE_OK)
           return ret;
  
-      memcpy(vdecl, hwtnl->cmd.vdecl,
-             hwtnl->cmd.vdecl_count * sizeof hwtnl->cmd.vdecl[0]);
-
-      for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
-         /* Given rangeHint is considered to be relative to indexBias, and 
-          * indexBias varies per primitive, we cannot accurately supply an 
-          * rangeHint when emitting more than one primitive per draw command.
-          */
-         if (hwtnl->cmd.prim_count == 1) {
-            vdecl[i].rangeHint.first = hwtnl->cmd.min_index[0];
-            vdecl[i].rangeHint.last = hwtnl->cmd.max_index[0] + 1;
-         }
-         else {
-            vdecl[i].rangeHint.first = 0;
-            vdecl[i].rangeHint.last = 0;
-         }
+      ret = svga_rebind_shaders(svga);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+
+   ret = validate_sampler_resources(svga);
+   if (ret != PIPE_OK)
+      return ret;
  
-         swc->surface_relocation(swc, &vdecl[i].array.surfaceId, NULL,
-                                 vb_handle[i], SVGA_RELOC_READ);
+   ret = validate_constant_buffers(svga);
+   if (ret != PIPE_OK)
+      return ret;
+
+   /* Get handle for each referenced vertex buffer */
+   for (i = 0; i < vbuf_count; i++) {
+      struct svga_buffer *sbuf = svga_buffer(hwtnl->cmd.vbufs[i].buffer);
+
+      if (sbuf) {
+         assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_VERTEX_BUFFER);
+         vb_handle[i] = svga_buffer_handle(svga, &sbuf->b.b);
+         if (vb_handle[i] == NULL)
+            return PIPE_ERROR_OUT_OF_MEMORY;
        }
+      else {
+         vb_handle[i] = NULL;
+      }
+   }
  
-      memcpy(prim, hwtnl->cmd.prim,
-             hwtnl->cmd.prim_count * sizeof hwtnl->cmd.prim[0]);
+   /* Get handles for the index buffers */
+   if (ib) {
+      struct svga_buffer *sbuf = svga_buffer(ib);
  
-      for (i = 0; i < hwtnl->cmd.prim_count; i++) {
-         swc->surface_relocation(swc, &prim[i].indexArray.surfaceId, NULL,
-                                 ib_handle[i], SVGA_RELOC_READ);
-         pipe_resource_reference(&hwtnl->cmd.prim_ib[i], NULL);
+      assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_INDEX_BUFFER);
+      (void) sbuf; /* silence unused var warning */
+
+      ib_handle = svga_buffer_handle(svga, ib);
+      if (ib_handle == NULL)
+         return PIPE_ERROR_OUT_OF_MEMORY;
+   }
+   else {
+      ib_handle = NULL;
+   }
+
+   /* setup vertex attribute input layout */
+   if (svga->state.hw_draw.layout_id != hwtnl->cmd.vdecl_layout_id) {
+      ret = SVGA3D_vgpu10_SetInputLayout(svga->swc,
+                                         hwtnl->cmd.vdecl_layout_id);
+      if (ret != PIPE_OK)
+         return ret;
+
+      svga->state.hw_draw.layout_id = hwtnl->cmd.vdecl_layout_id;
+   }
+
+   /* setup vertex buffers */
+   {
+      SVGA3dVertexBuffer buffers[PIPE_MAX_ATTRIBS];
+
+      for (i = 0; i < vbuf_count; i++) {
+         buffers[i].stride = hwtnl->cmd.vbufs[i].stride;
+         buffers[i].offset = hwtnl->cmd.vbufs[i].buffer_offset;
+      }
+      if (vbuf_count > 0) {
+         ret = SVGA3D_vgpu10_SetVertexBuffers(svga->swc, vbuf_count,
+                                              0,    /* startBuffer */
+                                              buffers, vb_handle);
+         if (ret != PIPE_OK)
+            return ret;
        }
+   }
+
+   /* Set primitive type (line, tri, etc) */
+   if (svga->state.hw_draw.topology != range->primType) {
+      ret = SVGA3D_vgpu10_SetTopology(svga->swc, range->primType);
+      if (ret != PIPE_OK)
+         return ret;
+
+      svga->state.hw_draw.topology = range->primType;
+   }
  
-      SVGA_FIFOCommitAll(swc);
-      hwtnl->cmd.prim_count = 0;
+   if (ib_handle) {
+      /* indexed drawing */
+      SVGA3dSurfaceFormat indexFormat = xlate_index_format(range->indexWidth);
+
+      /* setup index buffer */
+      ret = SVGA3D_vgpu10_SetIndexBuffer(svga->swc, ib_handle,
+                                         indexFormat,
+                                         range->indexArray.offset);
+      if (ret != PIPE_OK)
+         return ret;
+
+      if (instance_count > 1) {
+         ret = SVGA3D_vgpu10_DrawIndexedInstanced(svga->swc,
+                                                  vcount,
+                                                  instance_count,
+                                                  0, /* startIndexLocation */
+                                                  range->indexBias,
+                                                  start_instance);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+      else {
+         /* non-instanced drawing */
+         ret = SVGA3D_vgpu10_DrawIndexed(svga->swc,
+                                         vcount,
+                                         0,      /* startIndexLocation */
+                                         range->indexBias);
+         if (ret != PIPE_OK)
+            return ret;
+      }
     }
+   else {
+      /* non-indexed drawing */
+      if (instance_count > 1) {
+         ret = SVGA3D_vgpu10_DrawInstanced(svga->swc,
+                                           vcount,
+                                           instance_count,
+                                           range->indexBias,
+                                           start_instance);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+      else {
+         /* non-instanced */
+         ret = SVGA3D_vgpu10_Draw(svga->swc,
+                                  vcount,
+                                  range->indexBias);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+   }
+
+   hwtnl->cmd.prim_count = 0;
  
     return PIPE_OK;
  }
  
  
+
+/**
+ * Emit any pending drawing commands to the command buffer.
+ * When we receive VGPU9 drawing commands we accumulate them and don't
+ * immediately emit them into the command buffer.
+ * This function needs to be called before we change state that could
+ * effect those pending draws.
+ */
+enum pipe_error
+svga_hwtnl_flush(struct svga_hwtnl *hwtnl)
+{
+   if (!svga_have_vgpu10(hwtnl->svga) && hwtnl->cmd.prim_count) {
+      /* we only queue up primitive for VGPU9 */
+      return draw_vgpu9(hwtnl);
+   }
+   return PIPE_OK;
+}
+
+
  void
  svga_hwtnl_set_index_bias(struct svga_hwtnl *hwtnl, int index_bias)
  {
@@ -298,18 +634,28 @@ check_draw_params(struct svga_hwtnl *hwtnl,
  {
     unsigned i;
  
+   assert(!svga_have_vgpu10(hwtnl->svga));
+
     for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
-      struct pipe_resource *vb = hwtnl->cmd.vdecl_vb[i];
-      unsigned size = vb ? vb->width0 : 0;
+      unsigned j = hwtnl->cmd.vdecl_buffer_index[i];
+      const struct pipe_vertex_buffer *vb = &hwtnl->cmd.vbufs[j];
+      unsigned size = vb->buffer ? vb->buffer->width0 : 0;
        unsigned offset = hwtnl->cmd.vdecl[i].array.offset;
        unsigned stride = hwtnl->cmd.vdecl[i].array.stride;
        int index_bias = (int) range->indexBias + hwtnl->index_bias;
        unsigned width;
  
+      if (size == 0)
+         continue;
+
        assert(vb);
        assert(size);
        assert(offset < size);
        assert(min_index <= max_index);
+      (void) width;
+      (void) stride;
+      (void) offset;
+      (void) size;
  
        switch (hwtnl->cmd.vdecl[i].identity.type) {
        case SVGA3D_DECLTYPE_FLOAT1:
@@ -390,6 +736,9 @@ check_draw_params(struct svga_hwtnl *hwtnl,
        assert(size);
        assert(offset < size);
        assert(stride);
+      (void) size;
+      (void) offset;
+      (void) stride;
  
        switch (range->primType) {
        case SVGA3D_PRIMITIVE_POINTLIST:
@@ -421,33 +770,57 @@ check_draw_params(struct svga_hwtnl *hwtnl,
  }
  
  
+/**
+ * All drawing filters down into this function, either directly
+ * on the hardware path or after doing software vertex processing.
+ */
  enum pipe_error
  svga_hwtnl_prim(struct svga_hwtnl *hwtnl,
                  const SVGA3dPrimitiveRange * range,
+                unsigned vcount,
                  unsigned min_index,
-                unsigned max_index, struct pipe_resource *ib)
+                unsigned max_index, struct pipe_resource *ib,
+                unsigned start_instance, unsigned instance_count)
  {
     enum pipe_error ret = PIPE_OK;
  
+   if (svga_have_vgpu10(hwtnl->svga)) {
+      /* draw immediately */
+      ret = draw_vgpu10(hwtnl, range, vcount, min_index, max_index, ib,
+                        start_instance, instance_count);
+      if (ret != PIPE_OK) {
+         svga_context_flush(hwtnl->svga, NULL);
+         ret = draw_vgpu10(hwtnl, range, vcount, min_index, max_index, ib,
+                           start_instance, instance_count);
+         assert(ret == PIPE_OK);
+      }
+   }
+   else {
+      /* batch up drawing commands */
  #ifdef DEBUG
-   check_draw_params(hwtnl, range, min_index, max_index, ib);
+      check_draw_params(hwtnl, range, min_index, max_index, ib);
+      assert(start_instance == 0);
+      assert(instance_count <= 1);
+#else
+      (void) check_draw_params;
  #endif
  
-   if (hwtnl->cmd.prim_count + 1 >= QSZ) {
-      ret = svga_hwtnl_flush(hwtnl);
-      if (ret != PIPE_OK)
-         return ret;
-   }
+      if (hwtnl->cmd.prim_count + 1 >= QSZ) {
+         ret = svga_hwtnl_flush(hwtnl);
+         if (ret != PIPE_OK)
+            return ret;
+      }
  
-   /* min/max indices are relative to bias */
-   hwtnl->cmd.min_index[hwtnl->cmd.prim_count] = min_index;
-   hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index;
+      /* min/max indices are relative to bias */
+      hwtnl->cmd.min_index[hwtnl->cmd.prim_count] = min_index;
+      hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index;
  
-   hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range;
-   hwtnl->cmd.prim[hwtnl->cmd.prim_count].indexBias += hwtnl->index_bias;
+      hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range;
+      hwtnl->cmd.prim[hwtnl->cmd.prim_count].indexBias += hwtnl->index_bias;
  
-   pipe_resource_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib);
-   hwtnl->cmd.prim_count++;
+      pipe_resource_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib);
+      hwtnl->cmd.prim_count++;
+   }
  
     return ret;
  }
diff --git a/src/gallium/drivers/svga/svga_draw.h b/src/gallium/drivers/svga/svga_draw.h

index 1db79cd..af8ecab 100644 (file)
--- a/src/gallium/drivers/svga/svga_draw.h
+++ b/src/gallium/drivers/svga/svga_draw.h
@@ -35,54 +35,50 @@ struct svga_winsys_context;
  struct svga_screen;
  struct svga_context;
  struct pipe_resource;
+struct u_upload_mgr;
  
-struct svga_hwtnl *
-svga_hwtnl_create(struct svga_context *svga);
+struct svga_hwtnl *svga_hwtnl_create(struct svga_context *svga);
  
-void svga_hwtnl_destroy( struct svga_hwtnl *hwtnl );
+void svga_hwtnl_destroy(struct svga_hwtnl *hwtnl);
  
-void svga_hwtnl_set_flatshade( struct svga_hwtnl *hwtnl,
-                               boolean flatshade,
-                               boolean flatshade_first );
+void svga_hwtnl_set_flatshade(struct svga_hwtnl *hwtnl,
+                              boolean flatshade, boolean flatshade_first);
  
-void svga_hwtnl_set_unfilled( struct svga_hwtnl *hwtnl,
-                              unsigned mode );
+void svga_hwtnl_set_fillmode(struct svga_hwtnl *hwtnl, unsigned mode);
  
-void svga_hwtnl_vdecl( struct svga_hwtnl *hwtnl,
-                       unsigned i,
-                       const SVGA3dVertexDecl *decl,
-                       struct pipe_resource *vb);
+void
+svga_hwtnl_vertex_decls(struct svga_hwtnl *hwtnl,
+                        unsigned count,
+                        const SVGA3dVertexDecl * decls,
+                        const unsigned *buffer_indexes,
+                        SVGA3dElementLayoutId layoutId);
  
-void svga_hwtnl_reset_vdecl( struct svga_hwtnl *hwtnl,
-                             unsigned count );
+void
+svga_hwtnl_vertex_buffers(struct svga_hwtnl *hwtnl,
+                          unsigned count, struct pipe_vertex_buffer *buffers);
  
-
-enum pipe_error 
-svga_hwtnl_draw_arrays( struct svga_hwtnl *hwtnl,
-                        unsigned prim, 
-                        unsigned start, 
-                        unsigned count);
+enum pipe_error
+svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
+                       unsigned prim, unsigned start, unsigned count,
+                       unsigned start_instance, unsigned instance_count);
  
  enum pipe_error
-svga_hwtnl_draw_range_elements( struct svga_hwtnl *hwtnl,
-                                struct pipe_resource *indexBuffer,
-                                unsigned index_size,
-                                int index_bias,
-                                unsigned min_index,
-                                unsigned max_index,
-                                unsigned prim, 
-                                unsigned start, 
-                                unsigned count );
+svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl,
+                               struct pipe_resource *indexBuffer,
+                               unsigned index_size,
+                               int index_bias,
+                               unsigned min_index,
+                               unsigned max_index,
+                               unsigned prim, unsigned start, unsigned count,
+                               unsigned start_instance, unsigned instance_count);
  
  boolean
-svga_hwtnl_is_buffer_referred( struct svga_hwtnl *hwtnl,
-                               struct pipe_resource *buffer );
+svga_hwtnl_is_buffer_referred(struct svga_hwtnl *hwtnl,
+                              struct pipe_resource *buffer);
  
-enum pipe_error
-svga_hwtnl_flush( struct svga_hwtnl *hwtnl );
+enum pipe_error svga_hwtnl_flush(struct svga_hwtnl *hwtnl);
  
-void svga_hwtnl_set_index_bias( struct svga_hwtnl *hwtnl,
-                                int index_bias);
+void svga_hwtnl_set_index_bias(struct svga_hwtnl *hwtnl, int index_bias);
  
  
  #endif /* SVGA_DRAW_H_ */
diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c

index d4d7720..5635411 100644 (file)
--- a/src/gallium/drivers/svga/svga_draw_arrays.c
+++ b/src/gallium/drivers/svga/svga_draw_arrays.c
@@ -49,8 +49,8 @@ generate_indices(struct svga_hwtnl *hwtnl,
     struct pipe_resource *dst = NULL;
     void *dst_map = NULL;
  
-   dst = pipe_buffer_create(pipe->screen,
-                            PIPE_BIND_INDEX_BUFFER, PIPE_USAGE_DEFAULT, size);
+   dst = pipe_buffer_create(pipe->screen, PIPE_BIND_INDEX_BUFFER,
+                            PIPE_USAGE_IMMUTABLE, size);
     if (dst == NULL)
        goto fail;
  
@@ -168,7 +168,8 @@ retrieve_or_generate_indices(struct svga_hwtnl *hwtnl,
  
  static enum pipe_error
  simple_draw_arrays(struct svga_hwtnl *hwtnl,
-                   unsigned prim, unsigned start, unsigned count)
+                   unsigned prim, unsigned start, unsigned count,
+                   unsigned start_instance, unsigned instance_count)
  {
     SVGA3dPrimitiveRange range;
     unsigned hw_prim;
@@ -191,13 +192,16 @@ simple_draw_arrays(struct svga_hwtnl *hwtnl,
      * looking at those numbers knows to adjust them by
      * range.indexBias.
      */
-   return svga_hwtnl_prim(hwtnl, &range, 0, count - 1, NULL);
+   return svga_hwtnl_prim(hwtnl, &range, count,
+                          0, count - 1, NULL,
+                          start_instance, instance_count);
  }
  
  
  enum pipe_error
  svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
-                       unsigned prim, unsigned start, unsigned count)
+                       unsigned prim, unsigned start, unsigned count,
+                       unsigned start_instance, unsigned instance_count)
  {
     unsigned gen_prim, gen_size, gen_nr, gen_type;
     u_generate_func gen_func;
@@ -228,7 +232,8 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
     }
  
     if (gen_type == U_GENERATE_LINEAR) {
-      return simple_draw_arrays(hwtnl, gen_prim, start, count);
+      return simple_draw_arrays(hwtnl, gen_prim, start, count,
+                                start_instance, instance_count);
     }
     else {
        struct pipe_resource *gen_buf = NULL;
@@ -250,8 +255,9 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
                                                    start,
                                                    0,
                                                    count - 1,
-                                                  gen_prim, 0, gen_nr);
-
+                                                  gen_prim, 0, gen_nr,
+                                                  start_instance,
+                                                  instance_count);
        if (ret != PIPE_OK)
           goto done;
  
diff --git a/src/gallium/drivers/svga/svga_draw_elements.c b/src/gallium/drivers/svga/svga_draw_elements.c

index 038500a..9df8f6e 100644 (file)
--- a/src/gallium/drivers/svga/svga_draw_elements.c
+++ b/src/gallium/drivers/svga/svga_draw_elements.c
@@ -25,6 +25,7 @@
  
  #include "util/u_inlines.h"
  #include "util/u_prim.h"
+#include "util/u_upload_mgr.h"
  #include "indices/u_indices.h"
  
  #include "svga_cmd.h"
@@ -45,7 +46,7 @@ translate_indices(struct svga_hwtnl *hwtnl, struct pipe_resource *src,
     struct pipe_context *pipe = &hwtnl->svga->pipe;
     struct pipe_transfer *src_transfer = NULL;
     struct pipe_transfer *dst_transfer = NULL;
-   unsigned size;
+   unsigned size = index_size * nr;
     const void *src_map = NULL;
     struct pipe_resource *dst = NULL;
     void *dst_map = NULL;
@@ -98,7 +99,9 @@ svga_hwtnl_simple_draw_range_elements(struct svga_hwtnl *hwtnl,
                                        unsigned index_size, int index_bias,
                                        unsigned min_index, unsigned max_index,
                                        unsigned prim, unsigned start,
-                                      unsigned count)
+                                      unsigned count,
+                                      unsigned start_instance,
+                                      unsigned instance_count)
  {
     SVGA3dPrimitiveRange range;
     unsigned hw_prim;
@@ -109,12 +112,6 @@ svga_hwtnl_simple_draw_range_elements(struct svga_hwtnl *hwtnl,
     if (hw_count == 0)
        return PIPE_OK; /* nothing to draw */
  
-   /* We should never see user-space buffers in the driver.  The vbuf
-    * module should have converted them into real buffers.
-    */
-   if (index_buffer)
-      assert(!svga_buffer_is_user_buffer(index_buffer));
-
     range.primType = hw_prim;
     range.primitiveCount = hw_count;
     range.indexArray.offset = index_offset;
@@ -122,7 +119,9 @@ svga_hwtnl_simple_draw_range_elements(struct svga_hwtnl *hwtnl,
     range.indexWidth = index_size;
     range.indexBias = index_bias;
  
-   return svga_hwtnl_prim(hwtnl, &range, min_index, max_index, index_buffer);
+   return svga_hwtnl_prim(hwtnl, &range, count,
+                          min_index, max_index, index_buffer,
+                          start_instance, instance_count);
  }
  
  
@@ -131,7 +130,8 @@ svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl,
                                 struct pipe_resource *index_buffer,
                                 unsigned index_size, int index_bias,
                                 unsigned min_index, unsigned max_index,
-                               unsigned prim, unsigned start, unsigned count)
+                               unsigned prim, unsigned start, unsigned count,
+                               unsigned start_instance, unsigned instance_count)
  {
     unsigned gen_prim, gen_size, gen_nr, gen_type;
     u_translate_func gen_func;
@@ -165,7 +165,9 @@ svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl,
                                                     index_bias,
                                                     min_index,
                                                     max_index,
-                                                   gen_prim, start, count);
+                                                   gen_prim, start, count,
+                                                   start_instance,
+                                                   instance_count);
     }
     else {
        struct pipe_resource *gen_buf = NULL;
@@ -190,7 +192,9 @@ svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl,
                                                    index_bias,
                                                    min_index,
                                                    max_index,
-                                                  gen_prim, 0, gen_nr);
+                                                  gen_prim, 0, gen_nr,
+                                                  start_instance,
+                                                  instance_count);
        if (ret != PIPE_OK)
           goto done;
  
diff --git a/src/gallium/drivers/svga/svga_draw_private.h b/src/gallium/drivers/svga/svga_draw_private.h

index 9ab87e8..c821742 100644 (file)
--- a/src/gallium/drivers/svga/svga_draw_private.h
+++ b/src/gallium/drivers/svga/svga_draw_private.h
@@ -46,7 +46,11 @@ static const unsigned svga_hw_prims =
      (1 << PIPE_PRIM_LINE_STRIP) |
      (1 << PIPE_PRIM_TRIANGLES) |
      (1 << PIPE_PRIM_TRIANGLE_STRIP) |
-    (1 << PIPE_PRIM_TRIANGLE_FAN));
+    (1 << PIPE_PRIM_TRIANGLE_FAN) |
+    (1 << PIPE_PRIM_LINES_ADJACENCY) |
+    (1 << PIPE_PRIM_LINE_STRIP_ADJACENCY) |
+    (1 << PIPE_PRIM_TRIANGLES_ADJACENCY) |
+    (1 << PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY));
  
  
  /**
@@ -57,8 +61,8 @@ static const unsigned svga_hw_prims =
   * PIPE_PRIM_QUADS, PIPE_PRIM_QUAD_STRIP or PIPE_PRIM_POLYGON.  We convert
   * those to other types of primitives with index/translation code.
   */
-static inline unsigned
-svga_translate_prim(unsigned mode, unsigned vcount,unsigned *prim_count)
+static inline SVGA3dPrimitiveType
+svga_translate_prim(unsigned mode, unsigned vcount, unsigned *prim_count)
  {
     switch (mode) {
     case PIPE_PRIM_POINTS:
@@ -85,6 +89,22 @@ svga_translate_prim(unsigned mode, unsigned vcount,unsigned *prim_count)
        *prim_count = vcount - 2;
        return SVGA3D_PRIMITIVE_TRIANGLEFAN; 
  
+   case PIPE_PRIM_LINES_ADJACENCY:
+      *prim_count = vcount / 4;
+      return SVGA3D_PRIMITIVE_LINELIST_ADJ;
+
+   case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+      *prim_count = vcount - 3;
+      return SVGA3D_PRIMITIVE_LINESTRIP_ADJ;
+
+   case PIPE_PRIM_TRIANGLES_ADJACENCY:
+      *prim_count = vcount / 6;
+      return SVGA3D_PRIMITIVE_TRIANGLELIST_ADJ;
+
+   case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+      *prim_count = vcount / 2 - 2 ;
+      return SVGA3D_PRIMITIVE_TRIANGLESTRIP_ADJ;
+
     default:
        assert(0);
        *prim_count = 0;
@@ -110,13 +130,19 @@ struct index_cache {
  struct draw_cmd {
     struct svga_winsys_context *swc;
  
+   /* vertex layout info */
     SVGA3dVertexDecl vdecl[SVGA3D_INPUTREG_MAX];
-   struct pipe_resource *vdecl_vb[SVGA3D_INPUTREG_MAX];
     unsigned vdecl_count;
+   SVGA3dElementLayoutId vdecl_layout_id;
+   unsigned vdecl_buffer_index[SVGA3D_INPUTREG_MAX];
+
+   /* vertex buffer info */
+   struct pipe_vertex_buffer vbufs[SVGA3D_INPUTREG_MAX];
+   unsigned vbuf_count;
  
     SVGA3dPrimitiveRange prim[QSZ];
     struct pipe_resource *prim_ib[QSZ];
-   unsigned prim_count;
+   unsigned prim_count;   /**< number of primitives for this draw */
     unsigned min_index[QSZ];
     unsigned max_index[QSZ];
  };
@@ -158,9 +184,11 @@ struct svga_hwtnl {
  enum pipe_error 
  svga_hwtnl_prim( struct svga_hwtnl *hwtnl,
                   const SVGA3dPrimitiveRange *range,
+                 unsigned vcount,
                   unsigned min_index,
                   unsigned max_index,
-                 struct pipe_resource *ib );
+                 struct pipe_resource *ib,
+                 unsigned start_instance, unsigned instance_count);
  
  enum pipe_error
  svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl,
@@ -171,7 +199,9 @@ svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl,
                                         unsigned max_index,
                                         unsigned prim, 
                                         unsigned start,
-                                       unsigned count );
+                                       unsigned count,
+                                       unsigned start_instance,
+                                       unsigned instance_count);
  
  
  #endif
diff --git a/src/gallium/drivers/svga/svga_format.c b/src/gallium/drivers/svga/svga_format.c

index 8c1b161..28b8064 100644 (file)
--- a/src/gallium/drivers/svga/svga_format.c
+++ b/src/gallium/drivers/svga/svga_format.c
@@ -26,6 +26,7 @@
  
  #include "pipe/p_format.h"
  #include "util/u_debug.h"
+#include "util/u_format.h"
  #include "util/u_memory.h"
  
  #include "svga_winsys.h"
@@ -33,6 +34,319 @@
  #include "svga_format.h"
  
  
+/** Describes mapping from gallium formats to SVGA vertex/pixel formats */
+struct vgpu10_format_entry
+{
+   enum pipe_format pformat;
+   SVGA3dSurfaceFormat vertex_format;
+   SVGA3dSurfaceFormat pixel_format;
+   unsigned flags;
+};
+
+
+static const struct vgpu10_format_entry format_conversion_table[] =
+{
+   /* Gallium format                    SVGA3D vertex format        SVGA3D pixel format          Flags */
+   { PIPE_FORMAT_NONE,                  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B8G8R8A8_UNORM,        SVGA3D_B8G8R8A8_UNORM,      SVGA3D_B8G8R8A8_UNORM,       0 },
+   { PIPE_FORMAT_B8G8R8X8_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_B8G8R8X8_UNORM,       0 },
+   { PIPE_FORMAT_A8R8G8B8_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_X8R8G8B8_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B5G5R5A1_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_B5G5R5A1_UNORM,       0 },
+   { PIPE_FORMAT_B4G4R4A4_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_A4R4G4B4,             0 },
+   { PIPE_FORMAT_B5G6R5_UNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_B5G6R5_UNORM,         0 },
+   { PIPE_FORMAT_R10G10B10A2_UNORM,     SVGA3D_R10G10B10A2_UNORM,   SVGA3D_R10G10B10A2_UNORM,    0 },
+   { PIPE_FORMAT_L8_UNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_LUMINANCE8,           0 },
+   { PIPE_FORMAT_A8_UNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_A8_UNORM,             0 },
+   { PIPE_FORMAT_I8_UNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L8A8_UNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16_UNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_UYVY,                  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_YUYV,                  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_Z16_UNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_Z_D16,                0 },
+   { PIPE_FORMAT_Z32_UNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_Z32_FLOAT,             SVGA3D_FORMAT_INVALID,      SVGA3D_D32_FLOAT,            0 },
+   { PIPE_FORMAT_Z24_UNORM_S8_UINT,     SVGA3D_FORMAT_INVALID,      SVGA3D_D24_UNORM_S8_UINT,    0 },
+   { PIPE_FORMAT_S8_UINT_Z24_UNORM,     SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_Z24X8_UNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_D24_UNORM_S8_UINT,    0 },
+   { PIPE_FORMAT_X8Z24_UNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_S8_UINT,               SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R64_FLOAT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R64G64_FLOAT,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R64G64B64_FLOAT,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R64G64B64A64_FLOAT,    SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32_FLOAT,             SVGA3D_R32_FLOAT,           SVGA3D_R32_FLOAT,            0 },
+   { PIPE_FORMAT_R32G32_FLOAT,          SVGA3D_R32G32_FLOAT,        SVGA3D_R32G32_FLOAT,         0 },
+   { PIPE_FORMAT_R32G32B32_FLOAT,       SVGA3D_R32G32B32_FLOAT,     SVGA3D_R32G32B32_FLOAT,      0 },
+   { PIPE_FORMAT_R32G32B32A32_FLOAT,    SVGA3D_R32G32B32A32_FLOAT,  SVGA3D_R32G32B32A32_FLOAT,   0 },
+   { PIPE_FORMAT_R32_UNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32_UNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32B32_UNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32B32A32_UNORM,    SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32_USCALED,           SVGA3D_R32_UINT,            SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R32G32_USCALED,        SVGA3D_R32G32_UINT,         SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R32G32B32_USCALED,     SVGA3D_R32G32B32_UINT,      SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R32G32B32A32_USCALED,  SVGA3D_R32G32B32A32_UINT,   SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R32_SNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32_SNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32B32_SNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32B32A32_SNORM,    SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32_SSCALED,           SVGA3D_R32_SINT,            SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R32G32_SSCALED,        SVGA3D_R32G32_SINT,         SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R32G32B32_SSCALED,     SVGA3D_R32G32B32_SINT,      SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R32G32B32A32_SSCALED,  SVGA3D_R32G32B32A32_SINT,   SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R16_UNORM,             SVGA3D_R16_UNORM,           SVGA3D_R16_UNORM,            0 },
+   { PIPE_FORMAT_R16G16_UNORM,          SVGA3D_R16G16_UNORM,        SVGA3D_R16G16_UNORM,         0 },
+   { PIPE_FORMAT_R16G16B16_UNORM,       SVGA3D_R16G16B16A16_UNORM,  SVGA3D_FORMAT_INVALID,       VF_W_TO_1 },
+   { PIPE_FORMAT_R16G16B16A16_UNORM,    SVGA3D_R16G16B16A16_UNORM,  SVGA3D_R16G16B16A16_UNORM,   0 },
+   { PIPE_FORMAT_R16_USCALED,           SVGA3D_R16_UINT,            SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R16G16_USCALED,        SVGA3D_R16G16_UINT,         SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R16G16B16_USCALED,     SVGA3D_R16G16B16A16_UINT,   SVGA3D_FORMAT_INVALID,       VF_W_TO_1 | VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R16G16B16A16_USCALED,  SVGA3D_R16G16B16A16_UINT,   SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R16_SNORM,             SVGA3D_R16_SNORM,           SVGA3D_R16_SNORM,            0 },
+   { PIPE_FORMAT_R16G16_SNORM,          SVGA3D_R16G16_SNORM,        SVGA3D_R16G16_SNORM,         0 },
+   { PIPE_FORMAT_R16G16B16_SNORM,       SVGA3D_R16G16B16A16_SNORM,  SVGA3D_FORMAT_INVALID,       VF_W_TO_1 },
+   { PIPE_FORMAT_R16G16B16A16_SNORM,    SVGA3D_R16G16B16A16_SNORM,  SVGA3D_R16G16B16A16_SNORM,   0 },
+   { PIPE_FORMAT_R16_SSCALED,           SVGA3D_R16_SINT,            SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R16G16_SSCALED,        SVGA3D_R16G16_SINT,         SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R16G16B16_SSCALED,     SVGA3D_R16G16B16A16_SINT,   SVGA3D_FORMAT_INVALID,       VF_W_TO_1 | VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R16G16B16A16_SSCALED,  SVGA3D_R16G16B16A16_SINT,   SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R8_UNORM,              SVGA3D_R8_UNORM,            SVGA3D_R8_UNORM,             0 },
+   { PIPE_FORMAT_R8G8_UNORM,            SVGA3D_R8G8_UNORM,          SVGA3D_R8G8_UNORM,           0 },
+   { PIPE_FORMAT_R8G8B8_UNORM,          SVGA3D_R8G8B8A8_UNORM,      SVGA3D_FORMAT_INVALID,       VF_W_TO_1 },
+   { PIPE_FORMAT_R8G8B8A8_UNORM,        SVGA3D_R8G8B8A8_UNORM,      SVGA3D_R8G8B8A8_UNORM,       0 },
+   { PIPE_FORMAT_X8B8G8R8_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8_USCALED,            SVGA3D_R8_UINT,             SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R8G8_USCALED,          SVGA3D_R8G8_UINT,           SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R8G8B8_USCALED,        SVGA3D_R8G8B8A8_UINT,       SVGA3D_FORMAT_INVALID,       VF_W_TO_1 | VF_U_TO_F_CAST },
+   { PIPE_FORMAT_R8G8B8A8_USCALED,      SVGA3D_R8G8B8A8_UINT,       SVGA3D_FORMAT_INVALID,       VF_U_TO_F_CAST },
+   { 73,                                SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8_SNORM,              SVGA3D_R8_SNORM,            SVGA3D_R8_SNORM,             0 },
+   { PIPE_FORMAT_R8G8_SNORM,            SVGA3D_R8G8_SNORM,          SVGA3D_R8G8_SNORM,           0 },
+   { PIPE_FORMAT_R8G8B8_SNORM,          SVGA3D_R8G8B8A8_SNORM,      SVGA3D_FORMAT_INVALID,       VF_W_TO_1 },
+   { PIPE_FORMAT_R8G8B8A8_SNORM,        SVGA3D_R8G8B8A8_SNORM,      SVGA3D_R8G8B8A8_SNORM,       0 },
+   { 78,                                SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { 79,                                SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { 80,                                SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { 81,                                SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8_SSCALED,            SVGA3D_R8_SINT,             SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R8G8_SSCALED,          SVGA3D_R8G8_SINT,           SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R8G8B8_SSCALED,        SVGA3D_R8G8B8A8_SINT,       SVGA3D_FORMAT_INVALID,       VF_W_TO_1 | VF_I_TO_F_CAST },
+   { PIPE_FORMAT_R8G8B8A8_SSCALED,      SVGA3D_R8G8B8A8_SINT,       SVGA3D_FORMAT_INVALID,       VF_I_TO_F_CAST },
+   { 86,                                SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32_FIXED,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32_FIXED,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32B32_FIXED,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32B32A32_FIXED,    SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16_FLOAT,             SVGA3D_R16_FLOAT,           SVGA3D_R16_FLOAT,            0 },
+   { PIPE_FORMAT_R16G16_FLOAT,          SVGA3D_R16G16_FLOAT,        SVGA3D_R16G16_FLOAT,         0 },
+   { PIPE_FORMAT_R16G16B16_FLOAT,       SVGA3D_R16G16B16A16_FLOAT,  SVGA3D_FORMAT_INVALID,       VF_W_TO_1 },
+   { PIPE_FORMAT_R16G16B16A16_FLOAT,    SVGA3D_R16G16B16A16_FLOAT,  SVGA3D_R16G16B16A16_FLOAT,   0 },
+   { PIPE_FORMAT_L8_SRGB,               SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L8A8_SRGB,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8G8B8_SRGB,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8B8G8R8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_X8B8G8R8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B8G8R8A8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_B8G8R8A8_UNORM_SRGB,  0 },
+   { PIPE_FORMAT_B8G8R8X8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_B8G8R8X8_UNORM_SRGB,  0 },
+   { PIPE_FORMAT_A8R8G8B8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_X8R8G8B8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8G8B8A8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_R8G8B8A8_UNORM_SRGB,  0 },
+   { PIPE_FORMAT_DXT1_RGB,              SVGA3D_FORMAT_INVALID,      SVGA3D_DXT1,                 0 },
+   { PIPE_FORMAT_DXT1_RGBA,             SVGA3D_FORMAT_INVALID,      SVGA3D_DXT1,                 0 },
+   { PIPE_FORMAT_DXT3_RGBA,             SVGA3D_FORMAT_INVALID,      SVGA3D_DXT3,                 0 },
+   { PIPE_FORMAT_DXT5_RGBA,             SVGA3D_FORMAT_INVALID,      SVGA3D_DXT5,                 0 },
+   { PIPE_FORMAT_DXT1_SRGB,             SVGA3D_FORMAT_INVALID,      SVGA3D_DXT1,                 0 },
+   { PIPE_FORMAT_DXT1_SRGBA,            SVGA3D_FORMAT_INVALID,      SVGA3D_DXT1,                 0 },
+   { PIPE_FORMAT_DXT3_SRGBA,            SVGA3D_FORMAT_INVALID,      SVGA3D_DXT3,                 0 },
+   { PIPE_FORMAT_DXT5_SRGBA,            SVGA3D_FORMAT_INVALID,      SVGA3D_DXT5,                 0 },
+   { PIPE_FORMAT_RGTC1_UNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_BC4_UNORM,            0 },
+   { PIPE_FORMAT_RGTC1_SNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_BC4_SNORM,            0 },
+   { PIPE_FORMAT_RGTC2_UNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_BC5_UNORM,            0 },
+   { PIPE_FORMAT_RGTC2_SNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_BC5_SNORM,            0 },
+   { PIPE_FORMAT_R8G8_B8G8_UNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_G8R8_G8B8_UNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8SG8SB8UX8U_NORM,     SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R5SG5SB6U_NORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8B8G8R8_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B5G5R5X1_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R10G10B10A2_USCALED,   SVGA3D_R10G10B10A2_UNORM,   SVGA3D_FORMAT_INVALID,       VF_PUINT_TO_USCALED },
+   { PIPE_FORMAT_R11G11B10_FLOAT,       SVGA3D_FORMAT_INVALID,      SVGA3D_R11G11B10_FLOAT,      0 },
+   { PIPE_FORMAT_R9G9B9E5_FLOAT,        SVGA3D_FORMAT_INVALID,      SVGA3D_R9G9B9E5_SHAREDEXP,   0 },
+   { PIPE_FORMAT_Z32_FLOAT_S8X24_UINT,  SVGA3D_FORMAT_INVALID,      SVGA3D_D32_FLOAT_S8X24_UINT, 0 },
+   { PIPE_FORMAT_R1_UNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R10G10B10X2_USCALED,   SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R10G10B10X2_SNORM,     SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L4A4_UNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B10G10R10A2_UNORM,     SVGA3D_R10G10B10A2_UNORM,   SVGA3D_FORMAT_INVALID,       VF_BGRA },
+   { PIPE_FORMAT_R10SG10SB10SA2U_NORM,  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8G8Bx_SNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8G8B8X8_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B4G4R4X4_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_X24S8_UINT,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_S8X24_UINT,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_X32_S8X24_UINT,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B2G3R3_UNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16A16_UNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A16_UNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I16_UNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_LATC1_UNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_LATC1_SNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_LATC2_UNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_LATC2_SNORM,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8_SNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L8_SNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L8A8_SNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I8_SNORM,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A16_SNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16_SNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16A16_SNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I16_SNORM,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A16_FLOAT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16_FLOAT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16A16_FLOAT,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I16_FLOAT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A32_FLOAT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L32_FLOAT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L32A32_FLOAT,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I32_FLOAT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_YV12,                  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_YV16,                  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_IYUV,                  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_NV12,                  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_NV21,                  SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A4R4_UNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R4A4_UNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8A8_UNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8R8_UNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R10G10B10A2_SSCALED,   SVGA3D_R32_UINT,            SVGA3D_FORMAT_INVALID,       VF_PUINT_TO_SSCALED },
+   { PIPE_FORMAT_R10G10B10A2_SNORM,     SVGA3D_R10G10B10A2_UNORM,   SVGA3D_FORMAT_INVALID,       VF_PUINT_TO_SNORM },
+   { PIPE_FORMAT_B10G10R10A2_USCALED,   SVGA3D_R10G10B10A2_UNORM,   SVGA3D_FORMAT_INVALID,       VF_BGRA | VF_PUINT_TO_USCALED },
+   { PIPE_FORMAT_B10G10R10A2_SSCALED,   SVGA3D_R32_UINT,            SVGA3D_FORMAT_INVALID,       VF_BGRA | VF_PUINT_TO_SSCALED },
+   { PIPE_FORMAT_B10G10R10A2_SNORM,     SVGA3D_R10G10B10A2_UNORM,   SVGA3D_FORMAT_INVALID,       VF_BGRA | VF_PUINT_TO_SNORM },
+   { PIPE_FORMAT_R8_UINT,               SVGA3D_R8_UINT,             SVGA3D_R8_UINT,              0 },
+   { PIPE_FORMAT_R8G8_UINT,             SVGA3D_R8G8_UINT,           SVGA3D_R8G8_UINT,            0 },
+   { PIPE_FORMAT_R8G8B8_UINT,           SVGA3D_R8G8B8A8_UINT,       SVGA3D_FORMAT_INVALID,       VF_W_TO_1 },
+   { PIPE_FORMAT_R8G8B8A8_UINT,         SVGA3D_R8G8B8A8_UINT,       SVGA3D_R8G8B8A8_UINT,        0 },
+   { PIPE_FORMAT_R8_SINT,               SVGA3D_R8_SINT,             SVGA3D_R8_SINT,              0 },
+   { PIPE_FORMAT_R8G8_SINT,             SVGA3D_R8G8_SINT,           SVGA3D_R8G8_SINT,            0 },
+   { PIPE_FORMAT_R8G8B8_SINT,           SVGA3D_R8G8B8A8_SINT,       SVGA3D_FORMAT_INVALID,       VF_W_TO_1 },
+   { PIPE_FORMAT_R8G8B8A8_SINT,         SVGA3D_R8G8B8A8_SINT,       SVGA3D_R8G8B8A8_SINT,        0 },
+   { PIPE_FORMAT_R16_UINT,              SVGA3D_R16_UINT,            SVGA3D_R16_UINT,             0 },
+   { PIPE_FORMAT_R16G16_UINT,           SVGA3D_R16G16_UINT,         SVGA3D_R16G16_UINT,          0 },
+   { PIPE_FORMAT_R16G16B16_UINT,        SVGA3D_R16G16B16A16_UINT,   SVGA3D_FORMAT_INVALID,       VF_W_TO_1 },
+   { PIPE_FORMAT_R16G16B16A16_UINT,     SVGA3D_R16G16B16A16_UINT,   SVGA3D_R16G16B16A16_UINT,    0 },
+   { PIPE_FORMAT_R16_SINT,              SVGA3D_R16_SINT,            SVGA3D_R16_SINT,             0 },
+   { PIPE_FORMAT_R16G16_SINT,           SVGA3D_R16G16_SINT,         SVGA3D_R16G16_SINT,          0 },
+   { PIPE_FORMAT_R16G16B16_SINT,        SVGA3D_R16G16B16A16_SINT,   SVGA3D_FORMAT_INVALID,       VF_W_TO_1 },
+   { PIPE_FORMAT_R16G16B16A16_SINT,     SVGA3D_R16G16B16A16_SINT,   SVGA3D_R16G16B16A16_SINT,    0 },
+   { PIPE_FORMAT_R32_UINT,              SVGA3D_R32_UINT,            SVGA3D_R32_UINT,             0 },
+   { PIPE_FORMAT_R32G32_UINT,           SVGA3D_R32G32_UINT,         SVGA3D_R32G32_UINT,          0 },
+   { PIPE_FORMAT_R32G32B32_UINT,        SVGA3D_R32G32B32_UINT,      SVGA3D_R32G32B32_UINT,       0 },
+   { PIPE_FORMAT_R32G32B32A32_UINT,     SVGA3D_R32G32B32A32_UINT,   SVGA3D_R32G32B32A32_UINT,    0 },
+   { PIPE_FORMAT_R32_SINT,              SVGA3D_R32_SINT,            SVGA3D_R32_SINT,             0 },
+   { PIPE_FORMAT_R32G32_SINT,           SVGA3D_R32G32_SINT,         SVGA3D_R32G32_SINT,          0 },
+   { PIPE_FORMAT_R32G32B32_SINT,        SVGA3D_R32G32B32_SINT,      SVGA3D_R32G32B32_SINT,       0 },
+   { PIPE_FORMAT_R32G32B32A32_SINT,     SVGA3D_R32G32B32A32_SINT,   SVGA3D_R32G32B32A32_SINT,    0 },
+   { PIPE_FORMAT_A8_UINT,               SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I8_UINT,               SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L8_UINT,               SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L8A8_UINT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8_SINT,               SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I8_SINT,               SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L8_SINT,               SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L8A8_SINT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A16_UINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I16_UINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16_UINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16A16_UINT,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A16_SINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I16_SINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16_SINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L16A16_SINT,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A32_UINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I32_UINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L32_UINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L32A32_UINT,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A32_SINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_I32_SINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L32_SINT,              SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_L32A32_SINT,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B10G10R10A2_UINT,      SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC1_RGB8,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8G8_R8B8_UNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_G8R8_B8R8_UNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8G8B8X8_SNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8G8B8X8_SRGB,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8G8B8X8_UINT,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8G8B8X8_SINT,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_B10G10R10X2_UNORM,     SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16G16B16X16_UNORM,    SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16G16B16X16_SNORM,    SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16G16B16X16_FLOAT,    SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16G16B16X16_UINT,     SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16G16B16X16_SINT,     SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32B32X32_FLOAT,    SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32B32X32_UINT,     SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32G32B32X32_SINT,     SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8A8_SNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16A16_UNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16A16_SNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16A16_FLOAT,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32A32_FLOAT,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8A8_UINT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8A8_SINT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16A16_UINT,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R16A16_SINT,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32A32_UINT,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R32A32_SINT,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R10G10B10A2_UINT,      SVGA3D_R10G10B10A2_UINT,    SVGA3D_R10G10B10A2_UINT,     0 },
+   { PIPE_FORMAT_B5G6R5_SRGB,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_BPTC_RGBA_UNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_BPTC_SRGBA,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_BPTC_RGB_FLOAT,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_BPTC_RGB_UFLOAT,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8L8_UNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8L8_SNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8L8_SRGB,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A16L16_UNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_G8R8_UNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_G8R8_SNORM,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_G16R16_UNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_G16R16_SNORM,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8B8G8R8_SNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_X8B8G8R8_SNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_RGB8,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_SRGB8,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_RGB8A1,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_SRGB8A1,          SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_RGBA8,            SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_SRGBA8,           SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_R11_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_R11_SNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_RG11_UNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_ETC2_RG11_SNORM,       SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       0 },
+};
+
+
+/**
+ * Translate a gallium vertex format to a vgpu10 vertex format.
+ * Also, return any special vertex format flags.
+ */
+void
+svga_translate_vertex_format_vgpu10(enum pipe_format format,
+                                    SVGA3dSurfaceFormat *svga_format,
+                                    unsigned *vf_flags)
+{
+   assert(format < Elements(format_conversion_table));
+   if (format >= Elements(format_conversion_table)) {
+      format = PIPE_FORMAT_NONE;
+   }
+   *svga_format = format_conversion_table[format].vertex_format;
+   *vf_flags = format_conversion_table[format].flags;
+}
+
+
  /*
   * Translate from gallium format to SVGA3D format.
   */
@@ -41,8 +355,16 @@ svga_translate_format(struct svga_screen *ss,
                        enum pipe_format format,
                        unsigned bind)
  {
-   switch(format) {
+   if (ss->sws->have_vgpu10) {
+      if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) {
+         return format_conversion_table[format].vertex_format;
+      }
+      else {
+         return format_conversion_table[format].pixel_format;
+      }
+   }
  
+   switch(format) {
     case PIPE_FORMAT_B8G8R8A8_UNORM:
        return SVGA3D_A8R8G8B8;
     case PIPE_FORMAT_B8G8R8X8_UNORM:
@@ -70,10 +392,13 @@ svga_translate_format(struct svga_screen *ss,
        return SVGA3D_A16B16G16R16;
  
     case PIPE_FORMAT_Z16_UNORM:
+      assert(!ss->sws->have_vgpu10);
        return bind & PIPE_BIND_SAMPLER_VIEW ? ss->depth.z16 : SVGA3D_Z_D16;
     case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+      assert(!ss->sws->have_vgpu10);
        return bind & PIPE_BIND_SAMPLER_VIEW ? ss->depth.s8z24 : SVGA3D_Z_D24S8;
     case PIPE_FORMAT_X8Z24_UNORM:
+      assert(!ss->sws->have_vgpu10);
        return bind & PIPE_BIND_SAMPLER_VIEW ? ss->depth.x8z24 : SVGA3D_Z_D24X8;
  
     case PIPE_FORMAT_A8_UNORM:
@@ -116,12 +441,17 @@ svga_translate_format(struct svga_screen *ss,
   * Format capability description entry.
   */
  struct format_cap {
+   const char *name;
+
     SVGA3dSurfaceFormat format;
  
     /*
      * Capability index corresponding to the format.
      */
-   SVGA3dDevCapIndex index;
+   SVGA3dDevCapIndex devcap;
+
+   /* size of each pixel/block */
+   unsigned block_width, block_height, block_bytes;
  
     /*
      * Mask of supported SVGA3dFormatOp operations, to be inferred when the
@@ -134,598 +464,1637 @@ struct format_cap {
  /*
   * Format capability description table.
   *
- * Ordererd by increasing SVGA3dSurfaceFormat value, but with gaps.
+ * Ordered by increasing SVGA3dSurfaceFormat value, but with gaps.
+ *
+ * Note: there are some special cases below where we set devcap=0 and
+ * avoid querying the host.  In particular, depth/stencil formats which
+ * can be rendered to and sampled from.  For example, the gallium format
+ * PIPE_FORMAT_Z24_UNORM_S8_UINT is converted to SVGA3D_D24_UNORM_S8_UINT
+ * for rendering but converted to SVGA3D_R24_UNORM_X8_TYPELESS for sampling.
+ * If we want to query if a format supports both rendering and sampling the
+ * host will tell us no for both SVGA3D_D24_UNORM_S8_UINT and
+ * SVGA3D_R24_UNORM_X8_TYPELESS.  So we override the host query for those
+ * formats and report that both can do rendering and sampling.
   */
  static const struct format_cap format_cap_table[] = {
     {
+      "SVGA3D_FORMAT_INVALID",
+      SVGA3D_FORMAT_INVALID, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_X8R8G8B8",
        SVGA3D_X8R8G8B8,
        SVGA3D_DEVCAP_SURFACEFMT_X8R8G8B8,
+      1, 1, 4,
        SVGA3DFORMAT_OP_TEXTURE |
        SVGA3DFORMAT_OP_CUBETEXTURE |
        SVGA3DFORMAT_OP_VOLUMETEXTURE |
        SVGA3DFORMAT_OP_DISPLAYMODE |
-      SVGA3DFORMAT_OP_3DACCELERATION |
-      SVGA3DFORMAT_OP_CONVERT_TO_ARGB |
-      SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
        SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
     },
     {
+      "SVGA3D_A8R8G8B8",
        SVGA3D_A8R8G8B8,
        SVGA3D_DEVCAP_SURFACEFMT_A8R8G8B8,
+      1, 1, 4,
        SVGA3DFORMAT_OP_TEXTURE |
        SVGA3DFORMAT_OP_CUBETEXTURE |
        SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_CONVERT_TO_ARGB |
-      SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
-      SVGA3DFORMAT_OP_SAME_FORMAT_UP_TO_ALPHA_RENDERTARGET |
        SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
     },
     {
+      "SVGA3D_R5G6B5",
        SVGA3D_R5G6B5,
        SVGA3D_DEVCAP_SURFACEFMT_R5G6B5,
+      1, 1, 2,
        SVGA3DFORMAT_OP_TEXTURE |
        SVGA3DFORMAT_OP_CUBETEXTURE |
        SVGA3DFORMAT_OP_VOLUMETEXTURE |
        SVGA3DFORMAT_OP_DISPLAYMODE |
-      SVGA3DFORMAT_OP_3DACCELERATION |
-      SVGA3DFORMAT_OP_CONVERT_TO_ARGB |
-      SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
        SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
     },
     {
+      "SVGA3D_X1R5G5B5",
        SVGA3D_X1R5G5B5,
        SVGA3D_DEVCAP_SURFACEFMT_X1R5G5B5,
+      1, 1, 2,
        SVGA3DFORMAT_OP_TEXTURE |
        SVGA3DFORMAT_OP_CUBETEXTURE |
        SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_CONVERT_TO_ARGB |
-      SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
        SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
     },
     {
+      "SVGA3D_A1R5G5B5",
        SVGA3D_A1R5G5B5,
        SVGA3D_DEVCAP_SURFACEFMT_A1R5G5B5,
+      1, 1, 2,
        SVGA3DFORMAT_OP_TEXTURE |
        SVGA3DFORMAT_OP_CUBETEXTURE |
        SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_CONVERT_TO_ARGB |
-      SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
-      SVGA3DFORMAT_OP_SAME_FORMAT_UP_TO_ALPHA_RENDERTARGET |
        SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
     },
     {
+      "SVGA3D_A4R4G4B4",
        SVGA3D_A4R4G4B4,
        SVGA3D_DEVCAP_SURFACEFMT_A4R4G4B4,
+      1, 1, 2,
        SVGA3DFORMAT_OP_TEXTURE |
        SVGA3DFORMAT_OP_CUBETEXTURE |
        SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
-      SVGA3DFORMAT_OP_SAME_FORMAT_UP_TO_ALPHA_RENDERTARGET |
        SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
     },
-   /*
-    * SVGA3D_Z_D32 is not yet supported, and has no corresponding
-    * SVGA3D_DEVCAP_xxx.
-    */
     {
+      /*
+       * SVGA3D_Z_D32 is not yet supported, and has no corresponding
+       * SVGA3D_DEVCAP_xxx.
+       */
+      "SVGA3D_Z_D32",
+      SVGA3D_Z_D32, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_Z_D16",
        SVGA3D_Z_D16,
        SVGA3D_DEVCAP_SURFACEFMT_Z_D16,
-      SVGA3DFORMAT_OP_ZSTENCIL |
-      SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH
+      1, 1, 2,
+      SVGA3DFORMAT_OP_ZSTENCIL
     },
     {
+      "SVGA3D_Z_D24S8",
        SVGA3D_Z_D24S8,
        SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8,
-      SVGA3DFORMAT_OP_ZSTENCIL |
-      SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH
+      1, 1, 4,
+      SVGA3DFORMAT_OP_ZSTENCIL
     },
     {
+      "SVGA3D_Z_D15S1",
        SVGA3D_Z_D15S1,
        SVGA3D_DEVCAP_MAX,
-      SVGA3DFORMAT_OP_ZSTENCIL |
-      SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH
+      1, 1, 2,
+      SVGA3DFORMAT_OP_ZSTENCIL
     },
     {
+      "SVGA3D_LUMINANCE8",
        SVGA3D_LUMINANCE8,
        SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8,
+      1, 1, 1,
        SVGA3DFORMAT_OP_TEXTURE |
        SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_VOLUMETEXTURE
     },
     {
-      SVGA3D_LUMINANCE8_ALPHA8,
-      SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8_ALPHA8,
-      SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      /*
+       * SVGA3D_LUMINANCE4_ALPHA4 is not supported, and has no corresponding
+       * SVGA3D_DEVCAP_xxx.
+       */
+      "SVGA3D_LUMINANCE4_ALPHA4",
+      SVGA3D_LUMINANCE4_ALPHA4, 0, 0, 0, 0, 0
     },
-   /*
-    * SVGA3D_LUMINANCE4_ALPHA4 is not supported, and has no corresponding
-    * SVGA3D_DEVCAP_xxx.
-    */
     {
+      "SVGA3D_LUMINANCE16",
        SVGA3D_LUMINANCE16,
        SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE16,
+      1, 1, 2,
        SVGA3DFORMAT_OP_TEXTURE |
        SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_VOLUMETEXTURE
+   },
+   {
+      "SVGA3D_LUMINANCE8_ALPHA8",
+      SVGA3D_LUMINANCE8_ALPHA8,
+      SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8_ALPHA8,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE
     },
     {
+      "SVGA3D_DXT1",
        SVGA3D_DXT1,
        SVGA3D_DEVCAP_SURFACEFMT_DXT1,
+      4, 4, 8,
        SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_CUBETEXTURE
     },
     {
+      "SVGA3D_DXT2",
        SVGA3D_DXT2,
        SVGA3D_DEVCAP_SURFACEFMT_DXT2,
+      4, 4, 8,
        SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_CUBETEXTURE
     },
     {
+      "SVGA3D_DXT3",
        SVGA3D_DXT3,
        SVGA3D_DEVCAP_SURFACEFMT_DXT3,
+      4, 4, 16,
        SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_CUBETEXTURE
     },
     {
+      "SVGA3D_DXT4",
        SVGA3D_DXT4,
        SVGA3D_DEVCAP_SURFACEFMT_DXT4,
+      4, 4, 16,
        SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_CUBETEXTURE
     },
     {
+      "SVGA3D_DXT5",
        SVGA3D_DXT5,
        SVGA3D_DEVCAP_SURFACEFMT_DXT5,
+      4, 4, 8,
        SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_CUBETEXTURE
     },
     {
+      "SVGA3D_BUMPU8V8",
        SVGA3D_BUMPU8V8,
        SVGA3D_DEVCAP_SURFACEFMT_BUMPU8V8,
+      1, 1, 2,
        SVGA3DFORMAT_OP_TEXTURE |
        SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_BUMPMAP |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_VOLUMETEXTURE
     },
-   /*
-    * SVGA3D_BUMPL6V5U5 is unsupported; it has no corresponding
-    * SVGA3D_DEVCAP_xxx.
-    */
     {
+      /*
+       * SVGA3D_BUMPL6V5U5 is unsupported; it has no corresponding
+       * SVGA3D_DEVCAP_xxx.
+       */
+      "SVGA3D_BUMPL6V5U5",
+      SVGA3D_BUMPL6V5U5, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_BUMPX8L8V8U8",
        SVGA3D_BUMPX8L8V8U8,
        SVGA3D_DEVCAP_SURFACEFMT_BUMPX8L8V8U8,
+      1, 1, 4,
        SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_BUMPMAP |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_CUBETEXTURE
+   },
+   {
+      "SVGA3D_FORMAT_DEAD1",
+      SVGA3D_FORMAT_DEAD1, 0, 0, 0, 0, 0
     },
-   /*
-    * SVGA3D_BUMPL8V8U8 is unsupported; it has no corresponding
-    * SVGA3D_DEVCAP_xxx. SVGA3D_BUMPX8L8V8U8 should be used instead.
-    */
     {
+      "SVGA3D_ARGB_S10E5",
        SVGA3D_ARGB_S10E5,
        SVGA3D_DEVCAP_SURFACEFMT_ARGB_S10E5,
+      1, 1, 2,
        SVGA3DFORMAT_OP_TEXTURE |
        SVGA3DFORMAT_OP_CUBETEXTURE |
        SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
        SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
     },
     {
+      "SVGA3D_ARGB_S23E8",
        SVGA3D_ARGB_S23E8,
        SVGA3D_DEVCAP_SURFACEFMT_ARGB_S23E8,
+      1, 1, 4,
        SVGA3DFORMAT_OP_TEXTURE |
        SVGA3DFORMAT_OP_CUBETEXTURE |
        SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
        SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
     },
     {
+      "SVGA3D_A2R10G10B10",
        SVGA3D_A2R10G10B10,
        SVGA3D_DEVCAP_SURFACEFMT_A2R10G10B10,
+      1, 1, 4,
        SVGA3DFORMAT_OP_TEXTURE |
        SVGA3DFORMAT_OP_CUBETEXTURE |
        SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_CONVERT_TO_ARGB |
-      SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
        SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
     },
-   /*
-    * SVGA3D_V8U8 is unsupported; it has no corresponding
-    * SVGA3D_DEVCAP_xxx. SVGA3D_BUMPU8V8 should be used instead.
-    */
     {
+      /*
+       * SVGA3D_V8U8 is unsupported; it has no corresponding
+       * SVGA3D_DEVCAP_xxx. SVGA3D_BUMPU8V8 should be used instead.
+       */
+      "SVGA3D_V8U8",
+      SVGA3D_V8U8, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_Q8W8V8U8",
        SVGA3D_Q8W8V8U8,
        SVGA3D_DEVCAP_SURFACEFMT_Q8W8V8U8,
+      1, 1, 4,
        SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_BUMPMAP |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_CUBETEXTURE
     },
     {
+      "SVGA3D_CxV8U8",
        SVGA3D_CxV8U8,
        SVGA3D_DEVCAP_SURFACEFMT_CxV8U8,
-      SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_BUMPMAP |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE
+   },
+   {
+      /*
+       * SVGA3D_X8L8V8U8 is unsupported; it has no corresponding
+       * SVGA3D_DEVCAP_xxx. SVGA3D_BUMPX8L8V8U8 should be used instead.
+       */
+      "SVGA3D_X8L8V8U8",
+      SVGA3D_X8L8V8U8, 0, 0, 0, 0, 0
     },
-   /*
-    * SVGA3D_X8L8V8U8 is unsupported; it has no corresponding
-    * SVGA3D_DEVCAP_xxx. SVGA3D_BUMPX8L8V8U8 should be used instead.
-    */
     {
+      "SVGA3D_A2W10V10U10",
        SVGA3D_A2W10V10U10,
        SVGA3D_DEVCAP_SURFACEFMT_A2W10V10U10,
-      SVGA3DFORMAT_OP_TEXTURE |
-      SVGA3DFORMAT_OP_BUMPMAP |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE
     },
     {
+      "SVGA3D_ALPHA8",
        SVGA3D_ALPHA8,
        SVGA3D_DEVCAP_SURFACEFMT_ALPHA8,
+      1, 1, 1,
        SVGA3DFORMAT_OP_TEXTURE |
        SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_VOLUMETEXTURE
     },
     {
+      "SVGA3D_R_S10E5",
        SVGA3D_R_S10E5,
        SVGA3D_DEVCAP_SURFACEFMT_R_S10E5,
+      1, 1, 2,
        SVGA3DFORMAT_OP_TEXTURE |
        SVGA3DFORMAT_OP_VOLUMETEXTURE |
        SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
        SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
     },
     {
+      "SVGA3D_R_S23E8",
        SVGA3D_R_S23E8,
        SVGA3D_DEVCAP_SURFACEFMT_R_S23E8,
+      1, 1, 4,
        SVGA3DFORMAT_OP_TEXTURE |
        SVGA3DFORMAT_OP_VOLUMETEXTURE |
        SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
        SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
     },
     {
+      "SVGA3D_RG_S10E5",
        SVGA3D_RG_S10E5,
        SVGA3D_DEVCAP_SURFACEFMT_RG_S10E5,
+      1, 1, 2,
        SVGA3DFORMAT_OP_TEXTURE |
        SVGA3DFORMAT_OP_VOLUMETEXTURE |
        SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
        SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
     },
     {
+      "SVGA3D_RG_S23E8",
        SVGA3D_RG_S23E8,
        SVGA3D_DEVCAP_SURFACEFMT_RG_S23E8,
+      1, 1, 4,
        SVGA3DFORMAT_OP_TEXTURE |
        SVGA3DFORMAT_OP_VOLUMETEXTURE |
        SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SRGBREAD |
-      SVGA3DFORMAT_OP_SRGBWRITE |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
        SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
     },
-   /*
-    * SVGA3D_BUFFER is a placeholder format for index/vertex buffers.
-    */
     {
+      /*
+       * SVGA3D_BUFFER is a placeholder format for index/vertex buffers.
+       */
+      "SVGA3D_BUFFER",
+      SVGA3D_BUFFER, 0, 1, 1, 1, 0
+   },
+   {
+      "SVGA3D_Z_D24X8",
        SVGA3D_Z_D24X8,
        SVGA3D_DEVCAP_SURFACEFMT_Z_D24X8,
-      SVGA3DFORMAT_OP_ZSTENCIL |
-      SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH
+      1, 1, 4,
+      SVGA3DFORMAT_OP_ZSTENCIL
     },
     {
+      "SVGA3D_V16U16",
        SVGA3D_V16U16,
        SVGA3D_DEVCAP_SURFACEFMT_V16U16,
+      1, 1, 4,
        SVGA3DFORMAT_OP_TEXTURE |
        SVGA3DFORMAT_OP_CUBETEXTURE |
-      SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_BUMPMAP |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN
+      SVGA3DFORMAT_OP_VOLUMETEXTURE
     },
     {
+      "SVGA3D_G16R16",
        SVGA3D_G16R16,
        SVGA3D_DEVCAP_SURFACEFMT_G16R16,
+      1, 1, 4,
        SVGA3DFORMAT_OP_TEXTURE |
        SVGA3DFORMAT_OP_CUBETEXTURE |
        SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
        SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
     },
     {
+      "SVGA3D_A16B16G16R16",
        SVGA3D_A16B16G16R16,
        SVGA3D_DEVCAP_SURFACEFMT_A16B16G16R16,
+      1, 1, 8,
        SVGA3DFORMAT_OP_TEXTURE |
        SVGA3DFORMAT_OP_CUBETEXTURE |
        SVGA3DFORMAT_OP_VOLUMETEXTURE |
-      SVGA3DFORMAT_OP_OFFSCREENPLAIN |
-      SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
        SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
     },
     {
+      "SVGA3D_UYVY",
        SVGA3D_UYVY,
        SVGA3D_DEVCAP_SURFACEFMT_UYVY,
+      0, 0, 0,
        0
     },
     {
+      "SVGA3D_YUY2",
        SVGA3D_YUY2,
        SVGA3D_DEVCAP_SURFACEFMT_YUY2,
+      0, 0, 0,
        0
     },
     {
+      "SVGA3D_NV12",
        SVGA3D_NV12,
        SVGA3D_DEVCAP_SURFACEFMT_NV12,
+      0, 0, 0,
        0
     },
     {
+      "SVGA3D_AYUV",
        SVGA3D_AYUV,
        SVGA3D_DEVCAP_SURFACEFMT_AYUV,
+      0, 0, 0,
        0
     },
     {
-      SVGA3D_Z_DF16,
-      SVGA3D_DEVCAP_SURFACEFMT_Z_DF16,
-      0
+      "SVGA3D_R32G32B32A32_TYPELESS",
+      SVGA3D_R32G32B32A32_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R32G32B32A32_TYPELESS,
+      1, 1, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
     },
     {
-      SVGA3D_Z_DF24,
-      SVGA3D_DEVCAP_SURFACEFMT_Z_DF24,
-      0
+      "SVGA3D_R32G32B32A32_UINT",
+      SVGA3D_R32G32B32A32_UINT,
+      SVGA3D_DEVCAP_DXFMT_R32G32B32A32_UINT,
+      1, 1, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
     },
     {
-      SVGA3D_Z_D24S8_INT,
-      SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8_INT,
-      0
+      "SVGA3D_R32G32B32A32_SINT",
+      SVGA3D_R32G32B32A32_SINT,
+      SVGA3D_DEVCAP_DXFMT_R32G32B32A32_SINT,
+      1, 1, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
     },
-};
-
-
-/*
- * Get format capabilities from the host.  It takes in consideration
- * deprecated/unsupported formats, and formats which are implicitely assumed to
- * be supported when the host does not provide an explicit capability entry.
- */
-void
-svga_get_format_cap(struct svga_screen *ss,
-                    SVGA3dSurfaceFormat format,
-                    SVGA3dSurfaceFormatCaps *caps)
-{
-   const struct format_cap *entry;
-
-   for (entry = format_cap_table; entry < format_cap_table + Elements(format_cap_table); ++entry) {
-      if (entry->format == format) {
-         struct svga_winsys_screen *sws = ss->sws;
-         SVGA3dDevCapResult result;
-
-         if (sws->get_cap(sws, entry->index, &result)) {
-            /* Explicitly advertised format */
-            caps->value = result.u;
-         } else {
-            /* Implicitly advertised format -- use default caps */
-            caps->value = entry->defaultOperations;
-         }
-
-         return;
-      }
-   }
-
-   /* Unsupported format */
-   caps->value = 0;
-}
-
-
-/**
- * Return block size and bytes per block for the given SVGA3D format.
- * block_width and block_height are one for uncompressed formats and
- * greater than one for compressed formats.
- * Note: we don't handle formats that are unsupported, according to
- * the format_cap_table above.
- */
-void
-svga_format_size(SVGA3dSurfaceFormat format,
-                 unsigned *block_width,
-                 unsigned *block_height,
-                 unsigned *bytes_per_block)
-{
-   *block_width = *block_height = 1;
-
-   switch (format) {
-   case SVGA3D_X8R8G8B8:
-   case SVGA3D_A8R8G8B8:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_R5G6B5:
-   case SVGA3D_X1R5G5B5:
-   case SVGA3D_A1R5G5B5:
-   case SVGA3D_A4R4G4B4:
-      *bytes_per_block = 2;
-      return;
-
-   case SVGA3D_Z_D32:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_Z_D16:
-      *bytes_per_block = 2;
-      return;
-
-   case SVGA3D_Z_D24S8:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_Z_D15S1:
-      *bytes_per_block = 2;
-      return;
-
-   case SVGA3D_LUMINANCE8:
-   case SVGA3D_LUMINANCE4_ALPHA4:
-      *bytes_per_block = 1;
-      return;
-
-   case SVGA3D_LUMINANCE16:
-   case SVGA3D_LUMINANCE8_ALPHA8:
-      *bytes_per_block = 2;
-      return;
-
-   case SVGA3D_DXT1:
-   case SVGA3D_DXT2:
-      *block_width = *block_height = 4;
-      *bytes_per_block = 8;
-      return;
-
-   case SVGA3D_DXT3:
-   case SVGA3D_DXT4:
-   case SVGA3D_DXT5:
-      *block_width = *block_height = 4;
-      *bytes_per_block = 16;
-      return;
-
-   case SVGA3D_BUMPU8V8:
-   case SVGA3D_BUMPL6V5U5:
-      *bytes_per_block = 2;
-      return;
-
-   case SVGA3D_BUMPX8L8V8U8:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_ARGB_S10E5:
-      *bytes_per_block = 8;
-      return;
-
-   case SVGA3D_ARGB_S23E8:
-      *bytes_per_block = 16;
-      return;
-
-   case SVGA3D_A2R10G10B10:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_Q8W8V8U8:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_CxV8U8:
-      *bytes_per_block = 2;
-      return;
-
-   case SVGA3D_X8L8V8U8:
-   case SVGA3D_A2W10V10U10:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_ALPHA8:
-      *bytes_per_block = 1;
-      return;
-
-   case SVGA3D_R_S10E5:
-      *bytes_per_block = 2;
-      return;
-   case SVGA3D_R_S23E8:
-      *bytes_per_block = 4;
-      return;
-   case SVGA3D_RG_S10E5:
-      *bytes_per_block = 4;
-      return;
-   case SVGA3D_RG_S23E8:
-      *bytes_per_block = 8;
-      return;
-
-   case SVGA3D_BUFFER:
-      *bytes_per_block = 1;
-      return;
-
-   case SVGA3D_Z_D24X8:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_V16U16:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_G16R16:
-      *bytes_per_block = 4;
-      return;
-
-   case SVGA3D_A16B16G16R16:
-      *bytes_per_block = 8;
-      return;
-
-   case SVGA3D_Z_DF16:
-      *bytes_per_block = 2;
-      return;
-   case SVGA3D_Z_DF24:
-      *bytes_per_block = 4;
-      return;
-   case SVGA3D_Z_D24S8_INT:
-      *bytes_per_block = 4;
-      return;
-
+   {
+      "SVGA3D_R32G32B32_TYPELESS",
+      SVGA3D_R32G32B32_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R32G32B32_TYPELESS,
+      1, 1, 12,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32G32B32_FLOAT",
+      SVGA3D_R32G32B32_FLOAT,
+      SVGA3D_DEVCAP_DXFMT_R32G32B32_FLOAT,
+      1, 1, 12,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32G32B32_UINT",
+      SVGA3D_R32G32B32_UINT,
+      SVGA3D_DEVCAP_DXFMT_R32G32B32_UINT,
+      1, 1, 12,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32G32B32_SINT",
+      SVGA3D_R32G32B32_SINT,
+      SVGA3D_DEVCAP_DXFMT_R32G32B32_SINT,
+      1, 1, 12,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16B16A16_TYPELESS",
+      SVGA3D_R16G16B16A16_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R16G16B16A16_TYPELESS,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16B16A16_UINT",
+      SVGA3D_R16G16B16A16_UINT,
+      SVGA3D_DEVCAP_DXFMT_R16G16B16A16_UINT,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16B16A16_SNORM",
+      SVGA3D_R16G16B16A16_SNORM,
+      SVGA3D_DEVCAP_DXFMT_R16G16B16A16_SNORM,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16B16A16_SINT",
+      SVGA3D_R16G16B16A16_SINT,
+      SVGA3D_DEVCAP_DXFMT_R16G16B16A16_SINT,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32G32_TYPELESS",
+      SVGA3D_R32G32_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R32G32_TYPELESS,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32G32_UINT",
+      SVGA3D_R32G32_UINT,
+      SVGA3D_DEVCAP_DXFMT_R32G32_UINT,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32G32_SINT",
+      SVGA3D_R32G32_SINT,
+      SVGA3D_DEVCAP_DXFMT_R32G32_SINT,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32G8X24_TYPELESS",
+      SVGA3D_R32G8X24_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R32G8X24_TYPELESS,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      /* Special case: no devcap / report sampler and depth/stencil ability
+       */
+      "SVGA3D_D32_FLOAT_S8X24_UINT",
+      SVGA3D_D32_FLOAT_S8X24_UINT,
+      0, /*SVGA3D_DEVCAP_DXFMT_D32_FLOAT_S8X24_UINT*/
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      /* Special case: no devcap / report sampler and depth/stencil ability
+       */
+      "SVGA3D_R32_FLOAT_X8X24_TYPELESS",
+      SVGA3D_R32_FLOAT_X8X24_TYPELESS,
+      0, /*SVGA3D_DEVCAP_DXFMT_R32_FLOAT_X8X24_TYPELESS*/
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      "SVGA3D_X32_TYPELESS_G8X24_UINT",
+      SVGA3D_X32_TYPELESS_G8X24_UINT,
+      SVGA3D_DEVCAP_DXFMT_X32_TYPELESS_G8X24_UINT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R10G10B10A2_TYPELESS",
+      SVGA3D_R10G10B10A2_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R10G10B10A2_TYPELESS,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R10G10B10A2_UINT",
+      SVGA3D_R10G10B10A2_UINT,
+      SVGA3D_DEVCAP_DXFMT_R10G10B10A2_UINT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R11G11B10_FLOAT",
+      SVGA3D_R11G11B10_FLOAT,
+      SVGA3D_DEVCAP_DXFMT_R11G11B10_FLOAT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8G8B8A8_TYPELESS",
+      SVGA3D_R8G8B8A8_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R8G8B8A8_TYPELESS,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8G8B8A8_UNORM",
+      SVGA3D_R8G8B8A8_UNORM,
+      SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UNORM,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8G8B8A8_UNORM_SRGB",
+      SVGA3D_R8G8B8A8_UNORM_SRGB,
+      SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UNORM_SRGB,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8G8B8A8_UINT",
+      SVGA3D_R8G8B8A8_UINT,
+      SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UINT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+      },
+   {
+      "SVGA3D_R8G8B8A8_SINT",
+      SVGA3D_R8G8B8A8_SINT,
+      SVGA3D_DEVCAP_DXFMT_R8G8B8A8_SINT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16_TYPELESS",
+      SVGA3D_R16G16_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R16G16_TYPELESS,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16_UINT",
+      SVGA3D_R16G16_UINT,
+      SVGA3D_DEVCAP_DXFMT_R16G16_UINT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16_SINT",
+      SVGA3D_R16G16_SINT,
+      SVGA3D_DEVCAP_DXFMT_R16G16_SINT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32_TYPELESS",
+      SVGA3D_R32_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R32_TYPELESS,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      /* Special case: no devcap / report sampler and depth/stencil ability
+       */
+      "SVGA3D_D32_FLOAT",
+      SVGA3D_D32_FLOAT,
+      0, /*SVGA3D_DEVCAP_DXFMT_D32_FLOAT*/
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      "SVGA3D_R32_UINT",
+      SVGA3D_R32_UINT,
+      SVGA3D_DEVCAP_DXFMT_R32_UINT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32_SINT",
+      SVGA3D_R32_SINT,
+      SVGA3D_DEVCAP_DXFMT_R32_SINT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R24G8_TYPELESS",
+      SVGA3D_R24G8_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R24G8_TYPELESS,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      /* Special case: no devcap / report sampler and depth/stencil ability
+       */
+      "SVGA3D_D24_UNORM_S8_UINT",
+      SVGA3D_D24_UNORM_S8_UINT,
+      0, /*SVGA3D_DEVCAP_DXFMT_D24_UNORM_S8_UINT*/
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      /* Special case: no devcap / report sampler and depth/stencil ability
+       */
+      "SVGA3D_R24_UNORM_X8_TYPELESS",
+      SVGA3D_R24_UNORM_X8_TYPELESS,
+      0, /*SVGA3D_DEVCAP_DXFMT_R24_UNORM_X8_TYPELESS*/
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      "SVGA3D_X24_TYPELESS_G8_UINT",
+      SVGA3D_X24_TYPELESS_G8_UINT,
+      SVGA3D_DEVCAP_DXFMT_X24_TYPELESS_G8_UINT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      "SVGA3D_R8G8_TYPELESS",
+      SVGA3D_R8G8_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R8G8_TYPELESS,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8G8_UNORM",
+      SVGA3D_R8G8_UNORM,
+      SVGA3D_DEVCAP_DXFMT_R8G8_UNORM,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8G8_UINT",
+      SVGA3D_R8G8_UINT,
+      SVGA3D_DEVCAP_DXFMT_R8G8_UINT,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8G8_SINT",
+      SVGA3D_R8G8_SINT,
+      SVGA3D_DEVCAP_DXFMT_R8G8_SINT,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16_TYPELESS",
+      SVGA3D_R16_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R16_TYPELESS,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_ZSTENCIL |
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16_UNORM",
+      SVGA3D_R16_UNORM,
+      SVGA3D_DEVCAP_DXFMT_R16_UNORM,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16_UINT",
+      SVGA3D_R16_UINT,
+      SVGA3D_DEVCAP_DXFMT_R16_UINT,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16_SNORM",
+      SVGA3D_R16_SNORM,
+      SVGA3D_DEVCAP_DXFMT_R16_SNORM,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16_SINT",
+      SVGA3D_R16_SINT,
+      SVGA3D_DEVCAP_DXFMT_R16_SINT,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8_TYPELESS",
+      SVGA3D_R8_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_R8_TYPELESS,
+      1, 1, 1,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8_UNORM",
+      SVGA3D_R8_UNORM,
+      SVGA3D_DEVCAP_DXFMT_R8_UNORM,
+      1, 1, 1,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8_UINT",
+      SVGA3D_R8_UINT,
+      SVGA3D_DEVCAP_DXFMT_R8_UINT,
+      1, 1, 1,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8_SNORM",
+      SVGA3D_R8_SNORM,
+      SVGA3D_DEVCAP_DXFMT_R8_SNORM,
+      1, 1, 1,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8_SINT",
+      SVGA3D_R8_SINT,
+      SVGA3D_DEVCAP_DXFMT_R8_SINT,
+      1, 1, 1,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_P8",
+      SVGA3D_P8, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_R9G9B9E5_SHAREDEXP",
+      SVGA3D_R9G9B9E5_SHAREDEXP,
+      SVGA3D_DEVCAP_DXFMT_R9G9B9E5_SHAREDEXP,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8G8_B8G8_UNORM",
+      SVGA3D_R8G8_B8G8_UNORM, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_G8R8_G8B8_UNORM",
+      SVGA3D_G8R8_G8B8_UNORM, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_BC1_TYPELESS",
+      SVGA3D_BC1_TYPELESS, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_BC1_UNORM_SRGB",
+      SVGA3D_BC1_UNORM_SRGB, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_BC2_TYPELESS",
+      SVGA3D_BC2_TYPELESS, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_BC2_UNORM_SRGB",
+      SVGA3D_BC2_UNORM_SRGB, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_BC3_TYPELESS",
+      SVGA3D_BC3_TYPELESS, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_BC3_UNORM_SRGB",
+      SVGA3D_BC3_UNORM_SRGB, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_BC4_TYPELESS",
+      SVGA3D_BC4_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_BC4_TYPELESS,
+      4, 4, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
+   },
+   {
+      "SVGA3D_ATI1",
+      SVGA3D_ATI1, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_BC4_SNORM",
+      SVGA3D_BC4_SNORM,
+      SVGA3D_DEVCAP_DXFMT_BC4_SNORM,
+      4, 4, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
+   },
+   {
+      "SVGA3D_BC5_TYPELESS",
+      SVGA3D_BC5_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_BC5_TYPELESS,
+      4, 4, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
+   },
+   {
+      "SVGA3D_ATI2",
+      SVGA3D_ATI2, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_BC5_SNORM",
+      SVGA3D_BC5_SNORM,
+      SVGA3D_DEVCAP_DXFMT_BC5_SNORM,
+      4, 4, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE
+   },
+   {
+      "SVGA3D_R10G10B10_XR_BIAS_A2_UNORM",
+      SVGA3D_R10G10B10_XR_BIAS_A2_UNORM, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_B8G8R8A8_TYPELESS",
+      SVGA3D_B8G8R8A8_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_B8G8R8A8_TYPELESS,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_B8G8R8A8_UNORM_SRGB",
+      SVGA3D_B8G8R8A8_UNORM_SRGB,
+      SVGA3D_DEVCAP_DXFMT_B8G8R8A8_UNORM_SRGB,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_B8G8R8X8_TYPELESS",
+      SVGA3D_B8G8R8X8_TYPELESS,
+      SVGA3D_DEVCAP_DXFMT_B8G8R8X8_TYPELESS,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_B8G8R8X8_UNORM_SRGB",
+      SVGA3D_B8G8R8X8_UNORM_SRGB,
+      SVGA3D_DEVCAP_DXFMT_B8G8R8X8_UNORM_SRGB,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_Z_DF16",
+      SVGA3D_Z_DF16,
+      SVGA3D_DEVCAP_SURFACEFMT_Z_DF16,
+      1, 1, 2,
+      0
+   },
+   {
+      "SVGA3D_Z_DF24",
+      SVGA3D_Z_DF24,
+      SVGA3D_DEVCAP_SURFACEFMT_Z_DF24,
+      1, 1, 4,
+      0
+   },
+   {
+      "SVGA3D_Z_D24S8_INT",
+      SVGA3D_Z_D24S8_INT,
+      SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8_INT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      "SVGA3D_YV12",
+      SVGA3D_YV12, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_R32G32B32A32_FLOAT",
+      SVGA3D_R32G32B32A32_FLOAT,
+      SVGA3D_DEVCAP_DXFMT_R32G32B32A32_FLOAT,
+      1, 1, 16,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16B16A16_FLOAT",
+      SVGA3D_R16G16B16A16_FLOAT,
+      SVGA3D_DEVCAP_DXFMT_R16G16B16A16_FLOAT,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16B16A16_UNORM",
+      SVGA3D_R16G16B16A16_UNORM,
+      SVGA3D_DEVCAP_DXFMT_R16G16B16A16_UNORM,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R32G32_FLOAT",
+      SVGA3D_R32G32_FLOAT,
+      SVGA3D_DEVCAP_DXFMT_R32G32_FLOAT,
+      1, 1, 8,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R10G10B10A2_UNORM",
+      SVGA3D_R10G10B10A2_UNORM,
+      SVGA3D_DEVCAP_DXFMT_R10G10B10A2_UNORM,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R8G8B8A8_SNORM",
+      SVGA3D_R8G8B8A8_SNORM,
+      SVGA3D_DEVCAP_DXFMT_R8G8B8A8_SNORM,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16_FLOAT",
+      SVGA3D_R16G16_FLOAT,
+      SVGA3D_DEVCAP_DXFMT_R16G16_FLOAT,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16_UNORM",
+      SVGA3D_R16G16_UNORM,
+      SVGA3D_DEVCAP_DXFMT_R16G16_UNORM,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16G16_SNORM",
+      SVGA3D_R16G16_SNORM,
+      SVGA3D_DEVCAP_DXFMT_R16G16_SNORM,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      /* Special case: no devcap / report sampler, render target and
+       * depth/stencil ability
+       */
+      "SVGA3D_R32_FLOAT",
+      SVGA3D_R32_FLOAT,
+      0, /*SVGA3D_DEVCAP_DXFMT_R32_FLOAT*/
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      "SVGA3D_R8G8_SNORM",
+      SVGA3D_R8G8_SNORM,
+      SVGA3D_DEVCAP_DXFMT_R8G8_SNORM,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_R16_FLOAT",
+      SVGA3D_R16_FLOAT,
+      SVGA3D_DEVCAP_DXFMT_R16_FLOAT,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_D16_UNORM",
+      SVGA3D_D16_UNORM,
+      SVGA3D_DEVCAP_DXFMT_D16_UNORM,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_ZSTENCIL
+   },
+   {
+      "SVGA3D_A8_UNORM",
+      SVGA3D_A8_UNORM,
+      SVGA3D_DEVCAP_DXFMT_A8_UNORM,
+      1, 1, 1,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_BC1_UNORM",
+      SVGA3D_BC1_UNORM, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_BC2_UNORM",
+      SVGA3D_BC2_UNORM, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_BC3_UNORM",
+      SVGA3D_BC3_UNORM, 0, 0, 0, 0, 0
+   },
+   {
+      "SVGA3D_B5G6R5_UNORM",
+      SVGA3D_B5G6R5_UNORM,
+      SVGA3D_DEVCAP_DXFMT_B5G6R5_UNORM,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_DISPLAYMODE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_B5G5R5A1_UNORM",
+      SVGA3D_B5G5R5A1_UNORM,
+      SVGA3D_DEVCAP_DXFMT_B5G5R5A1_UNORM,
+      1, 1, 2,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_DISPLAYMODE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_B8G8R8A8_UNORM",
+      SVGA3D_B8G8R8A8_UNORM,
+      SVGA3D_DEVCAP_DXFMT_B8G8R8A8_UNORM,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_B8G8R8X8_UNORM",
+      SVGA3D_B8G8R8X8_UNORM,
+      SVGA3D_DEVCAP_DXFMT_B8G8R8X8_UNORM,
+      1, 1, 4,
+      SVGA3DFORMAT_OP_TEXTURE |
+      SVGA3DFORMAT_OP_CUBETEXTURE |
+      SVGA3DFORMAT_OP_VOLUMETEXTURE |
+      SVGA3DFORMAT_OP_DISPLAYMODE |
+      SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+   },
+   {
+      "SVGA3D_BC4_UNORM",
+     SVGA3D_BC4_UNORM,
+     SVGA3D_DEVCAP_DXFMT_BC4_UNORM,
+     4, 4, 8,
+     SVGA3DFORMAT_OP_TEXTURE |
+     SVGA3DFORMAT_OP_CUBETEXTURE
+   },
+   {
+      "SVGA3D_BC5_UNORM",
+     SVGA3D_BC5_UNORM,
+     SVGA3D_DEVCAP_DXFMT_BC5_UNORM,
+     4, 4, 16,
+     SVGA3DFORMAT_OP_TEXTURE |
+     SVGA3DFORMAT_OP_CUBETEXTURE
+   }
+};
+
+
+/**
+ * Debug only:
+ * 1. check that format_cap_table[i] matches the i-th SVGA3D format.
+ * 2. check that format_conversion_table[i].pformat == i.
+ */
+static void
+check_format_tables(void)
+{
+   static boolean first_call = TRUE;
+
+   if (first_call) {
+      unsigned i;
+
+      STATIC_ASSERT(Elements(format_cap_table) == SVGA3D_FORMAT_MAX);
+      for (i = 0; i < Elements(format_cap_table); i++) {
+         assert(format_cap_table[i].format == i);
+      }
+
+      STATIC_ASSERT(Elements(format_conversion_table) == PIPE_FORMAT_COUNT);
+      for (i = 0; i < Elements(format_conversion_table); i++) {
+         assert(format_conversion_table[i].pformat == i);
+      }
+
+      first_call = FALSE;
+   }
+}
+
+
+/*
+ * Get format capabilities from the host.  It takes in consideration
+ * deprecated/unsupported formats, and formats which are implicitely assumed to
+ * be supported when the host does not provide an explicit capability entry.
+ */
+void
+svga_get_format_cap(struct svga_screen *ss,
+                    SVGA3dSurfaceFormat format,
+                    SVGA3dSurfaceFormatCaps *caps)
+{
+   struct svga_winsys_screen *sws = ss->sws;
+   SVGA3dDevCapResult result;
+   const struct format_cap *entry;
+
+#ifdef DEBUG
+   check_format_tables();
+#else
+   (void) check_format_tables;
+#endif
+
+   assert(format < Elements(format_cap_table));
+   entry = &format_cap_table[format];
+   assert(entry->format == format);
+
+   if (entry->devcap && sws->get_cap(sws, entry->devcap, &result)) {
+      /* Explicitly advertised format */
+      if (entry->devcap > SVGA3D_DEVCAP_DX) {
+         /* Translate DX/VGPU10 format cap to VGPU9 cap */
+         caps->value = 0;
+         if (result.u & SVGA3D_DXFMT_COLOR_RENDERTARGET)
+            caps->value |= SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET;
+         if (!(result.u & SVGA3D_DXFMT_BLENDABLE))
+            caps->value |= SVGA3DFORMAT_OP_NOALPHABLEND;
+         if (result.u & SVGA3D_DXFMT_DEPTH_RENDERTARGET)
+            caps->value |= SVGA3DFORMAT_OP_ZSTENCIL;
+         if (result.u & SVGA3D_DXFMT_SHADER_SAMPLE)
+            caps->value |= (SVGA3DFORMAT_OP_TEXTURE |
+                            SVGA3DFORMAT_OP_CUBETEXTURE);
+         if (result.u & SVGA3D_DXFMT_VOLUME)
+            caps->value |= SVGA3DFORMAT_OP_VOLUMETEXTURE;
+      }
+      else {
+         /* Return VGPU9 format cap as-is */
+         caps->value = result.u;
+      }
+
+   } else {
+      /* Implicitly advertised format -- use default caps */
+      caps->value = entry->defaultOperations;
+   }
+}
+
+
+void
+svga_format_size(SVGA3dSurfaceFormat format,
+                 unsigned *block_width,
+                 unsigned *block_height,
+                 unsigned *bytes_per_block)
+{
+   assert(format < Elements(format_cap_table));
+   *block_width = format_cap_table[format].block_width;
+   *block_height = format_cap_table[format].block_height;
+   *bytes_per_block = format_cap_table[format].block_bytes;
+   /* Make sure the the table entry was valid */
+   if (*block_width == 0)
+      debug_printf("Bad table entry for %s\n", svga_format_name(format));
+   assert(*block_width);
+   assert(*block_height);
+   assert(*bytes_per_block);
+}
+
+
+const char *
+svga_format_name(SVGA3dSurfaceFormat format)
+{
+   assert(format < Elements(format_cap_table));
+   return format_cap_table[format].name;
+}
+
+
+/**
+ * Is the given SVGA3dSurfaceFormat a signed or unsigned integer color format?
+ */
+boolean
+svga_format_is_integer(SVGA3dSurfaceFormat format)
+{
+   switch (format) {
+   case SVGA3D_R32G32B32A32_SINT:
+   case SVGA3D_R32G32B32_SINT:
+   case SVGA3D_R32G32_SINT:
+   case SVGA3D_R32_SINT:
+   case SVGA3D_R16G16B16A16_SINT:
+   case SVGA3D_R16G16_SINT:
+   case SVGA3D_R16_SINT:
+   case SVGA3D_R8G8B8A8_SINT:
+   case SVGA3D_R8G8_SINT:
+   case SVGA3D_R8_SINT:
+   case SVGA3D_R32G32B32A32_UINT:
+   case SVGA3D_R32G32B32_UINT:
+   case SVGA3D_R32G32_UINT:
+   case SVGA3D_R32_UINT:
+   case SVGA3D_R16G16B16A16_UINT:
+   case SVGA3D_R16G16_UINT:
+   case SVGA3D_R16_UINT:
+   case SVGA3D_R8G8B8A8_UINT:
+   case SVGA3D_R8G8_UINT:
+   case SVGA3D_R8_UINT:
+   case SVGA3D_R10G10B10A2_UINT:
+      return TRUE;
+   default:
+      return FALSE;
+   }
+}
+
+
+/**
+ * Given a texture format, return the expected data type returned from
+ * the texture sampler.  For example, UNORM8 formats return floating point
+ * values while SINT formats returned signed integer values.
+ * Note: this function could be moved into the gallum u_format.[ch] code
+ * if it's useful to anyone else.
+ */
+enum tgsi_return_type
+svga_get_texture_datatype(enum pipe_format format)
+{
+   const struct util_format_description *desc = util_format_description(format);
+   enum tgsi_return_type t;
+
+   if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN ) {
+      if (util_format_is_depth_or_stencil(format)) {
+         t = TGSI_RETURN_TYPE_FLOAT; /* XXX revisit this */
+      }
+      else if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) {
+         t = TGSI_RETURN_TYPE_FLOAT;
+      }
+      else if (desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
+         t = desc->channel[0].normalized ? TGSI_RETURN_TYPE_UNORM : TGSI_RETURN_TYPE_UINT;
+      }
+      else if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
+         t = desc->channel[0].normalized ? TGSI_RETURN_TYPE_SNORM : TGSI_RETURN_TYPE_SINT;
+      }
+      else {
+         assert(!"Unexpected channel type in svga_get_texture_datatype()");
+         t = TGSI_RETURN_TYPE_FLOAT;
+      }
+   }
+   else {
+      /* compressed format, shared exponent format, etc. */
+      switch (format) {
+      case PIPE_FORMAT_DXT1_RGB:
+      case PIPE_FORMAT_DXT1_RGBA:
+      case PIPE_FORMAT_DXT3_RGBA:
+      case PIPE_FORMAT_DXT5_RGBA:
+      case PIPE_FORMAT_DXT1_SRGB:
+      case PIPE_FORMAT_DXT1_SRGBA:
+      case PIPE_FORMAT_DXT3_SRGBA:
+      case PIPE_FORMAT_DXT5_SRGBA:
+      case PIPE_FORMAT_RGTC1_UNORM:
+      case PIPE_FORMAT_RGTC2_UNORM:
+      case PIPE_FORMAT_LATC1_UNORM:
+      case PIPE_FORMAT_LATC2_UNORM:
+      case PIPE_FORMAT_ETC1_RGB8:
+         t = TGSI_RETURN_TYPE_UNORM;
+         break;
+      case PIPE_FORMAT_RGTC1_SNORM:
+      case PIPE_FORMAT_RGTC2_SNORM:
+      case PIPE_FORMAT_LATC1_SNORM:
+      case PIPE_FORMAT_LATC2_SNORM:
+      case PIPE_FORMAT_R10G10B10X2_SNORM:
+         t = TGSI_RETURN_TYPE_SNORM;
+         break;
+      case PIPE_FORMAT_R11G11B10_FLOAT:
+      case PIPE_FORMAT_R9G9B9E5_FLOAT:
+         t = TGSI_RETURN_TYPE_FLOAT;
+         break;
+      default:
+         assert(!"Unexpected channel type in svga_get_texture_datatype()");
+         t = TGSI_RETURN_TYPE_FLOAT;
+      }
+   }
+
+   return t;
+}
+
+
+/**
+ * Given an svga context, return true iff there are currently any integer color
+ * buffers attached to the framebuffer.
+ */
+boolean
+svga_has_any_integer_cbufs(const struct svga_context *svga)
+{
+   unsigned i;
+   for (i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) {
+      struct pipe_surface *cbuf = svga->curr.framebuffer.cbufs[i];
+
+      if (cbuf && util_format_is_pure_integer(cbuf->format)) {
+         return TRUE;
+      }
+   }
+   return FALSE;
+}
+
+
+/**
+ * Given an SVGA format, return the corresponding typeless format.
+ * If there is no typeless format, return the format unchanged.
+ */
+SVGA3dSurfaceFormat
+svga_typeless_format(SVGA3dSurfaceFormat format)
+{
+   switch (format) {
+   case SVGA3D_R32G32B32A32_UINT:
+   case SVGA3D_R32G32B32A32_SINT:
+   case SVGA3D_R32G32B32A32_FLOAT:
+      return SVGA3D_R32G32B32A32_TYPELESS;
+   case SVGA3D_R32G32B32_FLOAT:
+   case SVGA3D_R32G32B32_UINT:
+   case SVGA3D_R32G32B32_SINT:
+      return SVGA3D_R32G32B32_TYPELESS;
+   case SVGA3D_R16G16B16A16_UINT:
+   case SVGA3D_R16G16B16A16_UNORM:
+   case SVGA3D_R16G16B16A16_SNORM:
+   case SVGA3D_R16G16B16A16_SINT:
+   case SVGA3D_R16G16B16A16_FLOAT:
+      return SVGA3D_R16G16B16A16_TYPELESS;
+   case SVGA3D_R32G32_UINT:
+   case SVGA3D_R32G32_SINT:
+   case SVGA3D_R32G32_FLOAT:
+      return SVGA3D_R32G32_TYPELESS;
+   case SVGA3D_D32_FLOAT_S8X24_UINT:
+      return SVGA3D_R32G8X24_TYPELESS;
+   case SVGA3D_X32_TYPELESS_G8X24_UINT:
+      return SVGA3D_R32_FLOAT_X8X24_TYPELESS;
+   case SVGA3D_R10G10B10A2_UINT:
+   case SVGA3D_R10G10B10A2_UNORM:
+      return SVGA3D_R10G10B10A2_TYPELESS;
+   case SVGA3D_R8G8B8A8_UNORM:
+   case SVGA3D_R8G8B8A8_SNORM:
+   case SVGA3D_R8G8B8A8_UNORM_SRGB:
+   case SVGA3D_R8G8B8A8_UINT:
+   case SVGA3D_R8G8B8A8_SINT:
+      return SVGA3D_R8G8B8A8_TYPELESS;
+   case SVGA3D_R16G16_UINT:
+   case SVGA3D_R16G16_SINT:
+   case SVGA3D_R16G16_UNORM:
+   case SVGA3D_R16G16_SNORM:
+   case SVGA3D_R16G16_FLOAT:
+      return SVGA3D_R16G16_TYPELESS;
+   case SVGA3D_D32_FLOAT:
+   case SVGA3D_R32_FLOAT:
+   case SVGA3D_R32_UINT:
+   case SVGA3D_R32_SINT:
+      return SVGA3D_R32_TYPELESS;
+   case SVGA3D_D24_UNORM_S8_UINT:
+      return SVGA3D_R24G8_TYPELESS;
+   case SVGA3D_X24_TYPELESS_G8_UINT:
+      return SVGA3D_R24_UNORM_X8_TYPELESS;
+   case SVGA3D_R8G8_UNORM:
+   case SVGA3D_R8G8_SNORM:
+   case SVGA3D_R8G8_UINT:
+   case SVGA3D_R8G8_SINT:
+      return SVGA3D_R8G8_TYPELESS;
+   case SVGA3D_R16_UNORM:
+   case SVGA3D_R16_UINT:
+   case SVGA3D_R16_SNORM:
+   case SVGA3D_R16_SINT:
+   case SVGA3D_R16_FLOAT:
+      return SVGA3D_R16_TYPELESS;
+   case SVGA3D_R8_UNORM:
+   case SVGA3D_R8_UINT:
+   case SVGA3D_R8_SNORM:
+   case SVGA3D_R8_SINT:
+      return SVGA3D_R8_TYPELESS;
+   case SVGA3D_B8G8R8A8_UNORM_SRGB:
+   case SVGA3D_B8G8R8A8_UNORM:
+      return SVGA3D_B8G8R8A8_TYPELESS;
+   case SVGA3D_B8G8R8X8_UNORM_SRGB:
+   case SVGA3D_B8G8R8X8_UNORM:
+      return SVGA3D_B8G8R8X8_TYPELESS;
+   case SVGA3D_BC4_UNORM:
+   case SVGA3D_BC4_SNORM:
+      return SVGA3D_BC4_TYPELESS;
+   case SVGA3D_BC5_UNORM:
+   case SVGA3D_BC5_SNORM:
+      return SVGA3D_BC5_TYPELESS;
+
+   /* Special cases (no corresponding _TYPELESS formats) */
+   case SVGA3D_A8_UNORM:
+   case SVGA3D_A4R4G4B4:
+   case SVGA3D_B5G5R5A1_UNORM:
+   case SVGA3D_B5G6R5_UNORM:
+   case SVGA3D_DXT1:
+   case SVGA3D_DXT2:
+   case SVGA3D_DXT3:
+   case SVGA3D_DXT4:
+   case SVGA3D_DXT5:
+   case SVGA3D_R11G11B10_FLOAT:
+   case SVGA3D_R9G9B9E5_SHAREDEXP:
+   case SVGA3D_Z_D32:
+   case SVGA3D_Z_D16:
+      return format;
     default:
-      debug_printf("format %u\n", (unsigned) format);
-      assert(!"unexpected format in svga_format_size()");
-      *bytes_per_block = 4;
+      debug_printf("Unexpected format %s in %s\n",
+                   svga_format_name(format), __FUNCTION__);
+      return format;
     }
  }
diff --git a/src/gallium/drivers/svga/svga_format.h b/src/gallium/drivers/svga/svga_format.h

index 94c867a..0af218c 100644 (file)
--- a/src/gallium/drivers/svga/svga_format.h
+++ b/src/gallium/drivers/svga/svga_format.h
@@ -28,6 +28,7 @@
  
  
  #include "pipe/p_format.h"
+#include "svga_context.h"
  #include "svga_types.h"
  #include "svga_reg.h"
  #include "svga3d_reg.h"
@@ -36,6 +37,27 @@
  struct svga_screen;
  
  
+/**
+ * Vertex format flags.  These are used to specify that some vertex formats
+ * need extra processing/conversion in the vertex shader.  For example,
+ * setting the W component to 1, or swapping R/B, or converting packed uint
+ * types to signed int/snorm.
+ */
+#define VF_ADJUST_RANGE     (1 << 0)
+#define VF_W_TO_1           (1 << 1)
+#define VF_U_TO_F_CAST      (1 << 2)  /* convert uint to float */
+#define VF_I_TO_F_CAST      (1 << 3)  /* convert sint to float */
+#define VF_BGRA             (1 << 4)  /* swap R/B */
+#define VF_PUINT_TO_SNORM   (1 << 5)  /* 10_10_10_2 to snorm */
+#define VF_PUINT_TO_USCALED (1 << 6)  /* 10_10_10_2 to uscaled */
+#define VF_PUINT_TO_SSCALED (1 << 7)  /* 10_10_10_2 to sscaled */
+
+
+void
+svga_translate_vertex_format_vgpu10(enum pipe_format format,
+                                    SVGA3dSurfaceFormat *svga_format,
+                                    unsigned *vf_flags);
+
  enum SVGA3dSurfaceFormat
  svga_translate_format(struct svga_screen *ss,
                        enum pipe_format format,
@@ -52,5 +74,23 @@ svga_format_size(SVGA3dSurfaceFormat format,
                   unsigned *block_height,
                   unsigned *bytes_per_block);
  
+const char *
+svga_format_name(SVGA3dSurfaceFormat format);
+
+boolean
+svga_format_is_integer(SVGA3dSurfaceFormat format);
+
+enum tgsi_return_type
+svga_get_texture_datatype(enum pipe_format format);
+
+
+// XXX: Move this to svga_context?
+boolean
+svga_has_any_integer_cbufs(const struct svga_context *svga);
+
+
+SVGA3dSurfaceFormat
+svga_typeless_format(SVGA3dSurfaceFormat format);
+
  
  #endif /* SVGA_FORMAT_H_ */
diff --git a/src/gallium/drivers/svga/svga_link.c b/src/gallium/drivers/svga/svga_link.c

new file mode 100644 (file)

index 0000000..f3e524d
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_link.c
@@ -0,0 +1,120 @@
+/*/
+ * Copyright 2013 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#include "svga_context.h"
+#include "svga_link.h"
+
+#include "tgsi/tgsi_strings.h"
+
+
+#define INVALID_INDEX 255
+
+
+/**
+ * Examine input and output shaders info to link outputs from the
+ * output shader to inputs from the input shader.
+ * Basically, we'll remap input shader's input slots to new numbers
+ * based on semantic name/index of the outputs from the output shader.
+ */
+void
+svga_link_shaders(const struct tgsi_shader_info *outshader_info,
+                  const struct tgsi_shader_info *inshader_info,
+                  struct shader_linkage *linkage)
+{
+   unsigned i, free_slot;
+
+   for (i = 0; i < Elements(linkage->input_map); i++) {
+      linkage->input_map[i] = INVALID_INDEX;
+   }
+
+   /* Assign input slots for input shader inputs.
+    * Basically, we want to use the same index for the output shader's outputs
+    * and the input shader's inputs that should be linked together.
+    * We'll modify the input shader's inputs to match the output shader.
+    */
+   assert(inshader_info->num_inputs <=
+          Elements(inshader_info->input_semantic_name));
+
+   /* free register index that can be used for built-in varyings */
+   free_slot = outshader_info->num_outputs + 1;
+
+   for (i = 0; i < inshader_info->num_inputs; i++) {
+      unsigned sem_name = inshader_info->input_semantic_name[i];
+      unsigned sem_index = inshader_info->input_semantic_index[i];
+      unsigned j;
+      /**
+       * Get the clip distance inputs from the output shader's
+       * clip distance shadow copy.
+       */
+      if (sem_name == TGSI_SEMANTIC_CLIPDIST) {
+         linkage->input_map[i] = outshader_info->num_outputs + 1 + sem_index;
+         /* make sure free_slot includes this extra output */
+         free_slot = MAX2(free_slot, linkage->input_map[i] + 1);
+      }
+      else {
+         /* search output shader outputs for same item */
+         for (j = 0; j < outshader_info->num_outputs; j++) {
+            assert(j < Elements(outshader_info->output_semantic_name));
+            if (outshader_info->output_semantic_name[j] == sem_name &&
+                outshader_info->output_semantic_index[j] == sem_index) {
+               linkage->input_map[i] = j;
+               break;
+            }
+         }
+      }
+   }
+
+   linkage->num_inputs = inshader_info->num_inputs;
+
+   /* Things like the front-face register are handled here */
+   for (i = 0; i < inshader_info->num_inputs; i++) {
+      if (linkage->input_map[i] == INVALID_INDEX) {
+         unsigned j = free_slot++;
+         linkage->input_map[i] = j;
+      }
+   }
+
+   /* Debug */
+   if (0) {
+      unsigned reg = 0;
+      for (i = 0; i < linkage->num_inputs; i++) {
+
+         assert(linkage->input_map[i] != INVALID_INDEX);
+
+         debug_printf("input shader input[%d] slot %u  %s %u %s\n",
+                      i,
+                      linkage->input_map[i],
+                      tgsi_semantic_names[inshader_info->input_semantic_name[i]],
+                      inshader_info->input_semantic_index[i],
+                      tgsi_interpolate_names[inshader_info->input_interpolate[i]]);
+
+         /* make sure no repeating register index */
+         if (reg & 1 << linkage->input_map[i]) {
+            assert(0);
+         }
+         reg |= 1 << linkage->input_map[i];
+      }
+   }
+}
diff --git a/src/gallium/drivers/svga/svga_link.h b/src/gallium/drivers/svga/svga_link.h

new file mode 100644 (file)

index 0000000..724c611
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_link.h
@@ -0,0 +1,20 @@
+
+#ifndef SVGA_LINK_H
+#define SVGA_LINK_H
+
+#include "pipe/p_defines.h"
+
+struct svga_context;
+
+struct shader_linkage
+{
+   unsigned num_inputs;
+   ubyte input_map[PIPE_MAX_SHADER_INPUTS];
+};
+
+void
+svga_link_shaders(const struct tgsi_shader_info *outshader_info,
+                  const struct tgsi_shader_info *inshader_info,
+                  struct shader_linkage *linkage);
+
+#endif /* SVGA_LINK_H */
diff --git a/src/gallium/drivers/svga/svga_pipe_blend.c b/src/gallium/drivers/svga/svga_pipe_blend.c

index 2890516..06bb3e3 100644 (file)
--- a/src/gallium/drivers/svga/svga_pipe_blend.c
+++ b/src/gallium/drivers/svga/svga_pipe_blend.c
@@ -27,14 +27,15 @@
  #include "pipe/p_defines.h"
  #include "util/u_math.h"
  #include "util/u_memory.h"
+#include "util/u_bitmask.h"
  
  #include "svga_context.h"
-
  #include "svga_hw_reg.h"
+#include "svga_cmd.h"
  
  
  static inline unsigned
-svga_translate_blend_factor(unsigned factor)
+svga_translate_blend_factor(const struct svga_context *svga, unsigned factor)
  {
     switch (factor) {
     case PIPE_BLENDFACTOR_ZERO:            return SVGA3D_BLENDOP_ZERO;
@@ -50,8 +51,21 @@ svga_translate_blend_factor(unsigned factor)
     case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return SVGA3D_BLENDOP_SRCALPHASAT;
     case PIPE_BLENDFACTOR_CONST_COLOR:     return SVGA3D_BLENDOP_BLENDFACTOR;
     case PIPE_BLENDFACTOR_INV_CONST_COLOR: return SVGA3D_BLENDOP_INVBLENDFACTOR;
-   case PIPE_BLENDFACTOR_CONST_ALPHA:     return SVGA3D_BLENDOP_BLENDFACTOR; /* ? */
-   case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return SVGA3D_BLENDOP_INVBLENDFACTOR; /* ? */
+   case PIPE_BLENDFACTOR_CONST_ALPHA:
+      if (svga_have_vgpu10(svga))
+         return SVGA3D_BLENDOP_BLENDFACTORALPHA;
+      else
+         return SVGA3D_BLENDOP_BLENDFACTOR; /* as close as we can get */
+   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+      if (svga_have_vgpu10(svga))
+         return SVGA3D_BLENDOP_INVBLENDFACTORALPHA;
+      else
+         return SVGA3D_BLENDOP_INVBLENDFACTOR; /* as close as we can get */
+   case PIPE_BLENDFACTOR_SRC1_COLOR:      return SVGA3D_BLENDOP_SRC1COLOR;
+   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:  return SVGA3D_BLENDOP_INVSRC1COLOR;
+   case PIPE_BLENDFACTOR_SRC1_ALPHA:      return SVGA3D_BLENDOP_SRC1ALPHA;
+   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:  return SVGA3D_BLENDOP_INVSRC1ALPHA;
+   case 0:                                return SVGA3D_BLENDOP_ONE;
     default:
        assert(0);
        return SVGA3D_BLENDOP_ZERO;
@@ -74,18 +88,64 @@ svga_translate_blend_func(unsigned mode)
  }
  
  
+/**
+ * Define a vgpu10 blend state object for the given
+ * svga blend state.
+ */
+static void
+define_blend_state_object(struct svga_context *svga,
+                          struct svga_blend_state *bs)
+{
+   SVGA3dDXBlendStatePerRT perRT[SVGA3D_MAX_RENDER_TARGETS];
+   unsigned try;
+   int i;
+
+   assert(svga_have_vgpu10(svga));
+
+   bs->id = util_bitmask_add(svga->blend_object_id_bm);
+
+   for (i = 0; i < SVGA3D_DX_MAX_RENDER_TARGETS; i++) {
+      perRT[i].blendEnable = bs->rt[i].blend_enable;
+      perRT[i].srcBlend = bs->rt[i].srcblend;
+      perRT[i].destBlend = bs->rt[i].dstblend;
+      perRT[i].blendOp = bs->rt[i].blendeq;
+      perRT[i].srcBlendAlpha = bs->rt[i].srcblend_alpha;
+      perRT[i].destBlendAlpha = bs->rt[i].dstblend_alpha;
+      perRT[i].blendOpAlpha = bs->rt[i].blendeq_alpha;
+      perRT[i].renderTargetWriteMask = bs->rt[i].writemask;
+      perRT[i].logicOpEnable = 0;
+      perRT[i].logicOp = SVGA3D_LOGICOP_COPY;
+      assert(perRT[i].srcBlend == perRT[0].srcBlend);
+   }
+
+   /* Loop in case command buffer is full and we need to flush and retry */
+   for (try = 0; try < 2; try++) {
+      enum pipe_error ret;
+
+      ret = SVGA3D_vgpu10_DefineBlendState(svga->swc,
+                                           bs->id,
+                                           bs->alpha_to_coverage,
+                                           bs->independent_blend_enable,
+                                           perRT);
+      if (ret == PIPE_OK)
+         return;
+      svga_context_flush(svga, NULL);
+   }
+}
+
+
  static void *
  svga_create_blend_state(struct pipe_context *pipe,
                          const struct pipe_blend_state *templ)
  {
+   struct svga_context *svga = svga_context(pipe);
     struct svga_blend_state *blend = CALLOC_STRUCT( svga_blend_state );
     unsigned i;
  
- 
     /* Fill in the per-rendertarget blend state.  We currently only
-    * have one rendertarget.
+    * support independent blend enable and colormask per render target.
      */
-   for (i = 0; i < 1; i++) {
+   for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
        /* No way to set this in SVGA3D, and no way to correctly implement it on
         * top of D3D9 API.  Instead we try to simulate with various blend modes.
         */
@@ -107,6 +167,9 @@ svga_create_blend_state(struct pipe_context *pipe,
              break;
           case PIPE_LOGICOP_COPY:
              blend->rt[i].blend_enable = FALSE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_ONE;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_ZERO;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_ADD;
              break;
           case PIPE_LOGICOP_COPY_INVERTED:
              blend->rt[i].blend_enable   = TRUE;
@@ -169,38 +232,99 @@ svga_create_blend_state(struct pipe_context *pipe,
           case PIPE_LOGICOP_EQUIV:
              /* Fill these in with plausible values */
              blend->rt[i].blend_enable = FALSE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_ONE;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_ZERO;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_ADD;
              break;
           default:
              assert(0);
              break;
           }
+         blend->rt[i].srcblend_alpha = blend->rt[i].srcblend;
+         blend->rt[i].dstblend_alpha = blend->rt[i].dstblend;
+         blend->rt[i].blendeq_alpha = blend->rt[i].blendeq;
        }
        else {
-         blend->rt[i].blend_enable   = templ->rt[0].blend_enable;
-
-         if (templ->rt[0].blend_enable) {
-            blend->rt[i].srcblend       = svga_translate_blend_factor(templ->rt[0].rgb_src_factor);
-            blend->rt[i].dstblend       = svga_translate_blend_factor(templ->rt[0].rgb_dst_factor);
-            blend->rt[i].blendeq        = svga_translate_blend_func(templ->rt[0].rgb_func);
-            blend->rt[i].srcblend_alpha = svga_translate_blend_factor(templ->rt[0].alpha_src_factor);
-            blend->rt[i].dstblend_alpha = svga_translate_blend_factor(templ->rt[0].alpha_dst_factor);
-            blend->rt[i].blendeq_alpha  = svga_translate_blend_func(templ->rt[0].alpha_func);
+         /* Note: the vgpu10 device does not yet support independent
+          * blend terms per render target.  Target[0] always specifies the
+          * blending terms.
+          */
+         if (templ->independent_blend_enable || templ->rt[0].blend_enable) {
+            /* always use the 0th target's blending terms for now */
+            blend->rt[i].srcblend =
+               svga_translate_blend_factor(svga, templ->rt[0].rgb_src_factor);
+            blend->rt[i].dstblend =
+               svga_translate_blend_factor(svga, templ->rt[0].rgb_dst_factor);
+            blend->rt[i].blendeq =
+               svga_translate_blend_func(templ->rt[0].rgb_func);
+            blend->rt[i].srcblend_alpha =
+               svga_translate_blend_factor(svga, templ->rt[0].alpha_src_factor);
+            blend->rt[i].dstblend_alpha =
+               svga_translate_blend_factor(svga, templ->rt[0].alpha_dst_factor);
+            blend->rt[i].blendeq_alpha =
+               svga_translate_blend_func(templ->rt[0].alpha_func);
  
              if (blend->rt[i].srcblend_alpha != blend->rt[i].srcblend ||
                  blend->rt[i].dstblend_alpha != blend->rt[i].dstblend ||
-                blend->rt[i].blendeq_alpha  != blend->rt[i].blendeq)
-            {
+                blend->rt[i].blendeq_alpha  != blend->rt[i].blendeq) {
                 blend->rt[i].separate_alpha_blend_enable = TRUE;
              }
           }
+         else {
+            /* disabled - default blend terms */
+            blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE;
+            blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO;
+            blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD;
+            blend->rt[i].srcblend_alpha = SVGA3D_BLENDOP_ONE;
+            blend->rt[i].dstblend_alpha = SVGA3D_BLENDOP_ZERO;
+            blend->rt[i].blendeq_alpha = SVGA3D_BLENDEQ_ADD;
+         }
+
+         if (templ->independent_blend_enable) {
+            blend->rt[i].blend_enable = templ->rt[i].blend_enable;
+         }
+         else {
+            blend->rt[i].blend_enable = templ->rt[0].blend_enable;
+         }
        }
  
-      blend->rt[i].writemask = templ->rt[0].colormask;
+      /* Some GL blend modes are not supported by the VGPU9 device (there's
+       * no equivalent of PIPE_BLENDFACTOR_[INV_]CONST_ALPHA).
+       * When we set this flag, we copy the constant blend alpha value
+       * to the R, G, B components.
+       * This works as long as the src/dst RGB blend factors doesn't use
+       * PIPE_BLENDFACTOR_CONST_COLOR and PIPE_BLENDFACTOR_CONST_ALPHA
+       * at the same time.  There's no work-around for that.
+       */
+      if (!svga_have_vgpu10(svga)) {
+         if (templ->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_CONST_ALPHA ||
+             templ->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_CONST_ALPHA ||
+             templ->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_INV_CONST_ALPHA ||
+             templ->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_CONST_ALPHA) {
+            blend->blend_color_alpha = TRUE;
+         }
+      }
+
+      if (templ->independent_blend_enable) {
+         blend->rt[i].writemask = templ->rt[i].colormask;
+      }
+      else {
+         blend->rt[i].writemask = templ->rt[0].colormask;
+      }
+   }
+
+   blend->independent_blend_enable = templ->independent_blend_enable;
+
+   blend->alpha_to_coverage = templ->alpha_to_coverage;
+
+   if (svga_have_vgpu10(svga)) {
+      define_blend_state_object(svga, blend);
     }
  
     return blend;
  }
  
+
  static void svga_bind_blend_state(struct pipe_context *pipe,
                                    void *blend)
  {
@@ -210,9 +334,30 @@ static void svga_bind_blend_state(struct pipe_context *pipe,
     svga->dirty |= SVGA_NEW_BLEND;
  }
  
-
-static void svga_delete_blend_state(struct pipe_context *pipe, void *blend)
+static void svga_delete_blend_state(struct pipe_context *pipe,
+                                    void *blend)
  {
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_blend_state *bs =
+      (struct svga_blend_state *) blend;
+
+   if (bs->id != SVGA3D_INVALID_ID) {
+      enum pipe_error ret;
+
+      ret = SVGA3D_vgpu10_DestroyBlendState(svga->swc, bs->id);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_vgpu10_DestroyBlendState(svga->swc, bs->id);
+         assert(ret == PIPE_OK);
+      }
+
+      if (bs->id == svga->state.hw_draw.blend_id)
+         svga->state.hw_draw.blend_id = SVGA3D_INVALID_ID;
+
+      util_bitmask_clear(svga->blend_object_id_bm, bs->id);
+      bs->id = SVGA3D_INVALID_ID;
+   }
+
     FREE(blend);
  }
  
@@ -235,6 +380,3 @@ void svga_init_blend_functions( struct svga_context *svga )
  
     svga->pipe.set_blend_color = svga_set_blend_color;
  }
-
-
-
diff --git a/src/gallium/drivers/svga/svga_pipe_blit.c b/src/gallium/drivers/svga/svga_pipe_blit.c

index dbb9f4b..2b34f96 100644 (file)
--- a/src/gallium/drivers/svga/svga_pipe_blit.c
+++ b/src/gallium/drivers/svga/svga_pipe_blit.c
@@ -29,6 +29,7 @@
  #include "svga_cmd.h"
  #include "svga_surface.h"
  
+//#include "util/u_blit_sw.h"
  #include "util/u_format.h"
  #include "util/u_surface.h"
  
@@ -159,7 +160,8 @@ static void svga_blit(struct pipe_context *pipe,
     struct svga_context *svga = svga_context(pipe);
     struct pipe_blit_info info = *blit_info;
  
-   if (info.src.resource->nr_samples > 1 &&
+   if (!svga_have_vgpu10(svga) &&
+       info.src.resource->nr_samples > 1 &&
         info.dst.resource->nr_samples <= 1 &&
         !util_format_is_depth_or_stencil(info.src.resource->format) &&
         !util_format_is_pure_integer(info.src.resource->format)) {
@@ -171,12 +173,8 @@ static void svga_blit(struct pipe_context *pipe,
        return; /* done */
     }
  
-   if (info.mask & PIPE_MASK_S) {
-      debug_printf("svga: cannot blit stencil, skipping\n");
-      info.mask &= ~PIPE_MASK_S;
-   }
-
-   if (!util_blitter_is_blit_supported(svga->blitter, &info)) {
+   if ((info.mask & PIPE_MASK_S) ||
+       !util_blitter_is_blit_supported(svga->blitter, &info)) {
        debug_printf("svga: blit unsupported %s -> %s\n",
                     util_format_short_name(info.src.resource->format),
                     util_format_short_name(info.dst.resource->format));
@@ -188,9 +186,9 @@ static void svga_blit(struct pipe_context *pipe,
     util_blitter_save_vertex_buffer_slot(svga->blitter, svga->curr.vb);
     util_blitter_save_vertex_elements(svga->blitter, (void*)svga->curr.velems);
     util_blitter_save_vertex_shader(svga->blitter, svga->curr.vs);
-   /*util_blitter_save_geometry_shader(svga->blitter, svga->curr.gs);*/
-   /*util_blitter_save_so_targets(svga->blitter, svga->num_so_targets,
-                     (struct pipe_stream_output_target**)svga->so_targets);*/
+   util_blitter_save_geometry_shader(svga->blitter, svga->curr.user_gs);
+   util_blitter_save_so_targets(svga->blitter, svga->num_so_targets,
+                     (struct pipe_stream_output_target**)svga->so_targets);
     util_blitter_save_rasterizer(svga->blitter, (void*)svga->curr.rast);
     util_blitter_save_viewport(svga->blitter, &svga->curr.viewport);
     util_blitter_save_scissor(svga->blitter, &svga->curr.scissor);
@@ -199,14 +197,14 @@ static void svga_blit(struct pipe_context *pipe,
     util_blitter_save_depth_stencil_alpha(svga->blitter,
                                           (void*)svga->curr.depth);
     util_blitter_save_stencil_ref(svga->blitter, &svga->curr.stencil_ref);
-   /*util_blitter_save_sample_mask(svga->blitter, svga->sample_mask);*/
+   util_blitter_save_sample_mask(svga->blitter, svga->curr.sample_mask);
     util_blitter_save_framebuffer(svga->blitter, &svga->curr.framebuffer);
     util_blitter_save_fragment_sampler_states(svga->blitter,
-                     svga->curr.num_samplers,
-                     (void**)svga->curr.sampler);
+                     svga->curr.num_samplers[PIPE_SHADER_FRAGMENT],
+                     (void**)svga->curr.sampler[PIPE_SHADER_FRAGMENT]);
     util_blitter_save_fragment_sampler_views(svga->blitter,
-                     svga->curr.num_sampler_views,
-                     svga->curr.sampler_views);
+                     svga->curr.num_sampler_views[PIPE_SHADER_FRAGMENT],
+                     svga->curr.sampler_views[PIPE_SHADER_FRAGMENT]);
     /*util_blitter_save_render_condition(svga->blitter, svga->render_cond_query,
                                        svga->render_cond_cond, svga->render_cond_mode);*/
     util_blitter_blit(svga->blitter, &info);
diff --git a/src/gallium/drivers/svga/svga_pipe_clear.c b/src/gallium/drivers/svga/svga_pipe_clear.c

index c4edced..bab6178 100644 (file)
--- a/src/gallium/drivers/svga/svga_pipe_clear.c
+++ b/src/gallium/drivers/svga/svga_pipe_clear.c
@@ -34,6 +34,78 @@
  #include "svga_surface.h"
  
  
+/**
+ * Clear the whole color buffer(s) by drawing a quad.  For VGPU10 we use
+ * this when clearing integer render targets.  We'll also clear the
+ * depth and/or stencil buffers if the clear_buffers mask specifies them.
+ */
+static void
+clear_buffers_with_quad(struct svga_context *svga,
+                        unsigned clear_buffers,
+                        const union pipe_color_union *color,
+                        double depth, unsigned stencil)
+{
+   const struct pipe_framebuffer_state *fb = &svga->curr.framebuffer;
+
+   util_blitter_save_vertex_buffer_slot(svga->blitter, svga->curr.vb);
+   util_blitter_save_vertex_elements(svga->blitter, (void*)svga->curr.velems);
+   util_blitter_save_vertex_shader(svga->blitter, svga->curr.vs);
+   util_blitter_save_geometry_shader(svga->blitter, svga->curr.gs);
+   util_blitter_save_so_targets(svga->blitter, svga->num_so_targets,
+                     (struct pipe_stream_output_target**)svga->so_targets);
+   util_blitter_save_rasterizer(svga->blitter, (void*)svga->curr.rast);
+   util_blitter_save_viewport(svga->blitter, &svga->curr.viewport);
+   util_blitter_save_scissor(svga->blitter, &svga->curr.scissor);
+   util_blitter_save_fragment_shader(svga->blitter, svga->curr.fs);
+   util_blitter_save_blend(svga->blitter, (void*)svga->curr.blend);
+   util_blitter_save_depth_stencil_alpha(svga->blitter,
+                                         (void*)svga->curr.depth);
+   util_blitter_save_stencil_ref(svga->blitter, &svga->curr.stencil_ref);
+   util_blitter_save_sample_mask(svga->blitter, svga->curr.sample_mask);
+
+   util_blitter_clear(svga->blitter,
+                      fb->width, fb->height,
+                      1, /* num_layers */
+                      clear_buffers, color,
+                      depth, stencil);
+}
+
+
+/**
+ * Check if any of the color buffers are integer buffers.
+ */
+static boolean
+is_integer_target(struct pipe_framebuffer_state *fb, unsigned buffers)
+{
+   unsigned i;
+
+   for (i = 0; i < fb->nr_cbufs; i++) {
+      if ((buffers & (PIPE_CLEAR_COLOR0 << i)) &&
+          fb->cbufs[i] &&
+          util_format_is_pure_integer(fb->cbufs[i]->format)) {
+         return TRUE;
+      }
+   }
+   return FALSE;
+}
+
+
+/**
+ * Check if the integer values in the clear color can be represented
+ * by floats.  If so, we can use the VGPU10 ClearRenderTargetView command.
+ * Otherwise, we need to clear with a quad.
+ */
+static boolean
+ints_fit_in_floats(const union pipe_color_union *color)
+{
+   const int max = 1 << 24;
+   return (color->i[0] <= max &&
+           color->i[1] <= max &&
+           color->i[2] <= max &&
+           color->i[3] <= max);
+}
+
+
  static enum pipe_error
  try_clear(struct svga_context *svga, 
            unsigned buffers,
@@ -52,7 +124,7 @@ try_clear(struct svga_context *svga,
     if (ret != PIPE_OK)
        return ret;
  
-   if (svga->rebind.rendertargets) {
+   if (svga->rebind.flags.rendertargets) {
        ret = svga_reemit_framebuffer_bindings(svga);
        if (ret != PIPE_OK) {
           return ret;
@@ -71,29 +143,72 @@ try_clear(struct svga_context *svga,
        if (buffers & PIPE_CLEAR_DEPTH)
           flags |= SVGA3D_CLEAR_DEPTH;
  
-      if ((svga->curr.framebuffer.zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) &&
-          (buffers & PIPE_CLEAR_STENCIL))
+      if (buffers & PIPE_CLEAR_STENCIL)
           flags |= SVGA3D_CLEAR_STENCIL;
  
        rect.w = MAX2(rect.w, fb->zsbuf->width);
        rect.h = MAX2(rect.h, fb->zsbuf->height);
     }
  
-   if (memcmp(&rect, &svga->state.hw_clear.viewport, sizeof(rect)) != 0) {
+   if (!svga_have_vgpu10(svga) &&
+       !svga_rects_equal(&rect, &svga->state.hw_clear.viewport)) {
        restore_viewport = TRUE;
        ret = SVGA3D_SetViewport(svga->swc, &rect);
        if (ret != PIPE_OK)
           return ret;
     }
  
-   ret = SVGA3D_ClearRect(svga->swc, flags, uc.ui[0], (float) depth, stencil,
-                          rect.x, rect.y, rect.w, rect.h);
-   if (ret != PIPE_OK)
-      return ret;
+   if (svga_have_vgpu10(svga)) {
+      if (flags & SVGA3D_CLEAR_COLOR) {
+         unsigned i;
+
+         if (is_integer_target(fb, buffers) && !ints_fit_in_floats(color)) {
+            clear_buffers_with_quad(svga, buffers, color, depth, stencil);
+            /* We also cleared depth/stencil, so that's done */
+            flags &= ~(SVGA3D_CLEAR_DEPTH | SVGA3D_CLEAR_STENCIL);
+         }
+         else {
+            struct pipe_surface *rtv;
+
+            /* Issue VGPU10 Clear commands */
+            for (i = 0; i < fb->nr_cbufs; i++) {
+               if ((fb->cbufs[i] == NULL) ||
+                   !(buffers & (PIPE_CLEAR_COLOR0 << i)))
+                  continue;
+
+               rtv = svga_validate_surface_view(svga,
+                                                svga_surface(fb->cbufs[i]));
+               if (rtv == NULL)
+                  return PIPE_ERROR_OUT_OF_MEMORY;
+
+               ret = SVGA3D_vgpu10_ClearRenderTargetView(svga->swc,
+                                                         rtv, color->f);
+               if (ret != PIPE_OK)
+                  return ret;
+            }
+         }
+      }
+      if (flags & (SVGA3D_CLEAR_DEPTH | SVGA3D_CLEAR_STENCIL)) {
+         struct pipe_surface *dsv =
+            svga_validate_surface_view(svga, svga_surface(fb->zsbuf));
+         if (dsv == NULL)
+            return PIPE_ERROR_OUT_OF_MEMORY;
+
+         ret = SVGA3D_vgpu10_ClearDepthStencilView(svga->swc, dsv, flags,
+                                                   stencil, (float) depth);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+   }
+   else {
+      ret = SVGA3D_ClearRect(svga->swc, flags, uc.ui[0], (float) depth, stencil,
+                             rect.x, rect.y, rect.w, rect.h);
+      if (ret != PIPE_OK)
+         return ret;
+   }
  
     if (restore_viewport) {
-      memcpy(&rect, &svga->state.hw_clear.viewport, sizeof rect);
-      ret = SVGA3D_SetViewport(svga->swc, &rect);
+      ret = SVGA3D_SetViewport(svga->swc, &svga->state.hw_clear.viewport);
     }
     
     return ret;
diff --git a/src/gallium/drivers/svga/svga_pipe_constants.c b/src/gallium/drivers/svga/svga_pipe_constants.c

index c32b66d..8150879 100644 (file)
--- a/src/gallium/drivers/svga/svga_pipe_constants.c
+++ b/src/gallium/drivers/svga/svga_pipe_constants.c
@@ -48,28 +48,46 @@ static void svga_set_constant_buffer(struct pipe_context *pipe,
                                       uint shader, uint index,
                                       struct pipe_constant_buffer *cb)
  {
+   struct svga_screen *svgascreen = svga_screen(pipe->screen);
     struct svga_context *svga = svga_context(pipe);
     struct pipe_resource *buf = cb ? cb->buffer : NULL;
-
-   if (cb && cb->user_buffer) {
-      buf = svga_user_buffer_create(pipe->screen,
-                                    (void *) cb->user_buffer,
-                                    cb->buffer_size,
-                                    PIPE_BIND_CONSTANT_BUFFER);
+   unsigned buffer_size = 0;
+
+   if (cb) {
+      buffer_size = cb->buffer_size;
+      if (cb->user_buffer) {
+         buf = svga_user_buffer_create(pipe->screen,
+                                       (void *) cb->user_buffer,
+                                       cb->buffer_size,
+                                       PIPE_BIND_CONSTANT_BUFFER);
+      }
     }
  
     assert(shader < PIPE_SHADER_TYPES);
-   assert(index == 0);
+   assert(index < Elements(svga->curr.constbufs[shader]));
+   assert(index < svgascreen->max_const_buffers);
+   (void) svgascreen;
+
+   pipe_resource_reference(&svga->curr.constbufs[shader][index].buffer, buf);
+
+   /* Make sure the constant buffer size to be updated is within the
+    * limit supported by the device.
+    */
+   svga->curr.constbufs[shader][index].buffer_size =
+      MIN2(buffer_size, SVGA_MAX_CONST_BUF_SIZE);
  
-   pipe_resource_reference(&svga->curr.cbufs[shader].buffer, buf);
-   svga->curr.cbufs[shader].buffer_size = cb ? cb->buffer_size : 0;
-   svga->curr.cbufs[shader].buffer_offset = cb ? cb->buffer_offset : 0;
-   svga->curr.cbufs[shader].user_buffer = NULL; /* not used */
+   svga->curr.constbufs[shader][index].buffer_offset = cb ? cb->buffer_offset : 0;
+   svga->curr.constbufs[shader][index].user_buffer = NULL; /* not used */
  
     if (shader == PIPE_SHADER_FRAGMENT)
        svga->dirty |= SVGA_NEW_FS_CONST_BUFFER;
-   else
+   else if (shader == PIPE_SHADER_VERTEX)
        svga->dirty |= SVGA_NEW_VS_CONST_BUFFER;
+   else
+      svga->dirty |= SVGA_NEW_GS_CONST_BUFFER;
+
+   /* update bitmask of dirty const buffers */
+   svga->state.dirty_constbufs[shader] |= (1 << index);
  
     if (cb && cb->user_buffer) {
        pipe_resource_reference(&buf, NULL);
diff --git a/src/gallium/drivers/svga/svga_pipe_depthstencil.c b/src/gallium/drivers/svga/svga_pipe_depthstencil.c

index 8db21fd..5ea623b 100644 (file)
--- a/src/gallium/drivers/svga/svga_pipe_depthstencil.c
+++ b/src/gallium/drivers/svga/svga_pipe_depthstencil.c
@@ -23,13 +23,15 @@
   *
   **********************************************************/
  
-#include "util/u_inlines.h"
  #include "pipe/p_defines.h"
+#include "util/u_bitmask.h"
+#include "util/u_inlines.h"
  #include "util/u_math.h"
  #include "util/u_memory.h"
  
  #include "svga_context.h"
  #include "svga_hw_reg.h"
+#include "svga_cmd.h"
  
  
  static inline unsigned
@@ -69,10 +71,67 @@ svga_translate_stencil_op(unsigned op)
  }
  
  
+/**
+ * Define a vgpu10 depth/stencil state object for the given
+ * svga depth/stencil state.
+ */
+static void
+define_depth_stencil_state_object(struct svga_context *svga,
+                                  struct svga_depth_stencil_state *ds)
+{
+   unsigned try;
+
+   assert(svga_have_vgpu10(svga));
+
+   ds->id = util_bitmask_add(svga->ds_object_id_bm);
+
+   /* spot check that these comparision tokens are the same */
+   assert(SVGA3D_COMPARISON_NEVER == SVGA3D_CMP_NEVER);
+   assert(SVGA3D_COMPARISON_LESS == SVGA3D_CMP_LESS);
+   assert(SVGA3D_COMPARISON_NOT_EQUAL == SVGA3D_CMP_NOTEQUAL);
+
+   /* Loop in case command buffer is full and we need to flush and retry */
+   for (try = 0; try < 2; try++) {
+      enum pipe_error ret;
+
+      /* Note: we use the ds->stencil[0].enabled value for both the front
+       * and back-face enables.  If single-side stencil is used, we'll have
+       * set the back state the same as the front state.
+       */
+      ret = SVGA3D_vgpu10_DefineDepthStencilState(svga->swc,
+                                                  ds->id,
+                                                  /* depth/Z */
+                                                  ds->zenable,
+                                                  ds->zwriteenable,
+                                                  ds->zfunc,
+                                                  /* Stencil */
+                                                  ds->stencil[0].enabled, /*f|b*/
+                                                  ds->stencil[0].enabled, /*f*/
+                                                  ds->stencil[0].enabled, /*b*/
+                                                  ds->stencil_mask,
+                                                  ds->stencil_writemask,
+                                                  /* front stencil */
+                                                  ds->stencil[0].fail,
+                                                  ds->stencil[0].zfail,
+                                                  ds->stencil[0].pass,
+                                                  ds->stencil[0].func,
+                                                  /* back stencil */
+                                                  ds->stencil[1].fail,
+                                                  ds->stencil[1].zfail,
+                                                  ds->stencil[1].pass,
+                                                  ds->stencil[1].func);
+      if (ret == PIPE_OK)
+         return;
+      svga_context_flush(svga, NULL);
+   }
+}
+
+
  static void *
  svga_create_depth_stencil_state(struct pipe_context *pipe,
                                 const struct pipe_depth_stencil_alpha_state *templ)
  {
+   struct svga_context *svga = svga_context(pipe);
     struct svga_depth_stencil_state *ds = CALLOC_STRUCT( svga_depth_stencil_state );
  
     /* Don't try to figure out CW/CCW correspondence with
@@ -92,10 +151,18 @@ svga_create_depth_stencil_state(struct pipe_context *pipe,
        ds->stencil_mask      = templ->stencil[0].valuemask & 0xff;
        ds->stencil_writemask = templ->stencil[0].writemask & 0xff;
     }
+   else {
+      ds->stencil[0].func = SVGA3D_CMP_ALWAYS;
+      ds->stencil[0].fail = SVGA3D_STENCILOP_KEEP;
+      ds->stencil[0].zfail = SVGA3D_STENCILOP_KEEP;
+      ds->stencil[0].pass = SVGA3D_STENCILOP_KEEP;
+   }
  
  
     ds->stencil[1].enabled = templ->stencil[1].enabled;
     if (templ->stencil[1].enabled) {
+      assert(templ->stencil[0].enabled);
+      /* two-sided stencil */
        ds->stencil[1].func   = svga_translate_compare_func(templ->stencil[1].func);
        ds->stencil[1].fail   = svga_translate_stencil_op(templ->stencil[1].fail_op);
        ds->stencil[1].zfail  = svga_translate_stencil_op(templ->stencil[1].zfail_op);
@@ -104,6 +171,13 @@ svga_create_depth_stencil_state(struct pipe_context *pipe,
        ds->stencil_mask      = templ->stencil[1].valuemask & 0xff;
        ds->stencil_writemask = templ->stencil[1].writemask & 0xff;
     }
+   else {
+      /* back face state is same as front-face state */
+      ds->stencil[1].func = ds->stencil[0].func;
+      ds->stencil[1].fail = ds->stencil[0].fail;
+      ds->stencil[1].zfail = ds->stencil[0].zfail;
+      ds->stencil[1].pass = ds->stencil[0].pass;
+   }
  
  
     ds->zenable = templ->depth.enabled;
@@ -111,12 +185,22 @@ svga_create_depth_stencil_state(struct pipe_context *pipe,
        ds->zfunc = svga_translate_compare_func(templ->depth.func);
        ds->zwriteenable = templ->depth.writemask;
     }
+   else {
+      ds->zfunc = SVGA3D_CMP_ALWAYS;
+   }
  
     ds->alphatestenable = templ->alpha.enabled;
     if (ds->alphatestenable) {
        ds->alphafunc = svga_translate_compare_func(templ->alpha.func);
        ds->alpharef = templ->alpha.ref_value;
     }
+   else {
+      ds->alphafunc = SVGA3D_CMP_ALWAYS;
+   }
+
+   if (svga_have_vgpu10(svga)) {
+      define_depth_stencil_state_object(svga, ds);
+   }
  
     return ds;
  }
@@ -126,13 +210,43 @@ static void svga_bind_depth_stencil_state(struct pipe_context *pipe,
  {
     struct svga_context *svga = svga_context(pipe);
  
+   if (svga_have_vgpu10(svga)) {
+      /* flush any previously queued drawing before changing state */
+      svga_hwtnl_flush_retry(svga);
+   }
+
     svga->curr.depth = (const struct svga_depth_stencil_state *)depth_stencil;
-   svga->dirty |= SVGA_NEW_DEPTH_STENCIL;
+   svga->dirty |= SVGA_NEW_DEPTH_STENCIL_ALPHA;
  }
  
  static void svga_delete_depth_stencil_state(struct pipe_context *pipe,
                                              void *depth_stencil)
  {
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_depth_stencil_state *ds =
+      (struct svga_depth_stencil_state *) depth_stencil;
+
+   if (svga_have_vgpu10(svga)) {
+      enum pipe_error ret;
+
+      svga_hwtnl_flush_retry(svga);
+
+      assert(ds->id != SVGA3D_INVALID_ID);
+
+      ret = SVGA3D_vgpu10_DestroyDepthStencilState(svga->swc, ds->id);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_vgpu10_DestroyDepthStencilState(svga->swc, ds->id);
+         assert(ret == PIPE_OK);
+      }
+
+      if (ds->id == svga->state.hw_draw.depth_stencil_id)
+         svga->state.hw_draw.depth_stencil_id = SVGA3D_INVALID_ID;
+
+      util_bitmask_clear(svga->ds_object_id_bm, ds->id);
+      ds->id = SVGA3D_INVALID_ID;
+   }
+
     FREE(depth_stencil);
  }
  
@@ -142,6 +256,11 @@ static void svga_set_stencil_ref( struct pipe_context *pipe,
  {
     struct svga_context *svga = svga_context(pipe);
  
+   if (svga_have_vgpu10(svga)) {
+      /* flush any previously queued drawing before changing state */
+      svga_hwtnl_flush_retry(svga);
+   }
+
     svga->curr.stencil_ref = *stencil_ref;
  
     svga->dirty |= SVGA_NEW_STENCIL_REF;
@@ -151,6 +270,11 @@ static void
  svga_set_sample_mask(struct pipe_context *pipe,
                       unsigned sample_mask)
  {
+   struct svga_context *svga = svga_context(pipe);
+
+   svga->curr.sample_mask = sample_mask;
+
+   svga->dirty |= SVGA_NEW_BLEND; /* See emit_rss_vgpu10() */
  }
  
  
diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c

index 87f6b3d..303d456 100644 (file)
--- a/src/gallium/drivers/svga/svga_pipe_draw.c
+++ b/src/gallium/drivers/svga/svga_pipe_draw.c
@@ -27,7 +27,9 @@
  #include "util/u_format.h"
  #include "util/u_inlines.h"
  #include "util/u_prim.h"
+#include "util/u_prim_restart.h"
  #include "util/u_time.h"
+#include "util/u_upload_mgr.h"
  #include "indices/u_indices.h"
  
  #include "svga_hw_reg.h"
@@ -35,12 +37,12 @@
  #include "svga_context.h"
  #include "svga_screen.h"
  #include "svga_draw.h"
+#include "svga_shader.h"
  #include "svga_state.h"
  #include "svga_swtnl.h"
  #include "svga_debug.h"
  #include "svga_resource_buffer.h"
  
-
  static enum pipe_error
  retry_draw_range_elements( struct svga_context *svga,
                             struct pipe_resource *index_buffer,
@@ -51,26 +53,31 @@ retry_draw_range_elements( struct svga_context *svga,
                             unsigned prim,
                             unsigned start,
                             unsigned count,
+                           unsigned start_instance,
                             unsigned instance_count,
                             boolean do_retry )
  {
     enum pipe_error ret = PIPE_OK;
  
-   svga_hwtnl_set_unfilled( svga->hwtnl,
-                            svga->curr.rast->hw_unfilled );
-
-   svga_hwtnl_set_flatshade( svga->hwtnl,
-                             svga->curr.rast->templ.flatshade,
-                             svga->curr.rast->templ.flatshade_first );
+   svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode);
  
     ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
     if (ret != PIPE_OK)
        goto retry;
  
+   /** determine if flatshade is to be used after svga_update_state()
+    *  in case the fragment shader is changed.
+    */
+   svga_hwtnl_set_flatshade(svga->hwtnl,
+                            svga->curr.rast->templ.flatshade ||
+                            svga->state.hw_draw.fs->uses_flat_interp,
+                            svga->curr.rast->templ.flatshade_first);
+
     ret = svga_hwtnl_draw_range_elements( svga->hwtnl,
                                           index_buffer, index_size, index_bias,
                                           min_index, max_index,
-                                         prim, start, count );
+                                         prim, start, count,
+                                         start_instance, instance_count);
     if (ret != PIPE_OK)
        goto retry;
  
@@ -85,7 +92,7 @@ retry:
                                          index_buffer, index_size, index_bias,
                                          min_index, max_index,
                                          prim, start, count,
-                                        instance_count, FALSE );
+                                        start_instance, instance_count, FALSE );
     }
  
     return ret;
@@ -94,27 +101,28 @@ retry:
  
  static enum pipe_error
  retry_draw_arrays( struct svga_context *svga,
-                   unsigned prim,
-                   unsigned start,
-                   unsigned count,
-                   unsigned instance_count,
+                   unsigned prim, unsigned start, unsigned count,
+                   unsigned start_instance, unsigned instance_count,
                     boolean do_retry )
  {
     enum pipe_error ret;
  
-   svga_hwtnl_set_unfilled( svga->hwtnl,
-                            svga->curr.rast->hw_unfilled );
-
-   svga_hwtnl_set_flatshade( svga->hwtnl,
-                             svga->curr.rast->templ.flatshade,
-                             svga->curr.rast->templ.flatshade_first );
+   svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode);
  
     ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
     if (ret != PIPE_OK)
        goto retry;
  
-   ret = svga_hwtnl_draw_arrays( svga->hwtnl, prim,
-                                 start, count );
+   /** determine if flatshade is to be used after svga_update_state()
+    *  in case the fragment shader is changed.
+    */
+   svga_hwtnl_set_flatshade(svga->hwtnl,
+                            svga->curr.rast->templ.flatshade ||
+                            svga->state.hw_draw.fs->uses_flat_interp,
+                            svga->curr.rast->templ.flatshade_first);
+
+   ret = svga_hwtnl_draw_arrays(svga->hwtnl, prim, start, count,
+                                start_instance, instance_count);
     if (ret != PIPE_OK)
        goto retry;
  
@@ -125,18 +133,41 @@ retry:
     {
        svga_context_flush( svga, NULL );
  
-      return retry_draw_arrays( svga,
-                                prim,
-                                start,
-                                count,
-                                instance_count,
-                                FALSE );
+      return retry_draw_arrays(svga, prim, start, count,
+                               start_instance, instance_count,
+                               FALSE );
     }
  
     return ret;
  }
  
  
+/**
+ * Determine if we need to implement primitive restart with a fallback
+ * path which breaks the original primitive into sub-primitive at the
+ * restart indexes.
+ */
+static boolean
+need_fallback_prim_restart(const struct svga_context *svga,
+                           const struct pipe_draw_info *info)
+{
+   if (info->primitive_restart && info->indexed) {
+      if (!svga_have_vgpu10(svga))
+         return TRUE;
+      else if (!svga->state.sw.need_swtnl) {
+         if (svga->curr.ib.index_size == 1)
+            return TRUE; /* no device support for 1-byte indexes */
+         else if (svga->curr.ib.index_size == 2)
+            return info->restart_index != 0xffff;
+         else
+            return info->restart_index != 0xffffffff;
+      }
+   }
+
+   return FALSE;
+}
+
+
  static void
  svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
  {
@@ -148,7 +179,8 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
  
     svga->num_draw_calls++;  /* for SVGA_QUERY_DRAW_CALLS */
  
-   if (!u_trim_pipe_prim( info->mode, &count ))
+   if (u_reduced_prim(info->mode) == PIPE_PRIM_TRIANGLES &&
+       svga->curr.rast->templ.cull_face == PIPE_FACE_FRONT_AND_BACK)
        return;
  
     /*
@@ -165,6 +197,17 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
        svga->dirty |= SVGA_NEW_REDUCED_PRIMITIVE;
     }
  
+   if (need_fallback_prim_restart(svga, info)) {
+      enum pipe_error r;
+      r = util_draw_vbo_without_prim_restart(pipe, &svga->curr.ib, info);
+      assert(r == PIPE_OK);
+      (void) r;
+      return;
+   }
+
+   if (!u_trim_pipe_prim( info->mode, &count ))
+      return;
+
     needed_swtnl = svga->state.sw.need_swtnl;
  
     svga_update_state_retry( svga, SVGA_STATE_NEED_SWTNL );
@@ -208,17 +251,15 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
                                            info->max_index,
                                            info->mode,
                                            info->start + offset,
-                                          info->count,
+                                          count,
+                                          info->start_instance,
                                            info->instance_count,
                                            TRUE );
        }
        else {
-         ret = retry_draw_arrays( svga,
-                                  info->mode,
-                                  info->start,
-                                  info->count,
-                                  info->instance_count,
-                                  TRUE );
+         ret = retry_draw_arrays(svga, info->mode, info->start, count,
+                                 info->start_instance, info->instance_count,
+                                 TRUE);
        }
     }
  
diff --git a/src/gallium/drivers/svga/svga_pipe_fs.c b/src/gallium/drivers/svga/svga_pipe_fs.c

index 75299c5..4a9b3c9 100644 (file)
--- a/src/gallium/drivers/svga/svga_pipe_fs.c
+++ b/src/gallium/drivers/svga/svga_pipe_fs.c
@@ -31,7 +31,6 @@
  #include "draw/draw_context.h"
  
  #include "svga_context.h"
-#include "svga_tgsi.h"
  #include "svga_hw_reg.h"
  #include "svga_cmd.h"
  #include "svga_debug.h"
@@ -63,12 +62,6 @@ svga_create_fs_state(struct pipe_context *pipe,
  
     fs->draw_shader = draw_create_fragment_shader(svga->swtnl.draw, templ);
  
-   if (SVGA_DEBUG & DEBUG_TGSI || 0) {
-      debug_printf("%s id: %u, inputs: %u, outputs: %u\n",
-                   __FUNCTION__, fs->base.id,
-                   fs->base.info.num_inputs, fs->base.info.num_outputs);
-   }
-
     return fs;
  }
  
@@ -94,20 +87,30 @@ svga_delete_fs_state(struct pipe_context *pipe, void *shader)
  
     svga_hwtnl_flush_retry(svga);
  
+   assert(fs->base.parent == NULL);
+
     draw_delete_fragment_shader(svga->swtnl.draw, fs->draw_shader);
  
     for (variant = fs->base.variants; variant; variant = tmp) {
        tmp = variant->next;
  
-      ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
-      (void) ret;  /* PIPE_ERROR_ not handled yet */
-
-      /*
-       * Remove stale references to this variant to ensure a new variant on the
-       * same address will be detected as a change.
-       */
-      if (variant == svga->state.hw_draw.fs)
+      /* Check if deleting currently bound shader */
+      if (variant == svga->state.hw_draw.fs) {
+         ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, NULL);
+         if (ret != PIPE_OK) {
+            svga_context_flush(svga, NULL);
+            ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, NULL);
+            assert(ret == PIPE_OK);
+         }
           svga->state.hw_draw.fs = NULL;
+      }
+
+      ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
+         assert(ret == PIPE_OK);
+      }
     }
  
     FREE((void *)fs->base.tokens);
diff --git a/src/gallium/drivers/svga/svga_pipe_gs.c b/src/gallium/drivers/svga/svga_pipe_gs.c

new file mode 100644 (file)

index 0000000..d614e9d
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_gs.c
@@ -0,0 +1,142 @@
+/**********************************************************
+ * Copyright 2014 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "draw/draw_context.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_bitmask.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_text.h"
+
+#include "svga_context.h"
+#include "svga_cmd.h"
+#include "svga_debug.h"
+#include "svga_shader.h"
+#include "svga_streamout.h"
+
+static void *
+svga_create_gs_state(struct pipe_context *pipe,
+                     const struct pipe_shader_state *templ)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_geometry_shader *gs = CALLOC_STRUCT(svga_geometry_shader);
+
+   if (!gs)
+      return NULL;
+
+   gs->base.tokens = tgsi_dup_tokens(templ->tokens);
+
+   /* Collect basic info that we'll need later:
+    */
+   tgsi_scan_shader(gs->base.tokens, &gs->base.info);
+
+   gs->draw_shader = draw_create_geometry_shader(svga->swtnl.draw, templ);
+
+   gs->base.id = svga->debug.shader_id++;
+
+   gs->generic_outputs = svga_get_generic_outputs_mask(&gs->base.info);
+
+   /* check for any stream output declarations */
+   if (templ->stream_output.num_outputs) {
+      gs->base.stream_output = svga_create_stream_output(svga, &gs->base,
+                                                         &templ->stream_output);
+   }
+
+   return gs;
+}
+
+
+static void
+svga_bind_gs_state(struct pipe_context *pipe, void *shader)
+{
+   struct svga_geometry_shader *gs = (struct svga_geometry_shader *)shader;
+   struct svga_context *svga = svga_context(pipe);
+
+   svga->curr.user_gs = gs;
+   svga->dirty |= SVGA_NEW_GS;
+}
+
+
+static void
+svga_delete_gs_state(struct pipe_context *pipe, void *shader)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_geometry_shader *gs = (struct svga_geometry_shader *)shader;
+   struct svga_geometry_shader *next_gs;
+   struct svga_shader_variant *variant, *tmp;
+   enum pipe_error ret;
+
+   svga_hwtnl_flush_retry(svga);
+
+   /* Start deletion from the original geometry shader state */
+   if (gs->base.parent != NULL)
+      gs = (struct svga_geometry_shader *)gs->base.parent;
+
+   /* Free the list of geometry shaders */
+   while (gs) {
+      next_gs = (struct svga_geometry_shader *)gs->base.next;
+
+      if (gs->base.stream_output != NULL)
+         svga_delete_stream_output(svga, gs->base.stream_output);
+
+      draw_delete_geometry_shader(svga->swtnl.draw, gs->draw_shader);
+
+      for (variant = gs->base.variants; variant; variant = tmp) {
+         tmp = variant->next;
+
+         /* Check if deleting currently bound shader */
+         if (variant == svga->state.hw_draw.gs) {
+            ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_GS, NULL);
+            if (ret != PIPE_OK) {
+               svga_context_flush(svga, NULL);
+               ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_GS, NULL);
+               assert(ret == PIPE_OK);
+            }
+            svga->state.hw_draw.gs = NULL;
+         }
+
+         ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_GS, variant);
+         if (ret != PIPE_OK) {
+            svga_context_flush(svga, NULL);
+            ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_GS,
+                                              variant);
+            assert(ret == PIPE_OK);
+         }
+      }
+
+      FREE((void *)gs->base.tokens);
+      FREE(gs);
+      gs = next_gs;
+   }
+}
+
+
+void
+svga_init_gs_functions(struct svga_context *svga)
+{
+   svga->pipe.create_gs_state = svga_create_gs_state;
+   svga->pipe.bind_gs_state = svga_bind_gs_state;
+   svga->pipe.delete_gs_state = svga_delete_gs_state;
+}
diff --git a/src/gallium/drivers/svga/svga_pipe_misc.c b/src/gallium/drivers/svga/svga_pipe_misc.c

index 1df32a1..c8020da 100644 (file)
--- a/src/gallium/drivers/svga/svga_pipe_misc.c
+++ b/src/gallium/drivers/svga/svga_pipe_misc.c
@@ -27,6 +27,7 @@
  
  #include "util/u_framebuffer.h"
  #include "util/u_inlines.h"
+#include "util/u_pstipple.h"
  
  #include "svga_context.h"
  #include "svga_screen.h"
@@ -46,10 +47,37 @@ static void svga_set_scissor_states( struct pipe_context *pipe,
  }
  
  
-static void svga_set_polygon_stipple( struct pipe_context *pipe,
-                                      const struct pipe_poly_stipple *stipple )
+static void
+svga_set_polygon_stipple(struct pipe_context *pipe,
+                         const struct pipe_poly_stipple *stipple)
  {
-   /* overridden by the draw module */
+   struct svga_context *svga = svga_context(pipe);
+
+   /* release old texture */
+   pipe_resource_reference(&svga->polygon_stipple.texture, NULL);
+
+   /* release old sampler view */
+   if (svga->polygon_stipple.sampler_view) {
+      pipe->sampler_view_destroy(pipe,
+                                 &svga->polygon_stipple.sampler_view->base);
+   }
+
+   /* create new stipple texture */
+   svga->polygon_stipple.texture =
+      util_pstipple_create_stipple_texture(pipe, stipple->stipple);
+
+   /* create new sampler view */
+   svga->polygon_stipple.sampler_view =
+      (struct svga_pipe_sampler_view *)
+      util_pstipple_create_sampler_view(pipe,
+                                        svga->polygon_stipple.texture);
+
+   /* allocate sampler state, if first time */
+   if (!svga->polygon_stipple.sampler) {
+      svga->polygon_stipple.sampler = util_pstipple_create_sampler(pipe);
+   }
+
+   svga->dirty |= SVGA_NEW_STIPPLE;
  }
  
  
@@ -83,6 +111,11 @@ static void svga_set_framebuffer_state(struct pipe_context *pipe,
     boolean propagate = FALSE;
     unsigned i;
  
+   /* make sure any pending drawing calls are flushed before changing
+    * the framebuffer state
+    */
+   svga_hwtnl_flush_retry(svga);
+
     dst->width = fb->width;
     dst->height = fb->height;
     dst->nr_cbufs = fb->nr_cbufs;
@@ -99,9 +132,6 @@ static void svga_set_framebuffer_state(struct pipe_context *pipe,
     }
  
     if (propagate) {
-      /* make sure that drawing calls comes before propagation calls */
-      svga_hwtnl_flush_retry( svga );
-   
        for (i = 0; i < dst->nr_cbufs; i++) {
           struct pipe_surface *s = i < fb->nr_cbufs ? fb->cbufs[i] : NULL;
           if (dst->cbufs[i] && dst->cbufs[i] != s)
@@ -109,13 +139,30 @@ static void svga_set_framebuffer_state(struct pipe_context *pipe,
        }
     }
  
-   /* XXX: Actually the virtual hardware may support rendertargets with
-    * different size, depending on the host API and driver, but since we cannot
-    * know that make no such assumption here. */
-   for(i = 0; i < fb->nr_cbufs; ++i) {
-      if (fb->zsbuf && fb->cbufs[i]) {
-         assert(fb->zsbuf->width == fb->cbufs[i]->width); 
-         assert(fb->zsbuf->height == fb->cbufs[i]->height); 
+   /* Check that all surfaces are the same size.
+    * Actually, the virtual hardware may support rendertargets with
+    * different size, depending on the host API and driver,
+    */
+   {
+      int width = 0, height = 0;
+      if (fb->zsbuf) {
+         width = fb->zsbuf->width;
+         height = fb->zsbuf->height;
+      }
+      for (i = 0; i < fb->nr_cbufs; ++i) {
+         if (fb->cbufs[i]) {
+            if (width && height) {
+               if (fb->cbufs[i]->width != width ||
+                   fb->cbufs[i]->height != height) {
+                  debug_warning("Mixed-size color and depth/stencil surfaces "
+                                "may not work properly");
+               }
+            }
+            else {
+               width = fb->cbufs[i]->width;
+               height = fb->cbufs[i]->height;
+            }
+         }
        }
     }
  
diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c

index 208a2cd..7081e5a 100644 (file)
--- a/src/gallium/drivers/svga/svga_pipe_query.c
+++ b/src/gallium/drivers/svga/svga_pipe_query.c
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ * Copyright 2008-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
@@ -25,6 +25,8 @@
  
  #include "pipe/p_state.h"
  #include "pipe/p_context.h"
+
+#include "util/u_bitmask.h"
  #include "util/u_memory.h"
  
  #include "svga_cmd.h"
@@ -42,16 +44,26 @@ struct pipe_query {
     int dummy;
  };
  
-
  struct svga_query {
     struct pipe_query base;
     unsigned type;                  /**< PIPE_QUERY_x or SVGA_QUERY_x */
     SVGA3dQueryType svga_type;      /**< SVGA3D_QUERYTYPE_x or unused */
  
+   unsigned id;                    /** Per-context query identifier */
+
+   struct pipe_fence_handle *fence;
+
     /** For PIPE_QUERY_OCCLUSION_COUNTER / SVGA3D_QUERYTYPE_OCCLUSION */
+
+   /* For VGPU9 */
     struct svga_winsys_buffer *hwbuf;
     volatile SVGA3dQueryResult *queryResult;
-   struct pipe_fence_handle *fence;
+
+   /** For VGPU10 */
+   struct svga_winsys_gb_query *gb_query;
+   SVGA3dDXQueryFlags flags;
+   unsigned offset;                /**< offset to the gb_query memory */
+   struct pipe_query *predicate;   /** The associated query that can be used for predicate */
  
     /** For non-GPU SVGA_QUERY_x queries */
     uint64_t begin_count, end_count;
@@ -72,50 +84,641 @@ svga_get_query_result(struct pipe_context *pipe,
                        boolean wait,
                        union pipe_query_result *result);
  
+static enum pipe_error
+define_query_vgpu9(struct svga_context *svga,
+                   struct svga_query *sq)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+
+   sq->hwbuf = svga_winsys_buffer_create(svga, 1,
+                                         SVGA_BUFFER_USAGE_PINNED,
+                                         sizeof *sq->queryResult);
+   if (!sq->hwbuf)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   sq->queryResult = (SVGA3dQueryResult *)
+                     sws->buffer_map(sws, sq->hwbuf, PIPE_TRANSFER_WRITE);
+   if (!sq->queryResult) {
+      sws->buffer_destroy(sws, sq->hwbuf);
+      return PIPE_ERROR_OUT_OF_MEMORY;
+   }
+
+   sq->queryResult->totalSize = sizeof *sq->queryResult;
+   sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
+
+   /* We request the buffer to be pinned and assume it is always mapped.
+    * The reason is that we don't want to wait for fences when checking the
+    * query status.
+    */
+   sws->buffer_unmap(sws, sq->hwbuf);
+
+   return PIPE_OK;
+}
+
+static enum pipe_error
+begin_query_vgpu9(struct svga_context *svga, struct svga_query *sq)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   enum pipe_error ret = PIPE_OK;
+
+   if (sq->queryResult->state == SVGA3D_QUERYSTATE_PENDING) {
+      /* The application doesn't care for the pending query result.
+       * We cannot let go of the existing buffer and just get a new one
+       * because its storage may be reused for other purposes and clobbered
+       * by the host when it determines the query result.  So the only
+       * option here is to wait for the existing query's result -- not a
+       * big deal, given that no sane application would do this.
+       */
+       uint64_t result;
+       svga_get_query_result(&svga->pipe, &sq->base, TRUE, (void*)&result);
+       assert(sq->queryResult->state != SVGA3D_QUERYSTATE_PENDING);
+   }
+
+   sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
+   sws->fence_reference(sws, &sq->fence, NULL);
+
+   ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
+   }
+   return ret;
+}
+
+static enum pipe_error
+end_query_vgpu9(struct svga_context *svga, struct svga_query *sq)
+{
+   enum pipe_error ret = PIPE_OK;
+
+   /* Set to PENDING before sending EndQuery. */
+   sq->queryResult->state = SVGA3D_QUERYSTATE_PENDING;
+
+   ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf);
+   }
+   return ret;
+}
+
+static boolean
+get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq,
+                       boolean wait, uint64_t *result)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   enum pipe_error ret;
+   SVGA3dQueryState state;
+
+   if (!sq->fence) {
+      /* The query status won't be updated by the host unless
+       * SVGA_3D_CMD_WAIT_FOR_QUERY is emitted. Unfortunately this will cause
+       * a synchronous wait on the host.
+       */
+      ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf);
+      }
+      assert (ret == PIPE_OK);
+      svga_context_flush(svga, &sq->fence);
+      assert(sq->fence);
+   }
+
+   state = sq->queryResult->state;
+   if (state == SVGA3D_QUERYSTATE_PENDING) {
+      if (!wait)
+         return FALSE;
+      sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
+      state = sq->queryResult->state;
+   }
+
+   assert(state == SVGA3D_QUERYSTATE_SUCCEEDED ||
+          state == SVGA3D_QUERYSTATE_FAILED);
+
+   *result = (uint64_t)sq->queryResult->result32;
+   return TRUE;
+}
+
+
+/**
+ * VGPU10
+ *
+ * There is one query mob allocated for each context to be shared by all
+ * query types. The mob is used to hold queries's state and result. Since
+ * each query result type is of different length, to ease the query allocation
+ * management, the mob is divided into memory blocks. Each memory block
+ * will hold queries of the same type. Multiple memory blocks can be allocated
+ * for a particular query type.
+ *
+ * Currently each memory block is of 184 bytes. We support up to 128
+ * memory blocks. The query memory size is arbitrary right now.
+ * Each occlusion query takes about 8 bytes. One memory block can accomodate
+ * 23 occlusion queries. 128 of those blocks can support up to 2944 occlusion
+ * queries. That seems reasonable for now. If we think this limit is
+ * not enough, we can increase the limit or try to grow the mob in runtime.
+ * Note, SVGA device does not impose one mob per context for queries,
+ * we could allocate multiple mobs for queries; however, wddm KMD does not
+ * currently support that.
+ *
+ * Also note that the GL guest driver does not issue any of the
+ * following commands: DXMoveQuery, DXBindAllQuery & DXReadbackAllQuery.
+ */
+#define SVGA_QUERY_MEM_BLOCK_SIZE    (sizeof(SVGADXQueryResultUnion) * 2)
+#define SVGA_QUERY_MEM_SIZE          (128 * SVGA_QUERY_MEM_BLOCK_SIZE)
+
+struct svga_qmem_alloc_entry
+{
+   unsigned start_offset;               /* start offset of the memory block */
+   unsigned block_index;                /* block index of the memory block */
+   unsigned query_size;                 /* query size in this memory block */
+   unsigned nquery;                     /* number of queries allocated */
+   struct util_bitmask *alloc_mask;     /* allocation mask */
+   struct svga_qmem_alloc_entry *next;  /* next memory block */
+};
+
+
+/**
+ * Allocate a memory block from the query object memory
+ * \return -1 if out of memory, else index of the query memory block
+ */
+static int
+allocate_query_block(struct svga_context *svga)
+{
+   int index;
+   unsigned offset;
+
+   /* Find the next available query block */
+   index = util_bitmask_add(svga->gb_query_alloc_mask);
+
+   if (index == UTIL_BITMASK_INVALID_INDEX)
+      return -1;
+
+   offset = index * SVGA_QUERY_MEM_BLOCK_SIZE;
+   if (offset >= svga->gb_query_len) {
+      unsigned i;
+
+      /**
+       * All the memory blocks are allocated, lets see if there is
+       * any empty memory block around that can be freed up.
+       */
+      index = -1;
+      for (i = 0; i < SVGA_QUERY_MAX && index == -1; i++) {
+         struct svga_qmem_alloc_entry *alloc_entry;
+         struct svga_qmem_alloc_entry *prev_alloc_entry = NULL;
+
+         alloc_entry = svga->gb_query_map[i];
+         while (alloc_entry && index == -1) {
+            if (alloc_entry->nquery == 0) {
+               /* This memory block is empty, it can be recycled. */
+               if (prev_alloc_entry) {
+                  prev_alloc_entry->next = alloc_entry->next;
+               } else {
+                  svga->gb_query_map[i] = alloc_entry->next;
+               }
+               index = alloc_entry->block_index;
+            } else {
+               prev_alloc_entry = alloc_entry;
+               alloc_entry = alloc_entry->next;
+            }
+         }
+      }
+   }
+
+   return index;
+}
+
+/**
+ * Allocate a slot in the specified memory block.
+ * All slots in this memory block are of the same size.
+ *
+ * \return -1 if out of memory, else index of the query slot
+ */
+static int
+allocate_query_slot(struct svga_context *svga,
+                    struct svga_qmem_alloc_entry *alloc)
+{
+   int index;
+   unsigned offset;
+
+   /* Find the next available slot */
+   index = util_bitmask_add(alloc->alloc_mask);
+
+   if (index == UTIL_BITMASK_INVALID_INDEX)
+      return -1;
+
+   offset = index * alloc->query_size;
+   if (offset >= SVGA_QUERY_MEM_BLOCK_SIZE)
+      return -1;
+
+   alloc->nquery++;
+
+   return index;
+}
+
+/**
+ * Deallocate the specified slot in the memory block.
+ * If all slots are freed up, then deallocate the memory block
+ * as well, so it can be allocated for other query type
+ */
+static void
+deallocate_query_slot(struct svga_context *svga,
+                      struct svga_qmem_alloc_entry *alloc,
+                      unsigned index)
+{
+   assert(index != UTIL_BITMASK_INVALID_INDEX);
+
+   util_bitmask_clear(alloc->alloc_mask, index);
+   alloc->nquery--;
+
+   /**
+    * Don't worry about deallocating the empty memory block here.
+    * The empty memory block will be recycled when no more memory block
+    * can be allocated.
+    */
+}
+
+static struct svga_qmem_alloc_entry *
+allocate_query_block_entry(struct svga_context *svga,
+                           unsigned len)
+{
+   struct svga_qmem_alloc_entry *alloc_entry;
+   int block_index = -1;
+
+   block_index = allocate_query_block(svga);
+   if (block_index == -1)
+      return NULL;
+   alloc_entry = CALLOC_STRUCT(svga_qmem_alloc_entry);
+   if (alloc_entry == NULL)
+      return NULL;
+
+   alloc_entry->block_index = block_index;
+   alloc_entry->start_offset = block_index * SVGA_QUERY_MEM_BLOCK_SIZE;
+   alloc_entry->nquery = 0;
+   alloc_entry->alloc_mask = util_bitmask_create();
+   alloc_entry->next = NULL;
+   alloc_entry->query_size = len;
+
+   return alloc_entry;
+}
+
+/**
+ * Allocate a memory slot for a query of the specified type.
+ * It will first search through the memory blocks that are allocated
+ * for the query type. If no memory slot is available, it will try
+ * to allocate another memory block within the query object memory for
+ * this query type.
+ */
+static int
+allocate_query(struct svga_context *svga,
+               SVGA3dQueryType type,
+               unsigned len)
+{
+   struct svga_qmem_alloc_entry *alloc_entry;
+   int slot_index = -1;
+   unsigned offset;
+
+   assert(type < SVGA_QUERY_MAX);
+
+   alloc_entry = svga->gb_query_map[type];
+
+   if (alloc_entry == NULL) {
+      /**
+       * No query memory block has been allocated for this query type,
+       * allocate one now
+       */
+      alloc_entry = allocate_query_block_entry(svga, len);
+      if (alloc_entry == NULL)
+         return -1;
+      svga->gb_query_map[type] = alloc_entry;
+   }
+
+   /* Allocate a slot within the memory block allocated for this query type */
+   slot_index = allocate_query_slot(svga, alloc_entry);
+
+   if (slot_index == -1) {
+      /* This query memory block is full, allocate another one */
+      alloc_entry = allocate_query_block_entry(svga, len);
+      if (alloc_entry == NULL)
+         return -1;
+      alloc_entry->next = svga->gb_query_map[type];
+      svga->gb_query_map[type] = alloc_entry;
+      slot_index = allocate_query_slot(svga, alloc_entry);
+   }
+
+   assert(slot_index != -1);
+   offset = slot_index * len + alloc_entry->start_offset;
+
+   return offset;
+}
+
+
+/**
+ * Deallocate memory slot allocated for the specified query
+ */
+static void
+deallocate_query(struct svga_context *svga,
+                 struct svga_query *sq)
+{
+   struct svga_qmem_alloc_entry *alloc_entry;
+   unsigned slot_index;
+   unsigned offset = sq->offset;
+
+   alloc_entry = svga->gb_query_map[sq->svga_type];
+
+   while (alloc_entry) {
+      if (offset >= alloc_entry->start_offset &&
+          offset < alloc_entry->start_offset + SVGA_QUERY_MEM_BLOCK_SIZE) {
+
+         /* The slot belongs to this memory block, deallocate it */
+         slot_index = (offset - alloc_entry->start_offset) /
+                      alloc_entry->query_size;
+         deallocate_query_slot(svga, alloc_entry, slot_index);
+         alloc_entry = NULL;
+      } else {
+         alloc_entry = alloc_entry->next;
+      }
+   }
+}
+
+
+/**
+ * Destroy the gb query object and all the related query structures
+ */
+static void
+destroy_gb_query_obj(struct svga_context *svga)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   unsigned i;
+
+   for (i = 0; i < SVGA_QUERY_MAX; i++) {
+      struct svga_qmem_alloc_entry *alloc_entry, *next;
+      alloc_entry = svga->gb_query_map[i];
+      while (alloc_entry) {
+         next = alloc_entry->next;
+         util_bitmask_destroy(alloc_entry->alloc_mask);
+         FREE(alloc_entry);
+         alloc_entry = next;
+      }
+      svga->gb_query_map[i] = NULL;
+   }
+
+   if (svga->gb_query)
+      sws->query_destroy(sws, svga->gb_query);
+   svga->gb_query = NULL;
+
+   util_bitmask_destroy(svga->gb_query_alloc_mask);
+}
+
+/**
+ * Define query and create the gb query object if it is not already created.
+ * There is only one gb query object per context which will be shared by
+ * queries of all types.
+ */
+static enum pipe_error
+define_query_vgpu10(struct svga_context *svga,
+                    struct svga_query *sq, int resultLen)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   int qlen;
+   enum pipe_error ret = PIPE_OK;
+
+   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+
+   if (svga->gb_query == NULL) {
+      /* Create a gb query object */
+      svga->gb_query = sws->query_create(sws, SVGA_QUERY_MEM_SIZE);
+      if (!svga->gb_query)
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      svga->gb_query_len = SVGA_QUERY_MEM_SIZE;
+      memset (svga->gb_query_map, 0, sizeof(svga->gb_query_map));
+      svga->gb_query_alloc_mask = util_bitmask_create();
+
+      /* Bind the query object to the context */
+      if (svga->swc->query_bind(svga->swc, svga->gb_query,
+                                SVGA_QUERY_FLAG_SET) != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         svga->swc->query_bind(svga->swc, svga->gb_query,
+                               SVGA_QUERY_FLAG_SET);
+      }
+   }
+
+   sq->gb_query = svga->gb_query;
+
+   /* Allocate an integer ID for this query */
+   sq->id = util_bitmask_add(svga->query_id_bm);
+   if (sq->id == UTIL_BITMASK_INVALID_INDEX)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   /* Find a slot for this query in the gb object */
+   qlen = resultLen + sizeof(SVGA3dQueryState);
+   sq->offset = allocate_query(svga, sq->svga_type, qlen);
+   if (sq->offset == -1)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   SVGA_DBG(DEBUG_QUERY, "   query type=%d qid=0x%x offset=%d\n",
+            sq->svga_type, sq->id, sq->offset);
+
+   /**
+    * Send SVGA3D commands to define the query
+    */
+   ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags);
+   }
+   if (ret != PIPE_OK)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id);
+   }
+   assert(ret == PIPE_OK);
+
+   ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset);
+   }
+   assert(ret == PIPE_OK);
+
+   return PIPE_OK;
+}
+
+static enum pipe_error
+destroy_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
+{
+   enum pipe_error ret;
+
+   ret = SVGA3D_vgpu10_DestroyQuery(svga->swc, sq->id);
+
+   /* Deallocate the memory slot allocated for this query */
+   deallocate_query(svga, sq);
+
+   return ret;
+}
+
+
+/**
+ * Rebind queryies to the context.
+ */
+static void
+rebind_vgpu10_query(struct svga_context *svga)
+{
+   if (svga->swc->query_bind(svga->swc, svga->gb_query,
+                             SVGA_QUERY_FLAG_REF) != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      svga->swc->query_bind(svga->swc, svga->gb_query,
+                            SVGA_QUERY_FLAG_REF);
+   }
+
+   svga->rebind.flags.query = FALSE;
+}
+
+
+static enum pipe_error
+begin_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   enum pipe_error ret = PIPE_OK;
+   int status = 0;
+
+   sws->fence_reference(sws, &sq->fence, NULL);
+
+   /* Initialize the query state to NEW */
+   status = sws->query_init(sws, sq->gb_query, sq->offset, SVGA3D_QUERYSTATE_NEW);
+   if (status)
+      return PIPE_ERROR;
+
+   if (svga->rebind.flags.query) {
+      rebind_vgpu10_query(svga);
+   }
+
+   /* Send the BeginQuery command to the device */
+   ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id);
+   }
+   return ret;
+}
+
+static enum pipe_error
+end_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   enum pipe_error ret = PIPE_OK;
+
+   if (svga->rebind.flags.query) {
+      rebind_vgpu10_query(svga);
+   }
+
+   ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id);
+   }
+
+   /* Finish fence is copied here from get_query_result_vgpu10. This helps
+    * with cases where svga_begin_query might be called again before
+    * svga_get_query_result, such as GL_TIME_ELAPSED.
+    */
+   if (!sq->fence) {
+      svga_context_flush(svga, &sq->fence);
+   }
+   sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
+
+   return ret;
+}
+
+static boolean
+get_query_result_vgpu10(struct svga_context *svga, struct svga_query *sq,
+                        boolean wait, void *result, int resultLen)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   SVGA3dQueryState queryState;
+
+   if (svga->rebind.flags.query) {
+      rebind_vgpu10_query(svga);
+   }
+
+   sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen);
+
+   if (queryState == SVGA3D_QUERYSTATE_PENDING) {
+      if (!wait)
+         return FALSE;
+      sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
+      sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen);
+   }
+
+   assert(queryState == SVGA3D_QUERYSTATE_SUCCEEDED ||
+          queryState == SVGA3D_QUERYSTATE_FAILED);
+
+   return TRUE;
+}
  
  static struct pipe_query *
  svga_create_query(struct pipe_context *pipe,
                    unsigned query_type,
                    unsigned index)
  {
-   struct svga_context *svga = svga_context( pipe );
-   struct svga_screen *svgascreen = svga_screen(pipe->screen);
-   struct svga_winsys_screen *sws = svgascreen->sws;
+   struct svga_context *svga = svga_context(pipe);
     struct svga_query *sq;
  
-   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+   assert(query_type < SVGA_QUERY_MAX);
  
     sq = CALLOC_STRUCT(svga_query);
     if (!sq)
-      goto no_sq;
+      goto fail;
+
+   /* Allocate an integer ID for the query */
+   sq->id = util_bitmask_add(svga->query_id_bm);
+   if (sq->id == UTIL_BITMASK_INVALID_INDEX)
+      goto fail;
+
+   SVGA_DBG(DEBUG_QUERY, "%s type=%d sq=0x%x id=%d\n", __FUNCTION__,
+            query_type, sq, sq->id);
  
     switch (query_type) {
     case PIPE_QUERY_OCCLUSION_COUNTER:
        sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSION;
+      if (svga_have_vgpu10(svga)) {
+         define_query_vgpu10(svga, sq, sizeof(SVGADXOcclusionQueryResult));
  
-      sq->hwbuf = svga_winsys_buffer_create(svga, 1,
-                                            SVGA_BUFFER_USAGE_PINNED,
-                                            sizeof *sq->queryResult);
-      if (!sq->hwbuf) {
-         debug_printf("svga: failed to alloc query object!\n");
-         goto no_hwbuf;
-      }
+         /**
+          * In OpenGL, occlusion counter query can be used in conditional
+          * rendering; however, in DX10, only OCCLUSION_PREDICATE query can
+          * be used for predication. Hence, we need to create an occlusion
+          * predicate query along with the occlusion counter query. So when
+          * the occlusion counter query is used for predication, the associated
+          * query of occlusion predicate type will be used
+          * in the SetPredication command.
+          */
+         sq->predicate = svga_create_query(pipe, PIPE_QUERY_OCCLUSION_PREDICATE, index);
  
-      sq->queryResult = (SVGA3dQueryResult *)
-         sws->buffer_map(sws, sq->hwbuf, PIPE_TRANSFER_WRITE);
-      if (!sq->queryResult) {
-         debug_printf("svga: failed to map query object!\n");
-         goto no_query_result;
+      } else {
+         define_query_vgpu9(svga, sq);
        }
-
-      sq->queryResult->totalSize = sizeof *sq->queryResult;
-      sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
-
-      /* We request the buffer to be pinned and assume it is always mapped.
-       * The reason is that we don't want to wait for fences when checking the
-       * query status.
-       */
-      sws->buffer_unmap(sws, sq->hwbuf);
+      break;
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+      assert(svga_have_vgpu10(svga));
+      sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE;
+      define_query_vgpu10(svga, sq, sizeof(SVGADXOcclusionPredicateQueryResult));
+      break;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+   case PIPE_QUERY_SO_STATISTICS:
+      assert(svga_have_vgpu10(svga));
+      sq->svga_type = SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS;
+      define_query_vgpu10(svga, sq,
+                          sizeof(SVGADXStreamOutStatisticsQueryResult));
+      break;
+   case PIPE_QUERY_TIMESTAMP:
+      assert(svga_have_vgpu10(svga));
+      sq->svga_type = SVGA3D_QUERYTYPE_TIMESTAMP;
+      define_query_vgpu10(svga, sq,
+                          sizeof(SVGADXTimestampQueryResult));
        break;
     case SVGA_QUERY_DRAW_CALLS:
     case SVGA_QUERY_FALLBACKS:
@@ -129,28 +732,50 @@ svga_create_query(struct pipe_context *pipe,
  
     return &sq->base;
  
-no_query_result:
-   sws->buffer_destroy(sws, sq->hwbuf);
-no_hwbuf:
+fail:
     FREE(sq);
-no_sq:
     return NULL;
  }
  
-
  static void
  svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
  {
-   struct svga_screen *svgascreen = svga_screen(pipe->screen);
-   struct svga_winsys_screen *sws = svgascreen->sws;
-   struct svga_query *sq = svga_query( q );
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   struct svga_query *sq;
  
-   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+   if (q == NULL) {
+      return destroy_gb_query_obj(svga);
+   }
+
+   sq = svga_query(q);
+
+   SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
+            sq, sq->id);
  
     switch (sq->type) {
     case PIPE_QUERY_OCCLUSION_COUNTER:
-      sws->buffer_destroy(sws, sq->hwbuf);
-      sq->hwbuf = NULL;
+      if (svga_have_vgpu10(svga)) {
+         /* make sure to also destroy any associated predicate query */
+         if (sq->predicate)
+            svga_destroy_query(pipe, sq->predicate);
+         destroy_query_vgpu10(svga, sq);
+      } else {
+         sws->buffer_destroy(sws, sq->hwbuf);
+      }
+      sws->fence_reference(sws, &sq->fence, NULL);
+      break;
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+      assert(svga_have_vgpu10(svga));
+      destroy_query_vgpu10(svga, sq);
+      sws->fence_reference(sws, &sq->fence, NULL);
+      break;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+   case PIPE_QUERY_SO_STATISTICS:
+   case PIPE_QUERY_TIMESTAMP:
+      assert(svga_have_vgpu10(svga));
+      destroy_query_vgpu10(svga, sq);
        sws->fence_reference(sws, &sq->fence, NULL);
        break;
     case SVGA_QUERY_DRAW_CALLS:
@@ -162,6 +787,9 @@ svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
        assert(!"svga: unexpected query type in svga_destroy_query()");
     }
  
+   /* Free the query id */
+   util_bitmask_clear(svga->query_id_bm, sq->id);
+
     FREE(sq);
  }
  
@@ -169,13 +797,15 @@ svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
  static boolean
  svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
  {
-   struct svga_screen *svgascreen = svga_screen(pipe->screen);
-   struct svga_winsys_screen *sws = svgascreen->sws;
-   struct svga_context *svga = svga_context( pipe );
-   struct svga_query *sq = svga_query( q );
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_query *sq = svga_query(q);
     enum pipe_error ret;
  
-   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+   assert(sq);
+   assert(sq->type < SVGA_QUERY_MAX);
+
+   SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
+            sq, sq->id);
  
     /* Need to flush out buffered drawing commands so that they don't
      * get counted in the query results.
@@ -184,31 +814,33 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
  
     switch (sq->type) {
     case PIPE_QUERY_OCCLUSION_COUNTER:
-      assert(!svga->sq);
-      if (sq->queryResult->state == SVGA3D_QUERYSTATE_PENDING) {
-         /* The application doesn't care for the pending query result.
-          * We cannot let go of the existing buffer and just get a new one
-          * because its storage may be reused for other purposes and clobbered
-          * by the host when it determines the query result.  So the only
-          * option here is to wait for the existing query's result -- not a
-          * big deal, given that no sane application would do this.
-          */
-         uint64_t result;
-         svga_get_query_result(pipe, q, TRUE, (void*)&result);
-         assert(sq->queryResult->state != SVGA3D_QUERYSTATE_PENDING);
-      }
-
-      sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
-      sws->fence_reference(sws, &sq->fence, NULL);
-
-      ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
-      if (ret != PIPE_OK) {
-         svga_context_flush(svga, NULL);
-         ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
-         assert(ret == PIPE_OK);
+      if (svga_have_vgpu10(svga)) {
+         ret = begin_query_vgpu10(svga, sq);
+         /* also need to start the associated occlusion predicate query */
+         if (sq->predicate) {
+            enum pipe_error status;
+            status = begin_query_vgpu10(svga, svga_query(sq->predicate));
+            assert(status == PIPE_OK);
+            (void) status;
+         }
+      } else {
+         ret = begin_query_vgpu9(svga, sq);
        }
-
-      svga->sq = sq;
+      assert(ret == PIPE_OK);
+      (void) ret;
+      break;
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+      assert(svga_have_vgpu10(svga));
+      ret = begin_query_vgpu10(svga, sq);
+      assert(ret == PIPE_OK);
+      break;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+   case PIPE_QUERY_SO_STATISTICS:
+   case PIPE_QUERY_TIMESTAMP:
+      assert(svga_have_vgpu10(svga));
+      ret = begin_query_vgpu10(svga, sq);
+      assert(ret == PIPE_OK);
        break;
     case SVGA_QUERY_DRAW_CALLS:
        sq->begin_count = svga->num_draw_calls;
@@ -222,6 +854,9 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
     default:
        assert(!"unexpected query type in svga_begin_query()");
     }
+
+   svga->sq[sq->type] = sq;
+
     return true;
  }
  
@@ -229,35 +864,57 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
  static void
  svga_end_query(struct pipe_context *pipe, struct pipe_query *q)
  {
-   struct svga_context *svga = svga_context( pipe );
-   struct svga_query *sq = svga_query( q );
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_query *sq = svga_query(q);
     enum pipe_error ret;
  
-   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+   assert(sq);
+   assert(sq->type < SVGA_QUERY_MAX);
+
+   SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
+            sq, sq->id);
+
+   if (sq->type == PIPE_QUERY_TIMESTAMP && svga->sq[sq->type] != sq)
+      svga_begin_query(pipe, q);
  
     svga_hwtnl_flush_retry(svga);
  
+   assert(svga->sq[sq->type] == sq);
+
     switch (sq->type) {
     case PIPE_QUERY_OCCLUSION_COUNTER:
-      assert(svga->sq == sq);
-
-      /* Set to PENDING before sending EndQuery. */
-      sq->queryResult->state = SVGA3D_QUERYSTATE_PENDING;
-
-      ret = SVGA3D_EndQuery( svga->swc, sq->svga_type, sq->hwbuf);
-      if (ret != PIPE_OK) {
-         svga_context_flush(svga, NULL);
-         ret = SVGA3D_EndQuery( svga->swc, sq->svga_type, sq->hwbuf);
-         assert(ret == PIPE_OK);
+      if (svga_have_vgpu10(svga)) {
+         ret = end_query_vgpu10(svga, sq);
+         /* also need to end the associated occlusion predicate query */
+         if (sq->predicate) {
+            enum pipe_error status;
+            status = end_query_vgpu10(svga, svga_query(sq->predicate));
+            assert(status == PIPE_OK);
+            (void) status;
+         }
+      } else {
+         ret = end_query_vgpu9(svga, sq);
        }
-
+      assert(ret == PIPE_OK);
+      (void) ret;
        /* TODO: Delay flushing. We don't really need to flush here, just ensure
         * that there is one flush before svga_get_query_result attempts to get
         * the result.
         */
        svga_context_flush(svga, NULL);
-
-      svga->sq = NULL;
+      break;
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+      assert(svga_have_vgpu10(svga));
+      ret = end_query_vgpu10(svga, sq);
+      assert(ret == PIPE_OK);
+      break;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+   case PIPE_QUERY_SO_STATISTICS:
+   case PIPE_QUERY_TIMESTAMP:
+      assert(svga_have_vgpu10(svga));
+      ret = end_query_vgpu10(svga, sq);
+      assert(ret == PIPE_OK);
        break;
     case SVGA_QUERY_DRAW_CALLS:
        sq->end_count = svga->num_draw_calls;
@@ -271,6 +928,7 @@ svga_end_query(struct pipe_context *pipe, struct pipe_query *q)
     default:
        assert(!"unexpected query type in svga_end_query()");
     }
+   svga->sq[sq->type] = NULL;
  }
  
  
@@ -280,49 +938,75 @@ svga_get_query_result(struct pipe_context *pipe,
                        boolean wait,
                        union pipe_query_result *vresult)
  {
-   struct svga_context *svga = svga_context( pipe );
-   struct svga_screen *svgascreen = svga_screen( pipe->screen );
-   struct svga_winsys_screen *sws = svgascreen->sws;
-   struct svga_query *sq = svga_query( q );
-   SVGA3dQueryState state;
-   uint64_t *result = (uint64_t *) vresult;
+   struct svga_screen *svgascreen = svga_screen(pipe->screen);
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_query *sq = svga_query(q);
+   uint64_t *result = (uint64_t *)vresult;
+   boolean ret = TRUE;
+
+   assert(sq);
  
-   SVGA_DBG(DEBUG_QUERY, "%s wait: %d\n", __FUNCTION__);
+   SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d wait: %d\n",
+            __FUNCTION__, sq, sq->id, wait);
  
     switch (sq->type) {
     case PIPE_QUERY_OCCLUSION_COUNTER:
-      /* The query status won't be updated by the host unless
-       * SVGA_3D_CMD_WAIT_FOR_QUERY is emitted. Unfortunately this will cause
-       * a synchronous wait on the host.
-       */
-      if (!sq->fence) {
-         enum pipe_error ret;
-
-         ret = SVGA3D_WaitForQuery( svga->swc, sq->svga_type, sq->hwbuf);
-         if (ret != PIPE_OK) {
-            svga_context_flush(svga, NULL);
-            ret = SVGA3D_WaitForQuery( svga->swc, sq->svga_type, sq->hwbuf);
-            assert(ret == PIPE_OK);
-         }
-
-         svga_context_flush(svga, &sq->fence);
-
-         assert(sq->fence);
+      if (svga_have_vgpu10(svga)) {
+         SVGADXOcclusionQueryResult occResult;
+         ret = get_query_result_vgpu10(svga, sq, wait,
+                                       (void *)&occResult, sizeof(occResult));
+         *result = (uint64_t)occResult.samplesRendered;
+      } else {
+         ret = get_query_result_vgpu9(svga, sq, wait, (uint64_t *)result);
        }
+      break;
+   case PIPE_QUERY_OCCLUSION_PREDICATE: {
+      SVGADXOcclusionPredicateQueryResult occResult;
+      assert(svga_have_vgpu10(svga));
+      ret = get_query_result_vgpu10(svga, sq, wait,
+                                    (void *)&occResult, sizeof(occResult));
+      vresult->b = occResult.anySamplesRendered != 0;
+      break;
+   }
+   case PIPE_QUERY_SO_STATISTICS: {
+      SVGADXStreamOutStatisticsQueryResult sResult;
+      struct pipe_query_data_so_statistics *pResult =
+         (struct pipe_query_data_so_statistics *)vresult;
  
-      state = sq->queryResult->state;
-      if (state == SVGA3D_QUERYSTATE_PENDING) {
-         if (!wait)
-            return FALSE;
-         sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
-         state = sq->queryResult->state;
-      }
+      assert(svga_have_vgpu10(svga));
+      ret = get_query_result_vgpu10(svga, sq, wait,
+                                    (void *)&sResult, sizeof(sResult));
+      pResult->num_primitives_written = sResult.numPrimitivesWritten;
+      pResult->primitives_storage_needed = sResult.numPrimitivesRequired;
+      break;
+   }
+   case PIPE_QUERY_TIMESTAMP: {
+      SVGADXTimestampQueryResult sResult;
+
+      assert(svga_have_vgpu10(svga));
+      ret = get_query_result_vgpu10(svga, sq, wait,
+                                    (void *)&sResult, sizeof(sResult));
+      *result = (uint64_t)sResult.timestamp;
+      break;
+   }
+   case PIPE_QUERY_PRIMITIVES_GENERATED: {
+      SVGADXStreamOutStatisticsQueryResult sResult;
  
-      assert(state == SVGA3D_QUERYSTATE_SUCCEEDED ||
-             state == SVGA3D_QUERYSTATE_FAILED);
+      assert(svga_have_vgpu10(svga));
+      ret = get_query_result_vgpu10(svga, sq, wait,
+                                    (void *)&sResult, sizeof sResult);
+      *result = (uint64_t)sResult.numPrimitivesRequired;
+      break;
+   }
+   case PIPE_QUERY_PRIMITIVES_EMITTED: {
+      SVGADXStreamOutStatisticsQueryResult sResult;
  
-      *result = (uint64_t) sq->queryResult->result32;
+      assert(svga_have_vgpu10(svga));
+      ret = get_query_result_vgpu10(svga, sq, wait,
+                                    (void *)&sResult, sizeof sResult);
+      *result = (uint64_t)sResult.numPrimitivesWritten;
        break;
+   }
     case SVGA_QUERY_DRAW_CALLS:
        /* fall-through */
     case SVGA_QUERY_FALLBACKS:
@@ -335,9 +1019,73 @@ svga_get_query_result(struct pipe_context *pipe,
        assert(!"unexpected query type in svga_get_query_result");
     }
  
-   SVGA_DBG(DEBUG_QUERY, "%s result %d\n", __FUNCTION__, (unsigned)*result);
+   SVGA_DBG(DEBUG_QUERY, "%s result %d\n", __FUNCTION__, *((uint64_t *)vresult));
  
-   return TRUE;
+   return ret;
+}
+
+static void
+svga_render_condition(struct pipe_context *pipe, struct pipe_query *q,
+                      boolean condition, uint mode)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   struct svga_query *sq = svga_query(q);
+   SVGA3dQueryId queryId;
+   enum pipe_error ret;
+
+   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+
+   assert(svga_have_vgpu10(svga));
+   if (sq == NULL) {
+      queryId = SVGA3D_INVALID_ID;
+   }
+   else {
+      assert(sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSION ||
+             sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE);
+
+      if (sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSION) {
+         assert(sq->predicate);
+         /**
+          * For conditional rendering, make sure to use the associated
+          * predicate query.
+          */
+         sq = svga_query(sq->predicate);
+      }
+      queryId = sq->id;
+
+      if ((mode == PIPE_RENDER_COND_WAIT ||
+           mode == PIPE_RENDER_COND_BY_REGION_WAIT) && sq->fence) {
+         sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
+      }
+   }
+
+   ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId,
+                                      (uint32) condition);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId,
+                                         (uint32) condition);
+   }
+}
+
+
+/*
+ * This function is a workaround because we lack the ability to query
+ * renderer's time synchornously.
+ */
+static uint64_t
+svga_get_timestamp(struct pipe_context *pipe)
+{
+   struct pipe_query *q = svga_create_query(pipe, PIPE_QUERY_TIMESTAMP, 0);
+   union pipe_query_result result;
+
+   svga_begin_query(pipe, q);
+   svga_end_query(pipe,q);
+   svga_get_query_result(pipe, q, TRUE, &result);
+   svga_destroy_query(pipe, q);
+
+   return result.u64;
  }
  
  
@@ -349,4 +1097,6 @@ svga_init_query_functions(struct svga_context *svga)
     svga->pipe.begin_query = svga_begin_query;
     svga->pipe.end_query = svga_end_query;
     svga->pipe.get_query_result = svga_get_query_result;
+   svga->pipe.render_condition = svga_render_condition;
+   svga->pipe.get_timestamp = svga_get_timestamp;
  }
diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/src/gallium/drivers/svga/svga_pipe_rasterizer.c

index 356898a..a7aadac 100644 (file)
--- a/src/gallium/drivers/svga/svga_pipe_rasterizer.c
+++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c
@@ -23,16 +23,18 @@
   *
   **********************************************************/
  
+#include "pipe/p_defines.h"
  #include "draw/draw_context.h"
+#include "util/u_bitmask.h"
  #include "util/u_inlines.h"
-#include "pipe/p_defines.h"
  #include "util/u_math.h"
  #include "util/u_memory.h"
  
+#include "svga_cmd.h"
  #include "svga_context.h"
+#include "svga_hw_reg.h"
  #include "svga_screen.h"
  
-#include "svga_hw_reg.h"
  
  /* Hardware frontwinding is always set up as SVGA3D_FRONTWINDING_CW.
   */
@@ -61,6 +63,96 @@ static SVGA3dShadeMode svga_translate_flatshade( unsigned mode )
  }
  
  
+static unsigned
+translate_fill_mode(unsigned fill)
+{
+   switch (fill) {
+   case PIPE_POLYGON_MODE_POINT:
+      return SVGA3D_FILLMODE_POINT;
+   case PIPE_POLYGON_MODE_LINE:
+      return SVGA3D_FILLMODE_LINE;
+   case PIPE_POLYGON_MODE_FILL:
+      return SVGA3D_FILLMODE_FILL;
+   default:
+      assert(!"Bad fill mode");
+      return SVGA3D_FILLMODE_FILL;
+   }
+}
+
+
+static unsigned
+translate_cull_mode(unsigned cull)
+{
+   switch (cull) {
+   case PIPE_FACE_NONE:
+      return SVGA3D_CULL_NONE;
+   case PIPE_FACE_FRONT:
+      return SVGA3D_CULL_FRONT;
+   case PIPE_FACE_BACK:
+      return SVGA3D_CULL_BACK;
+   case PIPE_FACE_FRONT_AND_BACK:
+      /* NOTE: we simply no-op polygon drawing in svga_draw_vbo() */
+      return SVGA3D_CULL_NONE;
+   default:
+      assert(!"Bad cull mode");
+      return SVGA3D_CULL_NONE;
+   }
+}
+
+
+static void
+define_rasterizer_object(struct svga_context *svga,
+                         struct svga_rasterizer_state *rast)
+{
+   unsigned fill_mode = translate_fill_mode(rast->templ.fill_front);
+   unsigned cull_mode = translate_cull_mode(rast->templ.cull_face);
+   int depth_bias = rast->templ.offset_units;
+   float slope_scaled_depth_bias =  rast->templ.offset_scale;
+   float depth_bias_clamp = 0.0; /* XXX fix me */
+   unsigned try;
+   const float line_width = rast->templ.line_width > 0.0f ?
+      rast->templ.line_width : 1.0f;
+   const uint8 line_factor = rast->templ.line_stipple_enable ?
+      rast->templ.line_stipple_factor : 0;
+   const uint16 line_pattern = rast->templ.line_stipple_enable ?
+      rast->templ.line_stipple_pattern : 0;
+
+   rast->id = util_bitmask_add(svga->rast_object_id_bm);
+
+   if (rast->templ.fill_front != rast->templ.fill_back) {
+      /* The VGPU10 device can't handle different front/back fill modes.
+       * We'll handle that with a swtnl/draw fallback.  But we need to
+       * make sure we always fill triangles in that case.
+       */
+      fill_mode = SVGA3D_FILLMODE_FILL;
+   }
+
+   for (try = 0; try < 2; try++) {
+      enum pipe_error ret =
+         SVGA3D_vgpu10_DefineRasterizerState(svga->swc,
+                                             rast->id,
+                                             fill_mode,
+                                             cull_mode,
+                                             rast->templ.front_ccw,
+                                             depth_bias,
+                                             depth_bias_clamp,
+                                             slope_scaled_depth_bias,
+                                             rast->templ.depth_clip,
+                                             rast->templ.scissor,
+                                             rast->templ.multisample,
+                                             rast->templ.line_smooth,
+                                             line_width,
+                                             rast->templ.line_stipple_enable,
+                                             line_factor,
+                                             line_pattern,
+                                             !rast->templ.flatshade_first);
+      if (ret == PIPE_OK)
+         return;
+      svga_context_flush(svga, NULL);
+   }
+}
+
+
  static void *
  svga_create_rasterizer_state(struct pipe_context *pipe,
                               const struct pipe_rasterizer_state *templ)
@@ -92,17 +184,24 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
     rast->antialiasedlineenable = templ->line_smooth;
     rast->lastpixel = templ->line_last_pixel;
     rast->pointsprite = templ->sprite_coord_enable != 0x0;
-   rast->pointsize = templ->point_size;
-   rast->hw_unfilled = PIPE_POLYGON_MODE_FILL;
+
+   if (templ->point_smooth) {
+      /* For smooth points we need to generate fragments for at least
+       * a 2x2 region.  Otherwise the quad we draw may be too small and
+       * we may generate no fragments at all.
+       */
+      rast->pointsize = MAX2(2.0f, templ->point_size);
+   }
+   else {
+      rast->pointsize = templ->point_size;
+   }
+
+   rast->hw_fillmode = PIPE_POLYGON_MODE_FILL;
  
     /* Use swtnl + decomposition implement these:
      */
-   if (templ->poly_stipple_enable) {
-      rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
-      rast->need_pipeline_tris_str = "poly stipple";
-   }
  
-   if (screen->maxLineWidth > 1.0F) {
+   if (templ->line_width <= screen->maxLineWidth) {
        /* pass line width to device */
        rast->linewidth = MAX2(1.0F, templ->line_width);
     }
@@ -129,7 +228,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
        }
     } 
  
-   if (templ->point_smooth) {
+   if (!svga_have_vgpu10(svga) && templ->point_smooth) {
        rast->need_pipeline |= SVGA_PIPELINE_FLAG_POINTS;
        rast->need_pipeline_points_str = "smooth points";
     }
@@ -231,13 +330,13 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
           rast->depthbias = templ->offset_units;
        }
  
-      rast->hw_unfilled = fill;
+      rast->hw_fillmode = fill;
     }
  
     if (rast->need_pipeline & SVGA_PIPELINE_FLAG_TRIS) {
        /* Turn off stuff which will get done in the draw module:
         */
-      rast->hw_unfilled = PIPE_POLYGON_MODE_FILL;
+      rast->hw_fillmode = PIPE_POLYGON_MODE_FILL;
        rast->slopescaledepthbias = 0;
        rast->depthbias = 0;
     }
@@ -249,6 +348,10 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
        debug_printf(" tris: %s \n", rast->need_pipeline_tris_str);
     }
  
+   if (svga_have_vgpu10(svga)) {
+      define_rasterizer_object(svga, rast);
+   }
+
     return rast;
  }
  
@@ -258,18 +361,37 @@ static void svga_bind_rasterizer_state( struct pipe_context *pipe,
     struct svga_context *svga = svga_context(pipe);
     struct svga_rasterizer_state *raster = (struct svga_rasterizer_state *)state;
  
-
-   draw_set_rasterizer_state(svga->swtnl.draw, raster ? &raster->templ : NULL,
-                             state);
     svga->curr.rast = raster;
  
     svga->dirty |= SVGA_NEW_RAST;
+
+   if (raster && raster->templ.poly_stipple_enable) {
+      svga->dirty |= SVGA_NEW_STIPPLE;
+   }
  }
  
-static void svga_delete_rasterizer_state(struct pipe_context *pipe,
-                                         void *raster)
+static void
+svga_delete_rasterizer_state(struct pipe_context *pipe, void *state)
  {
-   FREE(raster);
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_rasterizer_state *raster =
+      (struct svga_rasterizer_state *) state;
+
+   if (svga_have_vgpu10(svga)) {
+      enum pipe_error ret =
+         SVGA3D_vgpu10_DestroyRasterizerState(svga->swc, raster->id);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_vgpu10_DestroyRasterizerState(svga->swc, raster->id);
+      }
+
+      if (raster->id == svga->state.hw_draw.rasterizer_id)
+         svga->state.hw_draw.rasterizer_id = SVGA3D_INVALID_ID;
+
+      util_bitmask_clear(svga->rast_object_id_bm, raster->id);
+   }
+
+   FREE(state);
  }
  
  
diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c

index effd490..60e2d44 100644 (file)
--- a/src/gallium/drivers/svga/svga_pipe_sampler.c
+++ b/src/gallium/drivers/svga/svga_pipe_sampler.c
@@ -23,17 +23,21 @@
   *
   **********************************************************/
  
-#include "util/u_inlines.h"
  #include "pipe/p_defines.h"
+#include "util/u_bitmask.h"
  #include "util/u_format.h"
+#include "util/u_inlines.h"
  #include "util/u_math.h"
  #include "util/u_memory.h"
  #include "tgsi/tgsi_parse.h"
  
  #include "svga_context.h"
+#include "svga_cmd.h"
+#include "svga_debug.h"
  #include "svga_resource_texture.h"
+#include "svga_surface.h"
+#include "svga_sampler_view.h"
  
-#include "svga_debug.h"
  
  static inline unsigned
  translate_wrap_mode(unsigned wrap)
@@ -91,6 +95,126 @@ static inline unsigned translate_mip_filter( unsigned filter )
     }
  }
  
+
+static uint8
+translate_comparison_func(unsigned func)
+{
+   switch (func) {
+   case PIPE_FUNC_NEVER:
+      return SVGA3D_COMPARISON_NEVER;
+   case PIPE_FUNC_LESS:
+      return SVGA3D_COMPARISON_LESS;
+   case PIPE_FUNC_EQUAL:
+      return SVGA3D_COMPARISON_EQUAL;
+   case PIPE_FUNC_LEQUAL:
+      return SVGA3D_COMPARISON_LESS_EQUAL;
+   case PIPE_FUNC_GREATER:
+      return SVGA3D_COMPARISON_GREATER;
+   case PIPE_FUNC_NOTEQUAL:
+      return SVGA3D_COMPARISON_NOT_EQUAL;
+   case PIPE_FUNC_GEQUAL:
+      return SVGA3D_COMPARISON_GREATER_EQUAL;
+   case PIPE_FUNC_ALWAYS:
+      return SVGA3D_COMPARISON_ALWAYS;
+   default:
+      assert(!"Invalid comparison function");
+      return SVGA3D_COMPARISON_ALWAYS;
+   }
+}
+
+
+/**
+ * Translate filtering state to vgpu10 format.
+ */
+static SVGA3dFilter
+translate_filter_mode(unsigned img_filter,
+                      unsigned min_filter,
+                      unsigned mag_filter,
+                      boolean anisotropic,
+                      boolean compare)
+{
+   SVGA3dFilter mode = 0;
+
+   if (img_filter == PIPE_TEX_FILTER_LINEAR)
+      mode |= SVGA3D_FILTER_MIP_LINEAR;
+   if (min_filter == PIPE_TEX_FILTER_LINEAR)
+      mode |= SVGA3D_FILTER_MIN_LINEAR;
+   if (mag_filter == PIPE_TEX_FILTER_LINEAR)
+      mode |= SVGA3D_FILTER_MAG_LINEAR;
+   if (anisotropic)
+      mode |= SVGA3D_FILTER_ANISOTROPIC;
+   if (compare)
+      mode |= SVGA3D_FILTER_COMPARE;
+
+   return mode;
+}
+
+
+/**
+ * Define a vgpu10 sampler state.
+ */
+static void
+define_sampler_state_object(struct svga_context *svga,
+                            struct svga_sampler_state *ss,
+                            const struct pipe_sampler_state *ps)
+{
+   uint8_t max_aniso = (uint8_t) 255; /* XXX fix me */
+   boolean anisotropic;
+   uint8 compare_func;
+   SVGA3dFilter filter;
+   SVGA3dRGBAFloat bcolor;
+   unsigned try;
+   float min_lod, max_lod;
+
+   assert(svga_have_vgpu10(svga));
+
+   anisotropic = ss->aniso_level > 1.0f;
+
+   filter = translate_filter_mode(ps->min_mip_filter,
+                                  ps->min_img_filter,
+                                  ps->mag_img_filter,
+                                  anisotropic,
+                                  ss->compare_mode);
+
+   compare_func = translate_comparison_func(ss->compare_func);
+
+   COPY_4V(bcolor.value, ps->border_color.f);
+
+   ss->id = util_bitmask_add(svga->sampler_object_id_bm);
+
+   assert(ps->min_lod <= ps->max_lod);
+
+   if (ps->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
+      /* just use the base level image */
+      min_lod = max_lod = 0.0f;
+   }
+   else {
+      min_lod = ps->min_lod;
+      max_lod = ps->max_lod;
+   }
+
+   /* Loop in case command buffer is full and we need to flush and retry */
+   for (try = 0; try < 2; try++) {
+      enum pipe_error ret =
+         SVGA3D_vgpu10_DefineSamplerState(svga->swc,
+                                          ss->id,
+                                          filter,
+                                          ss->addressu,
+                                          ss->addressv,
+                                          ss->addressw,
+                                          ss->lod_bias, /* float */
+                                          max_aniso,
+                                          compare_func,
+                                          bcolor,
+                                          min_lod,       /* float */
+                                          max_lod);      /* float */
+      if (ret == PIPE_OK)
+         return;
+      svga_context_flush(svga, NULL);
+   }
+}
+
+
  static void *
  svga_create_sampler_state(struct pipe_context *pipe,
                            const struct pipe_sampler_state *sampler)
@@ -141,6 +265,10 @@ svga_create_sampler_state(struct pipe_context *pipe,
        }
     }
  
+   if (svga_have_vgpu10(svga)) {
+      define_sampler_state_object(svga, cso, sampler);
+   }
+
     SVGA_DBG(DEBUG_VIEWS, "min %u, view(min %u, max %u) lod, mipfilter %s\n",
              cso->min_lod, cso->view_min_lod, cso->view_max_lod,
              cso->mipfilter == SVGA3D_TEX_FILTER_NONE ? "SVGA3D_TEX_FILTER_NONE" : "SOMETHING");
@@ -161,19 +289,19 @@ svga_bind_sampler_states(struct pipe_context *pipe,
     assert(shader < PIPE_SHADER_TYPES);
     assert(start + num <= PIPE_MAX_SAMPLERS);
  
-   /* we only support fragment shader samplers at this time */
-   if (shader != PIPE_SHADER_FRAGMENT)
+   /* Pre-VGPU10 only supports FS textures */
+   if (!svga_have_vgpu10(svga) && shader != PIPE_SHADER_FRAGMENT)
        return;
  
     for (i = 0; i < num; i++)
-      svga->curr.sampler[start + i] = samplers[i];
+      svga->curr.sampler[shader][start + i] = samplers[i];
  
     /* find highest non-null sampler[] entry */
     {
-      unsigned j = MAX2(svga->curr.num_samplers, start + num);
-      while (j > 0 && svga->curr.sampler[j - 1] == NULL)
+      unsigned j = MAX2(svga->curr.num_samplers[shader], start + num);
+      while (j > 0 && svga->curr.sampler[shader][j - 1] == NULL)
           j--;
-      svga->curr.num_samplers = j;
+      svga->curr.num_samplers[shader] = j;
     }
  
     svga->dirty |= SVGA_NEW_SAMPLER;
@@ -183,6 +311,22 @@ svga_bind_sampler_states(struct pipe_context *pipe,
  static void svga_delete_sampler_state(struct pipe_context *pipe,
                                        void *sampler)
  {
+   struct svga_sampler_state *ss = (struct svga_sampler_state *) sampler;
+   struct svga_context *svga = svga_context(pipe);
+
+   if (svga_have_vgpu10(svga)) {
+      enum pipe_error ret;
+
+      svga_hwtnl_flush_retry(svga);
+
+      ret = SVGA3D_vgpu10_DestroySamplerState(svga->swc, ss->id);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_vgpu10_DestroySamplerState(svga->swc, ss->id);
+      }
+      util_bitmask_clear(svga->sampler_object_id_bm, ss->id);
+   }
+
     FREE(sampler);
  }
  
@@ -192,17 +336,21 @@ svga_create_sampler_view(struct pipe_context *pipe,
                           struct pipe_resource *texture,
                           const struct pipe_sampler_view *templ)
  {
-   struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view);
-
-   if (view) {
-      *view = *templ;
-      view->reference.count = 1;
-      view->texture = NULL;
-      pipe_resource_reference(&view->texture, texture);
-      view->context = pipe;
+   struct svga_pipe_sampler_view *sv = CALLOC_STRUCT(svga_pipe_sampler_view);
+
+   if (!sv) {
+      return NULL;
     }
  
-   return view;
+   sv->base = *templ;
+   sv->base.reference.count = 1;
+   sv->base.texture = NULL;
+   pipe_resource_reference(&sv->base.texture, texture);
+
+   sv->base.context = pipe;
+   sv->id = SVGA3D_INVALID_ID;
+
+   return &sv->base;
  }
  
  
@@ -210,8 +358,37 @@ static void
  svga_sampler_view_destroy(struct pipe_context *pipe,
                            struct pipe_sampler_view *view)
  {
-   pipe_resource_reference(&view->texture, NULL);
-   FREE(view);
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_pipe_sampler_view *sv = svga_pipe_sampler_view(view);
+
+   if (svga_have_vgpu10(svga) && sv->id != SVGA3D_INVALID_ID) {
+      if (view->context != pipe) {
+         /* The SVGA3D device will generate an error (and on Linux, cause
+          * us to abort) if we try to destroy a shader resource view from
+          * a context other than the one it was created with.  Skip the
+          * SVGA3D_vgpu10_DestroyShaderResourceView() and leak the sampler
+          * view for now.  This should only sometimes happen when a shared
+          * texture is deleted.
+          */
+         _debug_printf("context mismatch in %s\n", __func__);
+      }
+      else {
+         enum pipe_error ret;
+
+         svga_hwtnl_flush_retry(svga); /* XXX is this needed? */
+
+         ret = SVGA3D_vgpu10_DestroyShaderResourceView(svga->swc, sv->id);
+         if (ret != PIPE_OK) {
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_vgpu10_DestroyShaderResourceView(svga->swc, sv->id);
+         }
+         util_bitmask_clear(svga->sampler_view_id_bm, sv->id);
+      }
+   }
+
+   pipe_resource_reference(&sv->base.texture, NULL);
+
+   FREE(sv);
  }
  
  static void
@@ -227,20 +404,20 @@ svga_set_sampler_views(struct pipe_context *pipe,
     uint i;
  
     assert(shader < PIPE_SHADER_TYPES);
-   assert(start + num <= Elements(svga->curr.sampler_views));
+   assert(start + num <= Elements(svga->curr.sampler_views[shader]));
  
-   /* we only support fragment shader sampler views at this time */
-   if (shader != PIPE_SHADER_FRAGMENT)
+   /* Pre-VGPU10 only supports FS textures */
+   if (!svga_have_vgpu10(svga) && shader != PIPE_SHADER_FRAGMENT)
        return;
  
     for (i = 0; i < num; i++) {
-      if (svga->curr.sampler_views[start + i] != views[i]) {
+      if (svga->curr.sampler_views[shader][start + i] != views[i]) {
           /* Note: we're using pipe_sampler_view_release() here to work around
            * a possible crash when the old view belongs to another context that
            * was already destroyed.
            */
-         pipe_sampler_view_release(pipe, &svga->curr.sampler_views[start + i]);
-         pipe_sampler_view_reference(&svga->curr.sampler_views[start + i],
+         pipe_sampler_view_release(pipe, &svga->curr.sampler_views[shader][start + i]);
+         pipe_sampler_view_reference(&svga->curr.sampler_views[shader][start + i],
                                       views[i]);
        }
  
@@ -256,10 +433,10 @@ svga_set_sampler_views(struct pipe_context *pipe,
  
     /* find highest non-null sampler_views[] entry */
     {
-      unsigned j = MAX2(svga->curr.num_sampler_views, start + num);
-      while (j > 0 && svga->curr.sampler_views[j - 1] == NULL)
+      unsigned j = MAX2(svga->curr.num_sampler_views[shader], start + num);
+      while (j > 0 && svga->curr.sampler_views[shader][j - 1] == NULL)
           j--;
-      svga->curr.num_sampler_views = j;
+      svga->curr.num_sampler_views[shader] = j;
     }
  
     svga->dirty |= SVGA_NEW_TEXTURE_BINDING;
@@ -270,7 +447,31 @@ svga_set_sampler_views(struct pipe_context *pipe,
        svga->dirty |= SVGA_NEW_TEXTURE_FLAGS;
        svga->curr.tex_flags.flag_1d = flag_1d;
        svga->curr.tex_flags.flag_srgb = flag_srgb;
-   }  
+   }
+
+   /* Check if any of the sampler view resources collide with the framebuffer
+    * color buffers or depth stencil resource. If so, enable the NEW_FRAME_BUFFER
+    * dirty bit so that emit_framebuffer can be invoked to create backed view
+    * for the conflicted surface view.
+    */
+   for (i = 0; i < svga->curr.framebuffer.nr_cbufs; i++) {
+      if (svga->curr.framebuffer.cbufs[i]) {
+         struct svga_surface *s = svga_surface(svga->curr.framebuffer.cbufs[i]);
+         if (svga_check_sampler_view_resource_collision(svga, s->handle, shader)) {
+            svga->dirty |= SVGA_NEW_FRAME_BUFFER;
+            break;
+         }
+      }
+   }
+
+   if (svga->curr.framebuffer.zsbuf) {
+      struct svga_surface *s = svga_surface(svga->curr.framebuffer.zsbuf);
+      if (s) {
+         if (svga_check_sampler_view_resource_collision(svga, s->handle, shader)) {
+            svga->dirty |= SVGA_NEW_FRAME_BUFFER;
+         }
+      }
+   }
  }
  
  
diff --git a/src/gallium/drivers/svga/svga_pipe_streamout.c b/src/gallium/drivers/svga/svga_pipe_streamout.c

new file mode 100644 (file)

index 0000000..1da6320
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_streamout.c
@@ -0,0 +1,320 @@
+/**********************************************************
+ * Copyright 2014 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "util/u_memory.h"
+#include "util/u_bitmask.h"
+
+#include "svga_cmd.h"
+#include "svga_context.h"
+#include "svga_resource_buffer.h"
+#include "svga_shader.h"
+#include "svga_debug.h"
+#include "svga_streamout.h"
+
+struct svga_stream_output_target {
+   struct pipe_stream_output_target base;
+};
+
+/** cast wrapper */
+static inline struct svga_stream_output_target *
+svga_stream_output_target(struct pipe_stream_output_target *s)
+{
+   return (struct svga_stream_output_target *)s;
+}
+
+struct svga_stream_output *
+svga_create_stream_output(struct svga_context *svga,
+                          struct svga_shader *shader,
+                          const struct pipe_stream_output_info *info)
+{
+   struct svga_stream_output *streamout;
+   SVGA3dStreamOutputDeclarationEntry decls[SVGA3D_MAX_STREAMOUT_DECLS];
+   unsigned strides[SVGA3D_DX_MAX_SOTARGETS];
+   unsigned i;
+   enum pipe_error ret;
+   unsigned id;
+
+   assert(info->num_outputs <= PIPE_MAX_SO_OUTPUTS);
+
+   /* Gallium utility creates shaders with stream output.
+    * For non-DX10, just return NULL.
+    */
+   if (!svga_have_vgpu10(svga))
+      return NULL;
+
+   assert(info->num_outputs <= SVGA3D_MAX_STREAMOUT_DECLS);
+
+   /* Allocate an integer ID for the stream output */
+   id = util_bitmask_add(svga->stream_output_id_bm);
+   if (id == UTIL_BITMASK_INVALID_INDEX) {
+      return NULL;
+   }
+
+   /* Allocate the streamout data structure */
+   streamout = CALLOC_STRUCT(svga_stream_output);
+
+   if (streamout == NULL)
+      return NULL;
+
+   streamout->info = *info;
+   streamout->id = id;
+   streamout->pos_out_index = -1;
+
+   SVGA_DBG(DEBUG_STREAMOUT, "%s, num_outputs=%d id=%d\n", __FUNCTION__,
+            info->num_outputs, id);
+
+   /* init whole decls and stride arrays to zero to avoid garbage values */
+   memset(decls, 0, sizeof(decls));
+   memset(strides, 0, sizeof(strides));
+
+   for (i = 0; i < info->num_outputs; i++) {
+      unsigned reg_idx = info->output[i].register_index;
+      unsigned buf_idx = info->output[i].output_buffer;
+      const unsigned sem_name = shader->info.output_semantic_name[reg_idx];
+
+      assert(buf_idx <= PIPE_MAX_SO_BUFFERS);
+
+      if (sem_name == TGSI_SEMANTIC_POSITION) {
+         /**
+          * Check if streaming out POSITION. If so, replace the
+          * register index with the index for NON_ADJUSTED POSITION.
+          */
+         decls[i].registerIndex = shader->info.num_outputs;
+
+         /* Save this output index, so we can tell later if this stream output
+          * includes an output of a vertex position
+          */
+         streamout->pos_out_index = i;
+      }
+      else if (sem_name == TGSI_SEMANTIC_CLIPDIST) {
+         /**
+          * Use the shadow copy for clip distance because
+          * CLIPDIST instruction is only emitted for enabled clip planes.
+          * It's valid to write to ClipDistance variable for non-enabled
+          * clip planes.
+          */
+         decls[i].registerIndex = shader->info.num_outputs + 1 +
+                                  shader->info.output_semantic_index[reg_idx];
+      }
+      else {
+         decls[i].registerIndex = reg_idx;
+      }
+
+      decls[i].outputSlot = buf_idx;
+      decls[i].registerMask =
+         ((1 << info->output[i].num_components) - 1)
+            << info->output[i].start_component;
+
+      SVGA_DBG(DEBUG_STREAMOUT, "%d slot=%d regIdx=%d regMask=0x%x\n",
+               i, decls[i].outputSlot, decls[i].registerIndex,
+               decls[i].registerMask);
+
+      strides[buf_idx] = info->stride[buf_idx] * sizeof(float);
+   }
+
+   ret = SVGA3D_vgpu10_DefineStreamOutput(svga->swc, id,
+                                          info->num_outputs,
+                                          strides,
+                                          decls);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_DefineStreamOutput(svga->swc, id,
+                                             info->num_outputs,
+                                             strides,
+                                             decls);
+      if (ret != PIPE_OK) {
+         util_bitmask_clear(svga->stream_output_id_bm, id);
+         FREE(streamout);
+         streamout = NULL;
+      }
+   }
+   return streamout;
+}
+
+enum pipe_error
+svga_set_stream_output(struct svga_context *svga,
+                       struct svga_stream_output *streamout)
+{
+   enum pipe_error ret = PIPE_OK;
+   unsigned id = streamout ? streamout->id : SVGA3D_INVALID_ID;
+
+   if (!svga_have_vgpu10(svga)) {
+      return PIPE_OK;
+   }
+
+   SVGA_DBG(DEBUG_STREAMOUT, "%s streamout=0x%x id=%d\n", __FUNCTION__,
+            streamout, id);
+
+   if (svga->current_so != streamout) {
+      /* Save current SO state */
+      svga->current_so = streamout;
+
+      ret = SVGA3D_vgpu10_SetStreamOutput(svga->swc, id);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_vgpu10_SetStreamOutput(svga->swc, id);
+      }
+   }
+
+   return ret;
+}
+
+void
+svga_delete_stream_output(struct svga_context *svga,
+                          struct svga_stream_output *streamout)
+{
+   enum pipe_error ret;
+
+   SVGA_DBG(DEBUG_STREAMOUT, "%s streamout=0x%x\n", __FUNCTION__, streamout);
+
+   assert(svga_have_vgpu10(svga));
+   assert(streamout != NULL);
+
+   ret = SVGA3D_vgpu10_DestroyStreamOutput(svga->swc, streamout->id);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_DestroyStreamOutput(svga->swc, streamout->id);
+   }
+
+   /* Release the ID */
+   util_bitmask_clear(svga->stream_output_id_bm, streamout->id);
+
+   /* Free streamout structure */
+   FREE(streamout);
+}
+
+static struct pipe_stream_output_target *
+svga_create_stream_output_target(struct pipe_context *pipe,
+                                 struct pipe_resource *buffer,
+                                 unsigned buffer_offset,
+                                 unsigned buffer_size)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_stream_output_target *sot;
+
+   SVGA_DBG(DEBUG_STREAMOUT, "%s offset=%d size=%d\n", __FUNCTION__,
+            buffer_offset, buffer_size);
+
+   assert(svga_have_vgpu10(svga));
+   (void) svga;
+
+   sot = CALLOC_STRUCT(svga_stream_output_target);
+   if (!sot)
+      return NULL;
+
+   pipe_reference_init(&sot->base.reference, 1);
+   pipe_resource_reference(&sot->base.buffer, buffer);
+   sot->base.context = pipe;
+   sot->base.buffer = buffer;
+   sot->base.buffer_offset = buffer_offset;
+   sot->base.buffer_size = buffer_size;
+
+   return &sot->base;
+}
+
+static void
+svga_destroy_stream_output_target(struct pipe_context *pipe,
+                                  struct pipe_stream_output_target *target)
+{
+   struct svga_stream_output_target *sot = svga_stream_output_target(target);
+
+   SVGA_DBG(DEBUG_STREAMOUT, "%s\n", __FUNCTION__);
+
+   pipe_resource_reference(&sot->base.buffer, NULL);
+   FREE(sot);
+}
+
+static void
+svga_set_stream_output_targets(struct pipe_context *pipe,
+                               unsigned num_targets,
+                               struct pipe_stream_output_target **targets,
+                               const unsigned *offsets)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct SVGA3dSoTarget soBindings[SVGA3D_DX_MAX_SOTARGETS];
+   enum pipe_error ret;
+   unsigned i;
+   unsigned num_so_targets;
+
+   SVGA_DBG(DEBUG_STREAMOUT, "%s num_targets=%d\n", __FUNCTION__,
+            num_targets);
+
+   assert(svga_have_vgpu10(svga));
+
+   /* Mark the streamout buffers as dirty so that we'll issue readbacks
+    * before mapping.
+    */
+   for (i = 0; i < svga->num_so_targets; i++) {
+      struct svga_buffer *sbuf = svga_buffer(svga->so_targets[i]->buffer);
+      sbuf->dirty = TRUE;
+   }
+
+   assert(num_targets <= SVGA3D_DX_MAX_SOTARGETS);
+
+   for (i = 0; i < num_targets; i++) {
+      struct svga_stream_output_target *sot
+         = svga_stream_output_target(targets[i]);
+      struct svga_buffer *sbuf = svga_buffer(sot->base.buffer);
+      unsigned size;
+
+      assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_STREAM_OUTPUT);
+      (void) sbuf;
+
+      svga->so_surfaces[i] = svga_buffer_handle(svga, sot->base.buffer);
+      svga->so_targets[i] = &sot->base;
+      soBindings[i].offset = sot->base.buffer_offset;
+
+      /* The size cannot extend beyond the end of the buffer.  Clamp it. */
+      size = MIN2(sot->base.buffer_size,
+                  sot->base.buffer->width0 - sot->base.buffer_offset);
+
+      soBindings[i].sizeInBytes = size;
+   }
+
+   /* unbind any previously bound stream output buffers */
+   for (; i < svga->num_so_targets; i++) {
+      svga->so_surfaces[i] = NULL;
+      svga->so_targets[i] = NULL;
+   }
+
+   num_so_targets = MAX2(svga->num_so_targets, num_targets);
+   ret = SVGA3D_vgpu10_SetSOTargets(svga->swc, num_so_targets,
+                                    soBindings, svga->so_surfaces);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_SetSOTargets(svga->swc, num_so_targets,
+                                       soBindings, svga->so_surfaces);
+   }
+
+   svga->num_so_targets = num_targets;
+}
+
+void
+svga_init_stream_output_functions(struct svga_context *svga)
+{
+   svga->pipe.create_stream_output_target = svga_create_stream_output_target;
+   svga->pipe.stream_output_target_destroy = svga_destroy_stream_output_target;
+   svga->pipe.set_stream_output_targets = svga_set_stream_output_targets;
+}
diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c

index faf77f3..e0932a9 100644 (file)
--- a/src/gallium/drivers/svga/svga_pipe_vertex.c
+++ b/src/gallium/drivers/svga/svga_pipe_vertex.c
@@ -23,17 +23,21 @@
   *
   **********************************************************/
  
+#include "pipe/p_defines.h"
+#include "util/u_bitmask.h"
+#include "util/u_format.h"
  #include "util/u_helpers.h"
  #include "util/u_inlines.h"
-#include "pipe/p_defines.h"
  #include "util/u_math.h"
  #include "util/u_memory.h"
  #include "util/u_transfer.h"
  #include "tgsi/tgsi_parse.h"
  
-#include "svga_screen.h"
-#include "svga_resource_buffer.h"
  #include "svga_context.h"
+#include "svga_cmd.h"
+#include "svga_format.h"
+#include "svga_resource_buffer.h"
+#include "svga_screen.h"
  
  
  static void svga_set_vertex_buffers(struct pipe_context *pipe,
@@ -55,25 +59,33 @@ static void svga_set_index_buffer(struct pipe_context *pipe,
  {
     struct svga_context *svga = svga_context(pipe);
  
-   if (ib) {
-      pipe_resource_reference(&svga->curr.ib.buffer, ib->buffer);
-      memcpy(&svga->curr.ib, ib, sizeof(svga->curr.ib));
-   }
-   else {
-      pipe_resource_reference(&svga->curr.ib.buffer, NULL);
-      memset(&svga->curr.ib, 0, sizeof(svga->curr.ib));
-   }
+   util_set_index_buffer(&svga->curr.ib, ib);
+}
  
-   /* TODO make this more like a state */
+
+/**
+ * Does the given vertex attrib format need range adjustment in the VS?
+ * Range adjustment scales and biases values from [0,1] to [-1,1].
+ * This lets us avoid the swtnl path.
+ */
+static boolean
+attrib_needs_range_adjustment(enum pipe_format format)
+{
+   switch (format) {
+   case PIPE_FORMAT_R8G8B8_SNORM:
+      return TRUE;
+   default:
+      return FALSE;
+   }
  }
  
  
  /**
- * Given a gallium vertex element format, return the corresponding SVGA3D
- * format.  Return SVGA3D_DECLTYPE_MAX for unsupported gallium formats.
+ * Given a gallium vertex element format, return the corresponding
+ * SVGA3dDeclType.
   */
  static SVGA3dDeclType
-translate_vertex_format(enum pipe_format format)
+translate_vertex_format_to_decltype(enum pipe_format format)
  {
     switch (format) {
     case PIPE_FORMAT_R32_FLOAT:            return SVGA3D_DECLTYPE_FLOAT1;
@@ -94,10 +106,10 @@ translate_vertex_format(enum pipe_format format)
     case PIPE_FORMAT_R16G16_FLOAT:         return SVGA3D_DECLTYPE_FLOAT16_2;
     case PIPE_FORMAT_R16G16B16A16_FLOAT:   return SVGA3D_DECLTYPE_FLOAT16_4;
  
-   /* See attrib_needs_adjustment() and attrib_needs_w_to_1() below */
+   /* See attrib_needs_adjustment() and attrib_needs_w_to_1() above */
     case PIPE_FORMAT_R8G8B8_SNORM:         return SVGA3D_DECLTYPE_UBYTE4N;
  
-   /* See attrib_needs_w_to_1() below */
+   /* See attrib_needs_w_to_1() above */
     case PIPE_FORMAT_R16G16B16_SNORM:      return SVGA3D_DECLTYPE_SHORT4N;
     case PIPE_FORMAT_R16G16B16_UNORM:      return SVGA3D_DECLTYPE_USHORT4N;
     case PIPE_FORMAT_R8G8B8_UNORM:         return SVGA3D_DECLTYPE_UBYTE4N;
@@ -111,38 +123,121 @@ translate_vertex_format(enum pipe_format format)
  }
  
  
-/**
- * Does the given vertex attrib format need range adjustment in the VS?
- * Range adjustment scales and biases values from [0,1] to [-1,1].
- * This lets us avoid the swtnl path.
- */
-static boolean
-attrib_needs_range_adjustment(enum pipe_format format)
+static void
+define_input_element_object(struct svga_context *svga,
+                            struct svga_velems_state *velems)
  {
-   switch (format) {
-   case PIPE_FORMAT_R8G8B8_SNORM:
-      return TRUE;
-   default:
-      return FALSE;
+   SVGA3dInputElementDesc elements[PIPE_MAX_ATTRIBS];
+   enum pipe_error ret;
+   unsigned i;
+
+   assert(velems->count <= PIPE_MAX_ATTRIBS);
+   assert(svga_have_vgpu10(svga));
+
+   for (i = 0; i < velems->count; i++) {
+      const struct pipe_vertex_element *elem = velems->velem + i;
+      SVGA3dSurfaceFormat svga_format;
+      unsigned vf_flags;
+
+      svga_translate_vertex_format_vgpu10(elem->src_format,
+                                          &svga_format, &vf_flags);
+
+      velems->decl_type[i] =
+         translate_vertex_format_to_decltype(elem->src_format);
+      elements[i].inputSlot = elem->vertex_buffer_index;
+      elements[i].alignedByteOffset = elem->src_offset;
+      elements[i].format = svga_format;
+
+      if (elem->instance_divisor) {
+         elements[i].inputSlotClass = SVGA3D_INPUT_PER_INSTANCE_DATA;
+         elements[i].instanceDataStepRate = elem->instance_divisor;
+      }
+      else {
+         elements[i].inputSlotClass = SVGA3D_INPUT_PER_VERTEX_DATA;
+         elements[i].instanceDataStepRate = 0;
+      }
+      elements[i].inputRegister = i;
+
+      if (elements[i].format == SVGA3D_FORMAT_INVALID) {
+         velems->need_swvfetch = TRUE;
+      }
+
+      if (util_format_is_pure_integer(elem->src_format)) {
+         velems->attrib_is_pure_int |= (1 << i);
+      }
+
+      if (vf_flags & VF_W_TO_1) {
+         velems->adjust_attrib_w_1 |= (1 << i);
+      }
+
+      if (vf_flags & VF_U_TO_F_CAST) {
+         velems->adjust_attrib_utof |= (1 << i);
+      }
+      else if (vf_flags & VF_I_TO_F_CAST) {
+         velems->adjust_attrib_itof |= (1 << i);
+      }
+
+      if (vf_flags & VF_BGRA) {
+         velems->attrib_is_bgra |= (1 << i);
+      }
+
+      if (vf_flags & VF_PUINT_TO_SNORM) {
+         velems->attrib_puint_to_snorm |= (1 << i);
+      }
+      else if (vf_flags & VF_PUINT_TO_USCALED) {
+         velems->attrib_puint_to_uscaled |= (1 << i);
+      }
+      else if (vf_flags & VF_PUINT_TO_SSCALED) {
+         velems->attrib_puint_to_sscaled |= (1 << i);
+      }
+   }
+
+   velems->id = util_bitmask_add(svga->input_element_object_id_bm);
+
+   ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, velems->count,
+                                           velems->id, elements);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, velems->count,
+                                              velems->id, elements);
+      assert(ret == PIPE_OK);
     }
  }
  
  
  /**
- * Does the given vertex attrib format need to have the W component set
- * to one in the VS?
+ * Translate the vertex element types to SVGA3dDeclType and check
+ * for VS-based vertex attribute adjustments.
   */
-static boolean
-attrib_needs_w_to_1(enum pipe_format format)
+static void
+translate_vertex_decls(struct svga_context *svga,
+                       struct svga_velems_state *velems)
  {
-   switch (format) {
-   case PIPE_FORMAT_R8G8B8_SNORM:
-   case PIPE_FORMAT_R8G8B8_UNORM:
-   case PIPE_FORMAT_R16G16B16_SNORM:
-   case PIPE_FORMAT_R16G16B16_UNORM:
-      return TRUE;
-   default:
-      return FALSE;
+   unsigned i;
+
+   assert(!svga_have_vgpu10(svga));
+
+   for (i = 0; i < velems->count; i++) {
+      const enum pipe_format f = velems->velem[i].src_format;
+      SVGA3dSurfaceFormat svga_format;
+      unsigned vf_flags;
+
+      svga_translate_vertex_format_vgpu10(f, &svga_format, &vf_flags);
+
+      velems->decl_type[i] = translate_vertex_format_to_decltype(f);
+      if (velems->decl_type[i] == SVGA3D_DECLTYPE_MAX) {
+         /* Unsupported format - use software fetch */
+         velems->need_swvfetch = TRUE;
+      }
+
+      /* Check for VS-based adjustments */
+      if (attrib_needs_range_adjustment(f)) {
+         velems->adjust_attrib_range |= (1 << i);
+      }
+
+      if (vf_flags & VF_W_TO_1) {
+         velems->adjust_attrib_w_1 |= (1 << i);
+      }
     }
  }
  
@@ -152,53 +247,73 @@ svga_create_vertex_elements_state(struct pipe_context *pipe,
                                    unsigned count,
                                    const struct pipe_vertex_element *attribs)
  {
+   struct svga_context *svga = svga_context(pipe);
     struct svga_velems_state *velems;
+
     assert(count <= PIPE_MAX_ATTRIBS);
     velems = (struct svga_velems_state *) MALLOC(sizeof(struct svga_velems_state));
     if (velems) {
-      unsigned i;
-
        velems->count = count;
        memcpy(velems->velem, attribs, sizeof(*attribs) * count);
  
        velems->need_swvfetch = FALSE;
        velems->adjust_attrib_range = 0x0;
+      velems->attrib_is_pure_int = 0x0;
        velems->adjust_attrib_w_1 = 0x0;
-
-      /* Translate Gallium vertex format to SVGA3dDeclType */
-      for (i = 0; i < count; i++) {
-         enum pipe_format f = attribs[i].src_format;
-         velems->decl_type[i] = translate_vertex_format(f);
-         if (velems->decl_type[i] == SVGA3D_DECLTYPE_MAX) {
-            /* Unsupported format - use software fetch */
-            velems->need_swvfetch = TRUE;
-            break;
-         }
-
-         if (attrib_needs_range_adjustment(f)) {
-            velems->adjust_attrib_range |= (1 << i);
-         }
-         if (attrib_needs_w_to_1(f)) {
-            velems->adjust_attrib_w_1 |= (1 << i);
-         }
+      velems->adjust_attrib_itof = 0x0;
+      velems->adjust_attrib_utof = 0x0;
+      velems->attrib_is_bgra = 0x0;
+      velems->attrib_puint_to_snorm = 0x0;
+      velems->attrib_puint_to_uscaled = 0x0;
+      velems->attrib_puint_to_sscaled = 0x0;
+
+      if (svga_have_vgpu10(svga)) {
+         define_input_element_object(svga, velems);
+      }
+      else {
+         translate_vertex_decls(svga, velems);
        }
     }
     return velems;
  }
  
-static void svga_bind_vertex_elements_state(struct pipe_context *pipe,
-                                            void *velems)
+
+static void
+svga_bind_vertex_elements_state(struct pipe_context *pipe, void *state)
  {
     struct svga_context *svga = svga_context(pipe);
-   struct svga_velems_state *svga_velems = (struct svga_velems_state *) velems;
+   struct svga_velems_state *velems = (struct svga_velems_state *) state;
  
-   svga->curr.velems = svga_velems;
+   svga->curr.velems = velems;
     svga->dirty |= SVGA_NEW_VELEMENT;
  }
  
-static void svga_delete_vertex_elements_state(struct pipe_context *pipe,
-                                              void *velems)
+
+static void
+svga_delete_vertex_elements_state(struct pipe_context *pipe, void *state)
  {
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_velems_state *velems = (struct svga_velems_state *) state;
+
+   if (svga_have_vgpu10(svga)) {
+      enum pipe_error ret;
+
+      svga_hwtnl_flush_retry(svga);
+
+      ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc, velems->id);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc, velems->id);
+         assert(ret == PIPE_OK);
+      }
+
+      if (velems->id == svga->state.hw_draw.layout_id)
+         svga->state.hw_draw.layout_id = SVGA3D_INVALID_ID;
+
+      util_bitmask_clear(svga->input_element_object_id_bm, velems->id);
+      velems->id = SVGA3D_INVALID_ID;
+   }
+
     FREE(velems);
  }
  
@@ -219,5 +334,3 @@ void svga_init_vertex_functions( struct svga_context *svga )
     svga->pipe.bind_vertex_elements_state = svga_bind_vertex_elements_state;
     svga->pipe.delete_vertex_elements_state = svga_delete_vertex_elements_state;
  }
-
-
diff --git a/src/gallium/drivers/svga/svga_pipe_vs.c b/src/gallium/drivers/svga/svga_pipe_vs.c

index c3ac663..630f490 100644 (file)
--- a/src/gallium/drivers/svga/svga_pipe_vs.c
+++ b/src/gallium/drivers/svga/svga_pipe_vs.c
@@ -32,11 +32,11 @@
  #include "tgsi/tgsi_text.h"
  
  #include "svga_context.h"
-#include "svga_tgsi.h"
  #include "svga_hw_reg.h"
  #include "svga_cmd.h"
  #include "svga_debug.h"
  #include "svga_shader.h"
+#include "svga_streamout.h"
  
  
  /**
@@ -100,6 +100,7 @@ svga_create_vs_state(struct pipe_context *pipe,
  {
     struct svga_context *svga = svga_context(pipe);
     struct svga_vertex_shader *vs = CALLOC_STRUCT(svga_vertex_shader);
+
     if (!vs)
        return NULL;
  
@@ -123,10 +124,12 @@ svga_create_vs_state(struct pipe_context *pipe,
  
     vs->base.id = svga->debug.shader_id++;
  
-   if (SVGA_DEBUG & DEBUG_TGSI || 0) {
-      debug_printf("%s id: %u, inputs: %u, outputs: %u\n",
-                   __FUNCTION__, vs->base.id,
-                   vs->base.info.num_inputs, vs->base.info.num_outputs);
+   vs->generic_outputs = svga_get_generic_outputs_mask(&vs->base.info);
+
+   /* check for any stream output declarations */
+   if (templ->stream_output.num_outputs) {
+      vs->base.stream_output = svga_create_stream_output(svga, &vs->base,
+                                                         &templ->stream_output);
     }
  
     return vs;
@@ -139,6 +142,17 @@ svga_bind_vs_state(struct pipe_context *pipe, void *shader)
     struct svga_vertex_shader *vs = (struct svga_vertex_shader *)shader;
     struct svga_context *svga = svga_context(pipe);
  
+   if (vs == svga->curr.vs)
+      return;
+
+   /* If the currently bound vertex shader has a generated geometry shader,
+    * then unbind the geometry shader before binding a new vertex shader.
+    * We need to unbind the geometry shader here because there is no
+    * pipe_shader associated with the generated geometry shader.
+    */
+   if (svga->curr.vs != NULL && svga->curr.vs->gs != NULL)
+      svga->pipe.bind_gs_state(&svga->pipe, NULL);
+
     svga->curr.vs = vs;
     svga->dirty |= SVGA_NEW_VS;
  }
@@ -154,20 +168,40 @@ svga_delete_vs_state(struct pipe_context *pipe, void *shader)
  
     svga_hwtnl_flush_retry(svga);
  
+   assert(vs->base.parent == NULL);
+
+   /* Check if there is a generated geometry shader to go with this
+    * vertex shader. If there is, then delete the geometry shader as well.
+    */
+   if (vs->gs != NULL) {
+      svga->pipe.delete_gs_state(&svga->pipe, vs->gs);
+   }
+
+   if (vs->base.stream_output != NULL)
+      svga_delete_stream_output(svga, vs->base.stream_output);
+
     draw_delete_vertex_shader(svga->swtnl.draw, vs->draw_shader);
  
     for (variant = vs->base.variants; variant; variant = tmp) {
        tmp = variant->next;
  
-      ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
-      (void) ret;  /* PIPE_ERROR_ not handled yet */
-
-      /*
-       * Remove stale references to this variant to ensure a new variant on the
-       * same address will be detected as a change.
-       */
-      if (variant == svga->state.hw_draw.vs)
+      /* Check if deleting currently bound shader */
+      if (variant == svga->state.hw_draw.vs) {
+         ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_VS, NULL);
+         if (ret != PIPE_OK) {
+            svga_context_flush(svga, NULL);
+            ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_VS, NULL);
+            assert(ret == PIPE_OK);
+         }
           svga->state.hw_draw.vs = NULL;
+      }
+
+      ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
+         assert(ret == PIPE_OK);
+      }
     }
  
     FREE((void *)vs->base.tokens);
diff --git a/src/gallium/drivers/svga/svga_resource.c b/src/gallium/drivers/svga/svga_resource.c

index b295b44..a910ae0 100644 (file)
--- a/src/gallium/drivers/svga/svga_resource.c
+++ b/src/gallium/drivers/svga/svga_resource.c
@@ -69,18 +69,21 @@ svga_can_create_resource(struct pipe_screen *screen,
     struct svga_winsys_screen *sws = svgascreen->sws;
     SVGA3dSurfaceFormat format;
     SVGA3dSize base_level_size;
-   uint32 numFaces;
     uint32 numMipLevels;
+   uint32 arraySize;
  
     if (res->target == PIPE_BUFFER) {
        format = SVGA3D_BUFFER;
        base_level_size.width = res->width0;
        base_level_size.height = 1;
        base_level_size.depth = 1;
-      numFaces = 1;
        numMipLevels = 1;
+      arraySize = 1;
  
     } else {
+      if (res->target == PIPE_TEXTURE_CUBE)
+         assert(res->array_size == 6);
+
        format = svga_translate_format(svgascreen, res->format, res->bind);
        if (format == SVGA3D_FORMAT_INVALID)
           return FALSE;
@@ -88,12 +91,12 @@ svga_can_create_resource(struct pipe_screen *screen,
        base_level_size.width = res->width0;
        base_level_size.height = res->height0;
        base_level_size.depth = res->depth0;
-      numFaces = (res->target == PIPE_TEXTURE_CUBE) ? 6 : 1;
        numMipLevels = res->last_level + 1;
+      arraySize = res->array_size;
     }
  
     return sws->surface_can_create(sws, format, base_level_size, 
-                                  numFaces, numMipLevels);
+                                  arraySize, numMipLevels);
  }
  
  
diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c

index 13f85cd..7ef36b3 100644 (file)
--- a/src/gallium/drivers/svga/svga_resource_buffer.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer.c
@@ -48,7 +48,8 @@
  static inline boolean
  svga_buffer_needs_hw_storage(unsigned usage)
  {
-   return usage & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER);
+   return (usage & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER |
+                    PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT)) != 0;
  }
  
  
@@ -87,6 +88,26 @@ svga_buffer_transfer_map(struct pipe_context *pipe,
     transfer->usage = usage;
     transfer->box = *box;
  
+   if ((usage & PIPE_TRANSFER_READ) && sbuf->dirty) {
+      /* Only need to test for vgpu10 since only vgpu10 features (streamout,
+       * buffer copy) can modify buffers on the device.
+       */
+      if (svga_have_vgpu10(svga)) {
+         enum pipe_error ret;
+         assert(sbuf->handle);
+         ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, sbuf->handle, 0);
+         if (ret != PIPE_OK) {
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, sbuf->handle, 0);
+            assert(ret == PIPE_OK);
+         }
+
+         svga_context_finish(svga);
+
+         sbuf->dirty = FALSE;
+      }
+   }
+
     if (usage & PIPE_TRANSFER_WRITE) {
        if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
           /*
@@ -343,13 +364,43 @@ svga_buffer_create(struct pipe_screen *screen,
     sbuf->b.vtbl = &svga_buffer_vtbl;
     pipe_reference_init(&sbuf->b.b.reference, 1);
     sbuf->b.b.screen = screen;
+   sbuf->bind_flags = template->bind;
+
+   if (template->bind & PIPE_BIND_CONSTANT_BUFFER) {
+      /* Constant buffers can only have the PIPE_BIND_CONSTANT_BUFFER
+       * flag set.
+       */
+      if (ss->sws->have_vgpu10) {
+         sbuf->bind_flags = PIPE_BIND_CONSTANT_BUFFER;
+
+         /* Constant buffer size needs to be in multiples of 16. */
+         sbuf->b.b.width0 = align(sbuf->b.b.width0, 16);
+      }
+   }
  
     if(svga_buffer_needs_hw_storage(template->bind)) {
+
+      /* If the buffer will be used for vertex/index/stream data, set all
+       * the flags so that the buffer will be accepted for all those uses.
+       * Note that the PIPE_BIND_ flags we get from the state tracker are
+       * just a hint about how the buffer may be used.  And OpenGL buffer
+       * object may be used for many different things.
+       */
+      if (!(template->bind & PIPE_BIND_CONSTANT_BUFFER)) {
+         /* Not a constant buffer.  The buffer may be used for vertex data,
+          * indexes or stream-out.
+          */
+         sbuf->bind_flags |= (PIPE_BIND_VERTEX_BUFFER |
+                              PIPE_BIND_INDEX_BUFFER);
+         if (ss->sws->have_vgpu10)
+            sbuf->bind_flags |= PIPE_BIND_STREAM_OUTPUT;
+      }
+
        if(svga_buffer_create_host_surface(ss, sbuf) != PIPE_OK)
           goto error2;
     }
     else {
-      sbuf->swbuf = align_malloc(template->width0, 64);
+      sbuf->swbuf = align_malloc(sbuf->b.b.width0, 64);
        if(!sbuf->swbuf)
           goto error2;
     }
@@ -357,7 +408,7 @@ svga_buffer_create(struct pipe_screen *screen,
     debug_reference(&sbuf->b.b.reference,
                     (debug_reference_descriptor)debug_describe_resource, 0);
  
-   sbuf->size = util_resource_size(template);
+   sbuf->size = util_resource_size(&sbuf->b.b);
     ss->total_resource_bytes += sbuf->size;
  
     return &sbuf->b.b; 
@@ -391,6 +442,7 @@ svga_user_buffer_create(struct pipe_screen *screen,
     sbuf->b.b.depth0 = 1;
     sbuf->b.b.array_size = 1;
  
+   sbuf->bind_flags = bind;
     sbuf->swbuf = ptr;
     sbuf->user = TRUE;
  
diff --git a/src/gallium/drivers/svga/svga_resource_buffer.h b/src/gallium/drivers/svga/svga_resource_buffer.h

index e838beb..75e12c3 100644 (file)
--- a/src/gallium/drivers/svga/svga_resource_buffer.h
+++ b/src/gallium/drivers/svga/svga_resource_buffer.h
@@ -65,6 +65,9 @@ struct svga_buffer
  {
     struct u_resource b;
  
+   /** This is a superset of b.b.bind */
+   unsigned bind_flags;
+
     /**
      * Regular (non DMA'able) memory.
      * 
@@ -187,6 +190,8 @@ struct svga_buffer
     struct list_head head;
  
     unsigned size;  /**< Approximate size in bytes */
+
+   boolean dirty;  /**< Need to do a readback before mapping? */
  };
  
  
diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c

index 5686531..69e5f75 100644 (file)
--- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
@@ -149,10 +149,22 @@ svga_buffer_create_host_surface(struct svga_screen *ss,
        sbuf->key.flags = 0;
  
        sbuf->key.format = SVGA3D_BUFFER;
-      if (sbuf->b.b.bind & PIPE_BIND_VERTEX_BUFFER)
+      if (sbuf->bind_flags & PIPE_BIND_VERTEX_BUFFER) {
           sbuf->key.flags |= SVGA3D_SURFACE_HINT_VERTEXBUFFER;
-      if (sbuf->b.b.bind & PIPE_BIND_INDEX_BUFFER)
+         sbuf->key.flags |= SVGA3D_SURFACE_BIND_VERTEX_BUFFER;
+      }
+      if (sbuf->bind_flags & PIPE_BIND_INDEX_BUFFER) {
           sbuf->key.flags |= SVGA3D_SURFACE_HINT_INDEXBUFFER;
+         sbuf->key.flags |= SVGA3D_SURFACE_BIND_INDEX_BUFFER;
+      }
+      if (sbuf->bind_flags & PIPE_BIND_CONSTANT_BUFFER)
+         sbuf->key.flags |= SVGA3D_SURFACE_BIND_CONSTANT_BUFFER;
+
+      if (sbuf->bind_flags & PIPE_BIND_STREAM_OUTPUT)
+         sbuf->key.flags |= SVGA3D_SURFACE_BIND_STREAM_OUTPUT;
+
+      if (sbuf->bind_flags & PIPE_BIND_SAMPLER_VIEW)
+         sbuf->key.flags |= SVGA3D_SURFACE_BIND_SHADER_RESOURCE;
  
        sbuf->key.size.width = sbuf->b.b.width0;
        sbuf->key.size.height = 1;
@@ -161,10 +173,12 @@ svga_buffer_create_host_surface(struct svga_screen *ss,
        sbuf->key.numFaces = 1;
        sbuf->key.numMipLevels = 1;
        sbuf->key.cachable = 1;
+      sbuf->key.arraySize = 1;
  
        SVGA_DBG(DEBUG_DMA, "surface_create for buffer sz %d\n", sbuf->b.b.width0);
  
-      sbuf->handle = svga_screen_surface_create(ss, &sbuf->key);
+      sbuf->handle = svga_screen_surface_create(ss, sbuf->b.b.bind,
+                                                sbuf->b.b.usage, &sbuf->key);
        if (!sbuf->handle)
           return PIPE_ERROR_OUT_OF_MEMORY;
  
@@ -203,8 +217,8 @@ svga_buffer_upload_gb_command(struct svga_context *svga,
                               struct svga_buffer *sbuf)
  {
     struct svga_winsys_context *swc = svga->swc;
-   SVGA3dCmdUpdateGBImage *cmd;
-   struct svga_3d_update_gb_image *ccmd = NULL;
+   SVGA3dCmdUpdateGBImage *update_cmd;
+   struct svga_3d_update_gb_image *whole_update_cmd = NULL;
     uint32 numBoxes = sbuf->map.num_ranges;
     struct pipe_resource *dummy;
     unsigned int i;
@@ -214,68 +228,78 @@ svga_buffer_upload_gb_command(struct svga_context *svga,
  
     if (sbuf->dma.flags.discard) {
        struct svga_3d_invalidate_gb_image *cicmd = NULL;
-      SVGA3dCmdInvalidateGBImage *icmd;
+      SVGA3dCmdInvalidateGBImage *invalidate_cmd;
+      const unsigned total_commands_size =
+         sizeof(*invalidate_cmd) + numBoxes * sizeof(*whole_update_cmd);
  
        /* Allocate FIFO space for one INVALIDATE_GB_IMAGE command followed by
         * 'numBoxes' UPDATE_GB_IMAGE commands.  Allocate all at once rather
         * than with separate commands because we need to properly deal with
         * filling the command buffer.
         */
-      icmd = SVGA3D_FIFOReserve(swc,
-                               SVGA_3D_CMD_INVALIDATE_GB_IMAGE,
-                               sizeof *icmd + numBoxes * sizeof *ccmd,
-                               2);
-      if (!icmd)
+      invalidate_cmd = SVGA3D_FIFOReserve(swc,
+                                          SVGA_3D_CMD_INVALIDATE_GB_IMAGE,
+                                          total_commands_size, 1 + numBoxes);
+      if (!invalidate_cmd)
          return PIPE_ERROR_OUT_OF_MEMORY;
  
-      cicmd = container_of(icmd, cicmd, body);
-      cicmd->header.size = sizeof *icmd;
-      swc->surface_relocation(swc, &icmd->image.sid, NULL, sbuf->handle,
+      cicmd = container_of(invalidate_cmd, cicmd, body);
+      cicmd->header.size = sizeof(*invalidate_cmd);
+      swc->surface_relocation(swc, &invalidate_cmd->image.sid, NULL, sbuf->handle,
                                (SVGA_RELOC_WRITE |
                                 SVGA_RELOC_INTERNAL |
                                 SVGA_RELOC_DMA));
-      icmd->image.face = 0;
-      icmd->image.mipmap = 0;
+      invalidate_cmd->image.face = 0;
+      invalidate_cmd->image.mipmap = 0;
  
+      /* The whole_update_command is a SVGA3dCmdHeader plus the
+       * SVGA3dCmdUpdateGBImage command.
+       */
+      whole_update_cmd = (struct svga_3d_update_gb_image *) &invalidate_cmd[1];
        /* initialize the first UPDATE_GB_IMAGE command */
-      ccmd = (struct svga_3d_update_gb_image *) &icmd[1];
-      ccmd->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE;
-      cmd = &ccmd->body;
+      whole_update_cmd->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE;
+      update_cmd = &whole_update_cmd->body;
  
     } else {
        /* Allocate FIFO space for 'numBoxes' UPDATE_GB_IMAGE commands */
-      cmd = SVGA3D_FIFOReserve(swc,
-                              SVGA_3D_CMD_UPDATE_GB_IMAGE,
-                              sizeof *cmd + (numBoxes - 1) * sizeof *ccmd,
-                              1);
-      if (!cmd)
+      const unsigned total_commands_size =
+         sizeof(*update_cmd) + (numBoxes - 1) * sizeof(*whole_update_cmd);
+
+      update_cmd = SVGA3D_FIFOReserve(swc,
+                                      SVGA_3D_CMD_UPDATE_GB_IMAGE,
+                                      total_commands_size, numBoxes);
+      if (!update_cmd)
          return PIPE_ERROR_OUT_OF_MEMORY;
  
-      ccmd = container_of(cmd, ccmd, body);
+      /* The whole_update_command is a SVGA3dCmdHeader plus the
+       * SVGA3dCmdUpdateGBImage command.
+       */
+      whole_update_cmd = container_of(update_cmd, whole_update_cmd, body);
     }
  
     /* Init the first UPDATE_GB_IMAGE command */
-   ccmd->header.size = sizeof *cmd;
-   swc->surface_relocation(swc, &cmd->image.sid, NULL, sbuf->handle,
+   whole_update_cmd->header.size = sizeof(*update_cmd);
+   swc->surface_relocation(swc, &update_cmd->image.sid, NULL, sbuf->handle,
                            SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL);
-   cmd->image.face = 0;
-   cmd->image.mipmap = 0;
+   update_cmd->image.face = 0;
+   update_cmd->image.mipmap = 0;
  
     /* Save pointer to the first UPDATE_GB_IMAGE command so that we can
      * fill in the box info below.
      */
-   sbuf->dma.updates = ccmd;
+   sbuf->dma.updates = whole_update_cmd;
  
     /*
-    * Copy the relocation info, face and mipmap to all
-    * subsequent commands. NOTE: For winsyses that actually
-    * patch the image.sid member at flush time, this will fail
-    * miserably. For those we need to add as many relocations
-    * as there are copy boxes.
+    * Copy the face, mipmap, etc. info to all subsequent commands.
+    * Also do the surface relocation for each subsequent command.
      */
-
     for (i = 1; i < numBoxes; ++i) {
-      memcpy(++ccmd, sbuf->dma.updates, sizeof *ccmd);
+      whole_update_cmd++;
+      memcpy(whole_update_cmd, sbuf->dma.updates, sizeof(*whole_update_cmd));
+
+      swc->surface_relocation(swc, &whole_update_cmd->body.image.sid, NULL,
+                              sbuf->handle,
+                              SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL);
     }
  
     /* Increment reference count */
diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c

index 64fd245..90787be 100644 (file)
--- a/src/gallium/drivers/svga/svga_resource_texture.c
+++ b/src/gallium/drivers/svga/svga_resource_texture.c
@@ -46,12 +46,6 @@
  #include "svga_debug.h"
  
  
-/* XXX: This isn't a real hardware flag, but just a hack for kernel to
- * know about primary surfaces. Find a better way to accomplish this.
- */
-#define SVGA3D_SURFACE_HINT_SCANOUT (1 << 9)
-
-
  static void
  svga_transfer_dma_band(struct svga_context *svga,
                         struct svga_transfer *st,
@@ -59,10 +53,10 @@ svga_transfer_dma_band(struct svga_context *svga,
                         unsigned y, unsigned h, unsigned srcy,
                         SVGA3dSurfaceDMAFlags flags)
  {
-   struct svga_texture *texture = svga_texture(st->base.resource); 
+   struct svga_texture *texture = svga_texture(st->base.resource);
     SVGA3dCopyBox box;
     enum pipe_error ret;
- 
+
     assert(!st->use_direct_map);
  
     box.x = st->base.box.x;
@@ -75,28 +69,23 @@ svga_transfer_dma_band(struct svga_context *svga,
     box.srcy = srcy;
     box.srcz = 0;
  
-   if (st->base.resource->target == PIPE_TEXTURE_CUBE) {
-      st->face = st->base.box.z;
-      box.z = 0;
-   }
-   else
-      st->face = 0;
-
-   SVGA_DBG(DEBUG_DMA, "dma %s sid %p, face %u, (%u, %u, %u) - (%u, %u, %u), %ubpp\n",
-                transfer == SVGA3D_WRITE_HOST_VRAM ? "to" : "from", 
-                texture->handle,
-                st->face,
-                st->base.box.x,
-                y,
-                box.z,
-                st->base.box.x + st->base.box.width,
-                y + h,
-                box.z + 1,
-                util_format_get_blocksize(texture->b.b.format) * 8 /
-                (util_format_get_blockwidth(texture->b.b.format)*util_format_get_blockheight(texture->b.b.format)));
+   SVGA_DBG(DEBUG_DMA, "dma %s sid %p, face %u, (%u, %u, %u) - "
+            "(%u, %u, %u), %ubpp\n",
+            transfer == SVGA3D_WRITE_HOST_VRAM ? "to" : "from",
+            texture->handle,
+            st->slice,
+            st->base.box.x,
+            y,
+            box.z,
+            st->base.box.x + st->base.box.width,
+            y + h,
+            box.z + 1,
+            util_format_get_blocksize(texture->b.b.format) * 8 /
+            (util_format_get_blockwidth(texture->b.b.format)
+             * util_format_get_blockheight(texture->b.b.format)));
  
     ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1, flags);
-   if(ret != PIPE_OK) {
+   if (ret != PIPE_OK) {
        svga_context_flush(svga, NULL);
        ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1, flags);
        assert(ret == PIPE_OK);
@@ -110,7 +99,7 @@ svga_transfer_dma(struct svga_context *svga,
                    SVGA3dTransferType transfer,
                    SVGA3dSurfaceDMAFlags flags)
  {
-   struct svga_texture *texture = svga_texture(st->base.resource); 
+   struct svga_texture *texture = svga_texture(st->base.resource);
     struct svga_screen *screen = svga_screen(texture->b.b.screen);
     struct svga_winsys_screen *sws = screen->sws;
     struct pipe_fence_handle *fence = NULL;
@@ -126,14 +115,13 @@ svga_transfer_dma(struct svga_context *svga,
      */
     svga_surfaces_flush( svga );
  
-   if(!st->swbuf) {
+   if (!st->swbuf) {
        /* Do the DMA transfer in a single go */
-
        svga_transfer_dma_band(svga, st, transfer,
                               st->base.box.y, st->base.box.height, 0,
                               flags);
  
-      if(transfer == SVGA3D_READ_HOST_VRAM) {
+      if (transfer == SVGA3D_READ_HOST_VRAM) {
           svga_context_flush(svga, &fence);
           sws->fence_finish(sws, fence, 0);
           sws->fence_reference(sws, &fence, NULL);
@@ -141,10 +129,13 @@ svga_transfer_dma(struct svga_context *svga,
     }
     else {
        int y, h, srcy;
-      unsigned blockheight = util_format_get_blockheight(st->base.resource->format);
+      unsigned blockheight =
+         util_format_get_blockheight(st->base.resource->format);
+
        h = st->hw_nblocksy * blockheight;
        srcy = 0;
-      for(y = 0; y < st->base.box.height; y += h) {
+
+      for (y = 0; y < st->base.box.height; y += h) {
           unsigned offset, length;
           void *hw, *sw;
  
@@ -158,7 +149,7 @@ svga_transfer_dma(struct svga_context *svga,
           offset = y * st->base.stride / blockheight;
           length = h * st->base.stride / blockheight;
  
-         sw = (uint8_t *)st->swbuf + offset;
+         sw = (uint8_t *) st->swbuf + offset;
  
           if (transfer == SVGA3D_WRITE_HOST_VRAM) {
              unsigned usage = PIPE_TRANSFER_WRITE;
@@ -184,16 +175,15 @@ svga_transfer_dma(struct svga_context *svga,
            * Prevent the texture contents to be discarded on the next band
            * upload.
            */
-
           flags.discard = FALSE;
  
-         if(transfer == SVGA3D_READ_HOST_VRAM) {
+         if (transfer == SVGA3D_READ_HOST_VRAM) {
              svga_context_flush(svga, &fence);
              sws->fence_finish(sws, fence, 0);
  
              hw = sws->buffer_map(sws, st->hwbuf, PIPE_TRANSFER_READ);
              assert(hw);
-            if(hw) {
+            if (hw) {
                 memcpy(sw, hw, length);
                 sws->buffer_unmap(sws, st->hwbuf);
              }
@@ -203,19 +193,22 @@ svga_transfer_dma(struct svga_context *svga,
  }
  
  
-static boolean 
+static boolean
  svga_texture_get_handle(struct pipe_screen *screen,
-                               struct pipe_resource *texture,
-                               struct winsys_handle *whandle)
+                        struct pipe_resource *texture,
+                        struct winsys_handle *whandle)
  {
     struct svga_winsys_screen *sws = svga_winsys_screen(texture->screen);
     unsigned stride;
  
     assert(svga_texture(texture)->key.cachable == 0);
     svga_texture(texture)->key.cachable = 0;
+
     stride = util_format_get_nblocksx(texture->format, texture->width0) *
              util_format_get_blocksize(texture->format);
-   return sws->surface_get_handle(sws, svga_texture(texture)->handle, stride, whandle);
+
+   return sws->surface_get_handle(sws, svga_texture(texture)->handle,
+                                  stride, whandle);
  }
  
  
@@ -238,6 +231,7 @@ svga_texture_destroy(struct pipe_screen *screen,
  
     ss->total_resource_bytes -= tex->size;
  
+   FREE(tex->defined);
     FREE(tex->rendered_to);
     FREE(tex);
  }
@@ -274,10 +268,43 @@ need_tex_readback(struct pipe_transfer *transfer)
  }
  
  
+static enum pipe_error
+readback_image_vgpu9(struct svga_context *svga,
+                   struct svga_winsys_surface *surf,
+                   unsigned slice,
+                   unsigned level)
+{
+   enum pipe_error ret;
+
+   ret = SVGA3D_ReadbackGBImage(svga->swc, surf, slice, level);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_ReadbackGBImage(svga->swc, surf, slice, level);
+   }
+   return ret;
+}
+
+
+static enum pipe_error
+readback_image_vgpu10(struct svga_context *svga,
+                    struct svga_winsys_surface *surf,
+                    unsigned slice,
+                    unsigned level,
+                    unsigned numMipLevels)
+{
+   enum pipe_error ret;
+   unsigned subResource;
+
+   subResource = slice * numMipLevels + level;
+   ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, surf, subResource);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, surf, subResource);
+   }
+   return ret;
+}
+
  
-/* XXX: Still implementing this as if it was a screen function, but
- * can now modify it to queue transfers on the context.
- */
  static void *
  svga_texture_transfer_map(struct pipe_context *pipe,
                            struct pipe_resource *texture,
@@ -289,6 +316,7 @@ svga_texture_transfer_map(struct pipe_context *pipe,
     struct svga_context *svga = svga_context(pipe);
     struct svga_screen *ss = svga_screen(pipe->screen);
     struct svga_winsys_screen *sws = ss->sws;
+   struct svga_texture *tex = svga_texture(texture);
     struct svga_transfer *st;
     unsigned nblocksx, nblocksy;
     boolean use_direct_map = svga_have_gb_objects(svga) &&
@@ -326,25 +354,34 @@ svga_texture_transfer_map(struct pipe_context *pipe,
     }
  
     pipe_resource_reference(&st->base.resource, texture);
+
     st->base.level = level;
     st->base.usage = usage;
     st->base.box = *box;
     st->base.stride = nblocksx*util_format_get_blocksize(texture->format);
     st->base.layer_stride = st->base.stride * nblocksy;
  
+   switch (tex->b.b.target) {
+   case PIPE_TEXTURE_CUBE:
+   case PIPE_TEXTURE_2D_ARRAY:
+   case PIPE_TEXTURE_1D_ARRAY:
+      st->slice = st->base.box.z;
+      st->base.box.z = 0;   /* so we don't apply double offsets below */
+      break;
+   default:
+      st->slice = 0;
+      break;
+   }
+
     if (!use_direct_map) {
        /* Use a DMA buffer */
        st->hw_nblocksy = nblocksy;
  
-      st->hwbuf = svga_winsys_buffer_create(svga,
-                                            1, 
-                                            0,
-                                            st->hw_nblocksy * st->base.stride * d);
+      st->hwbuf = svga_winsys_buffer_create(svga, 1, 0,
+                                   st->hw_nblocksy * st->base.stride * d);
        while(!st->hwbuf && (st->hw_nblocksy /= 2)) {
-         st->hwbuf = svga_winsys_buffer_create(svga,
-                                               1, 
-                                               0,
-                                               st->hw_nblocksy * st->base.stride * d);
+         st->hwbuf = svga_winsys_buffer_create(svga, 1, 0,
+                                   st->hw_nblocksy * st->base.stride * d);
        }
  
        if (!st->hwbuf) {
@@ -352,8 +389,8 @@ svga_texture_transfer_map(struct pipe_context *pipe,
           return NULL;
        }
  
-      if(st->hw_nblocksy < nblocksy) {
-         /* We couldn't allocate a hardware buffer big enough for the transfer, 
+      if (st->hw_nblocksy < nblocksy) {
+         /* We couldn't allocate a hardware buffer big enough for the transfer,
            * so allocate regular malloc memory instead */
           if (0) {
              debug_printf("%s: failed to allocate %u KB of DMA, "
@@ -379,45 +416,27 @@ svga_texture_transfer_map(struct pipe_context *pipe,
        }
     } else {
        struct pipe_transfer *transfer = &st->base;
-      struct svga_texture *tex = svga_texture(transfer->resource);
        struct svga_winsys_surface *surf = tex->handle;
-      unsigned face;
-
-      assert(surf);
  
-      if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
-        face = transfer->box.z;
-      } else {
-        face = 0;
+      if (!surf) {
+         FREE(st);
+         return NULL;
        }
  
        if (need_tex_readback(transfer)) {
-        SVGA3dBox box;
          enum pipe_error ret;
  
-        box.x = transfer->box.x;
-        box.y = transfer->box.y;
-        box.w = transfer->box.width;
-        box.h = transfer->box.height;
-        box.d = transfer->box.depth;
-        if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
-           box.z = 0;
-        }
-        else {
-           box.z = transfer->box.z;
-        }
-
-         (void) box;  /* not used at this time */
-
           svga_surfaces_flush(svga);
  
-        ret = SVGA3D_ReadbackGBImage(svga->swc, surf, face, transfer->level);
+         if (svga_have_vgpu10(svga)) {
+            ret = readback_image_vgpu10(svga, surf, st->slice, transfer->level,
+                                        tex->b.b.last_level + 1);
+         } else {
+            ret = readback_image_vgpu9(svga, surf, st->slice, transfer->level);
+         }
  
-        if (ret != PIPE_OK) {
-           svga_context_flush(svga, NULL);
-           ret = SVGA3D_ReadbackGBImage(svga->swc, surf, face, transfer->level);
-           assert(ret == PIPE_OK);
-        }
+         assert(ret == PIPE_OK);
+         (void) ret;
  
          svga_context_flush(svga, NULL);
  
@@ -425,7 +444,7 @@ svga_texture_transfer_map(struct pipe_context *pipe,
            * Note: if PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE were specified
            * we could potentially clear the flag for all faces/layers/mips.
            */
-         svga_clear_texture_rendered_to(tex, face, transfer->level);
+         svga_clear_texture_rendered_to(tex, st->slice, transfer->level);
        }
        else {
          assert(transfer->usage & PIPE_TRANSFER_WRITE);
@@ -451,17 +470,15 @@ svga_texture_transfer_map(struct pipe_context *pipe,
        return sws->buffer_map(sws, st->hwbuf, usage);
     }
     else {
-      struct svga_screen *screen = svga_screen(svga->pipe.screen);
-      SVGA3dSurfaceFormat format;
        SVGA3dSize baseLevelSize;
        struct svga_texture *tex = svga_texture(texture);
        struct svga_winsys_surface *surf = tex->handle;
        uint8_t *map;
        boolean retry;
-      unsigned face, offset, mip_width, mip_height;
-      unsigned xoffset = box->x;
-      unsigned yoffset = box->y;
-      unsigned zoffset = box->z;
+      unsigned offset, mip_width, mip_height;
+      unsigned xoffset = st->base.box.x;
+      unsigned yoffset = st->base.box.y;
+      unsigned zoffset = st->base.box.z;
  
        map = svga->swc->surface_map(svga->swc, surf, usage, &retry);
        if (map == NULL && retry) {
@@ -484,21 +501,13 @@ svga_texture_transfer_map(struct pipe_context *pipe,
        /**
         * Compute the offset to the specific texture slice in the buffer.
         */
-      if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
-         face = zoffset;
-         zoffset = 0;
-      } else {
-         face = 0;
-      }
-
-      format = svga_translate_format(screen, tex->b.b.format, 0);
        baseLevelSize.width = tex->b.b.width0;
        baseLevelSize.height = tex->b.b.height0;
        baseLevelSize.depth = tex->b.b.depth0;
  
-      offset = svga3dsurface_get_image_offset(format, baseLevelSize,
+      offset = svga3dsurface_get_image_offset(tex->key.format, baseLevelSize,
                                                tex->b.b.last_level + 1, /* numMips */
-                                              face, level);
+                                              st->slice, level);
        if (level > 0) {
           assert(offset > 0);
        }
@@ -506,7 +515,8 @@ svga_texture_transfer_map(struct pipe_context *pipe,
        mip_width = u_minify(tex->b.b.width0, level);
        mip_height = u_minify(tex->b.b.height0, level);
  
-      offset += svga3dsurface_get_pixel_offset(format, mip_width, mip_height,
+      offset += svga3dsurface_get_pixel_offset(tex->key.format,
+                                               mip_width, mip_height,
                                                 xoffset, yoffset, zoffset);
  
        return (void *) (map + offset);
@@ -541,9 +551,45 @@ svga_texture_surface_unmap(struct svga_context *svga,
  }
  
  
-/* XXX: Still implementing this as if it was a screen function, but
- * can now modify it to queue transfers on the context.
- */
+static enum pipe_error
+update_image_vgpu9(struct svga_context *svga,
+                   struct svga_winsys_surface *surf,
+                   const SVGA3dBox *box,
+                   unsigned slice,
+                   unsigned level)
+{
+   enum pipe_error ret;
+
+   ret = SVGA3D_UpdateGBImage(svga->swc, surf, box, slice, level);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_UpdateGBImage(svga->swc, surf, box, slice, level);
+   }
+   return ret;
+}
+
+
+static enum pipe_error
+update_image_vgpu10(struct svga_context *svga,
+                    struct svga_winsys_surface *surf,
+                    const SVGA3dBox *box,
+                    unsigned slice,
+                    unsigned level,
+                    unsigned numMipLevels)
+{
+   enum pipe_error ret;
+   unsigned subResource;
+
+   subResource = slice * numMipLevels + level;
+   ret = SVGA3D_vgpu10_UpdateSubResource(svga->swc, surf, box, subResource);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_UpdateSubResource(svga->swc, surf, box, subResource);
+   }
+   return ret;
+}
+
+
  static void
  svga_texture_transfer_unmap(struct pipe_context *pipe,
                             struct pipe_transfer *transfer)
@@ -579,26 +625,25 @@ svga_texture_transfer_unmap(struct pipe_context *pipe,
     } else if (transfer->usage & PIPE_TRANSFER_WRITE) {
        struct svga_winsys_surface *surf =
          svga_texture(transfer->resource)->handle;
-      unsigned face;
        SVGA3dBox box;
        enum pipe_error ret;
  
        assert(svga_have_gb_objects(svga));
  
        /* update the effected region */
-      if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
-        face = transfer->box.z;
-      } else {
-        face = 0;
-      }
-
        box.x = transfer->box.x;
        box.y = transfer->box.y;
-      if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
+      switch (tex->b.b.target) {
+      case PIPE_TEXTURE_CUBE:
+      case PIPE_TEXTURE_2D_ARRAY:
           box.z = 0;
-      }
-      else {
+         break;
+      case PIPE_TEXTURE_1D_ARRAY:
+         box.y = box.z = 0;
+         break;
+      default:
           box.z = transfer->box.z;
+         break;
        }
        box.w = transfer->box.width;
        box.h = transfer->box.height;
@@ -610,18 +655,21 @@ svga_texture_transfer_unmap(struct pipe_context *pipe,
                        box.x, box.y, box.z,
                        box.w, box.h, box.d);
  
-      ret = SVGA3D_UpdateGBImage(svga->swc, surf, &box, face, transfer->level);
-      if (ret != PIPE_OK) {
-         svga_context_flush(svga, NULL);
-         ret = SVGA3D_UpdateGBImage(svga->swc, surf, &box, face, transfer->level);
-         assert(ret == PIPE_OK);
+      if (svga_have_vgpu10(svga)) {
+         ret = update_image_vgpu10(svga, surf, &box, st->slice, transfer->level,
+                                   tex->b.b.last_level + 1);
+      } else {
+         ret = update_image_vgpu9(svga, surf, &box, st->slice, transfer->level);
        }
+
+      assert(ret == PIPE_OK);
+      (void) ret;
     }
  
     ss->texture_timestamp++;
     svga_age_texture_view(tex, transfer->level);
     if (transfer->resource->target == PIPE_TEXTURE_CUBE)
-      svga_define_texture_level(tex, transfer->box.z, transfer->level);
+      svga_define_texture_level(tex, st->slice, transfer->level);
     else
        svga_define_texture_level(tex, 0, transfer->level);
  
@@ -635,7 +683,18 @@ svga_texture_transfer_unmap(struct pipe_context *pipe,
  }
  
  
-struct u_resource_vtbl svga_texture_vtbl = 
+/**
+ * Does format store depth values?
+ */
+static inline boolean
+format_has_depth(enum pipe_format format)
+{
+   const struct util_format_description *desc = util_format_description(format);
+   return util_format_has_depth(desc);
+}
+
+
+struct u_resource_vtbl svga_texture_vtbl =
  {
     svga_texture_get_handle,          /* get_handle */
     svga_texture_destroy,             /* resource_destroy */
@@ -651,57 +710,119 @@ svga_texture_create(struct pipe_screen *screen,
                      const struct pipe_resource *template)
  {
     struct svga_screen *svgascreen = svga_screen(screen);
-   struct svga_texture *tex = CALLOC_STRUCT(svga_texture);
+   struct svga_texture *tex;
+   unsigned bindings = template->bind;
  
-   if (!tex)
-      goto error1;
+   assert(template->last_level < SVGA_MAX_TEXTURE_LEVELS);
+   if (template->last_level >= SVGA_MAX_TEXTURE_LEVELS) {
+      return NULL;
+   }
+
+   tex = CALLOC_STRUCT(svga_texture);
+   if (!tex) {
+      return NULL;
+   }
+
+   tex->defined = CALLOC(template->depth0 * template->array_size,
+                         sizeof(tex->defined[0]));
+   if (!tex->defined) {
+      FREE(tex);
+      return NULL;
+   }
+
+   tex->rendered_to = CALLOC(template->depth0 * template->array_size,
+                             sizeof(tex->rendered_to[0]));
+   if (!tex->rendered_to) {
+      FREE(tex->defined);
+      FREE(tex);
+      return NULL;
+   }
  
     tex->b.b = *template;
     tex->b.vtbl = &svga_texture_vtbl;
     pipe_reference_init(&tex->b.b.reference, 1);
     tex->b.b.screen = screen;
  
-   assert(template->last_level < SVGA_MAX_TEXTURE_LEVELS);
-   if(template->last_level >= SVGA_MAX_TEXTURE_LEVELS)
-      goto error2;
-   
     tex->key.flags = 0;
     tex->key.size.width = template->width0;
     tex->key.size.height = template->height0;
     tex->key.size.depth = template->depth0;
+   tex->key.arraySize = 1;
+   tex->key.numFaces = 1;
+   tex->key.sampleCount = template->nr_samples;
  
-   if(template->target == PIPE_TEXTURE_CUBE) {
-      tex->key.flags |= SVGA3D_SURFACE_CUBEMAP;
-      tex->key.numFaces = 6;
-   }
-   else {
-      tex->key.numFaces = 1;
+   if (template->nr_samples > 1) {
+      tex->key.flags |= SVGA3D_SURFACE_MASKABLE_ANTIALIAS;
     }
  
-   if (template->target == PIPE_TEXTURE_3D) {
-      tex->key.flags |= SVGA3D_SURFACE_VOLUME;
+   if (svgascreen->sws->have_vgpu10) {
+      switch (template->target) {
+      case PIPE_TEXTURE_1D:
+         tex->key.flags |= SVGA3D_SURFACE_1D;
+         break;
+      case PIPE_TEXTURE_1D_ARRAY:
+         tex->key.flags |= SVGA3D_SURFACE_1D;
+         /* fall-through */
+      case PIPE_TEXTURE_2D_ARRAY:
+         tex->key.flags |= SVGA3D_SURFACE_ARRAY;
+         tex->key.arraySize = template->array_size;
+         break;
+      case PIPE_TEXTURE_3D:
+         tex->key.flags |= SVGA3D_SURFACE_VOLUME;
+         break;
+      case PIPE_TEXTURE_CUBE:
+         tex->key.flags |= (SVGA3D_SURFACE_CUBEMAP | SVGA3D_SURFACE_ARRAY);
+         tex->key.numFaces = 6;
+         break;
+      default:
+         break;
+      }
+   }
+   else {
+      switch (template->target) {
+      case PIPE_TEXTURE_3D:
+         tex->key.flags |= SVGA3D_SURFACE_VOLUME;
+         break;
+      case PIPE_TEXTURE_CUBE:
+         tex->key.flags |= SVGA3D_SURFACE_CUBEMAP;
+         tex->key.numFaces = 6;
+         break;
+      default:
+         break;
+      }
     }
  
     tex->key.cachable = 1;
  
-   if (template->bind & PIPE_BIND_SAMPLER_VIEW)
+   if (bindings & PIPE_BIND_SAMPLER_VIEW) {
        tex->key.flags |= SVGA3D_SURFACE_HINT_TEXTURE;
+      tex->key.flags |= SVGA3D_SURFACE_BIND_SHADER_RESOURCE;
+
+      if (!(bindings & PIPE_BIND_RENDER_TARGET)) {
+         /* Also check if the format is renderable */
+         if (screen->is_format_supported(screen, template->format,
+                                         template->target,
+                                         template->nr_samples,
+                                         PIPE_BIND_RENDER_TARGET)) {
+            bindings |= PIPE_BIND_RENDER_TARGET;
+         }
+      }
+   }
  
-   if (template->bind & PIPE_BIND_DISPLAY_TARGET) {
+   if (bindings & PIPE_BIND_DISPLAY_TARGET) {
        tex->key.cachable = 0;
     }
  
-   if (template->bind & PIPE_BIND_SHARED) {
+   if (bindings & PIPE_BIND_SHARED) {
        tex->key.cachable = 0;
     }
  
-   if (template->bind & (PIPE_BIND_SCANOUT |
-                         PIPE_BIND_CURSOR)) {
-      tex->key.flags |= SVGA3D_SURFACE_HINT_SCANOUT;
+   if (bindings & (PIPE_BIND_SCANOUT | PIPE_BIND_CURSOR)) {
+      tex->key.scanout = 1;
        tex->key.cachable = 0;
     }
  
-   /* 
+   /*
      * Note: Previously we never passed the
      * SVGA3D_SURFACE_HINT_RENDERTARGET hint. Mesa cannot
      * know beforehand whether a texture will be used as a rendertarget or not
@@ -712,23 +833,55 @@ svga_texture_create(struct pipe_screen *screen,
      * (XA for example) uses it accurately and certain device versions
      * relies on it in certain situations to render correctly.
      */
-   if((template->bind & PIPE_BIND_RENDER_TARGET) &&
-      !util_format_is_s3tc(template->format))
+   if ((bindings & PIPE_BIND_RENDER_TARGET) &&
+       !util_format_is_s3tc(template->format)) {
        tex->key.flags |= SVGA3D_SURFACE_HINT_RENDERTARGET;
-   
-   if(template->bind & PIPE_BIND_DEPTH_STENCIL)
+      tex->key.flags |= SVGA3D_SURFACE_BIND_RENDER_TARGET;
+   }
+
+   if (bindings & PIPE_BIND_DEPTH_STENCIL) {
        tex->key.flags |= SVGA3D_SURFACE_HINT_DEPTHSTENCIL;
-   
+      tex->key.flags |= SVGA3D_SURFACE_BIND_DEPTH_STENCIL;
+   }
+
     tex->key.numMipLevels = template->last_level + 1;
-   
-   tex->key.format = svga_translate_format(svgascreen, template->format, template->bind);
-   if(tex->key.format == SVGA3D_FORMAT_INVALID)
-      goto error2;
+
+   tex->key.format = svga_translate_format(svgascreen, template->format,
+                                           bindings);
+   if (tex->key.format == SVGA3D_FORMAT_INVALID) {
+      FREE(tex->defined);
+      FREE(tex->rendered_to);
+      FREE(tex);
+      return NULL;
+   }
+
+   /* Use typeless formats for sRGB and depth resources.  Typeless
+    * formats can be reinterpreted as other formats.  For example,
+    * SVGA3D_R8G8B8A8_UNORM_TYPELESS can be interpreted as
+    * SVGA3D_R8G8B8A8_UNORM_SRGB or SVGA3D_R8G8B8A8_UNORM.
+    */
+   if (svgascreen->sws->have_vgpu10 &&
+       (util_format_is_srgb(template->format) ||
+        format_has_depth(template->format))) {
+      SVGA3dSurfaceFormat typeless = svga_typeless_format(tex->key.format);
+      if (0) {
+         debug_printf("Convert resource type %s -> %s (bind 0x%x)\n",
+                      svga_format_name(tex->key.format),
+                      svga_format_name(typeless),
+                      bindings);
+      }
+      tex->key.format = typeless;
+   }
  
     SVGA_DBG(DEBUG_DMA, "surface_create for texture\n", tex->handle);
-   tex->handle = svga_screen_surface_create(svgascreen, &tex->key);
-   if (!tex->handle)
-       goto error2;
+   tex->handle = svga_screen_surface_create(svgascreen, bindings,
+                                            tex->b.b.usage, &tex->key);
+   if (!tex->handle) {
+      FREE(tex->defined);
+      FREE(tex->rendered_to);
+      FREE(tex);
+      return NULL;
+   }
  
     SVGA_DBG(DEBUG_DMA, "  --> got sid %p (texture)\n", tex->handle);
  
@@ -738,18 +891,7 @@ svga_texture_create(struct pipe_screen *screen,
     tex->size = util_resource_size(template);
     svgascreen->total_resource_bytes += tex->size;
  
-   tex->rendered_to = CALLOC(template->depth0 * template->array_size,
-                             sizeof(tex->rendered_to[0]));
-   if (!tex->rendered_to)
-      goto error2;
-
     return &tex->b.b;
-
-error2:
-   FREE(tex->rendered_to);
-   FREE(tex);
-error1:
-   return NULL;
  }
  
  
@@ -777,16 +919,28 @@ svga_texture_from_handle(struct pipe_screen *screen,
     if (!srf)
        return NULL;
  
-   if (svga_translate_format(svga_screen(screen), template->format, template->bind) != format) {
-      unsigned f1 = svga_translate_format(svga_screen(screen), template->format, template->bind);
+   if (svga_translate_format(svga_screen(screen), template->format,
+                             template->bind) != format) {
+      unsigned f1 = svga_translate_format(svga_screen(screen),
+                                          template->format, template->bind);
        unsigned f2 = format;
  
-      /* It's okay for XRGB and ARGB or depth with/out stencil to get mixed up */
-      if ( !( (f1 == SVGA3D_X8R8G8B8 && f2 == SVGA3D_A8R8G8B8) ||
+      /* It's okay for XRGB and ARGB or depth with/out stencil to get mixed up.
+       */
+      if (f1 == SVGA3D_B8G8R8A8_UNORM)
+         f1 = SVGA3D_A8R8G8B8;
+      if (f1 == SVGA3D_B8G8R8X8_UNORM)
+         f1 = SVGA3D_X8R8G8B8;
+
+      if ( !( (f1 == f2) ||
+              (f1 == SVGA3D_X8R8G8B8 && f2 == SVGA3D_A8R8G8B8) ||
+              (f1 == SVGA3D_X8R8G8B8 && f2 == SVGA3D_B8G8R8X8_UNORM) ||
                (f1 == SVGA3D_A8R8G8B8 && f2 == SVGA3D_X8R8G8B8) ||
+              (f1 == SVGA3D_A8R8G8B8 && f2 == SVGA3D_B8G8R8A8_UNORM) ||
                (f1 == SVGA3D_Z_D24X8 && f2 == SVGA3D_Z_D24S8) ||
                (f1 == SVGA3D_Z_DF24 && f2 == SVGA3D_Z_D24S8_INT) ) ) {
-         debug_printf("%s wrong format %u != %u\n", __FUNCTION__, f1, f2);
+         debug_printf("%s wrong format %s != %s\n", __FUNCTION__,
+                      svga_format_name(f1), svga_format_name(f2));
           return NULL;
        }
     }
@@ -795,6 +949,13 @@ svga_texture_from_handle(struct pipe_screen *screen,
     if (!tex)
        return NULL;
  
+   tex->defined = CALLOC(template->depth0 * template->array_size,
+                         sizeof(tex->defined[0]));
+   if (!tex->defined) {
+      FREE(tex);
+      return NULL;
+   }
+
     tex->b.b = *template;
     tex->b.vtbl = &svga_texture_vtbl;
     pipe_reference_init(&tex->b.b.reference, 1);
@@ -803,9 +964,11 @@ svga_texture_from_handle(struct pipe_screen *screen,
     SVGA_DBG(DEBUG_DMA, "wrap surface sid %p\n", srf);
  
     tex->key.cachable = 0;
+   tex->key.format = format;
     tex->handle = srf;
  
     tex->rendered_to = CALLOC(1, sizeof(tex->rendered_to[0]));
+   tex->imported = TRUE;
  
     return &tex->b.b;
  }
diff --git a/src/gallium/drivers/svga/svga_resource_texture.h b/src/gallium/drivers/svga/svga_resource_texture.h

index 19dadfb..0326907 100644 (file)
--- a/src/gallium/drivers/svga/svga_resource_texture.h
+++ b/src/gallium/drivers/svga/svga_resource_texture.h
@@ -51,7 +51,7 @@ struct svga_texture
  {
     struct u_resource b;
  
-   boolean defined[6][SVGA_MAX_TEXTURE_LEVELS];
+   ushort *defined;
     
     struct svga_sampler_view *cached_view;
  
@@ -77,6 +77,12 @@ struct svga_texture
      */
     struct svga_winsys_surface *handle;
  
+   /**
+    * Whether the host side surface is imported and not created by this
+    * driver.
+    */
+   boolean imported;
+
     unsigned size;  /**< Approximate size in bytes */
  
     /** array indexed by cube face or 3D/array slice, one bit per mipmap level */
@@ -91,7 +97,7 @@ struct svga_transfer
  {
     struct pipe_transfer base;
  
-   unsigned face;
+   unsigned slice;  /**< array slice or cube face */
  
     struct svga_winsys_buffer *hwbuf;
  
@@ -135,29 +141,6 @@ svga_age_texture_view(struct svga_texture *tex, unsigned level)
  }
  
  
-/**
- * Mark the given texture face/level as being defined.
- */
-static inline void
-svga_define_texture_level(struct svga_texture *tex,
-                          unsigned face,unsigned level)
-{
-   assert(face < Elements(tex->defined));
-   assert(level < Elements(tex->defined[0]));
-   tex->defined[face][level] = TRUE;
-}
-
-
-static inline bool
-svga_is_texture_level_defined(const struct svga_texture *tex,
-                              unsigned face, unsigned level)
-{
-   assert(face < Elements(tex->defined));
-   assert(level < Elements(tex->defined[0]));
-   return tex->defined[face][level];
-}
-
-
  /** For debugging, check that face and level are legal */
  static inline void
  check_face_level(const struct svga_texture *tex,
@@ -177,6 +160,27 @@ check_face_level(const struct svga_texture *tex,
  }
  
  
+/**
+ * Mark the given texture face/level as being defined.
+ */
+static inline void
+svga_define_texture_level(struct svga_texture *tex,
+                          unsigned face,unsigned level)
+{
+   check_face_level(tex, face, level);
+   tex->defined[face] |= 1 << level;
+}
+
+
+static inline bool
+svga_is_texture_level_defined(const struct svga_texture *tex,
+                              unsigned face, unsigned level)
+{
+   check_face_level(tex, face, level);
+   return (tex->defined[face] & (1 << level)) != 0;
+}
+
+
  static inline void
  svga_set_texture_rendered_to(struct svga_texture *tex,
                               unsigned face, unsigned level)
diff --git a/src/gallium/drivers/svga/svga_sampler_view.c b/src/gallium/drivers/svga/svga_sampler_view.c

index 55dc49f..ffa5bce 100644 (file)
--- a/src/gallium/drivers/svga/svga_sampler_view.c
+++ b/src/gallium/drivers/svga/svga_sampler_view.c
@@ -67,7 +67,7 @@ svga_get_tex_sampler_view(struct pipe_context *pipe,
     assert(pt);
     assert(min_lod <= max_lod);
     assert(max_lod <= pt->last_level);
-
+   assert(!svga_have_vgpu10(svga));
  
     /* Is a view needed */
     {
@@ -143,10 +143,12 @@ svga_get_tex_sampler_view(struct pipe_context *pipe,
              pt->last_level);
  
     sv->age = tex->age;
-   sv->handle = svga_texture_view_surface(svga, tex, flags, format,
+   sv->handle = svga_texture_view_surface(svga, tex,
+                                          PIPE_BIND_SAMPLER_VIEW,
+                                          flags, format,
                                            min_lod,
                                            max_lod - min_lod + 1,
-                                          -1, -1,
+                                          -1, 1, -1,
                                            &sv->key);
  
     if (!sv->handle) {
@@ -177,6 +179,7 @@ svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *
     unsigned k;
  
     assert(svga);
+   assert(!svga_have_vgpu10(svga));
  
     if (v->handle == tex->handle)
        return;
diff --git a/src/gallium/drivers/svga/svga_sampler_view.h b/src/gallium/drivers/svga/svga_sampler_view.h

index 7f14323..4ca7fb7 100644 (file)
--- a/src/gallium/drivers/svga/svga_sampler_view.h
+++ b/src/gallium/drivers/svga/svga_sampler_view.h
@@ -36,6 +36,7 @@ struct pipe_context;
  struct pipe_screen;
  struct svga_context;
  struct svga_winsys_surface;
+struct svga_surface;
  enum SVGA3dSurfaceFormat;
  
  
@@ -97,5 +98,8 @@ svga_sampler_view_reference(struct svga_sampler_view **ptr, struct svga_sampler_
     *ptr = v;
  }
  
-
+boolean
+svga_check_sampler_view_resource_collision(struct svga_context *svga,
+                                           struct svga_winsys_surface *res,
+                                           unsigned shader);
  #endif
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c

index 66c3dea..44b6f4a 100644 (file)
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -34,31 +34,37 @@
  #include "svga_context.h"
  #include "svga_format.h"
  #include "svga_screen.h"
+#include "svga_tgsi.h"
  #include "svga_resource_texture.h"
  #include "svga_resource.h"
  #include "svga_debug.h"
  
  #include "svga3d_shaderdefs.h"
+#include "VGPU10ShaderTokens.h"
  
+/* NOTE: this constant may get moved into a svga3d*.h header file */
+#define SVGA3D_DX_MAX_RESOURCE_SIZE (128 * 1024 * 1024)
  
  #ifdef DEBUG
  int SVGA_DEBUG = 0;
  
  static const struct debug_named_value svga_debug_flags[] = {
-   { "dma",      DEBUG_DMA, NULL },
-   { "tgsi",     DEBUG_TGSI, NULL },
-   { "pipe",     DEBUG_PIPE, NULL },
-   { "state",    DEBUG_STATE, NULL },
-   { "screen",   DEBUG_SCREEN, NULL },
-   { "tex",      DEBUG_TEX, NULL },
-   { "swtnl",    DEBUG_SWTNL, NULL },
-   { "const",    DEBUG_CONSTS, NULL },
-   { "viewport", DEBUG_VIEWPORT, NULL },
-   { "views",    DEBUG_VIEWS, NULL },
-   { "perf",     DEBUG_PERF, NULL },
-   { "flush",    DEBUG_FLUSH, NULL },
-   { "sync",     DEBUG_SYNC, NULL },
-   { "cache",    DEBUG_CACHE, NULL },
+   { "dma",         DEBUG_DMA, NULL },
+   { "tgsi",        DEBUG_TGSI, NULL },
+   { "pipe",        DEBUG_PIPE, NULL },
+   { "state",       DEBUG_STATE, NULL },
+   { "screen",      DEBUG_SCREEN, NULL },
+   { "tex",         DEBUG_TEX, NULL },
+   { "swtnl",       DEBUG_SWTNL, NULL },
+   { "const",       DEBUG_CONSTS, NULL },
+   { "viewport",    DEBUG_VIEWPORT, NULL },
+   { "views",       DEBUG_VIEWS, NULL },
+   { "perf",        DEBUG_PERF, NULL },
+   { "flush",       DEBUG_FLUSH, NULL },
+   { "sync",        DEBUG_SYNC, NULL },
+   { "cache",       DEBUG_CACHE, NULL },
+   { "streamout",   DEBUG_STREAMOUT, NULL },
+   { "query",       DEBUG_QUERY, NULL },
     DEBUG_NAMED_VALUE_END
  };
  #endif
@@ -80,18 +86,52 @@ svga_get_name( struct pipe_screen *pscreen )
      */
     build = "build: DEBUG;";
     mutex = "mutex: " PIPE_ATOMIC ";";
-#ifdef HAVE_LLVM
-   llvm = "LLVM;";
-#endif
  #else
     build = "build: RELEASE;";
  #endif
+#ifdef HAVE_LLVM
+   llvm = "LLVM;";
+#endif
  
     util_snprintf(name, sizeof(name), "SVGA3D; %s %s %s", build, mutex, llvm);
     return name;
  }
  
  
+/** Helper for querying float-valued device cap */
+static float
+get_float_cap(struct svga_winsys_screen *sws, unsigned cap, float defaultVal)
+{
+   SVGA3dDevCapResult result;
+   if (sws->get_cap(sws, cap, &result))
+      return result.f;
+   else
+      return defaultVal;
+}
+
+
+/** Helper for querying uint-valued device cap */
+static unsigned
+get_uint_cap(struct svga_winsys_screen *sws, unsigned cap, unsigned defaultVal)
+{
+   SVGA3dDevCapResult result;
+   if (sws->get_cap(sws, cap, &result))
+      return result.u;
+   else
+      return defaultVal;
+}
+
+
+/** Helper for querying boolean-valued device cap */
+static boolean
+get_bool_cap(struct svga_winsys_screen *sws, unsigned cap, boolean defaultVal)
+{
+   SVGA3dDevCapResult result;
+   if (sws->get_cap(sws, cap, &result))
+      return result.b;
+   else
+      return defaultVal;
+}
  
  
  static float
@@ -99,7 +139,6 @@ svga_get_paramf(struct pipe_screen *screen, enum pipe_capf param)
  {
     struct svga_screen *svgascreen = svga_screen(screen);
     struct svga_winsys_screen *sws = svgascreen->sws;
-   SVGA3dDevCapResult result;
  
     switch (param) {
     case PIPE_CAPF_MAX_LINE_WIDTH:
@@ -113,12 +152,11 @@ svga_get_paramf(struct pipe_screen *screen, enum pipe_capf param)
        return svgascreen->maxPointSize;
  
     case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
-      if(!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_TEXTURE_ANISOTROPY, &result))
-         return 4.0f;
-      return (float) result.u;
+      return (float) get_uint_cap(sws, SVGA3D_DEVCAP_MAX_TEXTURE_ANISOTROPY, 4);
  
     case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
        return 15.0;
+
     case PIPE_CAPF_GUARD_BAND_LEFT:
     case PIPE_CAPF_GUARD_BAND_TOP:
     case PIPE_CAPF_GUARD_BAND_RIGHT:
@@ -145,7 +183,12 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
     case PIPE_CAP_TWO_SIDED_STENCIL:
        return 1;
     case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
-      return 0;
+      /*
+       * "In virtually every OpenGL implementation and hardware,
+       * GL_MAX_DUAL_SOURCE_DRAW_BUFFERS is 1"
+       * http://www.opengl.org/wiki/Blending
+       */
+      return sws->have_vgpu10 ? 1 : 0;
     case PIPE_CAP_ANISOTROPIC_FILTER:
        return 1;
     case PIPE_CAP_POINT_SPRITE:
@@ -158,6 +201,8 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
        return 1;
     case PIPE_CAP_QUERY_TIME_ELAPSED:
        return 0;
+   case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+      return sws->have_vgpu10;
     case PIPE_CAP_TEXTURE_SHADOW_MAP:
        return 1;
     case PIPE_CAP_TEXTURE_SWIZZLE:
@@ -170,7 +215,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
     case PIPE_CAP_USER_CONSTANT_BUFFERS:
        return 1;
     case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
-      return 16;
+      return 256;
  
     case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
        {
@@ -199,17 +244,20 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
        return MIN2(screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS),
                    12 /* 2048x2048 */);
  
+   case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+      return sws->have_vgpu10 ? SVGA3D_MAX_SURFACE_ARRAYSIZE : 0;
+
     case PIPE_CAP_BLEND_EQUATION_SEPARATE: /* req. for GL 1.5 */
        return 1;
  
     case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
        return 1;
     case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
-      return 0;
+      return sws->have_vgpu10;
     case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
        return 0;
     case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
-      return 1;
+      return !sws->have_vgpu10;
  
     case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
        return 1; /* The color outputs of vertex shaders are not clamped */
@@ -222,7 +270,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
        return 1; /* expected for GL_ARB_framebuffer_object */
  
     case PIPE_CAP_GLSL_FEATURE_LEVEL:
-      return 120;
+      return sws->have_vgpu10 ? 330 : 120;
  
     case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
        return 0;
@@ -230,49 +278,65 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
     case PIPE_CAP_SM3:
        return 1;
  
-   /* Unsupported features */
-   case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
-   case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
-   case PIPE_CAP_SHADER_STENCIL_EXPORT:
     case PIPE_CAP_DEPTH_CLIP_DISABLE:
-   case PIPE_CAP_SEAMLESS_CUBE_MAP:
-   case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
     case PIPE_CAP_INDEP_BLEND_ENABLE:
-   case PIPE_CAP_INDEP_BLEND_FUNC:
-   case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
-   case PIPE_CAP_PRIMITIVE_RESTART:
+   case PIPE_CAP_CONDITIONAL_RENDER:
+   case PIPE_CAP_QUERY_TIMESTAMP:
     case PIPE_CAP_TGSI_INSTANCEID:
     case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
-   case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+   case PIPE_CAP_SEAMLESS_CUBE_MAP:
+   case PIPE_CAP_FAKE_SW_MSAA:
+      return sws->have_vgpu10;
+
+   case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+      return sws->have_vgpu10 ? SVGA3D_DX_MAX_SOTARGETS : 0;
+   case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+      return sws->have_vgpu10 ? 4 : 0;
+   case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+      return sws->have_vgpu10 ? SVGA3D_MAX_STREAMOUT_DECLS : 0;
+   case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+      return 0;
+   case PIPE_CAP_TEXTURE_MULTISAMPLE:
+      return svgascreen->ms_samples ? 1 : 0;
+
+   case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+      return SVGA3D_DX_MAX_RESOURCE_SIZE;
+
     case PIPE_CAP_MIN_TEXEL_OFFSET:
+      return sws->have_vgpu10 ? VGPU10_MIN_TEXEL_FETCH_OFFSET : 0;
     case PIPE_CAP_MAX_TEXEL_OFFSET:
+      return sws->have_vgpu10 ? VGPU10_MAX_TEXEL_FETCH_OFFSET : 0;
+
     case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
     case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
-   case PIPE_CAP_CONDITIONAL_RENDER:
-   case PIPE_CAP_TEXTURE_BARRIER:
-   case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
-   case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
-   case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+      return 0;
+
     case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
+      return sws->have_vgpu10 ? 256 : 0;
     case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
+      return sws->have_vgpu10 ? 1024 : 0;
+
+   case PIPE_CAP_PRIMITIVE_RESTART:
+      return 1; /* may be a sw fallback, depending on restart index */
+
+   /* Unsupported features */
+   case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+   case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+   case PIPE_CAP_SHADER_STENCIL_EXPORT:
+   case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+   case PIPE_CAP_INDEP_BLEND_FUNC:
+   case PIPE_CAP_TEXTURE_BARRIER:
     case PIPE_CAP_MAX_VERTEX_STREAMS:
     case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
-   case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
-   case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
     case PIPE_CAP_COMPUTE:
     case PIPE_CAP_START_INSTANCE:
-   case PIPE_CAP_QUERY_TIMESTAMP:
-   case PIPE_CAP_TEXTURE_MULTISAMPLE:
     case PIPE_CAP_CUBE_MAP_ARRAY:
-   case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
     case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
     case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
-   case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
     case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
     case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
     case PIPE_CAP_TEXTURE_GATHER_SM5:
     case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
-   case PIPE_CAP_FAKE_SW_MSAA:
     case PIPE_CAP_TEXTURE_QUERY_LOD:
     case PIPE_CAP_SAMPLE_SHADING:
     case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
@@ -288,8 +352,10 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
        return 0;
     case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
        return 64;
+   case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+   case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
     case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
-      return 1;
+      return 1;  /* need 4-byte alignment for all offsets and strides */
     case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
        return 2048;
     case PIPE_CAP_MAX_VIEWPORTS:
@@ -313,6 +379,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
     case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
     case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
     case PIPE_CAP_DEPTH_BOUNDS_TEST:
+   case PIPE_CAP_TGSI_TXQS:
        return 0;
     }
  
@@ -320,11 +387,16 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
     return 0;
  }
  
-static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param)
+
+static int
+vgpu9_get_shader_param(struct pipe_screen *screen, unsigned shader,
+                       enum pipe_shader_cap param)
  {
     struct svga_screen *svgascreen = svga_screen(screen);
     struct svga_winsys_screen *sws = svgascreen->sws;
-   SVGA3dDevCapResult result;
+   unsigned val;
+
+   assert(!sws->have_vgpu10);
  
     switch (shader)
     {
@@ -347,9 +419,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
        case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
           return 1;
        case PIPE_SHADER_CAP_MAX_TEMPS:
-         if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_TEMPS, &result))
-            return 32;
-         return MIN2(result.u, SVGA3D_TEMPREG_MAX);
+         val = get_uint_cap(sws, SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_TEMPS, 32);
+         return MIN2(val, SVGA3D_TEMPREG_MAX);
        case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
          /* 
           * Although PS 3.0 has some addressing abilities it can only represent
@@ -392,9 +463,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
        {
        case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
        case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
-         if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_VERTEX_SHADER_INSTRUCTIONS, &result))
-            return 512;
-         return result.u;
+         return get_uint_cap(sws, SVGA3D_DEVCAP_MAX_VERTEX_SHADER_INSTRUCTIONS,
+                             512);
        case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
        case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
           /* XXX: until we have vertex texture support */
@@ -410,9 +480,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
        case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
           return 1;
        case PIPE_SHADER_CAP_MAX_TEMPS:
-         if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEMPS, &result))
-            return 32;
-         return MIN2(result.u, SVGA3D_TEMPREG_MAX);
+         val = get_uint_cap(sws, SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEMPS, 32);
+         return MIN2(val, SVGA3D_TEMPREG_MAX);
        case PIPE_SHADER_CAP_MAX_PREDS:
           return 1;
        case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
@@ -459,8 +528,102 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
  }
  
  
+static int
+vgpu10_get_shader_param(struct pipe_screen *screen, unsigned shader,
+                        enum pipe_shader_cap param)
+{
+   struct svga_screen *svgascreen = svga_screen(screen);
+   struct svga_winsys_screen *sws = svgascreen->sws;
+
+   assert(sws->have_vgpu10);
+   (void) sws;  /* silence unused var warnings in non-debug builds */
+
+   /* Only VS, GS, FS supported */
+   if (shader != PIPE_SHADER_VERTEX &&
+       shader != PIPE_SHADER_GEOMETRY &&
+       shader != PIPE_SHADER_FRAGMENT) {
+      return 0;
+   }
+
+   /* NOTE: we do not query the device for any caps/limits at this time */
+
+   /* Generally the same limits for vertex, geometry and fragment shaders */
+   switch (param) {
+   case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+   case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+   case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+   case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+      return 64 * 1024;
+   case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+      return 64;
+   case PIPE_SHADER_CAP_MAX_INPUTS:
+      if (shader == PIPE_SHADER_FRAGMENT)
+         return VGPU10_MAX_FS_INPUTS;
+      else if (shader == PIPE_SHADER_GEOMETRY)
+         return VGPU10_MAX_GS_INPUTS;
+      else
+         return VGPU10_MAX_VS_INPUTS;
+   case PIPE_SHADER_CAP_MAX_OUTPUTS:
+      if (shader == PIPE_SHADER_FRAGMENT)
+         return VGPU10_MAX_FS_OUTPUTS;
+      else if (shader == PIPE_SHADER_GEOMETRY)
+         return VGPU10_MAX_GS_OUTPUTS;
+      else
+         return VGPU10_MAX_VS_OUTPUTS;
+   case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+      return VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT * sizeof(float[4]);
+   case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+      return svgascreen->max_const_buffers;
+   case PIPE_SHADER_CAP_MAX_TEMPS:
+      return VGPU10_MAX_TEMPS;
+   case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+   case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+   case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+   case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+      return TRUE; /* XXX verify */
+   case PIPE_SHADER_CAP_MAX_PREDS:
+      return 0;
+   case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+   case PIPE_SHADER_CAP_SUBROUTINES:
+   case PIPE_SHADER_CAP_INTEGERS:
+      return TRUE;
+   case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+   case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
+      return SVGA3D_DX_MAX_SAMPLERS;
+   case PIPE_SHADER_CAP_PREFERRED_IR:
+      return PIPE_SHADER_IR_TGSI;
+   case PIPE_SHADER_CAP_DOUBLES:
+   case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+      return 0;
+   default:
+      debug_printf("Unexpected vgpu10 shader query %u\n", param);
+      return 0;
+   }
+   return 0;
+}
+
+
+static int
+svga_get_shader_param(struct pipe_screen *screen, unsigned shader,
+                      enum pipe_shader_cap param)
+{
+   struct svga_screen *svgascreen = svga_screen(screen);
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   if (sws->have_vgpu10) {
+      return vgpu10_get_shader_param(screen, shader, param);
+   }
+   else {
+      return vgpu9_get_shader_param(screen, shader, param);
+   }
+}
+
+
  /**
- * Implemnt pipe_screen::is_format_supported().
+ * Implement pipe_screen::is_format_supported().
   * \param bindings  bitmask of PIPE_BIND_x flags
   */
  static boolean
@@ -478,7 +641,12 @@ svga_is_format_supported( struct pipe_screen *screen,
     assert(bindings);
  
     if (sample_count > 1) {
-      return FALSE;
+      /* In ms_samples, if bit N is set it means that we support
+       * multisample with N+1 samples per pixel.
+       */
+      if ((ss->ms_samples & (1 << (sample_count - 1))) == 0) {
+         return FALSE;
+      }
     }
  
     svga_format = svga_translate_format(ss, format, bindings);
@@ -486,6 +654,22 @@ svga_is_format_supported( struct pipe_screen *screen,
        return FALSE;
     }
  
+   /* we don't support sRGB rendering into display targets */
+   if (util_format_is_srgb(format) && (bindings & PIPE_BIND_DISPLAY_TARGET)) {
+      return FALSE;
+   }
+
+   /*
+    * For VGPU10 vertex formats, skip querying host capabilities
+    */
+
+   if (ss->sws->have_vgpu10 && (bindings & PIPE_BIND_VERTEX_BUFFER)) {
+      SVGA3dSurfaceFormat svga_format;
+      unsigned flags;
+      svga_translate_vertex_format_vgpu10(format, &svga_format, &flags);
+      return svga_format != SVGA3D_FORMAT_INVALID;
+   }
+
     /*
      * Override host capabilities, so that we end up with the same
      * visuals for all virtual hardware implementations.
@@ -498,6 +682,12 @@ svga_is_format_supported( struct pipe_screen *screen,
        case SVGA3D_R5G6B5:
           break;
  
+      /* VGPU10 formats */
+      case SVGA3D_B8G8R8A8_UNORM:
+      case SVGA3D_B8G8R8X8_UNORM:
+      case SVGA3D_B5G6R5_UNORM:
+         break;
+
        /* Often unsupported/problematic. This means we end up with the same
         * visuals for all virtual hardware implementations.
         */
@@ -516,22 +706,32 @@ svga_is_format_supported( struct pipe_screen *screen,
  
     svga_get_format_cap(ss, svga_format, &caps);
  
+   if (bindings & PIPE_BIND_RENDER_TARGET) {
+      /* Check that the color surface is blendable, unless it's an
+       * integer format.
+       */
+      if (!svga_format_is_integer(svga_format) &&
+          (caps.value & SVGA3DFORMAT_OP_NOALPHABLEND)) {
+         return FALSE;
+      }
+   }
+
     mask.value = 0;
     if (bindings & PIPE_BIND_RENDER_TARGET) {
-      mask.offscreenRenderTarget = 1;
+      mask.value |= SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET;
     }
     if (bindings & PIPE_BIND_DEPTH_STENCIL) {
-      mask.zStencil = 1;
+      mask.value |= SVGA3DFORMAT_OP_ZSTENCIL;
     }
     if (bindings & PIPE_BIND_SAMPLER_VIEW) {
-      mask.texture = 1;
+      mask.value |= SVGA3DFORMAT_OP_TEXTURE;
     }
  
     if (target == PIPE_TEXTURE_CUBE) {
-      mask.cubeTexture = 1;
+      mask.value |= SVGA3DFORMAT_OP_CUBETEXTURE;
     }
-   if (target == PIPE_TEXTURE_3D) {
-      mask.volumeTexture = 1;
+   else if (target == PIPE_TEXTURE_3D) {
+      mask.value |= SVGA3DFORMAT_OP_VOLUMETEXTURE;
     }
  
     return (caps.value & mask.value) == mask.value;
@@ -611,8 +811,6 @@ svga_screen_create(struct svga_winsys_screen *sws)
  {
     struct svga_screen *svgascreen;
     struct pipe_screen *screen;
-   SVGA3dDevCapResult result;
-   boolean use_vs30, use_ps30;
  
  #ifdef DEBUG
     SVGA_DEBUG = debug_get_flags_option("SVGA_DEBUG", svga_debug_flags, 0 );
@@ -642,6 +840,7 @@ svga_screen_create(struct svga_winsys_screen *sws)
     screen->get_param = svga_get_param;
     screen->get_shader_param = svga_get_shader_param;
     screen->get_paramf = svga_get_paramf;
+   screen->get_timestamp = NULL;
     screen->is_format_supported = svga_is_format_supported;
     screen->context_create = svga_context_create;
     screen->fence_reference = svga_fence_reference;
@@ -657,18 +856,6 @@ svga_screen_create(struct svga_winsys_screen *sws)
        svgascreen->hw_version = SVGA3D_HWVERSION_WS65_B1;
     }
  
-   use_ps30 =
-      sws->get_cap(sws, SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION, &result) &&
-      result.u >= SVGA3DPSVERSION_30 ? TRUE : FALSE;
-
-   use_vs30 =
-      sws->get_cap(sws, SVGA3D_DEVCAP_VERTEX_SHADER_VERSION, &result) &&
-      result.u >= SVGA3DVSVERSION_30 ? TRUE : FALSE;
-
-   /* we require Shader model 3.0 or later */
-   if (!use_ps30 || !use_vs30)
-      goto error2;
-
     /*
      * The D16, D24X8, and D24S8 formats always do an implicit shadow compare
      * when sampled from, where as the DF16, DF24, and D24S8_INT do not.  So
@@ -716,46 +903,77 @@ svga_screen_create(struct svga_winsys_screen *sws)
  
     /* Query device caps
      */
-   if (!sws->get_cap(sws, SVGA3D_DEVCAP_LINE_STIPPLE, &result))
-      svgascreen->haveLineStipple = FALSE;
-   else
-      svgascreen->haveLineStipple = result.u;
+   if (sws->have_vgpu10) {
+      svgascreen->haveProvokingVertex
+         = get_bool_cap(sws, SVGA3D_DEVCAP_DX_PROVOKING_VERTEX, FALSE);
+      svgascreen->haveLineSmooth = TRUE;
+      svgascreen->maxPointSize = 80.0F;
+      svgascreen->max_color_buffers = SVGA3D_DX_MAX_RENDER_TARGETS;
+
+      /* Multisample samples per pixel */
+      svgascreen->ms_samples =
+         get_uint_cap(sws, SVGA3D_DEVCAP_MULTISAMPLE_MASKABLESAMPLES, 0);
+
+      /* Maximum number of constant buffers */
+      svgascreen->max_const_buffers =
+         get_uint_cap(sws, SVGA3D_DEVCAP_DX_MAX_CONSTANT_BUFFERS, 1);
+      assert(svgascreen->max_const_buffers <= SVGA_MAX_CONST_BUFS);
+   }
+   else {
+      /* VGPU9 */
+      unsigned vs_ver = get_uint_cap(sws, SVGA3D_DEVCAP_VERTEX_SHADER_VERSION,
+                                     SVGA3DVSVERSION_NONE);
+      unsigned fs_ver = get_uint_cap(sws, SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION,
+                                     SVGA3DPSVERSION_NONE);
+
+      /* we require Shader model 3.0 or later */
+      if (fs_ver < SVGA3DPSVERSION_30 || vs_ver < SVGA3DVSVERSION_30) {
+         goto error2;
+      }
  
-   if (!sws->get_cap(sws, SVGA3D_DEVCAP_LINE_AA, &result))
-      svgascreen->haveLineSmooth = FALSE;
-   else
-      svgascreen->haveLineSmooth = result.u;
+      svgascreen->haveProvokingVertex = FALSE;
  
-   if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_LINE_WIDTH, &result))
-      svgascreen->maxLineWidth = 1.0F;
-   else
-      svgascreen->maxLineWidth = result.f;
+      svgascreen->haveLineSmooth =
+         get_bool_cap(sws, SVGA3D_DEVCAP_LINE_AA, FALSE);
  
-   if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_AA_LINE_WIDTH, &result))
-      svgascreen->maxLineWidthAA = 1.0F;
-   else
-      svgascreen->maxLineWidthAA = result.f;
+      svgascreen->maxPointSize =
+         get_float_cap(sws, SVGA3D_DEVCAP_MAX_POINT_SIZE, 1.0f);
+      /* Keep this to a reasonable size to avoid failures in conform/pntaa.c */
+      svgascreen->maxPointSize = MIN2(svgascreen->maxPointSize, 80.0f);
+
+      /* The SVGA3D device always supports 4 targets at this time, regardless
+       * of what querying SVGA3D_DEVCAP_MAX_RENDER_TARGETS might return.
+       */
+      svgascreen->max_color_buffers = 4;
+
+      /* Only support one constant buffer
+       */
+      svgascreen->max_const_buffers = 1;
  
-   if (0)
+      /* No multisampling */
+      svgascreen->ms_samples = 0;
+   }
+
+   /* common VGPU9 / VGPU10 caps */
+   svgascreen->haveLineStipple =
+      get_bool_cap(sws, SVGA3D_DEVCAP_LINE_STIPPLE, FALSE);
+
+   svgascreen->maxLineWidth =
+      get_float_cap(sws, SVGA3D_DEVCAP_MAX_LINE_WIDTH, 1.0f);
+
+   svgascreen->maxLineWidthAA =
+      get_float_cap(sws, SVGA3D_DEVCAP_MAX_AA_LINE_WIDTH, 1.0f);
+
+   if (0) {
+      debug_printf("svga: haveProvokingVertex %u\n",
+                   svgascreen->haveProvokingVertex);
        debug_printf("svga: haveLineStip %u  "
                     "haveLineSmooth %u  maxLineWidth %f\n",
                     svgascreen->haveLineStipple, svgascreen->haveLineSmooth,
                     svgascreen->maxLineWidth);
-
-   if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_POINT_SIZE, &result)) {
-      svgascreen->maxPointSize = 1.0F;
-   } else {
-      /* Keep this to a reasonable size to avoid failures in
-       * conform/pntaa.c:
-       */
-      svgascreen->maxPointSize = MIN2(result.f, 80.0f);
+      debug_printf("svga: maxPointSize %g\n", svgascreen->maxPointSize);
     }
  
-   /* The SVGA3D device always supports 4 targets at this time, regardless
-    * of what querying SVGA3D_DEVCAP_MAX_RENDER_TARGETS might return.
-    */
-   svgascreen->max_color_buffers = 4;
-
     pipe_mutex_init(svgascreen->tex_mutex);
     pipe_mutex_init(svgascreen->swc_mutex);
  
diff --git a/src/gallium/drivers/svga/svga_screen.h b/src/gallium/drivers/svga/svga_screen.h

index ea1e743..5581d2e 100644 (file)
--- a/src/gallium/drivers/svga/svga_screen.h
+++ b/src/gallium/drivers/svga/svga_screen.h
@@ -1,4 +1,4 @@
-/**********************************************************
+ /**********************************************************
   * Copyright 2008-2009 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
@@ -48,10 +48,13 @@ struct svga_screen
     SVGA3dHardwareVersion hw_version;
  
     /** Device caps */
+   boolean haveProvokingVertex;
     boolean haveLineStipple, haveLineSmooth;
     float maxLineWidth, maxLineWidthAA;
     float maxPointSize;
     unsigned max_color_buffers;
+   unsigned max_const_buffers;
+   unsigned ms_samples;
  
     struct {
        boolean force_level_surface_view;
@@ -69,6 +72,7 @@ struct svga_screen
     /* which formats to translate depth formats into */
     struct {
       enum SVGA3dSurfaceFormat z16;
+
       /* note gallium order */
       enum SVGA3dSurfaceFormat x8z24;
       enum SVGA3dSurfaceFormat s8z24;
diff --git a/src/gallium/drivers/svga/svga_screen_cache.c b/src/gallium/drivers/svga/svga_screen_cache.c

index 3c76539..5b44129 100644 (file)
--- a/src/gallium/drivers/svga/svga_screen_cache.c
+++ b/src/gallium/drivers/svga/svga_screen_cache.c
@@ -115,8 +115,14 @@ svga_screen_cache_lookup(struct svga_screen *svgascreen,
  
        assert(entry->handle);
  
+      /* If the key matches and the fence is signalled (the surface is no
+       * longer needed) the lookup was successful.  We found a surface that
+       * can be reused.
+       * We unlink the surface from the cache entry and we add the entry to
+       * the 'empty' list.
+       */
        if (memcmp(&entry->key, key, sizeof *key) == 0 &&
-         sws->fence_signalled(sws, entry->fence, 0) == 0) {
+          sws->fence_signalled(sws, entry->fence, 0) == 0) {
           unsigned surf_size;
  
           assert(sws->surface_is_flushed(sws, entry->handle));
@@ -124,10 +130,13 @@ svga_screen_cache_lookup(struct svga_screen *svgascreen,
           handle = entry->handle; /* Reference is transfered here. */
           entry->handle = NULL;
  
+         /* Remove from hash table */
           LIST_DEL(&entry->bucket_head);
  
+         /* remove from LRU list */
           LIST_DEL(&entry->head);
  
+         /* Add the cache entry (but not the surface!) to the empty list */
           LIST_ADD(&entry->head, &cache->empty);
  
           /* update the cache size */
@@ -195,7 +204,8 @@ svga_screen_cache_shrink(struct svga_screen *svgascreen,
  
  
  /**
- * Transfers a handle reference.
+ * Add a surface to the cache.  This is done when the driver deletes
+ * the surface.  Note: transfers a handle reference.
   */
  static void
  svga_screen_cache_add(struct svga_screen *svgascreen,
@@ -207,17 +217,17 @@ svga_screen_cache_add(struct svga_screen *svgascreen,
     struct svga_host_surface_cache_entry *entry = NULL;
     struct svga_winsys_surface *handle = *p_handle;
     unsigned surf_size;
-   
+
     assert(key->cachable);
  
     if (!handle)
        return;
-   
+
     surf_size = surface_size(key);
  
     *p_handle = NULL;
     pipe_mutex_lock(cache->mutex);
-   
+
     if (surf_size >= SVGA_HOST_SURFACE_CACHE_BYTES) {
        /* this surface is too large to cache, just free it */
        sws->surface_reference(sws, &handle, NULL);
@@ -245,10 +255,13 @@ svga_screen_cache_add(struct svga_screen *svgascreen,
     }
  
     if (!LIST_IS_EMPTY(&cache->empty)) {
-      /* use the first empty entry */
+      /* An empty entry has no surface associated with it.
+       * Use the first empty entry.
+       */
        entry = LIST_ENTRY(struct svga_host_surface_cache_entry,
                           cache->empty.next, head);
  
+      /* Remove from LRU list */
        LIST_DEL(&entry->head);
     }
     else if (!LIST_IS_EMPTY(&cache->unused)) {
@@ -262,12 +275,15 @@ svga_screen_cache_add(struct svga_screen *svgascreen,
  
        sws->surface_reference(sws, &entry->handle, NULL);
  
+      /* Remove from hash table */
        LIST_DEL(&entry->bucket_head);
  
+      /* Remove from LRU list */
        LIST_DEL(&entry->head);
     }
  
     if (entry) {
+      assert(entry->handle == NULL);
        entry->handle = handle;
        memcpy(&entry->key, key, sizeof entry->key);
  
@@ -304,6 +320,7 @@ svga_screen_cache_flush(struct svga_screen *svgascreen,
  
     pipe_mutex_lock(cache->mutex);
  
+   /* Loop over entries in the validated list */
     curr = cache->validated.next;
     next = curr->next;
     while (curr != &cache->validated) {
@@ -312,12 +329,15 @@ svga_screen_cache_flush(struct svga_screen *svgascreen,
        assert(entry->handle);
  
        if (sws->surface_is_flushed(sws, entry->handle)) {
+         /* remove entry from LRU list */
           LIST_DEL(&entry->head);
  
           svgascreen->sws->fence_reference(svgascreen->sws, &entry->fence, fence);
  
+         /* Add entry to the unused list */
           LIST_ADD(&entry->head, &cache->unused);
  
+         /* Add entry to the hash table bucket */
           bucket = svga_screen_cache_bucket(&entry->key);
           LIST_ADD(&entry->bucket_head, &cache->bucket[bucket]);
        }
@@ -388,9 +408,12 @@ svga_screen_cache_init(struct svga_screen *svgascreen)
   * Allocate a new host-side surface.  If the surface is marked as cachable,
   * first try re-using a surface in the cache of freed surfaces.  Otherwise,
   * allocate a new surface.
+ * \param bind_flags  bitmask of PIPE_BIND_x flags
+ * \param usage  one of PIPE_USAGE_x values
   */
  struct svga_winsys_surface *
  svga_screen_surface_create(struct svga_screen *svgascreen,
+                           unsigned bind_flags, unsigned usage,
                             struct svga_host_surface_cache_key *key)
  {
     struct svga_winsys_screen *sws = svgascreen->sws;
@@ -398,17 +421,20 @@ svga_screen_surface_create(struct svga_screen *svgascreen,
     boolean cachable = SVGA_SURFACE_CACHE_ENABLED && key->cachable;
  
     SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
-            "%s sz %dx%dx%d mips %d faces %d cachable %d\n",
+            "%s sz %dx%dx%d mips %d faces %d arraySize %d cachable %d\n",
              __FUNCTION__,
              key->size.width,
              key->size.height,
              key->size.depth,
              key->numMipLevels,
              key->numFaces,
+            key->arraySize,
              key->cachable);
  
     if (cachable) {
        if (key->format == SVGA3D_BUFFER) {
+         SVGA3dSurfaceFlags hint_flag;
+
           /* For buffers, round the buffer size up to the nearest power
            * of two to increase the probability of cache hits.  Keep
            * texture surface dimensions unchanged.
@@ -417,15 +443,33 @@ svga_screen_surface_create(struct svga_screen *svgascreen,
           while (size < key->size.width)
              size <<= 1;
           key->size.width = size;
-        /* Since we're reusing buffers we're effectively transforming all
-         * of them into dynamic buffers.
-         *
-         * It would be nice to not cache long lived static buffers. But there
-         * is no way to detect the long lived from short lived ones yet. A
-         * good heuristic would be buffer size.
-         */
-        key->flags &= ~SVGA3D_SURFACE_HINT_STATIC;
-        key->flags |= SVGA3D_SURFACE_HINT_DYNAMIC;
+
+         /* Determine whether the buffer is static or dynamic.
+          * This is a bit of a heuristic which can be tuned as needed.
+          */
+         if (usage == PIPE_USAGE_DEFAULT ||
+             usage == PIPE_USAGE_IMMUTABLE) {
+            hint_flag = SVGA3D_SURFACE_HINT_STATIC;
+         }
+         else if (bind_flags & PIPE_BIND_INDEX_BUFFER) {
+            /* Index buffers don't change too often.  Mark them as static.
+             */
+            hint_flag = SVGA3D_SURFACE_HINT_STATIC;
+         }
+         else {
+            /* Since we're reusing buffers we're effectively transforming all
+             * of them into dynamic buffers.
+             *
+             * It would be nice to not cache long lived static buffers. But there
+             * is no way to detect the long lived from short lived ones yet. A
+             * good heuristic would be buffer size.
+             */
+            hint_flag = SVGA3D_SURFACE_HINT_DYNAMIC;
+         }
+
+         key->flags &= ~(SVGA3D_SURFACE_HINT_STATIC |
+                         SVGA3D_SURFACE_HINT_DYNAMIC);
+         key->flags |= hint_flag;
        }
  
        handle = svga_screen_cache_lookup(svgascreen, key);
@@ -436,24 +480,32 @@ svga_screen_surface_create(struct svga_screen *svgascreen,
                       key->size.width);
           else
              SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
-                     "reuse sid %p sz %dx%dx%d mips %d faces %d\n", handle,
+                     "reuse sid %p sz %dx%dx%d mips %d faces %d arraySize %d\n", handle,
                       key->size.width,
                       key->size.height,
                       key->size.depth,
                       key->numMipLevels,
-                     key->numFaces);
+                     key->numFaces,
+                     key->arraySize);
        }
     }
  
     if (!handle) {
+      unsigned usage = 0;
+
+      if (!key->cachable)
+         usage |= SVGA_SURFACE_USAGE_SHARED;
+      if (key->scanout)
+         usage |= SVGA_SURFACE_USAGE_SCANOUT;
+
        handle = sws->surface_create(sws,
                                     key->flags,
                                     key->format,
-                                   key->cachable ?
-                                   0 : SVGA_SURFACE_USAGE_SHARED,
+                                   usage,
                                     key->size,
-                                   key->numFaces,
-                                   key->numMipLevels);
+                                   key->numFaces * key->arraySize,
+                                   key->numMipLevels,
+                                   key->sampleCount);
        if (handle)
           SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
                    "  CREATE sid %p sz %dx%dx%d\n",
diff --git a/src/gallium/drivers/svga/svga_screen_cache.h b/src/gallium/drivers/svga/svga_screen_cache.h

index 56ac62b..424eb2c 100644 (file)
--- a/src/gallium/drivers/svga/svga_screen_cache.h
+++ b/src/gallium/drivers/svga/svga_screen_cache.h
@@ -62,9 +62,12 @@ struct svga_host_surface_cache_key
     SVGA3dSurfaceFlags flags;
     SVGA3dSurfaceFormat format;
     SVGA3dSize size;
-   uint32_t numFaces:24;
-   uint32_t numMipLevels:7;
+   uint32_t numFaces:3;
+   uint32_t arraySize:16;
+   uint32_t numMipLevels:6;
     uint32_t cachable:1;         /* False if this is a shared surface */
+   uint32_t sampleCount:5;
+   uint32_t scanout:1;
  };
  
  
@@ -137,6 +140,7 @@ svga_screen_cache_init(struct svga_screen *svgascreen);
  
  struct svga_winsys_surface *
  svga_screen_surface_create(struct svga_screen *svgascreen,
+                           unsigned bind_flags, unsigned usage,
                             struct svga_host_surface_cache_key *key);
  
  void
diff --git a/src/gallium/drivers/svga/svga_shader.c b/src/gallium/drivers/svga/svga_shader.c

index 46efa07..d46e7eb 100644 (file)
--- a/src/gallium/drivers/svga/svga_shader.c
+++ b/src/gallium/drivers/svga/svga_shader.c
@@ -27,14 +27,318 @@
  #include "util/u_memory.h"
  #include "svga_context.h"
  #include "svga_cmd.h"
+#include "svga_format.h"
  #include "svga_shader.h"
  
  
+/**
+ * This bit isn't really used anywhere.  It only serves to help
+ * generate a unique "signature" for the vertex shader output bitmask.
+ * Shader input/output signatures are used to resolve shader linking
+ * issues.
+ */
+#define FOG_GENERIC_BIT (((uint64_t) 1) << 63)
+
+
+/**
+ * Use the shader info to generate a bitmask indicating which generic
+ * inputs are used by the shader.  A set bit indicates that GENERIC[i]
+ * is used.
+ */
+uint64_t
+svga_get_generic_inputs_mask(const struct tgsi_shader_info *info)
+{
+   unsigned i;
+   uint64_t mask = 0x0;
+
+   for (i = 0; i < info->num_inputs; i++) {
+      if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
+         unsigned j = info->input_semantic_index[i];
+         assert(j < sizeof(mask) * 8);
+         mask |= ((uint64_t) 1) << j;
+      }
+   }
+
+   return mask;
+}
+
+
+/**
+ * Scan shader info to return a bitmask of written outputs.
+ */
+uint64_t
+svga_get_generic_outputs_mask(const struct tgsi_shader_info *info)
+{
+   unsigned i;
+   uint64_t mask = 0x0;
+
+   for (i = 0; i < info->num_outputs; i++) {
+      switch (info->output_semantic_name[i]) {
+      case TGSI_SEMANTIC_GENERIC:
+         {
+            unsigned j = info->output_semantic_index[i];
+            assert(j < sizeof(mask) * 8);
+            mask |= ((uint64_t) 1) << j;
+         }
+         break;
+      case TGSI_SEMANTIC_FOG:
+         mask |= FOG_GENERIC_BIT;
+         break;
+      }
+   }
+
+   return mask;
+}
+
+
+
+/**
+ * Given a mask of used generic variables (as returned by the above functions)
+ * fill in a table which maps those indexes to small integers.
+ * This table is used by the remap_generic_index() function in
+ * svga_tgsi_decl_sm30.c
+ * Example: if generics_mask = binary(1010) it means that GENERIC[1] and
+ * GENERIC[3] are used.  The remap_table will contain:
+ *   table[1] = 0;
+ *   table[3] = 1;
+ * The remaining table entries will be filled in with the next unused
+ * generic index (in this example, 2).
+ */
+void
+svga_remap_generics(uint64_t generics_mask,
+                    int8_t remap_table[MAX_GENERIC_VARYING])
+{
+   /* Note texcoord[0] is reserved so start at 1 */
+   unsigned count = 1, i;
+
+   for (i = 0; i < MAX_GENERIC_VARYING; i++) {
+      remap_table[i] = -1;
+   }
+
+   /* for each bit set in generic_mask */
+   while (generics_mask) {
+      unsigned index = ffsll(generics_mask) - 1;
+      remap_table[index] = count++;
+      generics_mask &= ~((uint64_t) 1 << index);
+   }
+}
+
+
+/**
+ * Use the generic remap table to map a TGSI generic varying variable
+ * index to a small integer.  If the remapping table doesn't have a
+ * valid value for the given index (the table entry is -1) it means
+ * the fragment shader doesn't use that VS output.  Just allocate
+ * the next free value in that case.  Alternately, we could cull
+ * VS instructions that write to register, or replace the register
+ * with a dummy temp register.
+ * XXX TODO: we should do one of the later as it would save precious
+ * texcoord registers.
+ */
+int
+svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
+                         int generic_index)
+{
+   assert(generic_index < MAX_GENERIC_VARYING);
+
+   if (generic_index >= MAX_GENERIC_VARYING) {
+      /* just don't return a random/garbage value */
+      generic_index = MAX_GENERIC_VARYING - 1;
+   }
+
+   if (remap_table[generic_index] == -1) {
+      /* This is a VS output that has no matching PS input.  Find a
+       * free index.
+       */
+      int i, max = 0;
+      for (i = 0; i < MAX_GENERIC_VARYING; i++) {
+         max = MAX2(max, remap_table[i]);
+      }
+      remap_table[generic_index] = max + 1;
+   }
+
+   return remap_table[generic_index];
+}
+
+
+/**
+ * Initialize the shader-neutral fields of svga_compile_key from context
+ * state.  This is basically the texture-related state.
+ */
+void
+svga_init_shader_key_common(const struct svga_context *svga, unsigned shader,
+                            struct svga_compile_key *key)
+{
+   unsigned i, idx = 0;
+
+   assert(shader < Elements(svga->curr.num_sampler_views));
+
+   for (i = 0; i < svga->curr.num_sampler_views[shader]; i++) {
+      struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i];
+      if (view) {
+         assert(svga->curr.sampler[shader][i]);
+         assert(view->texture);
+         assert(view->texture->target < (1 << 4)); /* texture_target:4 */
+
+         key->tex[i].texture_target = view->texture->target;
+
+         /* 1D/2D array textures with one slice are treated as non-arrays
+          * by the SVGA3D device.  Convert the texture type here so that
+          * we emit the right TEX/SAMPLE instruction in the shader.
+          */
+         if (view->texture->array_size == 1) {
+            if (view->texture->target == PIPE_TEXTURE_1D_ARRAY) {
+               key->tex[i].texture_target = PIPE_TEXTURE_1D;
+            }
+            else if (view->texture->target == PIPE_TEXTURE_2D_ARRAY) {
+               key->tex[i].texture_target = PIPE_TEXTURE_2D;
+            }
+         }
+
+         key->tex[i].texture_msaa = view->texture->nr_samples > 1;
+         if (!svga->curr.sampler[shader][i]->normalized_coords) {
+            assert(idx < (1 << 5));  /* width_height_idx:5 bitfield */
+            key->tex[i].width_height_idx = idx++;
+            key->tex[i].unnormalized = TRUE;
+            ++key->num_unnormalized_coords;
+         }
+
+         key->tex[i].swizzle_r = view->swizzle_r;
+         key->tex[i].swizzle_g = view->swizzle_g;
+         key->tex[i].swizzle_b = view->swizzle_b;
+         key->tex[i].swizzle_a = view->swizzle_a;
+
+         key->tex[i].return_type = svga_get_texture_datatype(view->format);
+      }
+   }
+   key->num_textures = svga->curr.num_sampler_views[shader];
+}
+
+
+/** Search for a compiled shader variant with the same compile key */
+struct svga_shader_variant *
+svga_search_shader_key(const struct svga_shader *shader,
+                       const struct svga_compile_key *key)
+{
+   struct svga_shader_variant *variant = shader->variants;
+
+   assert(key);
+
+   for ( ; variant; variant = variant->next) {
+      if (svga_compile_keys_equal(key, &variant->key))
+         return variant;
+   }
+   return NULL;
+}
+
+/** Search for a shader with the same token key */
+struct svga_shader *
+svga_search_shader_token_key(struct svga_shader *pshader,
+                             const struct svga_token_key *key)
+{
+   struct svga_shader *shader = pshader;
+
+   assert(key);
+
+   for ( ; shader; shader = shader->next) {
+      if (memcmp(key, &shader->token_key, sizeof(struct svga_token_key)) == 0)
+         return shader;
+   }
+   return NULL;
+}
+
+/**
+ * Helper function to define a gb shader for non-vgpu10 device
+ */
+static enum pipe_error
+define_gb_shader_vgpu9(struct svga_context *svga,
+                       SVGA3dShaderType type,
+                       struct svga_shader_variant *variant,
+                       unsigned codeLen)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   enum pipe_error ret;
+
+   /**
+    * Create gb memory for the shader and upload the shader code.
+    * Kernel module will allocate an id for the shader and issue
+    * the DefineGBShader command.
+    */
+   variant->gb_shader = sws->shader_create(sws, type,
+                                           variant->tokens, codeLen);
+
+   if (!variant->gb_shader)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader);
+
+   return ret;
+}
+
+/**
+ * Helper function to define a gb shader for vgpu10 device
+ */
+static enum pipe_error
+define_gb_shader_vgpu10(struct svga_context *svga,
+                        SVGA3dShaderType type,
+                        struct svga_shader_variant *variant,
+                        unsigned codeLen)
+{
+   struct svga_winsys_context *swc = svga->swc;
+   enum pipe_error ret;
+
+   /**
+    * Shaders in VGPU10 enabled device reside in the device COTable.
+    * SVGA driver will allocate an integer ID for the shader and
+    * issue DXDefineShader and DXBindShader commands.
+    */
+   variant->id = util_bitmask_add(svga->shader_id_bm);
+   if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
+      return PIPE_ERROR_OUT_OF_MEMORY;
+   }
+
+   /* Create gb memory for the shader and upload the shader code */
+   variant->gb_shader = swc->shader_create(swc,
+                                           variant->id, type,
+                                           variant->tokens, codeLen);
+
+   if (!variant->gb_shader) {
+      /* Free the shader ID */
+      assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
+      goto fail_no_allocation;
+   }
+
+   /**
+    * Since we don't want to do any flush within state emission to avoid
+    * partial state in a command buffer, it's important to make sure that
+    * there is enough room to send both the DXDefineShader & DXBindShader
+    * commands in the same command buffer. So let's send both
+    * commands in one command reservation. If it fails, we'll undo
+    * the shader creation and return an error.
+    */
+   ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader,
+                                           variant->id, type, codeLen);
+
+   if (ret != PIPE_OK)
+      goto fail;
+
+   return PIPE_OK;
+
+fail:
+   swc->shader_destroy(swc, variant->gb_shader);
+   variant->gb_shader = NULL;
+
+fail_no_allocation:
+   util_bitmask_clear(svga->shader_id_bm, variant->id);
+   variant->id = UTIL_BITMASK_INVALID_INDEX;
+
+   return PIPE_ERROR_OUT_OF_MEMORY;
+}
  
  /**
   * Issue the SVGA3D commands to define a new shader.
- * \param result  contains the shader tokens, etc.  The result->id field will
- *                be set here.
+ * \param variant  contains the shader tokens, etc.  The result->id field will
+ *                 be set here.
   */
  enum pipe_error
  svga_define_shader(struct svga_context *svga,
@@ -42,27 +346,17 @@ svga_define_shader(struct svga_context *svga,
                     struct svga_shader_variant *variant)
  {
     unsigned codeLen = variant->nr_tokens * sizeof(variant->tokens[0]);
+   enum pipe_error ret;
  
-   if (svga_have_gb_objects(svga)) {
-      struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
-      enum pipe_error ret;
-
-      variant->gb_shader = sws->shader_create(sws, type,
-                                              variant->tokens, codeLen);
-      if (!variant->gb_shader)
-         return PIPE_ERROR_OUT_OF_MEMORY;
-
-      ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader);
-      if (ret != PIPE_OK) {
-         sws->shader_destroy(sws, variant->gb_shader);
-         variant->gb_shader = NULL;
-      }
+   variant->id = UTIL_BITMASK_INVALID_INDEX;
  
-      return ret;
+   if (svga_have_gb_objects(svga)) {
+      if (svga_have_vgpu10(svga))
+         return define_gb_shader_vgpu10(svga, type, variant, codeLen);
+      else
+         return define_gb_shader_vgpu9(svga, type, variant, codeLen);
     }
     else {
-      enum pipe_error ret;
-
        /* Allocate an integer ID for the shader */
        variant->id = util_bitmask_add(svga->shader_id_bm);
        if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
@@ -80,14 +374,45 @@ svga_define_shader(struct svga_context *svga,
           assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
           util_bitmask_clear(svga->shader_id_bm, variant->id);
           variant->id = UTIL_BITMASK_INVALID_INDEX;
-         return ret;
        }
     }
  
-   return PIPE_OK;
+   return ret;
  }
  
  
+/**
+ * Issue the SVGA3D commands to set/bind a shader.
+ * \param result  the shader to bind.
+ */
+enum pipe_error
+svga_set_shader(struct svga_context *svga,
+                SVGA3dShaderType type,
+                struct svga_shader_variant *variant)
+{
+   enum pipe_error ret;
+   unsigned id = variant ? variant->id : SVGA3D_INVALID_ID;
+
+   assert(type == SVGA3D_SHADERTYPE_VS ||
+          type == SVGA3D_SHADERTYPE_GS ||
+          type == SVGA3D_SHADERTYPE_PS);
+
+   if (svga_have_gb_objects(svga)) {
+      struct svga_winsys_gb_shader *gbshader =
+         variant ? variant->gb_shader : NULL;
+
+      if (svga_have_vgpu10(svga))
+         ret = SVGA3D_vgpu10_SetShader(svga->swc, type, gbshader, id);
+      else
+         ret = SVGA3D_SetGBShader(svga->swc, type, gbshader);
+   }
+   else {
+      ret = SVGA3D_SetShader(svga->swc, type, id);
+   }
+
+   return ret;
+}
+
  
  enum pipe_error
  svga_destroy_shader_variant(struct svga_context *svga,
@@ -96,32 +421,92 @@ svga_destroy_shader_variant(struct svga_context *svga,
  {
     enum pipe_error ret = PIPE_OK;
  
-   if (svga_have_gb_objects(svga)) {
-      struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
-
-      sws->shader_destroy(sws, variant->gb_shader);
+   if (svga_have_gb_objects(svga) && variant->gb_shader) {
+      if (svga_have_vgpu10(svga)) {
+         struct svga_winsys_context *swc = svga->swc;
+         swc->shader_destroy(swc, variant->gb_shader);
+         ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
+         if (ret != PIPE_OK) {
+            /* flush and try again */
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
+         }
+         util_bitmask_clear(svga->shader_id_bm, variant->id);
+      }
+      else {
+         struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+         sws->shader_destroy(sws, variant->gb_shader);
+      }
        variant->gb_shader = NULL;
-      goto end;
     }
-
-   /* first try */
-   if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
-      ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
-
-      if (ret != PIPE_OK) {
-         /* flush and try again */
-         svga_context_flush(svga, NULL);
-
+   else {
+      if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
           ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
-         assert(ret == PIPE_OK);
+         if (ret != PIPE_OK) {
+            /* flush and try again */
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
+            assert(ret == PIPE_OK);
+         }
+         util_bitmask_clear(svga->shader_id_bm, variant->id);
        }
-
-      util_bitmask_clear(svga->shader_id_bm, variant->id);
     }
  
-end:
     FREE((unsigned *)variant->tokens);
     FREE(variant);
  
     return ret;
  }
+
+/*
+ * Rebind shaders.
+ * Called at the beginning of every new command buffer to ensure that
+ * shaders are properly paged-in. Instead of sending the SetShader
+ * command, this function sends a private allocation command to
+ * page in a shader. This avoids emitting redundant state to the device
+ * just to page in a resource.
+ */
+enum pipe_error
+svga_rebind_shaders(struct svga_context *svga)
+{
+   struct svga_winsys_context *swc = svga->swc;
+   struct svga_hw_draw_state *hw = &svga->state.hw_draw;
+   enum pipe_error ret;
+
+   assert(svga_have_vgpu10(svga));
+
+   /**
+    * If the underlying winsys layer does not need resource rebinding,
+    * just clear the rebind flags and return.
+    */
+   if (swc->resource_rebind == NULL) {
+      svga->rebind.flags.vs = 0;
+      svga->rebind.flags.gs = 0;
+      svga->rebind.flags.fs = 0;
+
+      return PIPE_OK;
+   }
+
+   if (svga->rebind.flags.vs && hw->vs && hw->vs->gb_shader) {
+      ret = swc->resource_rebind(swc, NULL, hw->vs->gb_shader, SVGA_RELOC_READ);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+   svga->rebind.flags.vs = 0;
+
+   if (svga->rebind.flags.gs && hw->gs && hw->gs->gb_shader) {
+      ret = swc->resource_rebind(swc, NULL, hw->gs->gb_shader, SVGA_RELOC_READ);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+   svga->rebind.flags.gs = 0;
+
+   if (svga->rebind.flags.fs && hw->fs && hw->fs->gb_shader) {
+      ret = swc->resource_rebind(swc, NULL, hw->fs->gb_shader, SVGA_RELOC_READ);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+   svga->rebind.flags.fs = 0;
+
+   return PIPE_OK;
+}
diff --git a/src/gallium/drivers/svga/svga_shader.h b/src/gallium/drivers/svga/svga_shader.h

index 5102159..b0800c1 100644 (file)
--- a/src/gallium/drivers/svga/svga_shader.h
+++ b/src/gallium/drivers/svga/svga_shader.h
@@ -27,8 +27,241 @@
  #define SVGA_SHADER_H
  
  #include "svga3d_reg.h"
+#include "svga_context.h"
+#include "svga_streamout.h"
  
-struct svga_shader_variant;
+
+/**
+ * We use a 64-bit mask to keep track of the generic indexes.
+ * This is the maximum semantic index for a TGSI GENERIC[i] register.
+ */
+#define MAX_GENERIC_VARYING 64
+
+
+struct svga_context;
+
+
+struct svga_compile_key
+{
+   /* vertex shader only */
+   struct {
+      uint64_t fs_generic_inputs;
+      unsigned passthrough:1;
+      unsigned need_prescale:1;
+      unsigned undo_viewport:1;
+      unsigned allow_psiz:1;
+      /** The following are all 32-bit bitmasks (per VS input) */
+      unsigned adjust_attrib_range;
+      unsigned attrib_is_pure_int;
+      unsigned adjust_attrib_w_1;
+      unsigned adjust_attrib_itof;
+      unsigned adjust_attrib_utof;
+      unsigned attrib_is_bgra;
+      unsigned attrib_puint_to_snorm;
+      unsigned attrib_puint_to_uscaled;
+      unsigned attrib_puint_to_sscaled;
+   } vs;
+
+   /* geometry shader only */
+   struct {
+      uint64_t vs_generic_outputs;
+      unsigned need_prescale:1;
+      unsigned writes_psize:1;
+      unsigned wide_point:1;
+   } gs;
+
+   /* fragment shader only */
+   struct {
+      uint64_t vs_generic_outputs;
+      uint64_t gs_generic_outputs;
+      unsigned light_twoside:1;
+      unsigned front_ccw:1;
+      unsigned white_fragments:1;
+      unsigned flatshade:1;
+      unsigned pstipple:1;
+      unsigned alpha_func:4;  /**< SVGA3D_CMP_x */
+      unsigned write_color0_to_n_cbufs:4;
+      unsigned aa_point:1;
+      int aa_point_coord_index;
+      float alpha_ref;
+   } fs;
+
+   /* any shader type */
+   int8_t generic_remap_table[MAX_GENERIC_VARYING];
+   unsigned num_textures:8;
+   unsigned num_unnormalized_coords:8;
+   unsigned clip_plane_enable:PIPE_MAX_CLIP_PLANES;
+   unsigned sprite_origin_lower_left:1;
+   unsigned sprite_coord_enable;
+   struct {
+      unsigned compare_mode:1;
+      unsigned compare_func:3;
+      unsigned unnormalized:1;
+      unsigned width_height_idx:5; /**< texture unit */
+      unsigned texture_target:4;   /**< PIPE_TEXTURE_x */
+      unsigned texture_msaa:1;    /**< A multisample texture? */
+      unsigned sprite_texgen:1;
+      unsigned swizzle_r:3;
+      unsigned swizzle_g:3;
+      unsigned swizzle_b:3;
+      unsigned swizzle_a:3;
+      unsigned return_type:3;  /**< TGSI_RETURN_TYPE_x */
+   } tex[PIPE_MAX_SAMPLERS];
+   /* Note: svga_compile_keys_equal() depends on the variable-size
+    * tex[] array being at the end of this structure.
+    */
+};
+
+/* A key for a variant of token string of a shader */
+struct svga_token_key {
+   struct {
+      unsigned sprite_coord_enable:24;
+      unsigned sprite_origin_upper_left:1;
+      unsigned point_pos_stream_out:1;
+      unsigned writes_psize:1;
+      unsigned aa_point:1;
+   } gs;
+};
+
+/**
+ * A single TGSI shader may be compiled into different variants of
+ * SVGA3D shaders depending on the compile key.  Each user shader
+ * will have a linked list of these variants.
+ */
+struct svga_shader_variant
+{
+   const struct svga_shader *shader;
+
+   /** Parameters used to generate this variant */
+   struct svga_compile_key key;
+
+   /* Compiled shader tokens:
+    */
+   const unsigned *tokens;
+   unsigned nr_tokens;
+
+   /** Per-context shader identifier used with SVGA_3D_CMD_SHADER_DEFINE,
+    * SVGA_3D_CMD_SET_SHADER and SVGA_3D_CMD_SHADER_DESTROY.
+    */
+   unsigned id;
+
+   /** Start of extra constants (number of float[4] constants) */
+   unsigned extra_const_start;
+
+   /* GB object buffer containing the bytecode */
+   struct svga_winsys_gb_shader *gb_shader;
+
+   boolean uses_flat_interp;   /** TRUE if flat interpolation qualifier is
+                                *  applied to any of the varyings.
+                                */
+
+   /** For FS-based polygon stipple */
+   unsigned pstipple_sampler_unit;
+
+   /** Next variant */
+   struct svga_shader_variant *next;
+};
+
+
+struct svga_shader
+{
+   const struct tgsi_token *tokens;
+   struct svga_token_key token_key;     /* token key for the token string */
+   struct tgsi_shader_info info;
+
+   /* List of shaders with tokens derived from the same token string */
+   struct svga_shader *next;
+   struct svga_shader *parent;   /* shader with the original token string */
+
+   struct svga_stream_output *stream_output;
+
+   /** Head of linked list of compiled variants */
+   struct svga_shader_variant *variants;
+
+   unsigned id;  /**< for debugging only */
+};
+
+
+struct svga_fragment_shader
+{
+   struct svga_shader base;
+
+   struct draw_fragment_shader *draw_shader;
+
+   /** Mask of which generic varying variables are read by this shader */
+   uint64_t generic_inputs;
+
+   /** Table mapping original TGSI generic indexes to low integers */
+   int8_t generic_remap_table[MAX_GENERIC_VARYING];
+};
+
+
+struct svga_vertex_shader
+{
+   struct svga_shader base;
+
+   struct draw_vertex_shader *draw_shader;
+
+   /** Mask of which generic varying variables are written by this shader */
+   uint64_t generic_outputs;
+
+   /** Generated geometry shader that goes with this vertex shader */
+   struct svga_geometry_shader *gs;
+};
+
+
+struct svga_geometry_shader
+{
+   struct svga_shader base;
+
+   struct draw_geometry_shader *draw_shader;
+
+   /** Table mapping original TGSI generic indexes to low integers */
+   int8_t generic_remap_table[MAX_GENERIC_VARYING];
+   uint64_t generic_outputs;
+
+   unsigned aa_point_coord_index; /* generic index for aa point coord */
+
+   unsigned wide_point:1;      /* set if the shader emulates wide point */
+};
+
+
+static inline boolean
+svga_compile_keys_equal(const struct svga_compile_key *a,
+                        const struct svga_compile_key *b)
+{
+   unsigned key_size =
+      (const char *) &a->tex[a->num_textures] - (const char *) a;
+
+   return memcmp(a, b, key_size) == 0;
+}
+
+
+uint64_t
+svga_get_generic_inputs_mask(const struct tgsi_shader_info *info);
+
+uint64_t
+svga_get_generic_outputs_mask(const struct tgsi_shader_info *info);
+
+void
+svga_remap_generics(uint64_t generics_mask,
+                    int8_t remap_table[MAX_GENERIC_VARYING]);
+
+int
+svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
+                         int generic_index);
+
+void
+svga_init_shader_key_common(const struct svga_context *svga, unsigned shader,
+                            struct svga_compile_key *key);
+
+struct svga_shader_variant *
+svga_search_shader_key(const struct svga_shader *shader,
+                       const struct svga_compile_key *key);
+
+struct svga_shader *
+svga_search_shader_token_key(struct svga_shader *shader,
+                             const struct svga_token_key *key);
  
  enum pipe_error
  svga_define_shader(struct svga_context *svga,
@@ -36,10 +269,17 @@ svga_define_shader(struct svga_context *svga,
                     struct svga_shader_variant *variant);
  
  enum pipe_error
+svga_set_shader(struct svga_context *svga,
+                SVGA3dShaderType type,
+                struct svga_shader_variant *variant);
+
+enum pipe_error
  svga_destroy_shader_variant(struct svga_context *svga,
                              SVGA3dShaderType type,
                              struct svga_shader_variant *variant);
  
+enum pipe_error
+svga_rebind_shaders(struct svga_context *svga);
  
  /**
   * Check if a shader's bytecode exceeds the device limits.
@@ -62,4 +302,40 @@ svga_shader_too_large(const struct svga_context *svga,
  }
  
  
+/**
+ * Convert from PIPE_SHADER_* to SVGA3D_SHADERTYPE_*
+ */
+static inline SVGA3dShaderType
+svga_shader_type(unsigned shader)
+{
+   switch (shader) {
+   case PIPE_SHADER_VERTEX:
+      return SVGA3D_SHADERTYPE_VS;
+   case PIPE_SHADER_GEOMETRY:
+      return SVGA3D_SHADERTYPE_GS;
+   case PIPE_SHADER_FRAGMENT:
+      return SVGA3D_SHADERTYPE_PS;
+   default:
+      assert(!"Invalid shader type");
+      return SVGA3D_SHADERTYPE_VS;
+   }
+}
+
+
+/** Does the current VS have stream output? */
+static inline boolean
+svga_have_vs_streamout(const struct svga_context *svga)
+{
+   return svga->curr.vs != NULL && svga->curr.vs->base.stream_output != NULL;
+}
+
+
+/** Does the current GS have stream output? */
+static inline boolean
+svga_have_gs_streamout(const struct svga_context *svga)
+{
+   return svga->curr.gs != NULL && svga->curr.gs->base.stream_output != NULL;
+}
+
+
  #endif /* SVGA_SHADER_H */
diff --git a/src/gallium/drivers/svga/svga_state.c b/src/gallium/drivers/svga/svga_state.c

index b0bc867..37d16dc 100644 (file)
--- a/src/gallium/drivers/svga/svga_state.c
+++ b/src/gallium/drivers/svga/svga_state.c
@@ -23,6 +23,7 @@
   *
   **********************************************************/
  
+#include "util/u_bitmask.h"
  #include "util/u_debug.h"
  #include "pipe/p_defines.h"
  #include "util/u_memory.h"
@@ -63,14 +64,19 @@ static const struct svga_tracked_state *hw_clear_state[] =
   */
  static const struct svga_tracked_state *hw_draw_state[] =
  {
+   &svga_need_tgsi_transform,
     &svga_hw_fs,
+   &svga_hw_gs,
     &svga_hw_vs,
     &svga_hw_rss,
-   &svga_hw_tss,
-   &svga_hw_tss_binding,
+   &svga_hw_sampler,           /* VGPU10 */
+   &svga_hw_sampler_bindings,  /* VGPU10 */
+   &svga_hw_tss,               /* pre-VGPU10 */
+   &svga_hw_tss_binding,       /* pre-VGPU10 */
     &svga_hw_clip_planes,
     &svga_hw_vdecl,
     &svga_hw_fs_constants,
+   &svga_hw_gs_constants,
     &svga_hw_vs_constants,
     NULL
  };
@@ -255,23 +261,55 @@ do {                                            \
   */
  enum pipe_error svga_emit_initial_state( struct svga_context *svga )
  {
-   SVGA3dRenderState *rs;
-   unsigned count = 0;
-   const unsigned COUNT = 2;
-   enum pipe_error ret;
-
-   ret = SVGA3D_BeginSetRenderState( svga->swc, &rs, COUNT );
-   if (ret != PIPE_OK)
+   if (svga_have_vgpu10(svga)) {
+      SVGA3dRasterizerStateId id = util_bitmask_add(svga->rast_object_id_bm);
+      enum pipe_error ret;
+
+      /* XXX preliminary code */
+      ret = SVGA3D_vgpu10_DefineRasterizerState(svga->swc,
+                                             id,
+                                             SVGA3D_FILLMODE_FILL,
+                                             SVGA3D_CULL_NONE,
+                                             1, /* frontCounterClockwise */
+                                             0, /* depthBias */
+                                             0.0f, /* depthBiasClamp */
+                                             0.0f, /* slopeScaledDepthBiasClamp */
+                                             0, /* depthClampEnable */
+                                             0, /* scissorEnable */
+                                             0, /* multisampleEnable */
+                                             0, /* aalineEnable */
+                                             1.0f, /* lineWidth */
+                                             0, /* lineStippleEnable */
+                                             0, /* lineStippleFactor */
+                                             0, /* lineStipplePattern */
+                                             0); /* provokingVertexLast */
+
+
+      assert(ret == PIPE_OK);
+
+      ret = SVGA3D_vgpu10_SetRasterizerState(svga->swc, id);
        return ret;
+   }
+   else {
+      SVGA3dRenderState *rs;
+      unsigned count = 0;
+      const unsigned COUNT = 2;
+      enum pipe_error ret;
  
-   /* Always use D3D style coordinate space as this is the only one
-    * which is implemented on all backends.
-    */
-   EMIT_RS(rs, count, SVGA3D_RS_COORDINATETYPE, SVGA3D_COORDINATE_LEFTHANDED );
-   EMIT_RS(rs, count, SVGA3D_RS_FRONTWINDING, SVGA3D_FRONTWINDING_CW );
-   
-   assert( COUNT == count );
-   SVGA_FIFOCommitAll( svga->swc );
+      ret = SVGA3D_BeginSetRenderState( svga->swc, &rs, COUNT );
+      if (ret != PIPE_OK)
+         return ret;
  
-   return PIPE_OK;
+      /* Always use D3D style coordinate space as this is the only one
+       * which is implemented on all backends.
+       */
+      EMIT_RS(rs, count, SVGA3D_RS_COORDINATETYPE,
+              SVGA3D_COORDINATE_LEFTHANDED );
+      EMIT_RS(rs, count, SVGA3D_RS_FRONTWINDING, SVGA3D_FRONTWINDING_CW );
+
+      assert( COUNT == count );
+      SVGA_FIFOCommitAll( svga->swc );
+
+      return PIPE_OK;
+   }
  }
diff --git a/src/gallium/drivers/svga/svga_state.h b/src/gallium/drivers/svga/svga_state.h

index 3325626..04b20e1 100644 (file)
--- a/src/gallium/drivers/svga/svga_state.h
+++ b/src/gallium/drivers/svga/svga_state.h
@@ -57,14 +57,20 @@ extern struct svga_tracked_state svga_hw_framebuffer;
  
  /* HW_DRAW
   */
+extern struct svga_tracked_state svga_need_tgsi_transform;
  extern struct svga_tracked_state svga_hw_vs;
  extern struct svga_tracked_state svga_hw_fs;
+extern struct svga_tracked_state svga_hw_gs;
  extern struct svga_tracked_state svga_hw_rss;
+extern struct svga_tracked_state svga_hw_pstipple;
+extern struct svga_tracked_state svga_hw_sampler;
+extern struct svga_tracked_state svga_hw_sampler_bindings;
  extern struct svga_tracked_state svga_hw_tss;
  extern struct svga_tracked_state svga_hw_tss_binding;
  extern struct svga_tracked_state svga_hw_clip_planes;
  extern struct svga_tracked_state svga_hw_vdecl;
  extern struct svga_tracked_state svga_hw_fs_constants;
+extern struct svga_tracked_state svga_hw_gs_constants;
  extern struct svga_tracked_state svga_hw_vs_constants;
  
  /* SWTNL_DRAW
@@ -93,10 +99,14 @@ enum pipe_error svga_emit_initial_state( struct svga_context *svga );
  
  enum pipe_error svga_reemit_framebuffer_bindings( struct svga_context *svga );
  
+enum pipe_error svga_rebind_framebuffer_bindings( struct svga_context *svga );
+
  enum pipe_error svga_reemit_tss_bindings( struct svga_context *svga );
  
  enum pipe_error svga_reemit_vs_bindings(struct svga_context *svga);
  
  enum pipe_error svga_reemit_fs_bindings(struct svga_context *svga);
  
+enum pipe_error svga_reemit_gs_bindings(struct svga_context *svga);
+
  #endif
diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c

index 1e1fbb0..b6d6de0 100644 (file)
--- a/src/gallium/drivers/svga/svga_state_constants.c
+++ b/src/gallium/drivers/svga/svga_state_constants.c
@@ -1,3 +1,4 @@
+
  /**********************************************************
   * Copyright 2008-2009 VMware, Inc.  All rights reserved.
   *
@@ -23,9 +24,11 @@
   *
   **********************************************************/
  
+#include "util/u_format.h"
  #include "util/u_inlines.h"
  #include "util/u_memory.h"
  #include "pipe/p_defines.h"
+#include "util/u_upload_mgr.h"
  
  #include "svga_screen.h"
  #include "svga_context.h"
@@ -34,6 +37,7 @@
  #include "svga_tgsi.h"
  #include "svga_debug.h"
  #include "svga_resource_buffer.h"
+#include "svga_shader.h"
  
  #include "svga_hw_reg.h"
  
@@ -52,65 +56,135 @@
  /** Guest-backed surface constant buffers must be this size */
  #define GB_CONSTBUF_SIZE (SVGA3D_CONSTREG_MAX)
  
+
  /**
- * Convert from PIPE_SHADER_* to SVGA3D_SHADERTYPE_*
+ * Emit any extra shader-type-independent shader constants into the buffer
+ * pointed to by 'dest'.
+ * \return number of float[4] constants put into the 'dest' buffer
   */
  static unsigned
-svga_shader_type(unsigned shader)
+svga_get_extra_constants_common(struct svga_context *svga,
+                                const struct svga_shader_variant *variant,
+                                unsigned shader, float *dest)
  {
-   switch (shader) {
-   case PIPE_SHADER_VERTEX:
-      return SVGA3D_SHADERTYPE_VS;
-   case PIPE_SHADER_FRAGMENT:
-      return SVGA3D_SHADERTYPE_PS;
-   default:
-      assert(!"Unexpected shader type");
-      return SVGA3D_SHADERTYPE_VS;
+   uint32_t *dest_u = (uint32_t *) dest;  // uint version of dest
+   unsigned i;
+   unsigned count = 0;
+
+   for (i = 0; i < variant->key.num_textures; i++) {
+      struct pipe_sampler_view *sv = svga->curr.sampler_views[shader][i];
+      if (sv) {
+         struct pipe_resource *tex = sv->texture;
+         /* Scaling factors needed for handling unnormalized texture coordinates
+          * for texture rectangles.
+          */
+         if (variant->key.tex[i].unnormalized) {
+            /* debug/sanity check */
+            assert(variant->key.tex[i].width_height_idx == count);
+
+            *dest++ = 1.0 / (float)tex->width0;
+            *dest++ = 1.0 / (float)tex->height0;
+            *dest++ = 1.0;
+            *dest++ = 1.0;
+
+            count++;
+         }
+
+         /* Store the sizes for texture buffers.
+         */
+         if (tex->target == PIPE_BUFFER) {
+            unsigned bytes_per_element = util_format_get_blocksize(sv->format);
+            *dest_u++ = tex->width0 / bytes_per_element;
+            *dest_u++ = 1;
+            *dest_u++ = 1;
+            *dest_u++ = 1;
+
+            count++;
+         }
+      }
     }
+
+   return count;
  }
  
  
  /**
   * Emit any extra fragment shader constants into the buffer pointed
   * to by 'dest'.
- * In particular, these would be the scaling factors needed for handling
- * unnormalized texture coordinates for texture rectangles.
   * \return number of float[4] constants put into the dest buffer
   */
  static unsigned
  svga_get_extra_fs_constants(struct svga_context *svga, float *dest)
  {
     const struct svga_shader_variant *variant = svga->state.hw_draw.fs;
-   const struct svga_fs_compile_key *key = &variant->key.fkey;
     unsigned count = 0;
  
-   /* SVGA_NEW_VS_VARIANT
-    */
-   if (key->num_unnormalized_coords) {
-      unsigned i;
+   count += svga_get_extra_constants_common(svga, variant,
+                                            PIPE_SHADER_FRAGMENT, dest);
  
-      for (i = 0; i < key->num_textures; i++) {
-         if (key->tex[i].unnormalized) {
-            struct pipe_resource *tex = svga->curr.sampler_views[i]->texture;
+   assert(count <= MAX_EXTRA_CONSTS);
  
-            /* debug/sanity check */
-            assert(key->tex[i].width_height_idx == count);
+   return count;
+}
  
-            *dest++ = 1.0 / (float)tex->width0;
-            *dest++ = 1.0 / (float)tex->height0;
-            *dest++ = 1.0;
-            *dest++ = 1.0;
+/**
+ * Emit extra constants needed for prescale computation into the
+ * the buffer pointed to by '*dest'. The updated buffer pointer
+ * will be returned in 'dest'.
+ */
+static unsigned
+svga_get_prescale_constants(struct svga_context *svga, float **dest)
+{
+   memcpy(*dest, svga->state.hw_clear.prescale.scale, 4 * sizeof(float));
+   *dest += 4;
  
-            count++;
-         }
-      }
-   }
+   memcpy(*dest, svga->state.hw_clear.prescale.translate, 4 * sizeof(float));
+   *dest += 4;
  
-   assert(count <= MAX_EXTRA_CONSTS);
+   return 2;
+}
  
-   return count;
+/**
+ * Emit extra constants needed for point sprite emulation.
+ */
+static unsigned
+svga_get_pt_sprite_constants(struct svga_context *svga, float **dest)
+{
+   struct svga_screen *screen = svga_screen(svga->pipe.screen);
+   float *dst = *dest;
+
+   dst[0] = 1.0 / (svga->curr.viewport.scale[0] * 2);
+   dst[1] = 1.0 / (svga->curr.viewport.scale[1] * 2);
+   dst[2] = svga->curr.rast->pointsize;
+   dst[3] = screen->maxPointSize;
+   *dest = *dest + 4;
+   return 1;
  }
  
+/**
+ * Emit user-defined clip plane coefficients into the buffer pointed to
+ * by '*dest'. The updated buffer pointer will be returned in 'dest'.
+ */
+static unsigned
+svga_get_clip_plane_constants(struct svga_context *svga,
+                              const struct svga_shader_variant *variant,
+                              float **dest)
+{
+   unsigned count = 0;
+
+   /* SVGA_NEW_CLIP */
+   if (svga_have_vgpu10(svga)) {
+      /* append user-defined clip plane coefficients onto constant buffer */
+      unsigned clip_planes = variant->key.clip_plane_enable;
+      while (clip_planes) {
+         int i = u_bit_scan(&clip_planes);
+         COPY_4V(*dest, svga->curr.clip.ucp[i]);
+         *dest += 4;
+         count += 1;
+      }
+   }
+   return count;
+}
  
  /**
   * Emit any extra vertex shader constants into the buffer pointed
@@ -124,26 +198,71 @@ static unsigned
  svga_get_extra_vs_constants(struct svga_context *svga, float *dest)
  {
     const struct svga_shader_variant *variant = svga->state.hw_draw.vs;
-   const struct svga_vs_compile_key *key = &variant->key.vkey;
     unsigned count = 0;
  
     /* SVGA_NEW_VS_VARIANT
      */
-   if (key->need_prescale) {
-      memcpy(dest, svga->state.hw_clear.prescale.scale, 4 * sizeof(float));
-      dest += 4;
+   if (variant->key.vs.need_prescale) {
+      count += svga_get_prescale_constants(svga, &dest);
+   }
  
-      memcpy(dest, svga->state.hw_clear.prescale.translate, 4 * sizeof(float));
+   if (variant->key.vs.undo_viewport) {
+      /* Used to convert window coords back to NDC coords */
+      dest[0] = 1.0f / svga->curr.viewport.scale[0];
+      dest[1] = 1.0f / svga->curr.viewport.scale[1];
+      dest[2] = -svga->curr.viewport.translate[0];
+      dest[3] = -svga->curr.viewport.translate[1];
        dest += 4;
-
-      count = 2;
+      count += 1;
     }
  
+   /* SVGA_NEW_CLIP */
+   count += svga_get_clip_plane_constants(svga, variant, &dest);
+
+   /* common constants */
+   count += svga_get_extra_constants_common(svga, variant,
+                                            PIPE_SHADER_VERTEX, dest);
+
     assert(count <= MAX_EXTRA_CONSTS);
  
     return count;
  }
  
+/**
+ * Emit any extra geometry shader constants into the buffer pointed
+ * to by 'dest'.
+ */
+static unsigned
+svga_get_extra_gs_constants(struct svga_context *svga, float *dest)
+{
+   const struct svga_shader_variant *variant = svga->state.hw_draw.gs;
+   unsigned count = 0;
+
+   /* SVGA_NEW_GS_VARIANT
+    */
+
+   /* Constants for point sprite
+    * These are used in the transformed gs that supports point sprite.
+    * They need to be added before the prescale constants.
+    */
+   if (variant->key.gs.wide_point) {
+      count += svga_get_pt_sprite_constants(svga, &dest);
+   }
+
+   if (variant->key.gs.need_prescale) {
+      count += svga_get_prescale_constants(svga, &dest);
+   }
+
+   /* SVGA_NEW_CLIP */
+   count += svga_get_clip_plane_constants(svga, variant, &dest);
+
+   /* common constants */
+   count += svga_get_extra_constants_common(svga, variant,
+                                            PIPE_SHADER_GEOMETRY, dest);
+
+   assert(count <= MAX_EXTRA_CONSTS);
+   return count;
+}
  
  /**
   * Check and emit one shader constant register.
@@ -159,6 +278,7 @@ emit_const(struct svga_context *svga, unsigned shader, unsigned i,
  
     assert(shader < PIPE_SHADER_TYPES);
     assert(i < SVGA3D_CONSTREG_MAX);
+   assert(!svga_have_vgpu10(svga));
  
     if (memcmp(svga->state.hw_draw.cb[shader][i], value,
                4 * sizeof(float)) != 0) {
@@ -202,6 +322,10 @@ emit_const_range(struct svga_context *svga,
     unsigned i, j;
     enum pipe_error ret;
  
+   assert(shader == PIPE_SHADER_VERTEX ||
+          shader == PIPE_SHADER_FRAGMENT);
+   assert(!svga_have_vgpu10(svga));
+
  #ifdef DEBUG
     if (offset + count > SVGA3D_CONSTREG_MAX) {
        debug_printf("svga: too many constants (offset %u + count %u = %u (max = %u))\n",
@@ -307,10 +431,12 @@ emit_const_range(struct svga_context *svga,
  
  /**
   * Emit all the constants in a constant buffer for a shader stage.
+ * On VGPU10, emit_consts_vgpu10 is used instead.
   */
  static enum pipe_error
-emit_consts(struct svga_context *svga, unsigned shader)
+emit_consts_vgpu9(struct svga_context *svga, unsigned shader)
  {
+   const struct pipe_constant_buffer *cbuf;
     struct svga_screen *ss = svga_screen(svga->pipe.screen);
     struct pipe_transfer *transfer = NULL;
     unsigned count;
@@ -320,53 +446,284 @@ emit_consts(struct svga_context *svga, unsigned shader)
     const unsigned offset = 0;
  
     assert(shader < PIPE_SHADER_TYPES);
+   assert(!svga_have_vgpu10(svga));
+   /* Only one constant buffer per shader is supported before VGPU10.
+    * This is only an approximate check against that.
+    */
+   assert(svga->curr.constbufs[shader][1].buffer == NULL);
  
-   if (svga->curr.cbufs[shader].buffer == NULL)
-      goto done;
+   cbuf = &svga->curr.constbufs[shader][0];
  
-   data = (const float (*)[4])pipe_buffer_map(&svga->pipe,
-                                              svga->curr.cbufs[shader].buffer,
-                                              PIPE_TRANSFER_READ,
-                                             &transfer);
-   if (data == NULL) {
-      ret = PIPE_ERROR_OUT_OF_MEMORY;
-      goto done;
-   }
+   if (svga->curr.constbufs[shader][0].buffer) {
+      /* emit user-provided constants */
+      data = (const float (*)[4])
+         pipe_buffer_map(&svga->pipe, svga->curr.constbufs[shader][0].buffer,
+                         PIPE_TRANSFER_READ, &transfer);
+      if (data == NULL) {
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      }
  
-   /* sanity check */
-   assert(svga->curr.cbufs[shader].buffer->width0 >=
-          svga->curr.cbufs[shader].buffer_size);
+      /* sanity check */
+      assert(cbuf->buffer->width0 >=
+             cbuf->buffer_size);
  
-   /* Use/apply the constant buffer size and offsets here */
-   count = svga->curr.cbufs[shader].buffer_size / (4 * sizeof(float));
-   data += svga->curr.cbufs[shader].buffer_offset / (4 * sizeof(float));
+      /* Use/apply the constant buffer size and offsets here */
+      count = cbuf->buffer_size / (4 * sizeof(float));
+      data += cbuf->buffer_offset / (4 * sizeof(float));
+
+      if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) {
+         ret = emit_const_range( svga, shader, offset, count, data );
+      }
+      else {
+         for (i = 0; i < count; i++) {
+            ret = emit_const( svga, shader, offset + i, data[i] );
+            if (ret != PIPE_OK) {
+               break;
+            }
+         }
+      }
+
+      pipe_buffer_unmap(&svga->pipe, transfer);
  
-   if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) {
-      ret = emit_const_range( svga, shader, offset, count, data );
        if (ret != PIPE_OK) {
-         goto done;
+         return ret;
        }
-   } else {
-      for (i = 0; i < count; i++) {
-         ret = emit_const( svga, shader, offset + i, data[i] );
-         if (ret != PIPE_OK) {
-            goto done;
+   }
+
+   /* emit extra shader constants */
+   {
+      const struct svga_shader_variant *variant = NULL;
+      unsigned offset;
+      float extras[MAX_EXTRA_CONSTS][4];
+      unsigned count, i;
+
+      switch (shader) {
+      case PIPE_SHADER_VERTEX:
+         variant = svga->state.hw_draw.vs;
+         count = svga_get_extra_vs_constants(svga, (float *) extras);
+         break;
+      case PIPE_SHADER_FRAGMENT:
+         variant = svga->state.hw_draw.fs;
+         count = svga_get_extra_fs_constants(svga, (float *) extras);
+         break;
+      default:
+         assert(!"Unexpected shader type");
+         count = 0;
+      }
+
+      assert(variant);
+      offset = variant->shader->info.file_max[TGSI_FILE_CONSTANT] + 1;
+      assert(count <= Elements(extras));
+
+      if (count > 0) {
+         if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) {
+            ret = emit_const_range(svga, shader, offset, count,
+                                   (const float (*) [4])extras);
+         }
+         else {
+            for (i = 0; i < count; i++) {
+               ret = emit_const(svga, shader, offset + i, extras[i]);
+               if (ret != PIPE_OK)
+                  return ret;
+            }
           }
        }
     }
  
-done:
-   if (data)
-      pipe_buffer_unmap(&svga->pipe, transfer);
+   return ret;
+}
+
+
+
+static enum pipe_error
+emit_constbuf_vgpu10(struct svga_context *svga, unsigned shader)
+{
+   const struct pipe_constant_buffer *cbuf;
+   struct pipe_resource *dst_buffer = NULL;
+   enum pipe_error ret = PIPE_OK;
+   struct pipe_transfer *src_transfer;
+   struct svga_winsys_surface *dst_handle;
+   float extras[MAX_EXTRA_CONSTS][4];
+   unsigned extra_count, extra_size, extra_offset;
+   unsigned new_buf_size;
+   void *src_map = NULL, *dst_map;
+   unsigned offset;
+   const struct svga_shader_variant *variant;
+
+   assert(shader == PIPE_SHADER_VERTEX ||
+          shader == PIPE_SHADER_GEOMETRY ||
+          shader == PIPE_SHADER_FRAGMENT);
+
+   cbuf = &svga->curr.constbufs[shader][0];
+
+   switch (shader) {
+   case PIPE_SHADER_VERTEX:
+      variant = svga->state.hw_draw.vs;
+      extra_count = svga_get_extra_vs_constants(svga, (float *) extras);
+      break;
+   case PIPE_SHADER_FRAGMENT:
+      variant = svga->state.hw_draw.fs;
+      extra_count = svga_get_extra_fs_constants(svga, (float *) extras);
+      break;
+   case PIPE_SHADER_GEOMETRY:
+      variant = svga->state.hw_draw.gs;
+      extra_count = svga_get_extra_gs_constants(svga, (float *) extras);
+      break;
+   default:
+      assert(!"Unexpected shader type");
+      /* Don't return an error code since we don't want to keep re-trying
+       * this function and getting stuck in an infinite loop.
+       */
+      return PIPE_OK;
+   }
+
+   assert(variant);
+
+   /* Compute extra constants size and offset in bytes */
+   extra_size = extra_count * 4 * sizeof(float);
+   extra_offset = 4 * sizeof(float) * variant->extra_const_start;
+
+   if (cbuf->buffer_size + extra_size == 0)
+      return PIPE_OK;  /* nothing to do */
+
+   /* Typically, the cbuf->buffer here is a user-space buffer so mapping
+    * it is really cheap.  If we ever get real HW buffers for constants
+    * we should void mapping and instead use a ResourceCopy command.
+    */
+   if (cbuf->buffer_size > 0) {
+      src_map = pipe_buffer_map_range(&svga->pipe, cbuf->buffer,
+                                      cbuf->buffer_offset, cbuf->buffer_size,
+                                      PIPE_TRANSFER_READ, &src_transfer);
+      assert(src_map);
+      if (!src_map) {
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      }
+   }
+
+   /* The new/dest buffer's size must be large enough to hold the original,
+    * user-specified constants, plus the extra constants.
+    * The size of the original constant buffer _should_ agree with what the
+    * shader is expecting, but it might not (it's not enforced anywhere by
+    * gallium).
+    */
+   new_buf_size = MAX2(cbuf->buffer_size, extra_offset) + extra_size;
+
+   /* According to the DX10 spec, the constant buffer size must be
+    * in multiples of 16.
+    */
+   new_buf_size = align(new_buf_size, 16);
+
+   u_upload_alloc(svga->const0_upload, 0, new_buf_size, &offset,
+                  &dst_buffer, &dst_map);
+   if (!dst_map) {
+      if (src_map)
+         pipe_buffer_unmap(&svga->pipe, src_transfer);
+      return PIPE_ERROR_OUT_OF_MEMORY;
+   }
+
+   if (src_map) {
+      memcpy(dst_map, src_map, cbuf->buffer_size);
+      pipe_buffer_unmap(&svga->pipe, src_transfer);
+   }
+
+   if (extra_size) {
+      assert(extra_offset + extra_size <= new_buf_size);
+      memcpy((char *) dst_map + extra_offset, extras, extra_size);
+   }
+   u_upload_unmap(svga->const0_upload);
+
+   /* Issue the SetSingleConstantBuffer command */
+   dst_handle = svga_buffer_handle(svga, dst_buffer);
+   if (!dst_handle) {
+      pipe_resource_reference(&dst_buffer, NULL);
+      return PIPE_ERROR_OUT_OF_MEMORY;
+   }
+
+   assert(new_buf_size % 16 == 0);
+   ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc,
+                                               0, /* index */
+                                               svga_shader_type(shader),
+                                               dst_handle,
+                                               offset,
+                                               new_buf_size);
+
+   if (ret != PIPE_OK) {
+      pipe_resource_reference(&dst_buffer, NULL);
+      return ret;
+   }
+
+   /* Save this const buffer until it's replaced in the future.
+    * Otherwise, all references to the buffer will go away after the
+    * command buffer is submitted, it'll get recycled and we will have
+    * incorrect constant buffer bindings.
+    */
+   pipe_resource_reference(&svga->state.hw_draw.constbuf[shader], dst_buffer);
+
+   svga->state.hw_draw.default_constbuf_size[shader] = new_buf_size;
+
+   pipe_resource_reference(&dst_buffer, NULL);
  
     return ret;
  }
  
  
  static enum pipe_error
+emit_consts_vgpu10(struct svga_context *svga, unsigned shader)
+{
+   enum pipe_error ret;
+   unsigned dirty_constbufs;
+   unsigned enabled_constbufs;
+
+   /* Emit 0th constant buffer (with extra constants) */
+   ret = emit_constbuf_vgpu10(svga, shader);
+   if (ret != PIPE_OK) {
+      return ret;
+   }
+
+   enabled_constbufs = svga->state.hw_draw.enabled_constbufs[shader] | 1u;
+
+   /* Emit other constant buffers (UBOs) */
+   dirty_constbufs = svga->state.dirty_constbufs[shader] & ~1u;
+
+   while (dirty_constbufs) {
+      unsigned index = u_bit_scan(&dirty_constbufs);
+      unsigned offset = svga->curr.constbufs[shader][index].buffer_offset;
+      unsigned size = svga->curr.constbufs[shader][index].buffer_size;
+      struct svga_buffer *buffer =
+         svga_buffer(svga->curr.constbufs[shader][index].buffer);
+      struct svga_winsys_surface *handle;
+
+      if (buffer) {
+         handle = svga_buffer_handle(svga, &buffer->b.b);
+         enabled_constbufs |= 1 << index;
+      }
+      else {
+         handle = NULL;
+         enabled_constbufs &= ~(1 << index);
+         assert(offset == 0);
+         assert(size == 0);
+      }
+
+      assert(size % 16 == 0);
+      ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc,
+                                                  index,
+                                                  svga_shader_type(shader),
+                                                  handle,
+                                                  offset,
+                                                  size);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+
+   svga->state.hw_draw.enabled_constbufs[shader] = enabled_constbufs;
+   svga->state.dirty_constbufs[shader] = 0;
+
+   return ret;
+}
+
+static enum pipe_error
  emit_fs_consts(struct svga_context *svga, unsigned dirty)
  {
-   struct svga_screen *ss = svga_screen(svga->pipe.screen);
     const struct svga_shader_variant *variant = svga->state.hw_draw.fs;
     enum pipe_error ret = PIPE_OK;
  
@@ -377,28 +734,11 @@ emit_fs_consts(struct svga_context *svga, unsigned dirty)
  
     /* SVGA_NEW_FS_CONST_BUFFER
      */
-   ret = emit_consts( svga, PIPE_SHADER_FRAGMENT );
-   if (ret != PIPE_OK)
-      return ret;
-
-   /* emit extra shader constants */
-   {
-      unsigned offset = variant->shader->info.file_max[TGSI_FILE_CONSTANT] + 1;
-      float extras[MAX_EXTRA_CONSTS][4];
-      unsigned count, i;
-
-      count = svga_get_extra_fs_constants(svga, (float *) extras);
-
-      if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) {
-         ret = emit_const_range(svga, PIPE_SHADER_FRAGMENT, offset, count,
-                                (const float (*) [4])extras);
-      } else {
-         for (i = 0; i < count; i++) {
-            ret = emit_const(svga, PIPE_SHADER_FRAGMENT, offset + i, extras[i]);
-            if (ret != PIPE_OK)
-               return ret;
-         }
-      }
+   if (svga_have_vgpu10(svga)) {
+      ret = emit_consts_vgpu10(svga, PIPE_SHADER_FRAGMENT);
+   }
+   else {
+      ret = emit_consts_vgpu9(svga, PIPE_SHADER_FRAGMENT);
     }
  
     return ret;
@@ -419,7 +759,6 @@ struct svga_tracked_state svga_hw_fs_constants =
  static enum pipe_error
  emit_vs_consts(struct svga_context *svga, unsigned dirty)
  {
-   struct svga_screen *ss = svga_screen(svga->pipe.screen);
     const struct svga_shader_variant *variant = svga->state.hw_draw.vs;
     enum pipe_error ret = PIPE_OK;
  
@@ -430,29 +769,11 @@ emit_vs_consts(struct svga_context *svga, unsigned dirty)
  
     /* SVGA_NEW_VS_CONST_BUFFER
      */
-   ret = emit_consts( svga, PIPE_SHADER_VERTEX );
-   if (ret != PIPE_OK)
-      return ret;
-
-   /* emit extra shader constants */
-   {
-      unsigned offset = variant->shader->info.file_max[TGSI_FILE_CONSTANT] + 1;
-      float extras[MAX_EXTRA_CONSTS][4];
-      unsigned count, i;
-
-      count = svga_get_extra_vs_constants(svga, (float *) extras);
-      assert(count <= Elements(extras));
-
-      if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) {
-         ret = emit_const_range(svga, PIPE_SHADER_VERTEX, offset, count,
-                                (const float (*) [4]) extras);
-      } else {
-         for (i = 0; i < count; i++) {
-            ret = emit_const(svga, PIPE_SHADER_VERTEX, offset + i, extras[i]);
-            if (ret != PIPE_OK)
-               return ret;
-         }
-      }
+   if (svga_have_vgpu10(svga)) {
+      ret = emit_consts_vgpu10(svga, PIPE_SHADER_VERTEX);
+   }
+   else {
+      ret = emit_consts_vgpu9(svga, PIPE_SHADER_VERTEX);
     }
  
     return ret;
@@ -467,3 +788,42 @@ struct svga_tracked_state svga_hw_vs_constants =
      SVGA_NEW_VS_VARIANT),
     emit_vs_consts
  };
+
+
+static enum pipe_error
+emit_gs_consts(struct svga_context *svga, unsigned dirty)
+{
+   const struct svga_shader_variant *variant = svga->state.hw_draw.gs;
+   enum pipe_error ret = PIPE_OK;
+
+   /* SVGA_NEW_GS_VARIANT
+    */
+   if (variant == NULL)
+      return PIPE_OK;
+
+   /* SVGA_NEW_GS_CONST_BUFFER
+    */
+   if (svga_have_vgpu10(svga)) {
+      /**
+       * If only the rasterizer state has changed and the current geometry
+       * shader does not emit wide points, then there is no reason to
+       * re-emit the GS constants, so skip it.
+       */
+      if (dirty == SVGA_NEW_RAST && !variant->key.gs.wide_point)
+         return PIPE_OK;
+
+      ret = emit_consts_vgpu10(svga, PIPE_SHADER_GEOMETRY);
+   }
+
+   return ret;
+}
+
+
+struct svga_tracked_state svga_hw_gs_constants =
+{
+   "hw gs params",
+   (SVGA_NEW_GS_CONST_BUFFER |
+    SVGA_NEW_RAST |
+    SVGA_NEW_GS_VARIANT),
+   emit_gs_consts
+};
diff --git a/src/gallium/drivers/svga/svga_state_framebuffer.c b/src/gallium/drivers/svga/svga_state_framebuffer.c

index 1c174da..9abacc9 100644 (file)
--- a/src/gallium/drivers/svga/svga_state_framebuffer.c
+++ b/src/gallium/drivers/svga/svga_state_framebuffer.c
@@ -26,12 +26,14 @@
  #include "util/u_inlines.h"
  #include "pipe/p_defines.h"
  #include "util/u_math.h"
+#include "util/u_format.h"
  
  #include "svga_context.h"
  #include "svga_state.h"
  #include "svga_cmd.h"
  #include "svga_debug.h"
  #include "svga_screen.h"
+#include "svga_surface.h"
  
  
  /*
@@ -46,30 +48,26 @@
  #define MAX_RT_PER_BATCH 8
  
  
-/***********************************************************************
- * Hardware state update
- */
-
  
  static enum pipe_error
-emit_framebuffer( struct svga_context *svga,
-                  unsigned dirty )
+emit_fb_vgpu9(struct svga_context *svga)
  {
     struct svga_screen *svgascreen = svga_screen(svga->pipe.screen);
     const struct pipe_framebuffer_state *curr = &svga->curr.framebuffer;
     struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
-   boolean reemit = svga->rebind.rendertargets;
+   boolean reemit = svga->rebind.flags.rendertargets;
     unsigned i;
     enum pipe_error ret;
  
+   assert(!svga_have_vgpu10(svga));
+
     /*
      * We need to reemit non-null surface bindings, even when they are not
      * dirty, to ensure that the resources are paged in.
      */
  
     for (i = 0; i < svgascreen->max_color_buffers; i++) {
-      if (curr->cbufs[i] != hw->cbufs[i] ||
-          (reemit && hw->cbufs[i])) {
+      if ((curr->cbufs[i] != hw->cbufs[i]) || (reemit && hw->cbufs[i])) {
           if (svga->curr.nr_fbs++ > MAX_RT_PER_BATCH)
              return PIPE_ERROR_OUT_OF_MEMORY;
  
@@ -82,14 +80,13 @@ emit_framebuffer( struct svga_context *svga,
        }
     }
  
-   if (curr->zsbuf != hw->zsbuf ||
-       (reemit && hw->zsbuf)) {
+   if ((curr->zsbuf != hw->zsbuf) || (reemit && hw->zsbuf)) {
        ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_DEPTH, curr->zsbuf);
        if (ret != PIPE_OK)
           return ret;
  
        if (curr->zsbuf &&
-          curr->zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) {
+          util_format_is_depth_and_stencil(curr->zsbuf->format)) {
           ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL,
                                        curr->zsbuf);
           if (ret != PIPE_OK)
@@ -104,8 +101,6 @@ emit_framebuffer( struct svga_context *svga,
        pipe_surface_reference(&hw->zsbuf, curr->zsbuf);
     }
  
-   svga->rebind.rendertargets = FALSE;
-
     return PIPE_OK;
  }
  
@@ -118,15 +113,15 @@ emit_framebuffer( struct svga_context *svga,
   * Called at the beginning of every new command buffer to ensure that
   * non-dirty rendertargets are properly paged-in.
   */
-enum pipe_error
-svga_reemit_framebuffer_bindings(struct svga_context *svga)
+static enum pipe_error
+svga_reemit_framebuffer_bindings_vgpu9(struct svga_context *svga)
  {
     struct svga_screen *svgascreen = svga_screen(svga->pipe.screen);
     struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
     unsigned i;
     enum pipe_error ret;
  
-   assert(svga->rebind.rendertargets);
+   assert(!svga_have_vgpu10(svga));
  
     for (i = 0; i < svgascreen->max_color_buffers; i++) {
        if (hw->cbufs[i]) {
@@ -145,7 +140,7 @@ svga_reemit_framebuffer_bindings(struct svga_context *svga)
        }
  
        if (hw->zsbuf &&
-          hw->zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) {
+          util_format_is_depth_and_stencil(hw->zsbuf->format)) {
           ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, hw->zsbuf);
           if (ret != PIPE_OK) {
              return ret;
@@ -159,7 +154,161 @@ svga_reemit_framebuffer_bindings(struct svga_context *svga)
        }
     }
  
-   svga->rebind.rendertargets = FALSE;
+   return PIPE_OK;
+}
+
+
+
+static enum pipe_error
+emit_fb_vgpu10(struct svga_context *svga)
+{
+   const struct svga_screen *ss = svga_screen(svga->pipe.screen);
+   struct pipe_surface *rtv[SVGA3D_MAX_RENDER_TARGETS];
+   struct pipe_surface *dsv;
+   struct pipe_framebuffer_state *curr = &svga->curr.framebuffer;
+   struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
+   const unsigned num_color = MAX2(curr->nr_cbufs, hw->nr_cbufs);
+   unsigned i;
+   enum pipe_error ret;
+
+   assert(svga_have_vgpu10(svga));
+
+   /* Setup render targets array.  Note that we loop over the max of the
+    * number of previously bound buffers and the new buffers to unbind
+    * any previously bound buffers when the new number of buffers is less
+    * than the old number of buffers.
+    */
+   for (i = 0; i < num_color; i++) {
+      if (curr->cbufs[i]) {
+         rtv[i] = svga_validate_surface_view(svga,
+                                             svga_surface(curr->cbufs[i]));
+         if (rtv[i] == NULL) {
+            return PIPE_ERROR_OUT_OF_MEMORY;
+         }
+
+         assert(svga_surface(rtv[i])->view_id != SVGA3D_INVALID_ID);
+      }
+      else {
+         rtv[i] = NULL;
+      }
+   }
+
+   /* Setup depth stencil view */
+   if (curr->zsbuf) {
+      dsv = svga_validate_surface_view(svga, svga_surface(curr->zsbuf));
+      if (dsv == NULL) {
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      }
+   }
+   else {
+      dsv = NULL;
+   }
+
+   ret = SVGA3D_vgpu10_SetRenderTargets(svga->swc, num_color, rtv, dsv);
+   if (ret != PIPE_OK)
+      return ret;
+
+   for (i = 0; i < ss->max_color_buffers; i++) {
+      if (hw->cbufs[i] != curr->cbufs[i]) {
+         /* propagate the backed view surface before unbinding it */
+         if (hw->cbufs[i] && svga_surface(hw->cbufs[i])->backed) {
+            svga_propagate_surface(svga,
+                                   &svga_surface(hw->cbufs[i])->backed->base);
+         }
+         pipe_surface_reference(&hw->cbufs[i], curr->cbufs[i]);
+      }
+   }
+   hw->nr_cbufs = curr->nr_cbufs;
+
+   if (hw->zsbuf != curr->zsbuf) {
+      /* propagate the backed view surface before unbinding it */
+      if (hw->zsbuf && svga_surface(hw->zsbuf)->backed) {
+         svga_propagate_surface(svga, &svga_surface(hw->zsbuf)->backed->base);
+      }
+      pipe_surface_reference(&hw->zsbuf, curr->zsbuf);
+   }
+
+   return ret;
+}
+
+
+static enum pipe_error
+emit_framebuffer(struct svga_context *svga, unsigned dirty)
+{
+   if (svga_have_vgpu10(svga)) {
+      return emit_fb_vgpu10(svga);
+   }
+   else {
+      return emit_fb_vgpu9(svga);
+   }
+}
+
+
+/*
+ * Rebind rendertargets.
+ *
+ * Similar to emit_framebuffer, but without any state checking/update.
+ *
+ * Called at the beginning of every new command buffer to ensure that
+ * non-dirty rendertargets are properly paged-in.
+ */
+enum pipe_error
+svga_reemit_framebuffer_bindings(struct svga_context *svga)
+{
+   enum pipe_error ret;
+
+   assert(svga->rebind.flags.rendertargets);
+
+   if (svga_have_vgpu10(svga)) {
+      ret = emit_fb_vgpu10(svga);
+   }
+   else {
+      ret = svga_reemit_framebuffer_bindings_vgpu9(svga);
+   }
+
+   svga->rebind.flags.rendertargets = FALSE;
+
+   return ret;
+}
+
+
+/*
+ * Send a private allocation command to page in rendertargets resource.
+ */
+enum pipe_error
+svga_rebind_framebuffer_bindings(struct svga_context *svga)
+{
+   const struct svga_screen *ss = svga_screen(svga->pipe.screen);
+   struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
+   unsigned i;
+   enum pipe_error ret;
+
+   assert(svga_have_vgpu10(svga));
+
+   if (!svga->rebind.flags.rendertargets)
+      return PIPE_OK;
+
+   for (i = 0; i < ss->max_color_buffers; i++) {
+      if (hw->cbufs[i]) {
+         ret = svga->swc->resource_rebind(svga->swc,
+                                          svga_surface(hw->cbufs[i])->handle,
+                                          NULL,
+                                          SVGA_RELOC_WRITE);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+   }
+
+   if (hw->zsbuf) {
+      ret = svga->swc->resource_rebind(svga->swc,
+                                       svga_surface(hw->zsbuf)->handle,
+                                       NULL,
+                                       SVGA_RELOC_WRITE);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+
+   svga->rebind.flags.rendertargets = 0;
  
     return PIPE_OK;
  }
@@ -202,6 +351,7 @@ emit_viewport( struct svga_context *svga,
     float fy = flip * viewport->scale[1] * -1.0f + viewport->translate[1];
     float fw =        viewport->scale[0] * 2.0f;
     float fh = flip * viewport->scale[1] * 2.0f;
+   boolean emit_vgpu10_viewport = FALSE;
  
     memset( &prescale, 0, sizeof(prescale) );
  
@@ -225,7 +375,16 @@ emit_viewport( struct svga_context *svga,
     prescale.translate[1] = 0;
     prescale.translate[2] = 0;
     prescale.translate[3] = 0;
-   prescale.enabled = TRUE;
+
+   /* Enable prescale to adjust vertex positions to match
+      VGPU10 convention only if rasterization is enabled.
+    */
+   if (svga->curr.rast->templ.rasterizer_discard) {
+      degenerate = TRUE;
+      goto out;
+   } else {
+      prescale.enabled = TRUE;
+   }
  
     if (fw < 0) {
        prescale.scale[0] *= -1.0f;
@@ -235,7 +394,14 @@ emit_viewport( struct svga_context *svga,
     }
  
     if (fh < 0.0) {
-      prescale.translate[1] = fh - 1.0f + fy * 2.0f;
+      if (svga_have_vgpu10(svga)) {
+         /* floating point viewport params below */
+         prescale.translate[1] = fh + fy * 2.0f;
+      }
+      else {
+         /* integer viewport params below */
+         prescale.translate[1] = fh - 1.0f + fy * 2.0f;
+      }
        fh = -fh;
        fy -= fh;
        prescale.scale[1] = -1.0f;
@@ -321,19 +487,31 @@ emit_viewport( struct svga_context *svga,
        float adjust_x = 0.0;
        float adjust_y = 0.0;
  
-      switch (svga->curr.reduced_prim) {
-      case PIPE_PRIM_POINTS:
-         adjust_x = -0.375;
-         adjust_y = -0.75;
-         break;
-      case PIPE_PRIM_LINES:
-         adjust_x = -0.5;
-         adjust_y = 0;
-         break;
-      case PIPE_PRIM_TRIANGLES:
-         adjust_x = -0.5;
-         adjust_y = -0.5;
-         break;
+      if (svga_have_vgpu10(svga)) {
+         /* Normally, we don't have to do any sub-pixel coordinate
+          * adjustments for VGPU10.  But when we draw wide points with
+          * a GS we need an X adjustment in order to be conformant.
+          */
+         if (svga->curr.reduced_prim == PIPE_PRIM_POINTS &&
+             svga->curr.rast->pointsize > 1.0f) {
+            adjust_x = 0.5;
+         }
+      }
+      else {
+         switch (svga->curr.reduced_prim) {
+         case PIPE_PRIM_POINTS:
+            adjust_x = -0.375;
+            adjust_y = -0.75;
+            break;
+         case PIPE_PRIM_LINES:
+            adjust_x = -0.5;
+            adjust_y = 0;
+            break;
+         case PIPE_PRIM_TRIANGLES:
+            adjust_x = -0.5;
+            adjust_y = -0.5;
+            break;
+         }
        }
  
        if (invertY)
@@ -360,6 +538,17 @@ emit_viewport( struct svga_context *svga,
        prescale.scale[2] = -prescale.scale[2];
     }
  
+   /* If zmin is less than 0, clamp zmin to 0 and adjust the prescale.
+    * zmin can be set to -1 when viewport->scale[2] is set to 1 and
+    * viewport->translate[2] is set to 0 in the blit code.
+    */
+   if (range_min < 0.0f) {
+      range_min = -0.5f * viewport->scale[2] + 0.5f + viewport->translate[2];
+      range_max = 0.5f * viewport->scale[2] + 0.5f + viewport->translate[2];
+      prescale.scale[2] *= 2.0f;
+      prescale.translate[2] -= 0.5f;
+   }
+
     if (prescale.enabled) {
        float H[2];
        float J[2];
@@ -428,21 +617,49 @@ out:
        prescale.enabled = FALSE;
     }
  
-   if (memcmp(&rect, &svga->state.hw_clear.viewport, sizeof(rect)) != 0) {
-      ret = SVGA3D_SetViewport(svga->swc, &rect);
-      if(ret != PIPE_OK)
-         return ret;
+   if (!svga_rects_equal(&rect, &svga->state.hw_clear.viewport)) {
+      if (svga_have_vgpu10(svga)) {
+         emit_vgpu10_viewport = TRUE;
+      }
+      else {
+         ret = SVGA3D_SetViewport(svga->swc, &rect);
+         if (ret != PIPE_OK)
+            return ret;
  
-      memcpy(&svga->state.hw_clear.viewport, &rect, sizeof(rect));
-      assert(sizeof(rect) == sizeof(svga->state.hw_clear.viewport));
+         svga->state.hw_clear.viewport = rect;
+      }
     }
  
     if (svga->state.hw_clear.depthrange.zmin != range_min ||
-       svga->state.hw_clear.depthrange.zmax != range_max) {
-      ret = SVGA3D_SetZRange(svga->swc, range_min, range_max );
-      if(ret != PIPE_OK)
+       svga->state.hw_clear.depthrange.zmax != range_max)
+   {
+      if (svga_have_vgpu10(svga)) {
+         emit_vgpu10_viewport = TRUE;
+      }
+      else {
+         ret = SVGA3D_SetZRange(svga->swc, range_min, range_max );
+         if (ret != PIPE_OK)
+            return ret;
+
+         svga->state.hw_clear.depthrange.zmin = range_min;
+         svga->state.hw_clear.depthrange.zmax = range_max;
+      }
+   }
+
+   if (emit_vgpu10_viewport) {
+      SVGA3dViewport vp;
+      vp.x = (float) rect.x;
+      vp.y = (float) rect.y;
+      vp.width = (float) rect.w;
+      vp.height = (float) rect.h;
+      vp.minDepth = range_min;
+      vp.maxDepth = range_max;
+      ret = SVGA3D_vgpu10_SetViewports(svga->swc, 1, &vp);
+      if (ret != PIPE_OK)
           return ret;
  
+      svga->state.hw_clear.viewport = rect;
+
        svga->state.hw_clear.depthrange.zmin = range_min;
        svga->state.hw_clear.depthrange.zmax = range_max;
     }
@@ -475,14 +692,27 @@ emit_scissor_rect( struct svga_context *svga,
                     unsigned dirty )
  {
     const struct pipe_scissor_state *scissor = &svga->curr.scissor;
-   SVGA3dRect rect;
  
-   rect.x = scissor->minx;
-   rect.y = scissor->miny;
-   rect.w = scissor->maxx - scissor->minx; /* + 1 ?? */
-   rect.h = scissor->maxy - scissor->miny; /* + 1 ?? */
+   if (svga_have_vgpu10(svga)) {
+      SVGASignedRect rect;
+
+      rect.left = scissor->minx;
+      rect.top = scissor->miny;
+      rect.right = scissor->maxx;
+      rect.bottom = scissor->maxy;
+
+      return SVGA3D_vgpu10_SetScissorRects(svga->swc, 1, &rect);
+   }
+   else {
+      SVGA3dRect rect;
  
-   return SVGA3D_SetScissorRect(svga->swc, &rect);
+      rect.x = scissor->minx;
+      rect.y = scissor->miny;
+      rect.w = scissor->maxx - scissor->minx; /* + 1 ?? */
+      rect.h = scissor->maxy - scissor->miny; /* + 1 ?? */
+
+      return SVGA3D_SetScissorRect(svga->swc, &rect);
+   }
  }
  
  
@@ -527,9 +757,15 @@ emit_clip_planes( struct svga_context *svga,
        plane[2] = 2.0f * c;
        plane[3] = d - c;
  
-      ret = SVGA3D_SetClipPlane(svga->swc, i, plane);
-      if(ret != PIPE_OK)
-         return ret;
+      if (svga_have_vgpu10(svga)) {
+         //debug_printf("XXX emit DX10 clip plane\n");
+         ret = PIPE_OK;
+      }
+      else {
+         ret = SVGA3D_SetClipPlane(svga->swc, i, plane);
+         if (ret != PIPE_OK)
+            return ret;
+      }
     }
  
     return PIPE_OK;
diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c

index 8cdce74..c244d53 100644 (file)
--- a/src/gallium/drivers/svga/svga_state_fs.c
+++ b/src/gallium/drivers/svga/svga_state_fs.c
@@ -36,43 +36,12 @@
  #include "svga_shader.h"
  #include "svga_resource_texture.h"
  #include "svga_tgsi.h"
+#include "svga_format.h"
  
  #include "svga_hw_reg.h"
  
  
  
-static inline int
-compare_fs_keys(const struct svga_fs_compile_key *a,
-                const struct svga_fs_compile_key *b)
-{
-   unsigned keysize_a = svga_fs_key_size( a );
-   unsigned keysize_b = svga_fs_key_size( b );
-
-   if (keysize_a != keysize_b) {
-      return (int)(keysize_a - keysize_b);
-   }
-   return memcmp( a, b, keysize_a );
-}
-
-
-/** Search for a fragment shader variant */
-static struct svga_shader_variant *
-search_fs_key(const struct svga_fragment_shader *fs,
-              const struct svga_fs_compile_key *key)
-{
-   struct svga_shader_variant *variant = fs->base.variants;
-
-   assert(key);
-
-   for ( ; variant; variant = variant->next) {
-      if (compare_fs_keys( key, &variant->key.fkey ) == 0)
-         return variant;
-   }
-   
-   return NULL;
-}
-
-
  /**
   * If we fail to compile a fragment shader (because it uses too many
   * registers, for example) we'll use a dummy/fallback shader that
@@ -111,13 +80,29 @@ get_dummy_fragment_shader(void)
  }
  
  
+static struct svga_shader_variant *
+translate_fragment_program(struct svga_context *svga,
+                           const struct svga_fragment_shader *fs,
+                           const struct svga_compile_key *key)
+{
+   if (svga_have_vgpu10(svga)) {
+      return svga_tgsi_vgpu10_translate(svga, &fs->base, key,
+                                        PIPE_SHADER_FRAGMENT);
+   }
+   else {
+      return svga_tgsi_vgpu9_translate(&fs->base, key, PIPE_SHADER_FRAGMENT);
+   }
+}
+
+
  /**
   * Replace the given shader's instruction with a simple constant-color
   * shader.  We use this when normal shader translation fails.
   */
  static struct svga_shader_variant *
-get_compiled_dummy_shader(struct svga_fragment_shader *fs,
-                          const struct svga_fs_compile_key *key)
+get_compiled_dummy_shader(struct svga_context *svga,
+                          struct svga_fragment_shader *fs,
+                          const struct svga_compile_key *key)
  {
     const struct tgsi_token *dummy = get_dummy_fragment_shader();
     struct svga_shader_variant *variant;
@@ -129,7 +114,7 @@ get_compiled_dummy_shader(struct svga_fragment_shader *fs,
     FREE((void *) fs->base.tokens);
     fs->base.tokens = dummy;
  
-   variant = svga_translate_fragment_program(fs, key);
+   variant = translate_fragment_program(svga, fs, key);
     return variant;
  }
  
@@ -140,52 +125,47 @@ get_compiled_dummy_shader(struct svga_fragment_shader *fs,
  static enum pipe_error
  compile_fs(struct svga_context *svga,
             struct svga_fragment_shader *fs,
-           const struct svga_fs_compile_key *key,
+           const struct svga_compile_key *key,
             struct svga_shader_variant **out_variant)
  {
     struct svga_shader_variant *variant;
     enum pipe_error ret = PIPE_ERROR;
  
-   variant = svga_translate_fragment_program( fs, key );
+   variant = translate_fragment_program(svga, fs, key);
     if (variant == NULL) {
        debug_printf("Failed to compile fragment shader,"
                     " using dummy shader instead.\n");
-      variant = get_compiled_dummy_shader(fs, key);
-      if (!variant) {
-         ret = PIPE_ERROR;
-         goto fail;
-      }
+      variant = get_compiled_dummy_shader(svga, fs, key);
     }
-
-   if (svga_shader_too_large(svga, variant)) {
+   else if (svga_shader_too_large(svga, variant)) {
        /* too big, use dummy shader */
-      debug_printf("Shader too large (%lu bytes),"
+      debug_printf("Shader too large (%u bytes),"
                     " using dummy shader instead.\n",
-                   (unsigned long ) variant->nr_tokens * sizeof(variant->tokens[0]));
-      variant = get_compiled_dummy_shader(fs, key);
-      if (!variant) {
-         ret = PIPE_ERROR;
-         goto fail;
-      }
+                   (unsigned) (variant->nr_tokens
+                               * sizeof(variant->tokens[0])));
+      /* Free the too-large variant */
+      svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
+      /* Use simple pass-through shader instead */
+      variant = get_compiled_dummy_shader(svga, fs, key);
+   }
+
+   if (!variant) {
+      return PIPE_ERROR;
     }
  
     ret = svga_define_shader(svga, SVGA3D_SHADERTYPE_PS, variant);
-   if (ret != PIPE_OK)
-      goto fail;
+   if (ret != PIPE_OK) {
+      svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
+      return ret;
+   }
  
     *out_variant = variant;
  
-   /* insert variants at head of linked list */
+   /* insert variant at head of linked list */
     variant->next = fs->base.variants;
     fs->base.variants = variant;
  
     return PIPE_OK;
-
-fail:
-   if (variant) {
-      svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
-   }
-   return ret;
  }
  
  
@@ -197,23 +177,45 @@ fail:
  static enum pipe_error
  make_fs_key(const struct svga_context *svga,
              struct svga_fragment_shader *fs,
-            struct svga_fs_compile_key *key)
+            struct svga_compile_key *key)
  {
+   const unsigned shader = PIPE_SHADER_FRAGMENT;
     unsigned i;
-   int idx = 0;
  
     memset(key, 0, sizeof *key);
  
+   memcpy(key->generic_remap_table, fs->generic_remap_table,
+          sizeof(fs->generic_remap_table));
+
+   /* SVGA_NEW_GS, SVGA_NEW_VS
+    */
+   if (svga->curr.gs) {
+      key->fs.gs_generic_outputs = svga->curr.gs->generic_outputs;
+   } else {
+      key->fs.vs_generic_outputs = svga->curr.vs->generic_outputs;
+   }
+
     /* Only need fragment shader fixup for twoside lighting if doing
      * hwtnl.  Otherwise the draw module does the whole job for us.
      *
      * SVGA_NEW_SWTNL
      */
     if (!svga->state.sw.need_swtnl) {
-      /* SVGA_NEW_RAST
+      /* SVGA_NEW_RAST, SVGA_NEW_REDUCED_PRIMITIVE
         */
-      key->light_twoside = svga->curr.rast->templ.light_twoside;
-      key->front_ccw = svga->curr.rast->templ.front_ccw;
+      key->fs.light_twoside = svga->curr.rast->templ.light_twoside;
+      key->fs.front_ccw = svga->curr.rast->templ.front_ccw;
+      key->fs.pstipple = (svga->curr.rast->templ.poly_stipple_enable &&
+                          svga->curr.reduced_prim == PIPE_PRIM_TRIANGLES);
+      key->fs.aa_point = (svga->curr.rast->templ.point_smooth &&
+                          svga->curr.reduced_prim == PIPE_PRIM_POINTS &&
+                          (svga->curr.rast->pointsize > 1.0 ||
+                           svga->curr.vs->base.info.writes_psize));
+      if (key->fs.aa_point) {
+         assert(svga->curr.gs != NULL);
+         assert(svga->curr.gs->aa_point_coord_index != -1);
+         key->fs.aa_point_coord_index = svga->curr.gs->aa_point_coord_index;
+      }
     }
  
     /* The blend workaround for simulating logicop xor behaviour
@@ -231,7 +233,7 @@ make_fs_key(const struct svga_context *svga,
      * SVGA_NEW_BLEND
      */
     if (svga->curr.blend->need_white_fragments) {
-      key->white_fragments = 1;
+      key->fs.white_fragments = 1;
     }
  
  #ifdef DEBUG
@@ -241,22 +243,23 @@ make_fs_key(const struct svga_context *svga,
      */
     {
        static boolean warned = FALSE;
-      unsigned i, n = MAX2(svga->curr.num_sampler_views,
-                           svga->curr.num_samplers);
+      unsigned i, n = MAX2(svga->curr.num_sampler_views[shader],
+                           svga->curr.num_samplers[shader]);
        /* Only warn once to prevent too much debug output */
        if (!warned) {
-         if (svga->curr.num_sampler_views != svga->curr.num_samplers) {
+         if (svga->curr.num_sampler_views[shader] !=
+             svga->curr.num_samplers[shader]) {
              debug_printf("svga: mismatched number of sampler views (%u) "
                           "vs. samplers (%u)\n",
-                         svga->curr.num_sampler_views,
-                         svga->curr.num_samplers);
+                         svga->curr.num_sampler_views[shader],
+                         svga->curr.num_samplers[shader]);
           }
           for (i = 0; i < n; i++) {
-            if ((svga->curr.sampler_views[i] == NULL) !=
-                (svga->curr.sampler[i] == NULL))
+            if ((svga->curr.sampler_views[shader][i] == NULL) !=
+                (svga->curr.sampler[shader][i] == NULL))
                 debug_printf("sampler_view[%u] = %p but sampler[%u] = %p\n",
-                            i, svga->curr.sampler_views[i],
-                            i, svga->curr.sampler[i]);
+                            i, svga->curr.sampler_views[shader][i],
+                            i, svga->curr.sampler[shader][i]);
           }
           warned = TRUE;
        }
@@ -268,68 +271,62 @@ make_fs_key(const struct svga_context *svga,
      *
      * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER
      */
-   for (i = 0; i < svga->curr.num_sampler_views; i++) {
-      if (svga->curr.sampler_views[i] && svga->curr.sampler[i]) {
-         assert(svga->curr.sampler_views[i]->texture);
-         key->tex[i].texture_target = svga->curr.sampler_views[i]->texture->target;
-         if (!svga->curr.sampler[i]->normalized_coords) {
-            key->tex[i].width_height_idx = idx++;
-            key->tex[i].unnormalized = TRUE;
-            ++key->num_unnormalized_coords;
-         }
-
-         key->tex[i].swizzle_r = svga->curr.sampler_views[i]->swizzle_r;
-         key->tex[i].swizzle_g = svga->curr.sampler_views[i]->swizzle_g;
-         key->tex[i].swizzle_b = svga->curr.sampler_views[i]->swizzle_b;
-         key->tex[i].swizzle_a = svga->curr.sampler_views[i]->swizzle_a;
-      }
-   }
-   key->num_textures = svga->curr.num_sampler_views;
-
-   idx = 0;
-   for (i = 0; i < svga->curr.num_samplers; ++i) {
-      if (svga->curr.sampler_views[i] && svga->curr.sampler[i]) {
-         struct pipe_resource *tex = svga->curr.sampler_views[i]->texture;
-         struct svga_texture *stex = svga_texture(tex);
-         SVGA3dSurfaceFormat format = stex->key.format;
-
-         if (format == SVGA3D_Z_D16 ||
-             format == SVGA3D_Z_D24X8 ||
-             format == SVGA3D_Z_D24S8) {
-            /* If we're sampling from a SVGA3D_Z_D16, SVGA3D_Z_D24X8,
-             * or SVGA3D_Z_D24S8 surface, we'll automatically get
-             * shadow comparison.  But we only get LEQUAL mode.
-             * Set TEX_COMPARE_NONE here so we don't emit the extra FS
-             * code for shadow comparison.
-             */
-            key->tex[i].compare_mode = PIPE_TEX_COMPARE_NONE;
-            key->tex[i].compare_func = PIPE_FUNC_NEVER;
-            /* These depth formats _only_ support comparison mode and
-             * not ordinary sampling so warn if the later is expected.
-             */
-            if (svga->curr.sampler[i]->compare_mode !=
-                PIPE_TEX_COMPARE_R_TO_TEXTURE) {
-               debug_warn_once("Unsupported shadow compare mode");
-            }                   
-            /* The only supported comparison mode is LEQUAL */
-            if (svga->curr.sampler[i]->compare_func != PIPE_FUNC_LEQUAL) {
-               debug_warn_once("Unsupported shadow compare function");
+   svga_init_shader_key_common(svga, shader, key);
+
+   for (i = 0; i < svga->curr.num_samplers[shader]; ++i) {
+      struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i];
+      const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i];
+      if (view) {
+         struct pipe_resource *tex = view->texture;
+         if (tex->target != PIPE_BUFFER) {
+            struct svga_texture *stex = svga_texture(tex);
+            SVGA3dSurfaceFormat format = stex->key.format;
+
+            if (!svga_have_vgpu10(svga) &&
+                (format == SVGA3D_Z_D16 ||
+                 format == SVGA3D_Z_D24X8 ||
+                 format == SVGA3D_Z_D24S8)) {
+               /* If we're sampling from a SVGA3D_Z_D16, SVGA3D_Z_D24X8,
+                * or SVGA3D_Z_D24S8 surface, we'll automatically get
+                * shadow comparison.  But we only get LEQUAL mode.
+                * Set TEX_COMPARE_NONE here so we don't emit the extra FS
+                * code for shadow comparison.
+                */
+               key->tex[i].compare_mode = PIPE_TEX_COMPARE_NONE;
+               key->tex[i].compare_func = PIPE_FUNC_NEVER;
+               /* These depth formats _only_ support comparison mode and
+                * not ordinary sampling so warn if the later is expected.
+                */
+               if (sampler->compare_mode != PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+                  debug_warn_once("Unsupported shadow compare mode");
+               }
+               /* The shader translation code can emit code to
+                * handle ALWAYS and NEVER compare functions
+                */
+               else if (sampler->compare_func == PIPE_FUNC_ALWAYS ||
+                        sampler->compare_func == PIPE_FUNC_NEVER) {
+                  key->tex[i].compare_mode = sampler->compare_mode;
+                  key->tex[i].compare_func = sampler->compare_func;
+               }
+               else if (sampler->compare_func != PIPE_FUNC_LEQUAL) {
+                  debug_warn_once("Unsupported shadow compare function");
+               }
+            }
+            else {
+               /* For other texture formats, just use the compare func/mode
+                * as-is.  Should be no-ops for color textures.  For depth
+                * textures, we do not get automatic depth compare.  We have
+                * to do it ourselves in the shader.  And we don't get PCF.
+                */
+               key->tex[i].compare_mode = sampler->compare_mode;
+               key->tex[i].compare_func = sampler->compare_func;
              }
-         }
-         else {
-            /* For other texture formats, just use the compare func/mode
-             * as-is.  Should be no-ops for color textures.  For depth
-             * textures, we do not get automatic depth compare.  We have
-             * to do it ourselves in the shader.  And we don't get PCF.
-             */
-            key->tex[i].compare_mode = svga->curr.sampler[i]->compare_mode;
-            key->tex[i].compare_func = svga->curr.sampler[i]->compare_func;
           }
        }
     }
  
     /* sprite coord gen state */
-   for (i = 0; i < svga->curr.num_samplers; ++i) {
+   for (i = 0; i < svga->curr.num_samplers[shader]; ++i) {
        key->tex[i].sprite_texgen =
           svga->curr.rast->templ.sprite_coord_enable & (1 << i);
     }
@@ -337,10 +334,25 @@ make_fs_key(const struct svga_context *svga,
     key->sprite_origin_lower_left = (svga->curr.rast->templ.sprite_coord_mode
                                      == PIPE_SPRITE_COORD_LOWER_LEFT);
  
+   key->fs.flatshade = svga->curr.rast->templ.flatshade;
+
+   /* SVGA_NEW_DEPTH_STENCIL_ALPHA */
+   if (svga_have_vgpu10(svga)) {
+      /* Alpha testing is not supported in integer-valued render targets. */
+      if (svga_has_any_integer_cbufs(svga)) {
+         key->fs.alpha_func = SVGA3D_CMP_ALWAYS;
+         key->fs.alpha_ref = 0;
+      }
+      else {
+         key->fs.alpha_func = svga->curr.depth->alphafunc;
+         key->fs.alpha_ref = svga->curr.depth->alpharef;
+      }
+   }
+
     /* SVGA_NEW_FRAME_BUFFER */
     if (fs->base.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) {
        /* Replicate color0 output to N colorbuffers */
-      key->write_color0_to_n_cbufs = svga->curr.framebuffer.nr_cbufs;
+      key->fs.write_color0_to_n_cbufs = svga->curr.framebuffer.nr_cbufs;
     }
  
     return PIPE_OK;
@@ -355,18 +367,32 @@ svga_reemit_fs_bindings(struct svga_context *svga)
  {
     enum pipe_error ret;
  
-   assert(svga->rebind.fs);
+   assert(svga->rebind.flags.fs);
     assert(svga_have_gb_objects(svga));
  
     if (!svga->state.hw_draw.fs)
        return PIPE_OK;
  
-   ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS,
-                            svga->state.hw_draw.fs->gb_shader);
+   if (!svga_need_to_rebind_resources(svga)) {
+      ret =  svga->swc->resource_rebind(svga->swc, NULL,
+                                        svga->state.hw_draw.fs->gb_shader,
+                                        SVGA_RELOC_READ);
+      goto out;
+   }
+
+   if (svga_have_vgpu10(svga))
+      ret = SVGA3D_vgpu10_SetShader(svga->swc, SVGA3D_SHADERTYPE_PS,
+                                    svga->state.hw_draw.fs->gb_shader,
+                                    svga->state.hw_draw.fs->id);
+   else
+      ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS,
+                               svga->state.hw_draw.fs->gb_shader);
+
+ out:
     if (ret != PIPE_OK)
        return ret;
  
-   svga->rebind.fs = FALSE;
+   svga->rebind.flags.fs = FALSE;
     return PIPE_OK;
  }
  
@@ -378,7 +404,7 @@ emit_hw_fs(struct svga_context *svga, unsigned dirty)
     struct svga_shader_variant *variant = NULL;
     enum pipe_error ret = PIPE_OK;
     struct svga_fragment_shader *fs = svga->curr.fs;
-   struct svga_fs_compile_key key;
+   struct svga_compile_key key;
  
     /* SVGA_NEW_BLEND
      * SVGA_NEW_TEXTURE_BINDING
@@ -386,14 +412,16 @@ emit_hw_fs(struct svga_context *svga, unsigned dirty)
      * SVGA_NEW_NEED_SWTNL
      * SVGA_NEW_SAMPLER
      * SVGA_NEW_FRAME_BUFFER
+    * SVGA_NEW_DEPTH_STENCIL_ALPHA
+    * SVGA_NEW_VS
      */
-   ret = make_fs_key( svga, fs, &key );
+   ret = make_fs_key(svga, fs, &key);
     if (ret != PIPE_OK)
        return ret;
  
-   variant = search_fs_key( fs, &key );
+   variant = svga_search_shader_key(&fs->base, &key);
     if (!variant) {
-      ret = compile_fs( svga, fs, &key, &variant );
+      ret = compile_fs(svga, fs, &key, &variant);
        if (ret != PIPE_OK)
           return ret;
     }
@@ -401,22 +429,14 @@ emit_hw_fs(struct svga_context *svga, unsigned dirty)
     assert(variant);
  
     if (variant != svga->state.hw_draw.fs) {
-      if (svga_have_gb_objects(svga)) {
-         ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS,
-                                  variant->gb_shader);
-         if (ret != PIPE_OK)
-            return ret;
+      ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, variant);
+      if (ret != PIPE_OK)
+         return ret;
  
-         svga->rebind.fs = FALSE;
-      }
-      else {
-         ret = SVGA3D_SetShader(svga->swc, SVGA3D_SHADERTYPE_PS, variant->id);
-         if (ret != PIPE_OK)
-            return ret;
-      }
+      svga->rebind.flags.fs = FALSE;
  
        svga->dirty |= SVGA_NEW_FS_VARIANT;
-      svga->state.hw_draw.fs = variant;      
+      svga->state.hw_draw.fs = variant;
     }
  
     return PIPE_OK;
@@ -426,11 +446,15 @@ struct svga_tracked_state svga_hw_fs =
  {
     "fragment shader (hwtnl)",
     (SVGA_NEW_FS |
+    SVGA_NEW_GS |
+    SVGA_NEW_VS |
      SVGA_NEW_TEXTURE_BINDING |
      SVGA_NEW_NEED_SWTNL |
      SVGA_NEW_RAST |
+    SVGA_NEW_REDUCED_PRIMITIVE |
      SVGA_NEW_SAMPLER |
      SVGA_NEW_FRAME_BUFFER |
+    SVGA_NEW_DEPTH_STENCIL_ALPHA |
      SVGA_NEW_BLEND),
     emit_hw_fs
  };
diff --git a/src/gallium/drivers/svga/svga_state_gs.c b/src/gallium/drivers/svga/svga_state_gs.c

new file mode 100644 (file)

index 0000000..7f75410
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_gs.c
@@ -0,0 +1,259 @@
+/**********************************************************
+ * Copyright 2014 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_bitmask.h"
+#include "translate/translate.h"
+#include "tgsi/tgsi_ureg.h"
+
+#include "svga_context.h"
+#include "svga_cmd.h"
+#include "svga_shader.h"
+#include "svga_tgsi.h"
+#include "svga_streamout.h"
+#include "svga_format.h"
+
+/**
+ * If we fail to compile a geometry shader we'll use a dummy/fallback shader
+ * that simply emits the incoming vertices.
+ */
+static const struct tgsi_token *
+get_dummy_geometry_shader(void)
+{
+   //XXX
+   return NULL;
+}
+
+
+static struct svga_shader_variant *
+translate_geometry_program(struct svga_context *svga,
+                           const struct svga_geometry_shader *gs,
+                           const struct svga_compile_key *key)
+{
+   if (svga_have_vgpu10(svga)) {
+      return svga_tgsi_vgpu10_translate(svga, &gs->base, key,
+                                        PIPE_SHADER_GEOMETRY);
+   }
+   else {
+      return svga_tgsi_vgpu9_translate(&gs->base, key, PIPE_SHADER_GEOMETRY);
+   }
+}
+
+
+/**
+ * Translate TGSI shader into an svga shader variant.
+ */
+static enum pipe_error
+compile_gs(struct svga_context *svga,
+           struct svga_geometry_shader *gs,
+           const struct svga_compile_key *key,
+           struct svga_shader_variant **out_variant)
+{
+   struct svga_shader_variant *variant;
+   enum pipe_error ret = PIPE_ERROR;
+
+   variant = translate_geometry_program(svga, gs, key);
+   if (variant == NULL) {
+      /* some problem during translation, try the dummy shader */
+      const struct tgsi_token *dummy = get_dummy_geometry_shader();
+      if (!dummy) {
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      }
+      debug_printf("Failed to compile geometry shader, using dummy shader instead.\n");
+      FREE((void *) gs->base.tokens);
+      gs->base.tokens = dummy;
+      variant = translate_geometry_program(svga, gs, key);
+      if (variant == NULL) {
+         return PIPE_ERROR;
+      }
+   }
+
+   ret = svga_define_shader(svga, SVGA3D_SHADERTYPE_GS, variant);
+   if (ret != PIPE_OK) {
+      svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_GS, variant);
+      return ret;
+   }
+
+   *out_variant = variant;
+
+   return PIPE_OK;
+}
+
+
+static void
+make_gs_key(struct svga_context *svga, struct svga_compile_key *key)
+{
+   struct svga_geometry_shader *gs = svga->curr.gs;
+
+   memset(key, 0, sizeof *key);
+
+   /*
+    * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER
+    */
+   svga_init_shader_key_common(svga, PIPE_SHADER_GEOMETRY, key);
+
+   memcpy(key->generic_remap_table, gs->generic_remap_table,
+          sizeof(gs->generic_remap_table));
+
+   key->gs.vs_generic_outputs = svga->curr.vs->generic_outputs;
+
+   key->gs.need_prescale = svga->state.hw_clear.prescale.enabled;
+
+   key->gs.writes_psize = gs->base.info.writes_psize;
+   key->gs.wide_point = gs->wide_point;
+   key->sprite_coord_enable = svga->curr.rast->templ.sprite_coord_enable;
+   key->sprite_origin_lower_left = (svga->curr.rast->templ.sprite_coord_mode
+                                    == PIPE_SPRITE_COORD_LOWER_LEFT);
+
+   /* SVGA_NEW_RAST */
+   key->clip_plane_enable = svga->curr.rast->templ.clip_plane_enable;
+}
+
+
+/**
+ * svga_reemit_gs_bindings - Reemit the geometry shader bindings
+ */
+enum pipe_error
+svga_reemit_gs_bindings(struct svga_context *svga)
+{
+   enum pipe_error ret;
+   struct svga_winsys_gb_shader *gbshader = NULL;
+   SVGA3dShaderId shaderId = SVGA3D_INVALID_ID;
+
+   assert(svga->rebind.flags.gs);
+   assert(svga_have_gb_objects(svga));
+
+   /* Geometry Shader is only supported in vgpu10 */
+   assert(svga_have_vgpu10(svga));
+
+   if (svga->state.hw_draw.gs) {
+      gbshader = svga->state.hw_draw.gs->gb_shader;
+      shaderId = svga->state.hw_draw.gs->id;
+   }
+
+   if (!svga_need_to_rebind_resources(svga)) {
+      ret =  svga->swc->resource_rebind(svga->swc, NULL, gbshader,
+                                        SVGA_RELOC_READ);
+      goto out;
+   }
+
+   ret = SVGA3D_vgpu10_SetShader(svga->swc, SVGA3D_SHADERTYPE_GS,
+                                 gbshader, shaderId);
+
+ out:
+   if (ret != PIPE_OK)
+      return ret;
+
+   svga->rebind.flags.gs = FALSE;
+   return PIPE_OK;
+}
+
+static enum pipe_error
+emit_hw_gs(struct svga_context *svga, unsigned dirty)
+{
+   struct svga_shader_variant *variant;
+   struct svga_geometry_shader *gs = svga->curr.gs;
+   enum pipe_error ret = PIPE_OK;
+   struct svga_compile_key key;
+
+   /* If there's a user-defined GS, we should have a pointer to a derived
+    * GS.  This should have been resolved in update_tgsi_transform().
+    */
+   if (svga->curr.user_gs)
+      assert(svga->curr.gs);
+
+   if (gs == NULL) {
+      if (svga->state.hw_draw.gs != NULL) {
+
+         /** The previous geometry shader is made inactive.
+          *  Needs to unbind the geometry shader.
+          */
+         ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_GS, NULL);
+         svga->state.hw_draw.gs = NULL;
+      }
+      return ret;
+   }
+
+   /* If there is stream output info for this geometry shader, then use
+    * it instead of the one from the vertex shader.
+    */
+   if (svga_have_gs_streamout(svga)) {
+      svga_set_stream_output(svga, gs->base.stream_output);
+   }
+   else if (!svga_have_vs_streamout(svga)) {
+      /* turn off stream out */
+      svga_set_stream_output(svga, NULL);
+   }
+
+   /* SVGA_NEW_NEED_SWTNL */
+   if (svga->state.sw.need_swtnl && !svga_have_vgpu10(svga)) {
+      /* No geometry shader is needed */
+      variant = NULL;
+   }
+   else {
+      make_gs_key(svga, &key);
+
+      /* See if we already have a GS variant that matches the key */
+      variant = svga_search_shader_key(&gs->base, &key);
+
+      if (!variant) {
+         ret = compile_gs(svga, gs, &key, &variant);
+         if (ret != PIPE_OK)
+            return ret;
+
+         /* insert the new variant at head of linked list */
+         assert(variant);
+         variant->next = gs->base.variants;
+         gs->base.variants = variant;
+      }
+   }
+
+   if (variant != svga->state.hw_draw.gs) {
+      /* Bind the new variant */
+      ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_GS, variant);
+      if (ret != PIPE_OK)
+         return ret;
+
+      svga->rebind.flags.gs = FALSE;
+      svga->dirty |= SVGA_NEW_GS_VARIANT;
+      svga->state.hw_draw.gs = variant;
+   }
+
+   return PIPE_OK;
+}
+
+struct svga_tracked_state svga_hw_gs =
+{
+   "geometry shader (hwtnl)",
+   (SVGA_NEW_VS |
+    SVGA_NEW_FS |
+    SVGA_NEW_GS |
+    SVGA_NEW_TEXTURE_BINDING |
+    SVGA_NEW_SAMPLER |
+    SVGA_NEW_RAST |
+    SVGA_NEW_NEED_SWTNL),
+   emit_hw_gs
+};
diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c

index cac39d6..429241e 100644 (file)
--- a/src/gallium/drivers/svga/svga_state_need_swtnl.c
+++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c
@@ -26,6 +26,7 @@
  #include "util/u_inlines.h"
  #include "pipe/p_state.h"
  #include "svga_context.h"
+#include "svga_shader.h"
  #include "svga_state.h"
  #include "svga_debug.h"
  #include "svga_hw_reg.h"
@@ -91,7 +92,7 @@ update_need_pipeline(struct svga_context *svga, unsigned dirty)
        unsigned generic_inputs =
           svga->curr.fs ? svga->curr.fs->generic_inputs : 0;
  
-      if (sprite_coord_gen &&
+      if (!svga_have_vgpu10(svga) && sprite_coord_gen &&
            (generic_inputs & ~sprite_coord_gen)) {
           /* The fragment shader is using some generic inputs that are
            * not being replaced by auto-generated point/sprite coords (and
diff --git a/src/gallium/drivers/svga/svga_state_rss.c b/src/gallium/drivers/svga/svga_state_rss.c

index ebb9837..d43894d 100644 (file)
--- a/src/gallium/drivers/svga/svga_state_rss.c
+++ b/src/gallium/drivers/svga/svga_state_rss.c
@@ -23,16 +23,20 @@
   *
   **********************************************************/
  
+#include "pipe/p_defines.h"
+#include "util/u_bitmask.h"
  #include "util/u_format.h"
  #include "util/u_inlines.h"
  #include "util/u_memory.h"
-#include "pipe/p_defines.h"
  #include "util/u_math.h"
+#include "util/u_memory.h"
  
  #include "svga_context.h"
  #include "svga_screen.h"
  #include "svga_state.h"
  #include "svga_cmd.h"
+#include "svga_format.h"
+#include "svga_shader.h"
  
  
  struct rs_queue {
@@ -77,7 +81,7 @@ svga_queue_rs( struct rs_queue *q,
   * the "to" state.
   */
  static enum pipe_error
-emit_rss(struct svga_context *svga, unsigned dirty)
+emit_rss_vgpu9(struct svga_context *svga, unsigned dirty)
  {
     struct svga_screen *screen = svga_screen(svga->pipe.screen);
     struct rs_queue queue;
@@ -85,7 +89,7 @@ emit_rss(struct svga_context *svga, unsigned dirty)
  
     queue.rs_count = 0;
  
-   if (dirty & SVGA_NEW_BLEND) {
+   if (dirty & (SVGA_NEW_BLEND | SVGA_NEW_BLEND_COLOR)) {
        const struct svga_blend_state *curr = svga->curr.blend;
  
        EMIT_RS( svga, curr->rt[0].writemask, COLORWRITEENABLE, fail );
@@ -119,7 +123,7 @@ emit_rss(struct svga_context *svga, unsigned dirty)
        EMIT_RS( svga, color, BLENDCOLOR, fail );
     }
  
-   if (dirty & (SVGA_NEW_DEPTH_STENCIL | SVGA_NEW_RAST)) {
+   if (dirty & (SVGA_NEW_DEPTH_STENCIL_ALPHA | SVGA_NEW_RAST)) {
        const struct svga_depth_stencil_state *curr = svga->curr.depth; 
        const struct svga_rasterizer_state *rast = svga->curr.rast; 
  
@@ -300,6 +304,151 @@ fail:
     return PIPE_ERROR_OUT_OF_MEMORY;
  }
  
+/** Returns a non-culling rasterizer state object to be used with
+ *  point sprite.
+ */
+static struct svga_rasterizer_state *
+get_no_cull_rasterizer_state(struct svga_context *svga)
+{
+   const struct svga_rasterizer_state *r = svga->curr.rast;
+   unsigned int aa_point = r->templ.point_smooth;
+
+   if (!svga->rasterizer_no_cull[aa_point]) {
+      struct pipe_rasterizer_state rast;
+
+      memset(&rast, 0, sizeof(rast));
+      rast.flatshade = 1;
+      rast.front_ccw = 1;
+      rast.point_smooth = r->templ.point_smooth;
+
+      /* All rasterizer states have the same half_pixel_center,
+       * bottom_edge_rule and clip_halfz values since they are
+       * constant for a context. If we ever implement
+       * GL_ARB_clip_control, the clip_halfz field would have to be observed.
+       */
+      rast.half_pixel_center = r->templ.half_pixel_center;
+      rast.bottom_edge_rule = r->templ.bottom_edge_rule;
+      rast.clip_halfz = r->templ.clip_halfz;
+
+      svga->rasterizer_no_cull[aa_point] =
+               svga->pipe.create_rasterizer_state(&svga->pipe, &rast);
+   }
+   return svga->rasterizer_no_cull[aa_point];
+}
+
+static enum pipe_error
+emit_rss_vgpu10(struct svga_context *svga, unsigned dirty)
+{
+   enum pipe_error ret = PIPE_OK;
+
+   svga_hwtnl_flush_retry(svga);
+
+   if (dirty & (SVGA_NEW_BLEND | SVGA_NEW_BLEND_COLOR)) {
+      const struct svga_blend_state *curr;
+      float blend_factor[4];
+
+      if (svga_has_any_integer_cbufs(svga)) {
+         /* Blending is not supported in integer-valued render targets. */
+         curr = svga->noop_blend;
+         blend_factor[0] =
+         blend_factor[1] =
+         blend_factor[2] =
+         blend_factor[3] = 0;
+      }
+      else {
+         curr = svga->curr.blend;
+
+         if (curr->blend_color_alpha) {
+            blend_factor[0] =
+            blend_factor[1] =
+            blend_factor[2] =
+            blend_factor[3] = svga->curr.blend_color.color[3];
+         }
+         else {
+            blend_factor[0] = svga->curr.blend_color.color[0];
+            blend_factor[1] = svga->curr.blend_color.color[1];
+            blend_factor[2] = svga->curr.blend_color.color[2];
+            blend_factor[3] = svga->curr.blend_color.color[3];
+         }
+      }
+
+      /* Set/bind the blend state object */
+      if (svga->state.hw_draw.blend_id != curr->id ||
+          svga->state.hw_draw.blend_factor[0] != blend_factor[0] ||
+          svga->state.hw_draw.blend_factor[1] != blend_factor[1] ||
+          svga->state.hw_draw.blend_factor[2] != blend_factor[2] ||
+          svga->state.hw_draw.blend_factor[3] != blend_factor[3] ||
+          svga->state.hw_draw.blend_sample_mask != svga->curr.sample_mask) {
+         ret = SVGA3D_vgpu10_SetBlendState(svga->swc, curr->id,
+                                           blend_factor,
+                                           svga->curr.sample_mask);
+         if (ret != PIPE_OK)
+            return ret;
+
+         svga->state.hw_draw.blend_id = curr->id;
+         svga->state.hw_draw.blend_factor[0] = blend_factor[0];
+         svga->state.hw_draw.blend_factor[1] = blend_factor[1];
+         svga->state.hw_draw.blend_factor[2] = blend_factor[2];
+         svga->state.hw_draw.blend_factor[3] = blend_factor[3];
+         svga->state.hw_draw.blend_sample_mask = svga->curr.sample_mask;
+      }
+   }
+
+   if (dirty & (SVGA_NEW_DEPTH_STENCIL_ALPHA | SVGA_NEW_STENCIL_REF)) {
+      const struct svga_depth_stencil_state *curr = svga->curr.depth;
+      unsigned curr_ref = svga->curr.stencil_ref.ref_value[0];
+
+      if (curr->id != svga->state.hw_draw.depth_stencil_id ||
+          curr_ref != svga->state.hw_draw.stencil_ref) {
+         /* Set/bind the depth/stencil state object */
+         ret = SVGA3D_vgpu10_SetDepthStencilState(svga->swc, curr->id,
+                                                  curr_ref);
+         if (ret != PIPE_OK)
+            return ret;
+
+         svga->state.hw_draw.depth_stencil_id = curr->id;
+         svga->state.hw_draw.stencil_ref = curr_ref;
+      }
+   }
+
+   if (dirty & (SVGA_NEW_REDUCED_PRIMITIVE | SVGA_NEW_RAST)) {
+      const struct svga_rasterizer_state *rast;
+
+      if (svga->curr.reduced_prim == PIPE_PRIM_POINTS &&
+          svga->curr.gs && svga->curr.gs->wide_point) {
+
+         /* If we are drawing a point sprite, we will need to
+          * bind a non-culling rasterizer state object
+          */
+         rast = get_no_cull_rasterizer_state(svga);
+      }
+      else {
+         rast = svga->curr.rast;
+      }
+
+      if (svga->state.hw_draw.rasterizer_id != rast->id) {
+         /* Set/bind the rasterizer state object */
+         ret = SVGA3D_vgpu10_SetRasterizerState(svga->swc, rast->id);
+         if (ret != PIPE_OK)
+            return ret;
+         svga->state.hw_draw.rasterizer_id = rast->id;
+      }
+   }
+   return PIPE_OK;
+}
+
+
+static enum pipe_error
+emit_rss(struct svga_context *svga, unsigned dirty)
+{
+   if (svga_have_vgpu10(svga)) {
+      return emit_rss_vgpu10(svga, dirty);
+   }
+   else {
+      return emit_rss_vgpu9(svga, dirty);
+   }
+}
+
  
  struct svga_tracked_state svga_hw_rss = 
  {
@@ -307,11 +456,12 @@ struct svga_tracked_state svga_hw_rss =
  
     (SVGA_NEW_BLEND |
      SVGA_NEW_BLEND_COLOR |
-    SVGA_NEW_DEPTH_STENCIL |
+    SVGA_NEW_DEPTH_STENCIL_ALPHA |
      SVGA_NEW_STENCIL_REF |
      SVGA_NEW_RAST |
      SVGA_NEW_FRAME_BUFFER |
-    SVGA_NEW_NEED_PIPELINE),
+    SVGA_NEW_NEED_PIPELINE |
+    SVGA_NEW_REDUCED_PRIMITIVE),
  
     emit_rss
  };
diff --git a/src/gallium/drivers/svga/svga_state_sampler.c b/src/gallium/drivers/svga/svga_state_sampler.c

new file mode 100644 (file)

index 0000000..611d2c6
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_sampler.c
@@ -0,0 +1,338 @@
+/*
+ * Copyright 2013 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+/**
+ * VGPU10 sampler and sampler view functions.
+ */
+
+
+#include "pipe/p_defines.h"
+#include "util/u_bitmask.h"
+#include "util/u_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "svga_cmd.h"
+#include "svga_context.h"
+#include "svga_format.h"
+#include "svga_resource_buffer.h"
+#include "svga_resource_texture.h"
+#include "svga_shader.h"
+#include "svga_state.h"
+#include "svga_sampler_view.h"
+
+
+/** Get resource handle for a texture or buffer */
+static inline struct svga_winsys_surface *
+svga_resource_handle(struct pipe_resource *res)
+{
+   if (res->target == PIPE_BUFFER) {
+      return svga_buffer(res)->handle;
+   }
+   else {
+      return svga_texture(res)->handle;
+   }
+}
+
+
+/**
+ * This helper function returns TRUE if the specified resource collides with
+ * any of the resources bound to any of the currently bound sampler views.
+ */
+boolean
+svga_check_sampler_view_resource_collision(struct svga_context *svga,
+                                           struct svga_winsys_surface *res,
+                                           unsigned shader)
+{
+   struct pipe_screen *screen = svga->pipe.screen;
+   unsigned i;
+
+   if (svga_screen(screen)->debug.no_surface_view) {
+      return FALSE;
+   }
+
+   for (i = 0; i < svga->curr.num_sampler_views[shader]; i++) {
+      struct svga_pipe_sampler_view *sv =
+         svga_pipe_sampler_view(svga->curr.sampler_views[shader][i]);
+
+      if (sv && res == svga_resource_handle(sv->base.texture)) {
+         return TRUE;
+      }
+   }
+
+   return FALSE;
+}
+
+
+/**
+ * Create a DX ShaderResourceSamplerView for the given pipe_sampler_view,
+ * if needed.
+ */
+static enum pipe_error
+svga_validate_pipe_sampler_view(struct svga_context *svga,
+                                struct svga_pipe_sampler_view *sv)
+{
+   enum pipe_error ret = PIPE_OK;
+
+   if (sv->id == SVGA3D_INVALID_ID) {
+      struct svga_screen *ss = svga_screen(svga->pipe.screen);
+      struct pipe_resource *texture = sv->base.texture;
+      struct svga_winsys_surface *surface = svga_resource_handle(texture);
+      SVGA3dSurfaceFormat format;
+      SVGA3dResourceType resourceDim;
+      SVGA3dShaderResourceViewDesc viewDesc;
+
+      format = svga_translate_format(ss, sv->base.format,
+                                     PIPE_BIND_SAMPLER_VIEW);
+      assert(format != SVGA3D_FORMAT_INVALID);
+
+      if (texture->target == PIPE_BUFFER) {
+         viewDesc.buffer.firstElement = sv->base.u.buf.first_element;
+         viewDesc.buffer.numElements = (sv->base.u.buf.last_element -
+                                        sv->base.u.buf.first_element + 1);
+      }
+      else {
+         viewDesc.tex.mostDetailedMip = sv->base.u.tex.first_level;
+         viewDesc.tex.firstArraySlice = sv->base.u.tex.first_layer;
+         viewDesc.tex.mipLevels = (sv->base.u.tex.last_level -
+                                   sv->base.u.tex.first_level + 1);
+      }
+
+      /* arraySize in viewDesc specifies the number of array slices in a
+       * texture array. For 3D texture, last_layer in
+       * pipe_sampler_view specifies the last slice of the texture
+       * which is different from the last slice in a texture array,
+       * hence we need to set arraySize to 1 explicitly.
+       */
+      viewDesc.tex.arraySize =
+         (texture->target == PIPE_TEXTURE_3D ||
+          texture->target == PIPE_BUFFER) ? 1 :
+            (sv->base.u.tex.last_layer - sv->base.u.tex.first_layer + 1);
+
+      switch (texture->target) {
+      case PIPE_BUFFER:
+         resourceDim = SVGA3D_RESOURCE_BUFFER;
+         break;
+      case PIPE_TEXTURE_1D:
+      case PIPE_TEXTURE_1D_ARRAY:
+         resourceDim = SVGA3D_RESOURCE_TEXTURE1D;
+         break;
+      case PIPE_TEXTURE_RECT:
+      case PIPE_TEXTURE_2D:
+      case PIPE_TEXTURE_2D_ARRAY:
+         resourceDim = SVGA3D_RESOURCE_TEXTURE2D;
+         break;
+      case PIPE_TEXTURE_3D:
+         resourceDim = SVGA3D_RESOURCE_TEXTURE3D;
+         break;
+      case PIPE_TEXTURE_CUBE:
+      case PIPE_TEXTURE_CUBE_ARRAY:
+         resourceDim = SVGA3D_RESOURCE_TEXTURECUBE;
+         break;
+
+      default:
+         assert(!"Unexpected texture type");
+         resourceDim = SVGA3D_RESOURCE_TEXTURE2D;
+      }
+
+      sv->id = util_bitmask_add(svga->sampler_view_id_bm);
+
+      ret = SVGA3D_vgpu10_DefineShaderResourceView(svga->swc,
+                                                   sv->id,
+                                                   surface,
+                                                   format,
+                                                   resourceDim,
+                                                   &viewDesc);
+      if (ret != PIPE_OK) {
+         util_bitmask_clear(svga->sampler_view_id_bm, sv->id);
+         sv->id = SVGA3D_INVALID_ID;
+      }
+   }
+
+   return ret;
+}
+
+
+static enum pipe_error
+update_sampler_resources(struct svga_context *svga, unsigned dirty)
+{
+   enum pipe_error ret = PIPE_OK;
+   unsigned shader;
+
+   if (!svga_have_vgpu10(svga))
+      return PIPE_OK;
+
+   for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
+      SVGA3dShaderResourceViewId ids[PIPE_MAX_SAMPLERS];
+      struct svga_winsys_surface *surfaces[PIPE_MAX_SAMPLERS];
+      unsigned count;
+      unsigned nviews;
+      unsigned i;
+
+      count = svga->curr.num_sampler_views[shader];
+      for (i = 0; i < count; i++) {
+         struct svga_pipe_sampler_view *sv =
+            svga_pipe_sampler_view(svga->curr.sampler_views[shader][i]);
+         struct svga_winsys_surface *surface;
+
+         if (sv) {
+            surface = svga_resource_handle(sv->base.texture);
+
+            ret = svga_validate_pipe_sampler_view(svga, sv);
+            if (ret != PIPE_OK)
+               return ret;
+
+            assert(sv->id != SVGA3D_INVALID_ID);
+            ids[i] = sv->id;
+         }
+         else {
+            surface = NULL;
+            ids[i] = SVGA3D_INVALID_ID;
+         }
+         surfaces[i] = surface;
+      }
+
+      for (; i < Elements(ids); i++) {
+         ids[i] = SVGA3D_INVALID_ID;
+         surfaces[i] = NULL;
+      }
+
+      if (shader == PIPE_SHADER_FRAGMENT) {
+         /* Handle polygon stipple sampler view */
+         if (svga->curr.rast->templ.poly_stipple_enable) {
+            const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+            struct svga_pipe_sampler_view *sv =
+               svga->polygon_stipple.sampler_view;
+
+            assert(sv);
+            if (!sv) {
+               return PIPE_OK;  /* probably out of memory */
+            }
+
+            ret = svga_validate_pipe_sampler_view(svga, sv);
+            if (ret != PIPE_OK)
+               return ret;
+
+            ids[unit] = sv->id;
+            surfaces[unit] = svga_resource_handle(sv->base.texture);
+            count = MAX2(count, unit+1);
+         }
+      }
+
+      /* Number of ShaderResources that need to be modified. This includes
+       * the one that need to be unbound.
+       */
+      nviews = MAX2(svga->state.hw_draw.num_sampler_views[shader], count);
+      if (nviews > 0) {
+         ret = SVGA3D_vgpu10_SetShaderResources(svga->swc,
+                                                svga_shader_type(shader),
+                                                0, /* startView */
+                                                nviews,
+                                                ids,
+                                                surfaces);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+
+      /* Number of sampler views enabled in the device */
+      svga->state.hw_draw.num_sampler_views[shader] = count;
+   }
+
+   return ret;
+}
+
+
+struct svga_tracked_state svga_hw_sampler_bindings = {
+   "shader resources emit",
+   SVGA_NEW_STIPPLE |
+   SVGA_NEW_TEXTURE_BINDING,
+   update_sampler_resources
+};
+
+
+
+static enum pipe_error
+update_samplers(struct svga_context *svga, unsigned dirty )
+{
+   enum pipe_error ret = PIPE_OK;
+   unsigned shader;
+
+   if (!svga_have_vgpu10(svga))
+      return PIPE_OK;
+
+   for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
+      const unsigned count = svga->curr.num_samplers[shader];
+      SVGA3dSamplerId ids[PIPE_MAX_SAMPLERS];
+      unsigned i;
+
+      for (i = 0; i < count; i++) {
+         if (svga->curr.sampler[shader][i]) {
+            ids[i] = svga->curr.sampler[shader][i]->id;
+            assert(ids[i] != SVGA3D_INVALID_ID);
+         }
+         else {
+            ids[i] = SVGA3D_INVALID_ID;
+         }
+      }
+
+      if (count > 0) {
+         ret = SVGA3D_vgpu10_SetSamplers(svga->swc,
+                                         count,
+                                         0,                        /* start */
+                                         svga_shader_type(shader), /* type */
+                                         ids);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+   }
+
+   /* Handle polygon stipple sampler texture */
+   if (svga->curr.rast->templ.poly_stipple_enable) {
+      const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+      struct svga_sampler_state *sampler = svga->polygon_stipple.sampler;
+
+      assert(sampler);
+      if (!sampler) {
+         return PIPE_OK; /* probably out of memory */
+      }
+
+      ret = SVGA3D_vgpu10_SetSamplers(svga->swc,
+                                      1, /* count */
+                                      unit, /* start */
+                                      SVGA3D_SHADERTYPE_PS,
+                                      &sampler->id);
+   }
+
+   return ret;
+}
+
+
+struct svga_tracked_state svga_hw_sampler = {
+   "texture sampler emit",
+   (SVGA_NEW_SAMPLER |
+    SVGA_NEW_STIPPLE |
+    SVGA_NEW_TEXTURE_FLAGS),
+   update_samplers
+};
diff --git a/src/gallium/drivers/svga/svga_state_tgsi_transform.c b/src/gallium/drivers/svga/svga_state_tgsi_transform.c

new file mode 100644 (file)

index 0000000..023c586
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_tgsi_transform.c
@@ -0,0 +1,293 @@
+/**********************************************************
+ * Copyright 2014 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_bitmask.h"
+#include "util/u_simple_shaders.h"
+#include "tgsi/tgsi_ureg.h"
+#include "tgsi/tgsi_point_sprite.h"
+#include "tgsi/tgsi_dump.h"
+
+#include "svga_context.h"
+#include "svga_shader.h"
+#include "svga_tgsi.h"
+
+
+/**
+ * Bind a new GS.  This updates the derived current gs state, not the
+ * user-specified GS state.
+ */
+static void
+bind_gs_state(struct svga_context *svga,
+              struct svga_geometry_shader *gs)
+{
+   svga->curr.gs = gs;
+   svga->dirty |= SVGA_NEW_GS;
+}
+
+
+/**
+ * emulate_point_sprite searches the shader variants list to see it there is
+ * a shader variant with a token string that matches the emulation
+ * requirement. It there isn't, then it will use a tgsi utility
+ * tgsi_add_point_sprite to transform the original token string to support
+ * point sprite. A new geometry shader state will be created with the
+ * transformed token string and added to the shader variants list of the
+ * original geometry shader. The new geometry shader state will then be
+ * bound as the current geometry shader.
+ */
+static struct svga_shader *
+emulate_point_sprite(struct svga_context *svga,
+                     struct svga_shader *shader,
+                     const struct tgsi_token *tokens)
+{
+   struct svga_token_key key;
+   struct tgsi_token *new_tokens;
+   const struct tgsi_token *orig_tokens;
+   struct svga_geometry_shader *orig_gs = (struct svga_geometry_shader *)shader;
+   struct svga_geometry_shader *gs = NULL;
+   struct pipe_shader_state templ;
+   struct svga_stream_output *streamout = NULL;
+   int pos_out_index = -1;
+   int aa_point_coord_index = -1;
+
+   assert(tokens != NULL);
+
+   orig_tokens = tokens;
+
+   /* Create a token key */
+   memset(&key, 0, sizeof key);
+   key.gs.writes_psize = 1;
+   key.gs.sprite_coord_enable = svga->curr.rast->templ.sprite_coord_enable;
+
+   key.gs.sprite_origin_upper_left =
+      !(svga->curr.rast->templ.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT);
+
+   key.gs.aa_point = svga->curr.rast->templ.point_smooth;
+
+   if (orig_gs != NULL) {
+
+      /* Check if the original geometry shader has stream output and
+       * if position is one of the outputs.
+       */
+      streamout = orig_gs->base.stream_output;
+      if (streamout != NULL) {
+         pos_out_index = streamout->pos_out_index;
+         key.gs.point_pos_stream_out = pos_out_index != -1;
+      }
+
+      /* Search the shader lists to see if there is a variant that matches
+       * this token key.
+       */
+      gs = (struct svga_geometry_shader *)
+              svga_search_shader_token_key(&orig_gs->base, &key);
+   }
+
+   /* If there isn't, then call the tgsi utility tgsi_add_point_sprite
+    * to transform the original tokens to support point sprite.
+    * Flip the sprite origin as SVGA3D device only supports an
+    * upper-left origin.
+    */
+   if (!gs) {
+      new_tokens = tgsi_add_point_sprite(orig_tokens,
+                                         key.gs.sprite_coord_enable,
+                                         key.gs.sprite_origin_upper_left,
+                                         key.gs.point_pos_stream_out,
+                                         key.gs.aa_point ?
+                                            &aa_point_coord_index : NULL);
+
+      if (new_tokens == NULL) {
+         /* if no new tokens are generated for whatever reason, just return */
+         return NULL;
+      }
+
+      if (0) {
+         debug_printf("Before tgsi_add_point_sprite ---------------\n");
+         tgsi_dump(orig_tokens, 0);
+         debug_printf("After tgsi_add_point_sprite --------------\n");
+         tgsi_dump(new_tokens, 0);
+      }
+
+      templ.tokens = new_tokens;
+      templ.stream_output.num_outputs = 0;
+
+      if (streamout != NULL) {
+         templ.stream_output = streamout->info;
+         /* The tgsi_add_point_sprite utility adds an extra output
+          * for the original point position for stream output purpose.
+          * We need to replace the position output register index in the
+          * stream output declaration with the new register index.
+          */
+         if (pos_out_index != -1) {
+            assert(orig_gs != NULL);
+            templ.stream_output.output[pos_out_index].register_index =
+               orig_gs->base.info.num_outputs;
+         }
+      }
+
+      /* Create a new geometry shader state with the new tokens */
+      gs = svga->pipe.create_gs_state(&svga->pipe, &templ);
+
+      /* Don't need the token string anymore. There is a local copy
+       * in the shader state.
+       */
+      FREE(new_tokens);
+
+      if (!gs) {
+         return NULL;
+      }
+
+      gs->wide_point = TRUE;
+      gs->aa_point_coord_index = aa_point_coord_index;
+      gs->base.token_key = key;
+      gs->base.parent = &orig_gs->base;
+      gs->base.next = NULL;
+
+      /* Add the new geometry shader to the head of the shader list
+       * pointed to by the original geometry shader.
+       */
+      if (orig_gs != NULL) {
+         gs->base.next = orig_gs->base.next;
+         orig_gs->base.next = &gs->base;
+      }
+   }
+
+   /* Bind the new geometry shader state */
+   bind_gs_state(svga, gs);
+
+   return &gs->base;
+}
+
+/**
+ * Generate a geometry shader that emits a wide point by drawing a quad.
+ * This function first creates a passthrough geometry shader and then
+ * calls emulate_point_sprite() to transform the geometry shader to
+ * support point sprite.
+ */
+static struct svga_shader *
+add_point_sprite_shader(struct svga_context *svga)
+{
+   struct svga_vertex_shader *vs = svga->curr.vs;
+   struct svga_geometry_shader *orig_gs = vs->gs;
+   struct svga_geometry_shader *new_gs;
+   const struct tgsi_token *tokens;
+
+   if (orig_gs == NULL) {
+
+      /* If this is the first time adding a geometry shader to this
+       * vertex shader to support point sprite, then create
+       * a passthrough geometry shader first.
+       */
+      orig_gs = (struct svga_geometry_shader *)
+                   util_make_geometry_passthrough_shader(
+                      &svga->pipe, vs->base.info.num_outputs,
+                      vs->base.info.output_semantic_name,
+                      vs->base.info.output_semantic_index);
+
+      if (orig_gs == NULL)
+         return NULL;
+   }
+   else {
+      if (orig_gs->base.parent)
+         orig_gs = (struct svga_geometry_shader *)orig_gs->base.parent;
+   }
+   tokens = orig_gs->base.tokens;
+
+   /* Call emulate_point_sprite to find or create a transformed
+    * geometry shader for supporting point sprite.
+    */
+   new_gs = (struct svga_geometry_shader *)
+               emulate_point_sprite(svga, &orig_gs->base, tokens);
+
+   /* If this is the first time creating a geometry shader to
+    * support vertex point size, then add the new geometry shader
+    * to the vertex shader.
+    */
+   if (vs->gs == NULL) {
+      vs->gs = new_gs;
+   }
+
+   return &new_gs->base;
+}
+
+/* update_tgsi_transform provides a hook to transform a shader if needed.
+ */
+static enum pipe_error
+update_tgsi_transform(struct svga_context *svga, unsigned dirty)
+{
+   struct svga_geometry_shader *gs = svga->curr.user_gs;   /* current gs */
+   struct svga_vertex_shader *vs = svga->curr.vs;     /* currently bound vs */
+   struct svga_shader *orig_gs;                       /* original gs */
+   struct svga_shader *new_gs;                        /* new gs */
+
+   if (!svga_have_vgpu10(svga))
+      return PIPE_OK;
+
+   if (svga->curr.reduced_prim == PIPE_PRIM_POINTS) {
+      /* If the current prim type is POINTS and the current geometry shader
+       * emits wide points, transform the shader to emulate wide points using
+       * quads.
+       */
+      if (gs != NULL && (gs->base.info.writes_psize || gs->wide_point)) {
+         orig_gs = gs->base.parent ? gs->base.parent : &gs->base;
+         new_gs = emulate_point_sprite(svga, orig_gs, orig_gs->tokens);
+      }
+
+      /* If there is not an active geometry shader and the current vertex
+       * shader emits wide point then create a new geometry shader to emulate
+       * wide point.
+       */
+      else if (gs == NULL &&
+               (svga->curr.rast->pointsize > 1.0 ||
+                vs->base.info.writes_psize)) {
+         new_gs = add_point_sprite_shader(svga);
+      }
+      else {
+         /* use the user's GS */
+         bind_gs_state(svga, svga->curr.user_gs);
+      }
+   }
+   else if (svga->curr.gs != svga->curr.user_gs) {
+      /* If current primitive type is not POINTS, then make sure
+       * we don't bind to any of the generated geometry shader
+       */
+      bind_gs_state(svga, svga->curr.user_gs);
+   }
+   (void) new_gs;    /* silence the unused var warning */
+
+   return PIPE_OK;
+}
+
+struct svga_tracked_state svga_need_tgsi_transform =
+{
+   "transform shader for optimization",
+   (SVGA_NEW_VS |
+    SVGA_NEW_FS |
+    SVGA_NEW_GS |
+    SVGA_NEW_REDUCED_PRIMITIVE |
+    SVGA_NEW_RAST),
+   update_tgsi_transform
+};
diff --git a/src/gallium/drivers/svga/svga_state_tss.c b/src/gallium/drivers/svga/svga_state_tss.c

index 41334bd..5991da1 100644 (file)
--- a/src/gallium/drivers/svga/svga_state_tss.c
+++ b/src/gallium/drivers/svga/svga_state_tss.c
@@ -31,24 +31,28 @@
  #include "svga_sampler_view.h"
  #include "svga_winsys.h"
  #include "svga_context.h"
+#include "svga_shader.h"
  #include "svga_state.h"
  #include "svga_cmd.h"
  
  
+/**
+ * Called when tearing down a context to free resources and samplers.
+ */
  void svga_cleanup_tss_binding(struct svga_context *svga)
  {
+   const unsigned shader = PIPE_SHADER_FRAGMENT;
     unsigned i;
-   unsigned count = MAX2( svga->curr.num_sampler_views,
-                          svga->state.hw_draw.num_views );
  
-   for (i = 0; i < count; i++) {
+   for (i = 0; i < Elements(svga->state.hw_draw.views); i++) {
        struct svga_hw_view_state *view = &svga->state.hw_draw.views[i];
-
-      svga_sampler_view_reference(&view->v, NULL);
-      pipe_sampler_view_release(&svga->pipe, &svga->curr.sampler_views[i]);
-      pipe_resource_reference( &view->texture, NULL );
-
-      view->dirty = 1;
+      if (view) {
+         svga_sampler_view_reference(&view->v, NULL);
+         pipe_sampler_view_release(&svga->pipe,
+                                   &svga->curr.sampler_views[shader][i]);
+         pipe_resource_reference(&view->texture, NULL);
+         view->dirty = TRUE;
+      }
     }
  }
  
@@ -63,73 +67,113 @@ struct bind_queue {
  };
  
  
+/**
+ * Update the texture binding for one texture unit.
+ */
+static void
+emit_tex_binding_unit(struct svga_context *svga,
+                      unsigned unit,
+                      const struct svga_sampler_state *s,
+                      const struct pipe_sampler_view *sv,
+                      struct svga_hw_view_state *view,
+                      boolean reemit,
+                      struct bind_queue *queue)
+{
+   struct pipe_resource *texture = NULL;
+   unsigned last_level, min_lod, max_lod;
+
+   /* get min max lod */
+   if (sv && s) {
+      if (s->mipfilter == SVGA3D_TEX_FILTER_NONE) {
+         /* just use the base level image */
+         min_lod = max_lod = sv->u.tex.first_level;
+      }
+      else {
+         last_level = MIN2(sv->u.tex.last_level, sv->texture->last_level);
+         min_lod = s->view_min_lod + sv->u.tex.first_level;
+         min_lod = MIN2(min_lod, last_level);
+         max_lod = MIN2(s->view_max_lod + sv->u.tex.first_level, last_level);
+      }
+      texture = sv->texture;
+   }
+   else {
+      min_lod = 0;
+      max_lod = 0;
+   }
+
+   if (view->texture != texture ||
+       view->min_lod != min_lod ||
+       view->max_lod != max_lod) {
+
+      svga_sampler_view_reference(&view->v, NULL);
+      pipe_resource_reference( &view->texture, texture );
+
+      view->dirty = TRUE;
+      view->min_lod = min_lod;
+      view->max_lod = max_lod;
+
+      if (texture) {
+         view->v = svga_get_tex_sampler_view(&svga->pipe,
+                                             texture,
+                                             min_lod,
+                                             max_lod);
+      }
+   }
+
+   /*
+    * We need to reemit non-null texture bindings, even when they are not
+    * dirty, to ensure that the resources are paged in.
+    */
+   if (view->dirty || (reemit && view->v)) {
+      queue->bind[queue->bind_count].unit = unit;
+      queue->bind[queue->bind_count].view = view;
+      queue->bind_count++;
+   }
+
+   if (!view->dirty && view->v) {
+      svga_validate_sampler_view(svga, view->v);
+   }
+}
+
+
  static enum pipe_error
  update_tss_binding(struct svga_context *svga, 
                     unsigned dirty )
  {
-   boolean reemit = svga->rebind.texture_samplers;
+   const unsigned shader = PIPE_SHADER_FRAGMENT;
+   boolean reemit = svga->rebind.flags.texture_samplers;
     unsigned i;
-   unsigned count = MAX2( svga->curr.num_sampler_views,
+   unsigned count = MAX2( svga->curr.num_sampler_views[shader],
                            svga->state.hw_draw.num_views );
-   unsigned min_lod;
-   unsigned max_lod;
  
     struct bind_queue queue;
  
+   if (svga_have_vgpu10(svga))
+      return PIPE_OK;
+
     queue.bind_count = 0;
     
     for (i = 0; i < count; i++) {
-      const struct svga_sampler_state *s = svga->curr.sampler[i];
-      struct svga_hw_view_state *view = &svga->state.hw_draw.views[i];
-      struct pipe_resource *texture = NULL;
-      struct pipe_sampler_view *sv = svga->curr.sampler_views[i];
-
-      /* get min max lod */
-      if (sv && s) {
-         min_lod = MAX2(0, (s->view_min_lod + sv->u.tex.first_level));
-         max_lod = MIN2(s->view_max_lod + sv->u.tex.first_level,
-                        sv->texture->last_level);
-         texture = sv->texture;
-      } else {
-         min_lod = 0;
-         max_lod = 0;
-      }
-
-      if (view->texture != texture ||
-          view->min_lod != min_lod ||
-          view->max_lod != max_lod) {
-
-         svga_sampler_view_reference(&view->v, NULL);
-         pipe_resource_reference( &view->texture, texture );
-
-         view->dirty = TRUE;
-         view->min_lod = min_lod;
-         view->max_lod = max_lod;
-
-         if (texture)
-            view->v = svga_get_tex_sampler_view(&svga->pipe, 
-                                                texture, 
-                                                min_lod,
-                                                max_lod);
-      }
-
-      /*
-       * We need to reemit non-null texture bindings, even when they are not
-       * dirty, to ensure that the resources are paged in.
-       */
-
-      if (view->dirty ||
-          (reemit && view->v)) {
-         queue.bind[queue.bind_count].unit = i;
-         queue.bind[queue.bind_count].view = view;
-         queue.bind_count++;
-      } 
-      if (!view->dirty && view->v) {
-         svga_validate_sampler_view(svga, view->v);
-      }
+      emit_tex_binding_unit(svga, i,
+                            svga->curr.sampler[shader][i],
+                            svga->curr.sampler_views[shader][i],
+                            &svga->state.hw_draw.views[i],
+                            reemit,
+                            &queue);
     }
  
-   svga->state.hw_draw.num_views = svga->curr.num_sampler_views;
+   svga->state.hw_draw.num_views = svga->curr.num_sampler_views[shader];
+
+   /* Polygon stipple */
+   if (svga->curr.rast->templ.poly_stipple_enable) {
+      const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+      emit_tex_binding_unit(svga, unit,
+                            svga->polygon_stipple.sampler,
+                            &svga->polygon_stipple.sampler_view->base,
+                            &svga->state.hw_draw.views[unit],
+                            reemit,
+                            &queue);
+   }
  
     if (queue.bind_count) {
        SVGA3dTextureState *ts;
@@ -163,7 +207,7 @@ update_tss_binding(struct svga_context *svga,
        SVGA_FIFOCommitAll( svga->swc );
     }
  
-   svga->rebind.texture_samplers = FALSE;
+   svga->rebind.flags.texture_samplers = FALSE;
  
     return PIPE_OK;
  
@@ -187,7 +231,8 @@ svga_reemit_tss_bindings(struct svga_context *svga)
     enum pipe_error ret;
     struct bind_queue queue;
  
-   assert(svga->rebind.texture_samplers);
+   assert(!svga_have_vgpu10(svga));
+   assert(svga->rebind.flags.texture_samplers);
  
     queue.bind_count = 0;
  
@@ -201,6 +246,18 @@ svga_reemit_tss_bindings(struct svga_context *svga)
        }
     }
  
+   /* Polygon stipple */
+   if (svga->curr.rast->templ.poly_stipple_enable) {
+      const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+      struct svga_hw_view_state *view = &svga->state.hw_draw.views[unit];
+
+      if (view->v) {
+         queue.bind[queue.bind_count].unit = unit;
+         queue.bind[queue.bind_count].view = view;
+         queue.bind_count++;
+      }
+   }
+
     if (queue.bind_count) {
        SVGA3dTextureState *ts;
  
@@ -229,7 +286,7 @@ svga_reemit_tss_bindings(struct svga_context *svga)
        SVGA_FIFOCommitAll(svga->swc);
     }
  
-   svga->rebind.texture_samplers = FALSE;
+   svga->rebind.flags.texture_samplers = FALSE;
  
     return PIPE_OK;
  }
@@ -238,6 +295,7 @@ svga_reemit_tss_bindings(struct svga_context *svga)
  struct svga_tracked_state svga_hw_tss_binding = {
     "texture binding emit",
     SVGA_NEW_TEXTURE_BINDING |
+   SVGA_NEW_STIPPLE |
     SVGA_NEW_SAMPLER,
     update_tss_binding
  };
@@ -252,78 +310,98 @@ struct ts_queue {
  };
  
  
-#define EMIT_TS(svga, unit, val, token, fail)                           \
+static inline void
+svga_queue_tss( struct ts_queue *q,
+                unsigned unit,
+                unsigned tss,
+                unsigned value )
+{
+   assert(q->ts_count < sizeof(q->ts)/sizeof(q->ts[0]));
+   q->ts[q->ts_count].stage = unit;
+   q->ts[q->ts_count].name = tss;
+   q->ts[q->ts_count].value = value;
+   q->ts_count++;
+}
+
+
+#define EMIT_TS(svga, unit, val, token)                                 \
  do {                                                                    \
     assert(unit < Elements(svga->state.hw_draw.ts));                     \
     assert(SVGA3D_TS_##token < Elements(svga->state.hw_draw.ts[unit]));  \
     if (svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] != val) {        \
-      svga_queue_tss( &queue, unit, SVGA3D_TS_##token, val );           \
+      svga_queue_tss( queue, unit, SVGA3D_TS_##token, val );            \
        svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] = val;            \
     }                                                                    \
  } while (0)
  
-#define EMIT_TS_FLOAT(svga, unit, fvalue, token, fail)                  \
+#define EMIT_TS_FLOAT(svga, unit, fvalue, token)                        \
  do {                                                                    \
     unsigned val = fui(fvalue);                                          \
     assert(unit < Elements(svga->state.hw_draw.ts));                     \
     assert(SVGA3D_TS_##token < Elements(svga->state.hw_draw.ts[unit]));  \
     if (svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] != val) {        \
-      svga_queue_tss( &queue, unit, SVGA3D_TS_##token, val );           \
+      svga_queue_tss( queue, unit, SVGA3D_TS_##token, val );            \
        svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] = val;            \
     }                                                                    \
  } while (0)
  
  
-static inline void 
-svga_queue_tss( struct ts_queue *q,
-                unsigned unit,
-                unsigned tss,
-                unsigned value )
+/**
+ * Emit texture sampler state (tss) for one texture unit.
+ */
+static void
+emit_tss_unit(struct svga_context *svga, unsigned unit,
+              const struct svga_sampler_state *state,
+              struct ts_queue *queue)
  {
-   assert(q->ts_count < sizeof(q->ts)/sizeof(q->ts[0]));
-   q->ts[q->ts_count].stage = unit;
-   q->ts[q->ts_count].name = tss;
-   q->ts[q->ts_count].value = value;
-   q->ts_count++;
+   EMIT_TS(svga, unit, state->mipfilter, MIPFILTER);
+   EMIT_TS(svga, unit, state->min_lod, TEXTURE_MIPMAP_LEVEL);
+   EMIT_TS(svga, unit, state->magfilter, MAGFILTER);
+   EMIT_TS(svga, unit, state->minfilter, MINFILTER);
+   EMIT_TS(svga, unit, state->aniso_level, TEXTURE_ANISOTROPIC_LEVEL);
+   EMIT_TS_FLOAT(svga, unit, state->lod_bias, TEXTURE_LOD_BIAS);
+   EMIT_TS(svga, unit, state->addressu, ADDRESSU);
+   EMIT_TS(svga, unit, state->addressw, ADDRESSW);
+   EMIT_TS(svga, unit, state->bordercolor, BORDERCOLOR);
+   // TEXCOORDINDEX -- hopefully not needed
+
+   if (svga->curr.tex_flags.flag_1d & (1 << unit))
+      EMIT_TS(svga, unit, SVGA3D_TEX_ADDRESS_WRAP, ADDRESSV);
+   else
+      EMIT_TS(svga, unit, state->addressv, ADDRESSV);
+
+   if (svga->curr.tex_flags.flag_srgb & (1 << unit))
+      EMIT_TS_FLOAT(svga, unit, 2.2f, GAMMA);
+   else
+      EMIT_TS_FLOAT(svga, unit, 1.0f, GAMMA);
  }
  
-
  static enum pipe_error
  update_tss(struct svga_context *svga, 
             unsigned dirty )
  {
+   const unsigned shader = PIPE_SHADER_FRAGMENT;
     unsigned i;
     struct ts_queue queue;
  
-   queue.ts_count = 0;
-   for (i = 0; i < svga->curr.num_samplers; i++) {
-      if (svga->curr.sampler[i]) {
-         const struct svga_sampler_state *curr = svga->curr.sampler[i];
-
-         EMIT_TS(svga, i, curr->mipfilter, MIPFILTER, fail);
-         EMIT_TS(svga, i, curr->min_lod, TEXTURE_MIPMAP_LEVEL, fail);
-         EMIT_TS(svga, i, curr->magfilter, MAGFILTER, fail);
-         EMIT_TS(svga, i, curr->minfilter, MINFILTER, fail);
-         EMIT_TS(svga, i, curr->aniso_level, TEXTURE_ANISOTROPIC_LEVEL, fail);
-         EMIT_TS_FLOAT(svga, i, curr->lod_bias, TEXTURE_LOD_BIAS, fail);
-         EMIT_TS(svga, i, curr->addressu, ADDRESSU, fail);
-         EMIT_TS(svga, i, curr->addressw, ADDRESSW, fail);
-         EMIT_TS(svga, i, curr->bordercolor, BORDERCOLOR, fail);
-         // TEXCOORDINDEX -- hopefully not needed
-
-         if (svga->curr.tex_flags.flag_1d & (1 << i)) {
-            EMIT_TS(svga, i, SVGA3D_TEX_ADDRESS_WRAP, ADDRESSV, fail);
-         }
-         else
-            EMIT_TS(svga, i, curr->addressv, ADDRESSV, fail);
-
-         if (svga->curr.tex_flags.flag_srgb & (1 << i))
-            EMIT_TS_FLOAT(svga, i, 2.2f, GAMMA, fail);
-         else
-            EMIT_TS_FLOAT(svga, i, 1.0f, GAMMA, fail);
+   if (svga_have_vgpu10(svga))
+      return PIPE_OK;
  
+   queue.ts_count = 0;
+   for (i = 0; i < svga->curr.num_samplers[shader]; i++) {
+      if (svga->curr.sampler[shader][i]) {
+         const struct svga_sampler_state *curr = svga->curr.sampler[shader][i];
+         emit_tss_unit(svga, i, curr, &queue);
        }
     }
+
+   /* polygon stipple sampler */
+   if (svga->curr.rast->templ.poly_stipple_enable) {
+      emit_tss_unit(svga,
+                    svga->state.hw_draw.fs->pstipple_sampler_unit,
+                    svga->polygon_stipple.sampler,
+                    &queue);
+   }
   
     if (queue.ts_count) {
        SVGA3dTextureState *ts;
@@ -357,6 +435,7 @@ fail:
  struct svga_tracked_state svga_hw_tss = {
     "texture state emit",
     (SVGA_NEW_SAMPLER |
+    SVGA_NEW_STIPPLE |
      SVGA_NEW_TEXTURE_FLAGS),
     update_tss
  };
diff --git a/src/gallium/drivers/svga/svga_state_vdecl.c b/src/gallium/drivers/svga/svga_state_vdecl.c

index a33eda3..e1b6a1c 100644 (file)
--- a/src/gallium/drivers/svga/svga_state_vdecl.c
+++ b/src/gallium/drivers/svga/svga_state_vdecl.c
@@ -33,6 +33,7 @@
  #include "svga_draw.h"
  #include "svga_tgsi.h"
  #include "svga_screen.h"
+#include "svga_shader.h"
  #include "svga_resource_buffer.h"
  #include "svga_hw_reg.h"
  
@@ -42,16 +43,14 @@ static enum pipe_error
  emit_hw_vs_vdecl(struct svga_context *svga, unsigned dirty)
  {
     const struct pipe_vertex_element *ve = svga->curr.velems->velem;
+   SVGA3dVertexDecl decls[SVGA3D_INPUTREG_MAX];
+   unsigned buffer_indexes[SVGA3D_INPUTREG_MAX];
     unsigned i;
     unsigned neg_bias = 0;
  
     assert(svga->curr.velems->count >=
            svga->curr.vs->base.info.file_count[TGSI_FILE_INPUT]);
  
-   /* specify number of vertex element declarations to come */
-   svga_hwtnl_reset_vdecl( svga->hwtnl,
-                           svga->curr.velems->count );
-
     /**
      * We can't set the VDECL offset to something negative, so we
      * must calculate a common negative additional index bias, and modify
@@ -70,15 +69,16 @@ emit_hw_vs_vdecl(struct svga_context *svga, unsigned dirty)
     for (i = 0; i < svga->curr.velems->count; i++) {
        const struct pipe_vertex_buffer *vb =
           &svga->curr.vb[ve[i].vertex_buffer_index];
-      const struct svga_buffer *buffer;
+      struct svga_buffer *buffer;
        unsigned int offset = vb->buffer_offset + ve[i].src_offset;
+      unsigned tmp_neg_bias = 0;
  
        if (!vb->buffer)
           continue;
  
        buffer = svga_buffer(vb->buffer);
        if (buffer->uploaded.start > offset) {
-         unsigned tmp_neg_bias = buffer->uploaded.start - offset;
+         tmp_neg_bias = buffer->uploaded.start - offset;
           if (vb->stride)
              tmp_neg_bias = (tmp_neg_bias + vb->stride - 1) / vb->stride;
           neg_bias = MAX2(neg_bias, tmp_neg_bias);
@@ -89,8 +89,7 @@ emit_hw_vs_vdecl(struct svga_context *svga, unsigned dirty)
        const struct pipe_vertex_buffer *vb =
           &svga->curr.vb[ve[i].vertex_buffer_index];
        unsigned usage, index;
-      const struct svga_buffer *buffer;
-      SVGA3dVertexDecl decl;
+      struct svga_buffer *buffer;
  
        if (!vb->buffer)
           continue;
@@ -100,29 +99,37 @@ emit_hw_vs_vdecl(struct svga_context *svga, unsigned dirty)
  
        /* SVGA_NEW_VELEMENT
         */
-      decl.identity.type = svga->curr.velems->decl_type[i];
-      decl.identity.method = SVGA3D_DECLMETHOD_DEFAULT;
-      decl.identity.usage = usage;
-      decl.identity.usageIndex = index;
-      decl.array.stride = vb->stride;
+      decls[i].identity.type = svga->curr.velems->decl_type[i];
+      decls[i].identity.method = SVGA3D_DECLMETHOD_DEFAULT;
+      decls[i].identity.usage = usage;
+      decls[i].identity.usageIndex = index;
+      decls[i].array.stride = vb->stride;
  
        /* Compensate for partially uploaded vbo, and
         * for the negative index bias.
         */
-      decl.array.offset = (vb->buffer_offset
+      decls[i].array.offset = (vb->buffer_offset
                             + ve[i].src_offset
                            + neg_bias * vb->stride
                            - buffer->uploaded.start);
  
-      assert(decl.array.offset >= 0);
+      assert(decls[i].array.offset >= 0);
+
+      buffer_indexes[i] = ve[i].vertex_buffer_index;
  
-      svga_hwtnl_vdecl( svga->hwtnl,
-                        i,
-                        &decl,
-                        buffer->uploaded.buffer ? buffer->uploaded.buffer :
-                        vb->buffer );
+      assert(!buffer->uploaded.buffer);
     }
  
+   svga_hwtnl_vertex_decls(svga->hwtnl,
+                           svga->curr.velems->count,
+                           decls,
+                           buffer_indexes,
+                           svga->curr.velems->id);
+
+   svga_hwtnl_vertex_buffers(svga->hwtnl,
+                             svga->curr.num_vertex_buffers,
+                             svga->curr.vb);
+
     svga_hwtnl_set_index_bias( svga->hwtnl, -(int) neg_bias );
     return PIPE_OK;
  }
diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c

index c2a0f1e..a846b77 100644 (file)
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -25,7 +25,6 @@
  
  #include "util/u_inlines.h"
  #include "pipe/p_defines.h"
-#include "util/u_format.h"
  #include "util/u_math.h"
  #include "util/u_memory.h"
  #include "util/u_bitmask.h"
@@ -41,33 +40,6 @@
  #include "svga_hw_reg.h"
  
  
-static inline int
-compare_vs_keys(const struct svga_vs_compile_key *a,
-                const struct svga_vs_compile_key *b)
-{
-   unsigned keysize = svga_vs_key_size( a );
-   return memcmp( a, b, keysize );
-}
-
-
-/** Search for a vertex shader variant */
-static struct svga_shader_variant *
-search_vs_key(const struct svga_vertex_shader *vs,
-              const struct svga_vs_compile_key *key)
-{
-   struct svga_shader_variant *variant = vs->base.variants;
-
-   assert(key);
-
-   for ( ; variant; variant = variant->next) {
-      if (compare_vs_keys( key, &variant->key.vkey ) == 0)
-         return variant;
-   }
-   
-   return NULL;
-}
-
-
  /**
   * If we fail to compile a vertex shader we'll use a dummy/fallback shader
   * that simply emits a (0,0,0,1) vertex position.
@@ -99,13 +71,29 @@ get_dummy_vertex_shader(void)
  }
  
  
+static struct svga_shader_variant *
+translate_vertex_program(struct svga_context *svga,
+                         const struct svga_vertex_shader *vs,
+                         const struct svga_compile_key *key)
+{
+   if (svga_have_vgpu10(svga)) {
+      return svga_tgsi_vgpu10_translate(svga, &vs->base, key,
+                                        PIPE_SHADER_VERTEX);
+   }
+   else {
+      return svga_tgsi_vgpu9_translate(&vs->base, key, PIPE_SHADER_VERTEX);
+   }
+}
+
+
  /**
   * Replace the given shader's instruction with a simple / dummy shader.
   * We use this when normal shader translation fails.
   */
  static struct svga_shader_variant *
-get_compiled_dummy_vertex_shader(struct svga_vertex_shader *vs,
-                                 const struct svga_vs_compile_key *key)
+get_compiled_dummy_vertex_shader(struct svga_context *svga,
+                                 struct svga_vertex_shader *vs,
+                                 const struct svga_compile_key *key)
  {
     const struct tgsi_token *dummy = get_dummy_vertex_shader();
     struct svga_shader_variant *variant;
@@ -117,7 +105,7 @@ get_compiled_dummy_vertex_shader(struct svga_vertex_shader *vs,
     FREE((void *) vs->base.tokens);
     vs->base.tokens = dummy;
  
-   variant = svga_translate_vertex_program(vs, key);
+   variant = translate_vertex_program(svga, vs, key);
     return variant;
  }
  
@@ -128,69 +116,87 @@ get_compiled_dummy_vertex_shader(struct svga_vertex_shader *vs,
  static enum pipe_error
  compile_vs(struct svga_context *svga,
             struct svga_vertex_shader *vs,
-           const struct svga_vs_compile_key *key,
+           const struct svga_compile_key *key,
             struct svga_shader_variant **out_variant)
  {
     struct svga_shader_variant *variant;
     enum pipe_error ret = PIPE_ERROR;
  
-   variant = svga_translate_vertex_program( vs, key );
+   variant = translate_vertex_program(svga, vs, key);
     if (variant == NULL) {
-      /* some problem during translation, try the dummy shader */
-      variant = get_compiled_dummy_vertex_shader(vs, key);
-      if (!variant) {
-         ret = PIPE_ERROR;
-         goto fail;
-      }
+      debug_printf("Failed to compile vertex shader,"
+                   " using dummy shader instead.\n");
+      variant = get_compiled_dummy_vertex_shader(svga, vs, key);
     }
-
-   if (svga_shader_too_large(svga, variant)) {
+   else if (svga_shader_too_large(svga, variant)) {
        /* too big, use dummy shader */
-      debug_printf("Shader too large (%lu bytes),"
+      debug_printf("Shader too large (%u bytes),"
                     " using dummy shader instead.\n",
-                   (unsigned long ) variant->nr_tokens
-                   * sizeof(variant->tokens[0]));
-      variant = get_compiled_dummy_vertex_shader(vs, key);
-      if (!variant) {
-         ret = PIPE_ERROR;
-         goto fail;
-      }
+                   (unsigned) (variant->nr_tokens
+                               * sizeof(variant->tokens[0])));
+      /* Free the too-large variant */
+      svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
+      /* Use simple pass-through shader instead */
+      variant = get_compiled_dummy_vertex_shader(svga, vs, key);
+   }
+
+   if (!variant) {
+      return PIPE_ERROR;
     }
  
     ret = svga_define_shader(svga, SVGA3D_SHADERTYPE_VS, variant);
-   if (ret != PIPE_OK)
-      goto fail;
+   if (ret != PIPE_OK) {
+      svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
+      return ret;
+   }
  
     *out_variant = variant;
  
-   /* insert variants at head of linked list */
-   variant->next = vs->base.variants;
-   vs->base.variants = variant;
-
     return PIPE_OK;
-
-fail:
-   if (variant) {
-      svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
-   }
-   return ret;
  }
  
+
  /* SVGA_NEW_PRESCALE, SVGA_NEW_RAST, SVGA_NEW_FS
   */
  static void
-make_vs_key(struct svga_context *svga, struct svga_vs_compile_key *key)
+make_vs_key(struct svga_context *svga, struct svga_compile_key *key)
  {
+   const unsigned shader = PIPE_SHADER_VERTEX;
+
     memset(key, 0, sizeof *key);
-   key->need_prescale = svga->state.hw_clear.prescale.enabled;
-   key->allow_psiz = svga->curr.rast->templ.point_size_per_vertex;
+
+   if (svga->state.sw.need_swtnl && svga_have_vgpu10(svga)) {
+      /* Set both of these flags, to match compile_passthrough_vs() */
+      key->vs.passthrough = 1;
+      key->vs.undo_viewport = 1;
+      return;
+   }
+
+   key->vs.need_prescale = svga->state.hw_clear.prescale.enabled &&
+                           (svga->curr.gs == NULL);
+   key->vs.allow_psiz = svga->curr.rast->templ.point_size_per_vertex;
  
     /* SVGA_NEW_FS */
-   key->fs_generic_inputs = svga->curr.fs->generic_inputs;
+   key->vs.fs_generic_inputs = svga->curr.fs->generic_inputs;
+
+   svga_remap_generics(key->vs.fs_generic_inputs, key->generic_remap_table);
  
     /* SVGA_NEW_VELEMENT */
-   key->adjust_attrib_range = svga->curr.velems->adjust_attrib_range;
-   key->adjust_attrib_w_1 = svga->curr.velems->adjust_attrib_w_1;
+   key->vs.adjust_attrib_range = svga->curr.velems->adjust_attrib_range;
+   key->vs.adjust_attrib_w_1 = svga->curr.velems->adjust_attrib_w_1;
+   key->vs.attrib_is_pure_int = svga->curr.velems->attrib_is_pure_int;
+   key->vs.adjust_attrib_itof = svga->curr.velems->adjust_attrib_itof;
+   key->vs.adjust_attrib_utof = svga->curr.velems->adjust_attrib_utof;
+   key->vs.attrib_is_bgra = svga->curr.velems->attrib_is_bgra;
+   key->vs.attrib_puint_to_snorm = svga->curr.velems->attrib_puint_to_snorm;
+   key->vs.attrib_puint_to_uscaled = svga->curr.velems->attrib_puint_to_uscaled;
+   key->vs.attrib_puint_to_sscaled = svga->curr.velems->attrib_puint_to_sscaled;
+
+   /* SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER */
+   svga_init_shader_key_common(svga, shader, key);
+
+   /* SVGA_NEW_RAST */
+   key->clip_plane_enable = svga->curr.rast->templ.clip_plane_enable;
  }
  
  
@@ -201,17 +207,128 @@ enum pipe_error
  svga_reemit_vs_bindings(struct svga_context *svga)
  {
     enum pipe_error ret;
-   struct svga_winsys_gb_shader *gbshader =
-      svga->state.hw_draw.vs ? svga->state.hw_draw.vs->gb_shader : NULL;
+   struct svga_winsys_gb_shader *gbshader = NULL;
+   SVGA3dShaderId shaderId = SVGA3D_INVALID_ID;
  
-   assert(svga->rebind.vs);
+   assert(svga->rebind.flags.vs);
     assert(svga_have_gb_objects(svga));
  
-   ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_VS, gbshader);
+   if (svga->state.hw_draw.vs) {
+      gbshader = svga->state.hw_draw.vs->gb_shader;
+      shaderId = svga->state.hw_draw.vs->id;
+   }
+
+   if (!svga_need_to_rebind_resources(svga)) {
+      ret =  svga->swc->resource_rebind(svga->swc, NULL, gbshader,
+                                        SVGA_RELOC_READ);
+      goto out;
+   }
+
+   if (svga_have_vgpu10(svga))
+      ret = SVGA3D_vgpu10_SetShader(svga->swc, SVGA3D_SHADERTYPE_VS,
+                                    gbshader, shaderId);
+   else
+      ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_VS, gbshader);
+
+ out:
+   if (ret != PIPE_OK)
+      return ret;
+
+   svga->rebind.flags.vs = FALSE;
+   return PIPE_OK;
+}
+
+
+/**
+ * The current vertex shader is already executed by the 'draw'
+ * module, so we just need to generate a simple vertex shader
+ * to pass through all those VS outputs that will
+ * be consumed by the fragment shader.
+ * Used when we employ the 'draw' module.
+ */
+static enum pipe_error
+compile_passthrough_vs(struct svga_context *svga,
+                       struct svga_vertex_shader *vs,
+                       struct svga_fragment_shader *fs,
+                       struct svga_shader_variant **out_variant)
+{
+   struct svga_shader_variant *variant = NULL;
+   unsigned num_inputs;
+   unsigned i;
+   unsigned num_elements;
+   struct svga_vertex_shader new_vs;
+   struct ureg_src src[PIPE_MAX_SHADER_INPUTS];
+   struct ureg_dst dst[PIPE_MAX_SHADER_OUTPUTS];
+   struct ureg_program *ureg;
+   unsigned num_tokens;
+   struct svga_compile_key key;
+   enum pipe_error ret;
+
+   assert(svga_have_vgpu10(svga));
+   assert(fs);
+
+   num_inputs = fs->base.info.num_inputs;
+
+   ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
+   if (!ureg)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   /* draw will always add position */
+   dst[0] = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
+   src[0] = ureg_DECL_vs_input(ureg, 0);
+   num_elements = 1;
+
+   /**
+    * swtnl backend redefines the input layout based on the
+    * fragment shader's inputs. So we only need to passthrough
+    * those inputs that will be consumed by the fragment shader.
+    * Note: DX10 requires the number of vertex elements
+    * specified in the input layout to be no less than the
+    * number of inputs to the vertex shader.
+    */
+   for (i = 0; i < num_inputs; i++) {
+      switch (fs->base.info.input_semantic_name[i]) {
+      case TGSI_SEMANTIC_COLOR:
+      case TGSI_SEMANTIC_GENERIC:
+      case TGSI_SEMANTIC_FOG:
+         dst[num_elements] = ureg_DECL_output(ureg,
+                                fs->base.info.input_semantic_name[i],
+                                fs->base.info.input_semantic_index[i]);
+         src[num_elements] = ureg_DECL_vs_input(ureg, num_elements);
+         num_elements++;
+         break;
+      default:
+         break;
+      }
+   }
+
+   for (i = 0; i < num_elements; i++) {
+      ureg_MOV(ureg, dst[i], src[i]);
+   }
+
+   ureg_END(ureg);
+
+   memset(&new_vs, 0, sizeof(new_vs));
+   new_vs.base.tokens = ureg_get_tokens(ureg, &num_tokens);
+   tgsi_scan_shader(new_vs.base.tokens, &new_vs.base.info);
+
+   memset(&key, 0, sizeof(key));
+   key.vs.undo_viewport = 1;
+
+   ret = compile_vs(svga, &new_vs, &key, &variant);
     if (ret != PIPE_OK)
        return ret;
  
-   svga->rebind.vs = FALSE;
+   ureg_free_tokens(new_vs.base.tokens);
+   ureg_destroy(ureg);
+
+   /* Overwrite the variant key to indicate it's a pass-through VS */
+   memset(&variant->key, 0, sizeof(variant->key));
+   variant->key.vs.passthrough = 1;
+   variant->key.vs.undo_viewport = 1;
+
+   *out_variant = variant;
+
     return PIPE_OK;
  }
  
@@ -219,45 +336,67 @@ svga_reemit_vs_bindings(struct svga_context *svga)
  static enum pipe_error
  emit_hw_vs(struct svga_context *svga, unsigned dirty)
  {
-   struct svga_shader_variant *variant = NULL;
+   struct svga_shader_variant *variant;
+   struct svga_vertex_shader *vs = svga->curr.vs;
+   struct svga_fragment_shader *fs = svga->curr.fs;
     enum pipe_error ret = PIPE_OK;
+   struct svga_compile_key key;
+
+   /* If there is an active geometry shader, and it has stream output
+    * defined, then we will skip the stream output from the vertex shader
+    */
+   if (!svga_have_gs_streamout(svga)) {
+      /* No GS stream out */
+      if (svga_have_vs_streamout(svga)) {
+         /* Set VS stream out */
+         svga_set_stream_output(svga, vs->base.stream_output);
+      }
+      else {
+         /* turn off stream out */
+         svga_set_stream_output(svga, NULL);
+      }
+   }
  
     /* SVGA_NEW_NEED_SWTNL */
-   if (!svga->state.sw.need_swtnl) {
-      struct svga_vertex_shader *vs = svga->curr.vs;
-      struct svga_vs_compile_key key;
+   if (svga->state.sw.need_swtnl && !svga_have_vgpu10(svga)) {
+      /* No vertex shader is needed */
+      variant = NULL;
+   }
+   else {
+      make_vs_key(svga, &key);
  
-      make_vs_key( svga, &key );
+      /* See if we already have a VS variant that matches the key */
+      variant = svga_search_shader_key(&vs->base, &key);
  
-      variant = search_vs_key( vs, &key );
        if (!variant) {
-         ret = compile_vs( svga, vs, &key, &variant );
+         /* Create VS variant now */
+         if (key.vs.passthrough) {
+            ret = compile_passthrough_vs(svga, vs, fs, &variant);
+         }
+         else {
+            ret = compile_vs(svga, vs, &key, &variant);
+         }
           if (ret != PIPE_OK)
              return ret;
-      }
  
-      assert(variant);
+         /* insert the new variant at head of linked list */
+         assert(variant);
+         variant->next = vs->base.variants;
+         vs->base.variants = variant;
+      }
     }
  
     if (variant != svga->state.hw_draw.vs) {
-      if (svga_have_gb_objects(svga)) {
-         struct svga_winsys_gb_shader *gbshader =
-            variant ? variant->gb_shader : NULL;
-         ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_VS, gbshader);
-         if (ret != PIPE_OK)
-            return ret;
-
-         svga->rebind.vs = FALSE;
-      }
-      else {
-         unsigned id = variant ? variant->id : SVGA_ID_INVALID;
-         ret = SVGA3D_SetShader(svga->swc, SVGA3D_SHADERTYPE_VS, id);
+      /* Bind the new variant */
+      if (variant) {
+         ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_VS, variant);
           if (ret != PIPE_OK)
              return ret;
+         svga->rebind.flags.vs = FALSE;
        }
  
        svga->dirty |= SVGA_NEW_VS_VARIANT;
-      svga->state.hw_draw.vs = variant;      
+      svga->state.hw_draw.vs = variant;
     }
  
     return PIPE_OK;
@@ -268,6 +407,9 @@ struct svga_tracked_state svga_hw_vs =
     "vertex shader (hwtnl)",
     (SVGA_NEW_VS |
      SVGA_NEW_FS |
+    SVGA_NEW_TEXTURE_BINDING |
+    SVGA_NEW_SAMPLER |
+    SVGA_NEW_RAST |
      SVGA_NEW_PRESCALE |
      SVGA_NEW_VELEMENT |
      SVGA_NEW_NEED_SWTNL),
diff --git a/src/gallium/drivers/svga/svga_streamout.h b/src/gallium/drivers/svga/svga_streamout.h

new file mode 100644 (file)

index 0000000..da0c445
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_streamout.h
@@ -0,0 +1,50 @@
+/**********************************************************
+ * Copyright 2014 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_STREAMOUT_H
+#define SVGA_STREAMOUT_H
+
+struct svga_shader;
+
+struct svga_stream_output {
+   struct pipe_stream_output_info info;
+   unsigned pos_out_index;                  // position output index
+   unsigned id;
+};
+
+struct svga_stream_output *
+svga_create_stream_output(struct svga_context *svga,
+                          struct svga_shader *shader,
+                          const struct pipe_stream_output_info *info);
+
+enum pipe_error
+svga_set_stream_output(struct svga_context *svga,
+                       struct svga_stream_output *streamout);
+
+void
+svga_delete_stream_output(struct svga_context *svga,
+                          struct svga_stream_output *streamout);
+
+#endif /* SVGA_STREAMOUT_H */
diff --git a/src/gallium/drivers/svga/svga_surface.c b/src/gallium/drivers/svga/svga_surface.c

index 85d0154..aca5abc 100644 (file)
--- a/src/gallium/drivers/svga/svga_surface.c
+++ b/src/gallium/drivers/svga/svga_surface.c
@@ -29,6 +29,7 @@
  #include "pipe/p_defines.h"
  #include "util/u_inlines.h"
  #include "os/os_thread.h"
+#include "util/u_bitmask.h"
  #include "util/u_format.h"
  #include "util/u_math.h"
  #include "util/u_memory.h"
@@ -36,19 +37,21 @@
  #include "svga_format.h"
  #include "svga_screen.h"
  #include "svga_context.h"
+#include "svga_sampler_view.h"
  #include "svga_resource_texture.h"
  #include "svga_surface.h"
  #include "svga_debug.h"
  
+static void svga_mark_surface_dirty(struct pipe_surface *surf);
  
  void
  svga_texture_copy_handle(struct svga_context *svga,
                           struct svga_winsys_surface *src_handle,
                           unsigned src_x, unsigned src_y, unsigned src_z,
-                         unsigned src_level, unsigned src_face,
+                         unsigned src_level, unsigned src_layer,
                           struct svga_winsys_surface *dst_handle,
                           unsigned dst_x, unsigned dst_y, unsigned dst_z,
-                         unsigned dst_level, unsigned dst_face,
+                         unsigned dst_level, unsigned dst_layer,
                           unsigned width, unsigned height, unsigned depth)
  {
     struct svga_surface dst, src;
@@ -59,12 +62,12 @@ svga_texture_copy_handle(struct svga_context *svga,
  
     src.handle = src_handle;
     src.real_level = src_level;
-   src.real_face = src_face;
+   src.real_layer = src_layer;
     src.real_zslice = 0;
  
     dst.handle = dst_handle;
     dst.real_level = dst_level;
-   dst.real_face = dst_face;
+   dst.real_layer = dst_layer;
     dst.real_zslice = 0;
  
     box.x = dst_x;
@@ -103,11 +106,13 @@ svga_texture_copy_handle(struct svga_context *svga,
  struct svga_winsys_surface *
  svga_texture_view_surface(struct svga_context *svga,
                            struct svga_texture *tex,
+                          unsigned bind_flags,
                            SVGA3dSurfaceFlags flags,
                            SVGA3dSurfaceFormat format,
                            unsigned start_mip,
                            unsigned num_mip,
-                          int face_pick,
+                          int layer_pick,
+                          unsigned num_layers,
                            int zslice_pick,
                            struct svga_host_surface_cache_key *key) /* OUT */
  {
@@ -117,8 +122,8 @@ svga_texture_view_surface(struct svga_context *svga,
     unsigned z_offset = 0;
  
     SVGA_DBG(DEBUG_PERF, 
-            "svga: Create surface view: face %d zslice %d mips %d..%d\n",
-            face_pick, zslice_pick, start_mip, start_mip+num_mip-1);
+            "svga: Create surface view: layer %d zslice %d mips %d..%d\n",
+            layer_pick, zslice_pick, start_mip, start_mip+num_mip-1);
  
     key->flags = flags;
     key->format = format;
@@ -127,12 +132,20 @@ svga_texture_view_surface(struct svga_context *svga,
     key->size.height = u_minify(tex->b.b.height0, start_mip);
     key->size.depth = zslice_pick < 0 ? u_minify(tex->b.b.depth0, start_mip) : 1;
     key->cachable = 1;
+   key->arraySize = 1;
+   key->numFaces = 1;
+   key->sampleCount = tex->b.b.nr_samples;
+
+   if (key->sampleCount > 1) {
+      key->flags |= SVGA3D_SURFACE_MASKABLE_ANTIALIAS;
+   }
     
-   if (tex->b.b.target == PIPE_TEXTURE_CUBE && face_pick < 0) {
+   if (tex->b.b.target == PIPE_TEXTURE_CUBE && layer_pick < 0) {
        key->flags |= SVGA3D_SURFACE_CUBEMAP;
        key->numFaces = 6;
-   } else {
-      key->numFaces = 1;
+   } else if (tex->b.b.target == PIPE_TEXTURE_1D_ARRAY ||
+              tex->b.b.target == PIPE_TEXTURE_2D_ARRAY) {
+      key->arraySize = num_layers;
     }
  
     if (key->format == SVGA3D_FORMAT_INVALID) {
@@ -141,7 +154,7 @@ svga_texture_view_surface(struct svga_context *svga,
     }
  
     SVGA_DBG(DEBUG_DMA, "surface_create for texture view\n");
-   handle = svga_screen_surface_create(ss, key);
+   handle = svga_screen_surface_create(ss, bind_flags, PIPE_USAGE_DEFAULT, key);
     if (!handle) {
        key->cachable = 0;
        return NULL;
@@ -149,15 +162,15 @@ svga_texture_view_surface(struct svga_context *svga,
  
     SVGA_DBG(DEBUG_DMA, " --> got sid %p (texture view)\n", handle);
  
-   if (face_pick < 0)
-      face_pick = 0;
+   if (layer_pick < 0)
+      layer_pick = 0;
  
     if (zslice_pick >= 0)
        z_offset = zslice_pick;
  
     for (i = 0; i < key->numMipLevels; i++) {
-      for (j = 0; j < key->numFaces; j++) {
-         if (svga_is_texture_level_defined(tex, j + face_pick, i + start_mip)) {
+      for (j = 0; j < key->numFaces * key->arraySize; j++) {
+         if (svga_is_texture_level_defined(tex, j + layer_pick, i + start_mip)) {
              unsigned depth = (zslice_pick < 0 ?
                                u_minify(tex->b.b.depth0, i + start_mip) :
                                1);
@@ -166,7 +179,7 @@ svga_texture_view_surface(struct svga_context *svga,
                                       tex->handle, 
                                       0, 0, z_offset, 
                                       i + start_mip, 
-                                     j + face_pick,
+                                     j + layer_pick,
                                       handle, 0, 0, 0, i, j,
                                       u_minify(tex->b.b.width0, i + start_mip),
                                       u_minify(tex->b.b.height0, i + start_mip),
@@ -179,33 +192,43 @@ svga_texture_view_surface(struct svga_context *svga,
  }
  
  
+/**
+ * A helper function to create a surface view.
+ * The view boolean flag specifies whether svga_texture_view_surface()
+ * will be called to create a cloned surface and resource for the view.
+ */
  static struct pipe_surface *
-svga_create_surface(struct pipe_context *pipe,
-                    struct pipe_resource *pt,
-                    const struct pipe_surface *surf_tmpl)
+svga_create_surface_view(struct pipe_context *pipe,
+                         struct pipe_resource *pt,
+                         const struct pipe_surface *surf_tmpl,
+                         boolean view)
  {
     struct svga_context *svga = svga_context(pipe);
     struct svga_texture *tex = svga_texture(pt);
     struct pipe_screen *screen = pipe->screen;
     struct svga_screen *ss = svga_screen(screen);
     struct svga_surface *s;
-   unsigned face, zslice;
-   boolean view = FALSE;
-   SVGA3dSurfaceFlags flags;
+   unsigned layer, zslice, bind;
+   unsigned nlayers = 1;
+   SVGA3dSurfaceFlags flags = 0;
     SVGA3dSurfaceFormat format;
  
-   assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
-
     s = CALLOC_STRUCT(svga_surface);
     if (!s)
        return NULL;
  
     if (pt->target == PIPE_TEXTURE_CUBE) {
-      face = surf_tmpl->u.tex.first_layer;
+      layer = surf_tmpl->u.tex.first_layer;
        zslice = 0;
     }
+   else if (pt->target == PIPE_TEXTURE_1D_ARRAY ||
+            pt->target == PIPE_TEXTURE_2D_ARRAY) {
+      layer = surf_tmpl->u.tex.first_layer;
+      zslice = 0;
+      nlayers = surf_tmpl->u.tex.last_layer - surf_tmpl->u.tex.first_layer + 1;
+   }
     else {
-      face = 0;
+      layer = 0;
        zslice = surf_tmpl->u.tex.first_layer;
     }
  
@@ -218,25 +241,98 @@ svga_create_surface(struct pipe_context *pipe,
     s->base.u.tex.level = surf_tmpl->u.tex.level;
     s->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer;
     s->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer;
+   s->view_id = SVGA3D_INVALID_ID;
+
+   s->backed = NULL;
  
     if (util_format_is_depth_or_stencil(surf_tmpl->format)) {
-      flags = SVGA3D_SURFACE_HINT_DEPTHSTENCIL;
+      flags = SVGA3D_SURFACE_HINT_DEPTHSTENCIL |
+              SVGA3D_SURFACE_BIND_DEPTH_STENCIL;
+      bind = PIPE_BIND_DEPTH_STENCIL;
     }
     else {
-      flags = SVGA3D_SURFACE_HINT_RENDERTARGET;
+      flags = SVGA3D_SURFACE_HINT_RENDERTARGET |
+              SVGA3D_SURFACE_BIND_RENDER_TARGET;
+      bind = PIPE_BIND_RENDER_TARGET;
     }
  
-   format = svga_translate_format(ss, surf_tmpl->format, 0);
+   if (tex->imported)
+      format = tex->key.format;
+   else
+      format = svga_translate_format(ss, surf_tmpl->format, bind);
+
     assert(format != SVGA3D_FORMAT_INVALID);
  
-   if (svga_screen(screen)->debug.force_surface_view)
-      view = TRUE;
+   if (view) {
+      SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: yes %p, level %u layer %u z %u, %p\n",
+               pt, surf_tmpl->u.tex.level, layer, zslice, s);
+
+      if (svga_have_vgpu10(svga)) {
+         switch (pt->target) {
+         case PIPE_TEXTURE_1D:
+            flags |= SVGA3D_SURFACE_1D;
+            break;
+         case PIPE_TEXTURE_1D_ARRAY:
+            flags |= SVGA3D_SURFACE_1D | SVGA3D_SURFACE_ARRAY;
+            break;
+         case PIPE_TEXTURE_2D_ARRAY:
+            flags |= SVGA3D_SURFACE_ARRAY;
+            break;
+         case PIPE_TEXTURE_3D:
+            flags |= SVGA3D_SURFACE_VOLUME;
+            break;
+         case PIPE_TEXTURE_CUBE:
+            if (nlayers == 6)
+               flags |= SVGA3D_SURFACE_CUBEMAP;
+            break;
+         default:
+            break;
+         }
+      }
  
-   /* Currently only used for compressed textures */
-   if (format != svga_translate_format(ss, surf_tmpl->format, 0)) {
-      view = TRUE;
+      /* When we clone the surface view resource, use the format used in
+       * the creation of the original resource.
+       */
+      s->handle = svga_texture_view_surface(svga, tex, bind, flags, tex->key.format,
+                                            surf_tmpl->u.tex.level, 1,
+                                            layer, nlayers, zslice, &s->key);
+      if (!s->handle) {
+         FREE(s);
+         return NULL;
+      }
+
+      s->key.format = format;
+      s->real_layer = 0;
+      s->real_level = 0;
+      s->real_zslice = 0;
+   } else {
+      SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: no %p, level %u, layer %u, z %u, %p\n",
+               pt, surf_tmpl->u.tex.level, layer, zslice, s);
+
+      memset(&s->key, 0, sizeof s->key);
+      s->key.format = format;
+      s->handle = tex->handle;
+      s->real_layer = layer;
+      s->real_zslice = zslice;
+      s->real_level = surf_tmpl->u.tex.level;
     }
  
+   return &s->base;
+}
+
+
+static struct pipe_surface *
+svga_create_surface(struct pipe_context *pipe,
+                    struct pipe_resource *pt,
+                    const struct pipe_surface *surf_tmpl)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct pipe_screen *screen = pipe->screen;
+   boolean view = FALSE;
+
+   if (svga_screen(screen)->debug.force_surface_view)
+      view = TRUE;
+
     if (surf_tmpl->u.tex.level != 0 &&
         svga_screen(screen)->debug.force_level_surface_view)
        view = TRUE;
@@ -244,47 +340,173 @@ svga_create_surface(struct pipe_context *pipe,
     if (pt->target == PIPE_TEXTURE_3D)
        view = TRUE;
  
-   if (svga_screen(screen)->debug.no_surface_view)
+   if (svga_have_vgpu10(svga) || svga_screen(screen)->debug.no_surface_view)
        view = FALSE;
  
-   if (view) {
-      SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: yes %p, level %u face %u z %u, %p\n",
-               pt, surf_tmpl->u.tex.level, face, zslice, s);
+   return svga_create_surface_view(pipe, pt, surf_tmpl, view);
+}
  
-      s->handle = svga_texture_view_surface(svga, tex, flags, format,
-                                            surf_tmpl->u.tex.level,
-                                            1, face, zslice, &s->key);
-      s->real_face = 0;
-      s->real_level = 0;
-      s->real_zslice = 0;
-   } else {
-      SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: no %p, level %u, face %u, z %u, %p\n",
-               pt, surf_tmpl->u.tex.level, face, zslice, s);
  
-      memset(&s->key, 0, sizeof s->key);
-      s->handle = tex->handle;
-      s->real_face = face;
-      s->real_zslice = zslice;
-      s->real_level = surf_tmpl->u.tex.level;
+/**
+ * Clone the surface view and its associated resource.
+ */
+static struct svga_surface *
+create_backed_surface_view(struct svga_context *svga, struct svga_surface *s)
+{
+   struct svga_surface *bs = s->backed;
+
+   if (bs == NULL) {
+      struct svga_texture *tex = svga_texture(s->base.texture);
+      struct pipe_surface *backed_view;
+
+      backed_view = svga_create_surface_view(&svga->pipe,
+                                             &tex->b.b,
+                                             &s->base,
+                                             TRUE);
+      if (!backed_view)
+         return NULL;
+
+      bs = svga_surface(backed_view);
+      s->backed = bs;
     }
  
+   svga_mark_surface_dirty(&bs->base);
+
+   return bs;
+}
+
+/**
+ * Create a DX RenderTarget/DepthStencil View for the given surface,
+ * if needed.
+ */
+struct pipe_surface *
+svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s)
+{
+   enum pipe_error ret = PIPE_OK;
+   unsigned shader;
+
+   assert(svga_have_vgpu10(svga));
+
+   /**
+    * DX spec explicitly specifies that no resource can be bound to a render
+    * target view and a shader resource view simultanously.
+    * So first check if the resource bound to this surface view collides with
+    * a sampler view. If so, then we will clone this surface view and its
+    * associated resource. We will then use the cloned surface view for
+    * render target.
+    */
+   for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
+      if (svga_check_sampler_view_resource_collision(svga, s->handle, shader)) {
+         SVGA_DBG(DEBUG_VIEWS,
+                  "same resource used in shaderResource and renderTarget 0x%x\n",
+                  s->handle);
+         s = create_backed_surface_view(svga, s);
+         if (!s)
+            return NULL;
+
+         break;
+      }
+   }
+
+   if (s->view_id == SVGA3D_INVALID_ID) {
+      SVGA3dResourceType resType;
+      SVGA3dRenderTargetViewDesc desc;
+
+      desc.tex.mipSlice = s->real_level;
+      desc.tex.firstArraySlice = s->real_layer + s->real_zslice;
+      desc.tex.arraySize =
+         s->base.u.tex.last_layer - s->base.u.tex.first_layer + 1;
+
+      s->view_id = util_bitmask_add(svga->surface_view_id_bm);
+
+      switch (s->base.texture->target) {
+      case PIPE_TEXTURE_1D:
+      case PIPE_TEXTURE_1D_ARRAY:
+         resType = SVGA3D_RESOURCE_TEXTURE1D;
+         break;
+      case PIPE_TEXTURE_RECT:
+      case PIPE_TEXTURE_2D:
+      case PIPE_TEXTURE_2D_ARRAY:
+      case PIPE_TEXTURE_CUBE:
+         /* drawing to cube map is treated as drawing to 2D array */
+         resType = SVGA3D_RESOURCE_TEXTURE2D;
+         break;
+      case PIPE_TEXTURE_3D:
+         resType = SVGA3D_RESOURCE_TEXTURE3D;
+         break;
+      default:
+         assert(!"Unexpected texture target");
+         resType = SVGA3D_RESOURCE_TEXTURE2D;
+      }
+
+      if (util_format_is_depth_or_stencil(s->base.format)) {
+         ret = SVGA3D_vgpu10_DefineDepthStencilView(svga->swc,
+                                                    s->view_id,
+                                                    s->handle,
+                                                    s->key.format,
+                                                    resType,
+                                                    &desc);
+      }
+      else {
+         ret = SVGA3D_vgpu10_DefineRenderTargetView(svga->swc,
+                                                    s->view_id,
+                                                    s->handle,
+                                                    s->key.format,
+                                                    resType,
+                                                    &desc);
+      }
+
+      if (ret != PIPE_OK) {
+         util_bitmask_clear(svga->surface_view_id_bm, s->view_id);
+         s->view_id = SVGA3D_INVALID_ID;
+         return NULL;
+      }
+   }
     return &s->base;
  }
  
  
+
  static void
  svga_surface_destroy(struct pipe_context *pipe,
                       struct pipe_surface *surf)
  {
+   struct svga_context *svga = svga_context(pipe);
     struct svga_surface *s = svga_surface(surf);
     struct svga_texture *t = svga_texture(surf->texture);
     struct svga_screen *ss = svga_screen(surf->texture->screen);
+   enum pipe_error ret = PIPE_OK;
+
+   /* Destroy the backed view surface if it exists */
+   if (s->backed) {
+      svga_surface_destroy(pipe, &s->backed->base);
+      s->backed = NULL;
+   }
  
     if (s->handle != t->handle) {
        SVGA_DBG(DEBUG_DMA, "unref sid %p (tex surface)\n", s->handle);
        svga_screen_surface_destroy(ss, &s->key, &s->handle);
     }
  
+   if (s->view_id != SVGA3D_INVALID_ID) {
+      unsigned try;
+
+      assert(svga_have_vgpu10(svga));
+      for (try = 0; try < 2; try++) {
+         if (util_format_is_depth_or_stencil(s->base.format)) {
+            ret = SVGA3D_vgpu10_DestroyDepthStencilView(svga->swc, s->view_id);
+         }
+         else {
+            ret = SVGA3D_vgpu10_DestroyRenderTargetView(svga->swc, s->view_id);
+         }
+         if (ret == PIPE_OK)
+            break;
+         svga_context_flush(svga, NULL);
+      }
+      assert(ret == PIPE_OK);
+      util_bitmask_clear(svga->surface_view_id_bm, s->view_id);
+   }
+
     pipe_resource_reference(&surf->texture, NULL);
     FREE(surf);
  }
@@ -294,29 +516,25 @@ static void
  svga_mark_surface_dirty(struct pipe_surface *surf)
  {
     struct svga_surface *s = svga_surface(surf);
+   struct svga_texture *tex = svga_texture(surf->texture);
  
     if (!s->dirty) {
-      struct svga_texture *tex = svga_texture(surf->texture);
-
        s->dirty = TRUE;
  
        if (s->handle == tex->handle) {
           /* hmm so 3d textures always have all their slices marked ? */
-         if (surf->texture->target == PIPE_TEXTURE_CUBE)
-            svga_define_texture_level(tex, surf->u.tex.first_layer,
-                                      surf->u.tex.level);
-         else
-            svga_define_texture_level(tex, 0, surf->u.tex.level);
+         svga_define_texture_level(tex, surf->u.tex.first_layer,
+                                   surf->u.tex.level);
        }
        else {
           /* this will happen later in svga_propagate_surface */
        }
-
-      /* Increment the view_age and texture age for this surface's mipmap
-       * level so that any sampler views into the texture are re-validated too.
-       */
-      svga_age_texture_view(tex, surf->u.tex.level);
     }
+
+   /* Increment the view_age and texture age for this surface's mipmap
+    * level so that any sampler views into the texture are re-validated too.
+    */
+   svga_age_texture_view(tex, surf->u.tex.level);
  }
  
  
@@ -345,18 +563,26 @@ svga_propagate_surface(struct svga_context *svga, struct pipe_surface *surf)
     struct svga_surface *s = svga_surface(surf);
     struct svga_texture *tex = svga_texture(surf->texture);
     struct svga_screen *ss = svga_screen(surf->texture->screen);
-   unsigned zslice, face;
+   unsigned zslice, layer;
+   unsigned nlayers = 1;
+   unsigned i;
  
     if (!s->dirty)
        return;
  
     if (surf->texture->target == PIPE_TEXTURE_CUBE) {
        zslice = 0;
-      face = surf->u.tex.first_layer;
+      layer = surf->u.tex.first_layer;
+   }
+   else if (surf->texture->target == PIPE_TEXTURE_1D_ARRAY ||
+            surf->texture->target == PIPE_TEXTURE_2D_ARRAY) {
+      zslice = 0;
+      layer = surf->u.tex.first_layer;
+      nlayers = surf->u.tex.last_layer - surf->u.tex.first_layer + 1;
     }
     else {
        zslice = surf->u.tex.first_layer;
-      face = 0;
+      layer = 0;
     }
  
     s->dirty = FALSE;
@@ -367,12 +593,14 @@ svga_propagate_surface(struct svga_context *svga, struct pipe_surface *surf)
        SVGA_DBG(DEBUG_VIEWS,
                 "svga: Surface propagate: tex %p, level %u, from %p\n",
                 tex, surf->u.tex.level, surf);
-      svga_texture_copy_handle(svga,
-                               s->handle, 0, 0, 0, s->real_level, s->real_face,
-                               tex->handle, 0, 0, zslice, surf->u.tex.level, face,
-                               u_minify(tex->b.b.width0, surf->u.tex.level),
-                               u_minify(tex->b.b.height0, surf->u.tex.level), 1);
-      svga_define_texture_level(tex, face, surf->u.tex.level);
+      for (i = 0; i < nlayers; i++) {
+         svga_texture_copy_handle(svga,
+                                  s->handle, 0, 0, 0, s->real_level, s->real_layer + i,
+                                  tex->handle, 0, 0, zslice, surf->u.tex.level, layer + i,
+                                  u_minify(tex->b.b.width0, surf->u.tex.level),
+                                  u_minify(tex->b.b.height0, surf->u.tex.level), 1);
+         svga_define_texture_level(tex, layer + i, surf->u.tex.level);
+      }
     }
  }
  
@@ -390,10 +618,76 @@ svga_surface_needs_propagation(const struct pipe_surface *surf)
  }
  
  
+static void
+svga_get_sample_position(struct pipe_context *context,
+                         unsigned sample_count, unsigned sample_index,
+                         float *pos_out)
+{
+   /* We can't actually query the device to learn the sample positions.
+    * These were grabbed from nvidia's driver.
+    */
+   static const float pos1[1][2] = {
+      { 0.5, 0.5 }
+   };
+   static const float pos4[4][2] = {
+      { 0.375000, 0.125000 },
+      { 0.875000, 0.375000 },
+      { 0.125000, 0.625000 },
+      { 0.625000, 0.875000 }
+   };
+   static const float pos8[8][2] = {
+      { 0.562500, 0.312500 },
+      { 0.437500, 0.687500 },
+      { 0.812500, 0.562500 },
+      { 0.312500, 0.187500 },
+      { 0.187500, 0.812500 },
+      { 0.062500, 0.437500 },
+      { 0.687500, 0.937500 },
+      { 0.937500, 0.062500 }
+   };
+   static const float pos16[16][2] = {
+      { 0.187500, 0.062500 },
+      { 0.437500, 0.187500 },
+      { 0.062500, 0.312500 },
+      { 0.312500, 0.437500 },
+      { 0.687500, 0.062500 },
+      { 0.937500, 0.187500 },
+      { 0.562500, 0.312500 },
+      { 0.812500, 0.437500 },
+      { 0.187500, 0.562500 },
+      { 0.437500, 0.687500 },
+      { 0.062500, 0.812500 },
+      { 0.312500, 0.937500 },
+      { 0.687500, 0.562500 },
+      { 0.937500, 0.687500 },
+      { 0.562500, 0.812500 },
+      { 0.812500, 0.937500 }
+   };
+   const float (*positions)[2];
+
+   switch (sample_count) {
+   case 4:
+      positions = pos4;
+      break;
+   case 8:
+      positions = pos8;
+      break;
+   case 16:
+      positions = pos16;
+      break;
+   default:
+      positions = pos1;
+   }
+
+   pos_out[0] = positions[sample_index][0];
+   pos_out[1] = positions[sample_index][1];
+}
+
  
  void
  svga_init_surface_functions(struct svga_context *svga)
  {
     svga->pipe.create_surface = svga_create_surface;
     svga->pipe.surface_destroy = svga_surface_destroy;
+   svga->pipe.get_sample_position = svga_get_sample_position;
  }
diff --git a/src/gallium/drivers/svga/svga_surface.h b/src/gallium/drivers/svga/svga_surface.h

index 2fa72a1..0e5794b 100644 (file)
--- a/src/gallium/drivers/svga/svga_surface.h
+++ b/src/gallium/drivers/svga/svga_surface.h
@@ -47,11 +47,15 @@ struct svga_surface
     struct svga_host_surface_cache_key key;
     struct svga_winsys_surface *handle;
  
-   unsigned real_face;
+   unsigned real_layer;
     unsigned real_level;
     unsigned real_zslice;
  
     boolean dirty;
+
+   /* VGPU10 */
+   SVGA3dRenderTargetViewId view_id;
+   struct svga_surface *backed;
  };
  
  
@@ -64,11 +68,13 @@ svga_surface_needs_propagation(const struct pipe_surface *surf);
  struct svga_winsys_surface *
  svga_texture_view_surface(struct svga_context *svga,
                            struct svga_texture *tex,
+                          unsigned bind_flags,
                            SVGA3dSurfaceFlags flags,
                            SVGA3dSurfaceFormat format,
                            unsigned start_mip,
                            unsigned num_mip,
-                          int face_pick,
+                          int layer_pick,
+                          unsigned num_layers,
                            int zslice_pick,
                            struct svga_host_surface_cache_key *key); /* OUT */
  
@@ -99,4 +105,8 @@ svga_surface_const(const struct pipe_surface *surface)
     return (const struct svga_surface *)surface;
  }
  
+struct pipe_surface *
+svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s);
+
+
  #endif
diff --git a/src/gallium/drivers/svga/svga_swtnl_backend.c b/src/gallium/drivers/svga/svga_swtnl_backend.c

index ded8bcb..4bdb21a 100644 (file)
--- a/src/gallium/drivers/svga/svga_swtnl_backend.c
+++ b/src/gallium/drivers/svga/svga_swtnl_backend.c
@@ -40,6 +40,7 @@
  #include "svga_reg.h"
  #include "svga3d_reg.h"
  #include "svga_draw.h"
+#include "svga_shader.h"
  #include "svga_swtnl_private.h"
  
  
@@ -129,9 +130,12 @@ svga_vbuf_render_map_vertices( struct vbuf_render *render )
                                           PIPE_TRANSFER_DISCARD_RANGE |
                                           PIPE_TRANSFER_UNSYNCHRONIZED,
                                           &svga_render->vbuf_transfer);
-      if (ptr)
+      if (ptr) {
+         svga_render->vbuf_ptr = ptr;
           return ptr + svga_render->vbuf_offset;
+      }
        else {
+         svga_render->vbuf_ptr = NULL;
           svga_render->vbuf_transfer = NULL;
           return NULL;
        }
@@ -154,6 +158,18 @@ svga_vbuf_render_unmap_vertices( struct vbuf_render *render,
  
     offset = svga_render->vbuf_offset + svga_render->vertex_size * min_index;
     length = svga_render->vertex_size * (max_index + 1 - min_index);
+
+   if (0) {
+      /* dump vertex data */
+      const float *f = (const float *) ((char *) svga_render->vbuf_ptr +
+                                        svga_render->vbuf_offset);
+      unsigned i;
+      debug_printf("swtnl vertex data:\n");
+      for (i = 0; i < length / 4; i += 4) {
+         debug_printf("%u: %f %f %f %f\n", i, f[i], f[i+1], f[i+2], f[i+3]);
+      }
+   }
+
     pipe_buffer_flush_mapped_range(&svga->pipe,
                                   svga_render->vbuf_transfer,
                                   offset, length);
@@ -178,6 +194,7 @@ svga_vbuf_submit_state( struct svga_vbuf_render *svga_render )
     SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS];
     enum pipe_error ret;
     unsigned i;
+   static const unsigned zero[PIPE_MAX_ATTRIBS] = {0};
  
     /* if the vdecl or vbuf hasn't changed do nothing */
     if (!svga->swtnl.new_vdecl)
@@ -192,18 +209,27 @@ svga_vbuf_submit_state( struct svga_vbuf_render *svga_render )
        ret = svga_hwtnl_flush(svga->hwtnl);
        /* if we hit this path we might become synced with hw */
        svga->swtnl.new_vbuf = TRUE;
-      assert(ret == 0);
+      assert(ret == PIPE_OK);
     }
  
-   svga_hwtnl_reset_vdecl(svga->hwtnl, svga_render->vdecl_count);
-
     for (i = 0; i < svga_render->vdecl_count; i++) {
        vdecl[i].array.offset += svga_render->vdecl_offset;
+   }
  
-      svga_hwtnl_vdecl( svga->hwtnl,
-                        i,
-                        &vdecl[i],
-                        svga_render->vbuf );
+   svga_hwtnl_vertex_decls(svga->hwtnl,
+                           svga_render->vdecl_count,
+                           vdecl,
+                           zero,
+                           svga_render->layout_id);
+
+   /* Specify the vertex buffer (there's only ever one) */
+   {
+      struct pipe_vertex_buffer vb;
+      vb.buffer = svga_render->vbuf;
+      vb.buffer_offset = svga_render->vdecl_offset;
+      vb.stride = vdecl[0].array.stride;
+      vb.user_buffer = NULL;
+      svga_hwtnl_vertex_buffers(svga->hwtnl, 1, &vb);
     }
  
     /* We have already taken care of flatshading, so let the hwtnl
@@ -211,15 +237,15 @@ svga_vbuf_submit_state( struct svga_vbuf_render *svga_render )
      */
     if (svga->state.sw.need_pipeline) {
        svga_hwtnl_set_flatshade(svga->hwtnl, FALSE, FALSE);
-      svga_hwtnl_set_unfilled(svga->hwtnl, PIPE_POLYGON_MODE_FILL);
+      svga_hwtnl_set_fillmode(svga->hwtnl, PIPE_POLYGON_MODE_FILL);
     }
     else {
        svga_hwtnl_set_flatshade( svga->hwtnl,
-                                svga->curr.rast->templ.flatshade,
+                                svga->curr.rast->templ.flatshade ||
+                                svga->state.hw_draw.fs->uses_flat_interp,
                                  svga->curr.rast->templ.flatshade_first );
  
-      svga_hwtnl_set_unfilled( svga->hwtnl,
-                               svga->curr.rast->hw_unfilled );
+      svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode);
     }
  
     svga->swtnl.new_vdecl = FALSE;
@@ -227,13 +253,15 @@ svga_vbuf_submit_state( struct svga_vbuf_render *svga_render )
  
  static void
  svga_vbuf_render_draw_arrays( struct vbuf_render *render,
-                              unsigned start,
-                              uint nr )
+                              unsigned start, uint nr )
  {
     struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
     struct svga_context *svga = svga_render->svga;
     unsigned bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size;
     enum pipe_error ret = PIPE_OK;
+   /* instancing will already have been resolved at this point by 'draw' */
+   const unsigned start_instance = 0;
+   const unsigned instance_count = 1;
  
     /* off to hardware */
     svga_vbuf_submit_state(svga_render);
@@ -244,10 +272,13 @@ svga_vbuf_render_draw_arrays( struct vbuf_render *render,
      */
     svga_update_state_retry( svga, SVGA_STATE_HW_DRAW );
  
-   ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr);
+   ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr,
+                                start_instance, instance_count);
     if (ret != PIPE_OK) {
        svga_context_flush(svga, NULL);
-      ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr);
+      ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim,
+                                   start + bias, nr,
+                                   start_instance, instance_count);
        svga->swtnl.new_vbuf = TRUE;
        assert(ret == PIPE_OK);
     }
@@ -265,6 +296,9 @@ svga_vbuf_render_draw_elements( struct vbuf_render *render,
     int bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size;
     boolean ret;
     size_t size = 2 * nr_indices;
+   /* instancing will already have been resolved at this point by 'draw' */
+   const unsigned start_instance = 0;
+   const unsigned instance_count = 1;
  
     assert(( svga_render->vbuf_offset - svga_render->vdecl_offset) % svga_render->vertex_size == 0);
     
@@ -299,7 +333,8 @@ svga_vbuf_render_draw_elements( struct vbuf_render *render,
                                          svga_render->min_index,
                                          svga_render->max_index,
                                          svga_render->prim,
-                                        svga_render->ibuf_offset / 2, nr_indices);
+                                        svga_render->ibuf_offset / 2, nr_indices,
+                                        start_instance, instance_count);
     if(ret != PIPE_OK) {
        svga_context_flush(svga, NULL);
        ret = svga_hwtnl_draw_range_elements(svga->hwtnl,
@@ -309,7 +344,9 @@ svga_vbuf_render_draw_elements( struct vbuf_render *render,
                                             svga_render->min_index,
                                             svga_render->max_index,
                                             svga_render->prim,
-                                           svga_render->ibuf_offset / 2, nr_indices);
+                                           svga_render->ibuf_offset / 2,
+                                           nr_indices,
+                                           start_instance, instance_count);
        svga->swtnl.new_vbuf = TRUE;
        assert(ret == PIPE_OK);
     }
@@ -349,6 +386,7 @@ svga_vbuf_render_create( struct svga_context *svga )
     svga_render->vbuf_size = 0;
     svga_render->ibuf_alloc_size = 4*1024;
     svga_render->vbuf_alloc_size = 64*1024;
+   svga_render->layout_id = SVGA3D_INVALID_ID;
     svga_render->base.max_vertex_buffer_bytes = 64*1024/10;
     svga_render->base.max_indices = 65536;
     svga_render->base.get_vertex_info = svga_vbuf_render_get_vertex_info;
diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c

index 8322495..6a8e857 100644 (file)
--- a/src/gallium/drivers/svga/svga_swtnl_draw.c
+++ b/src/gallium/drivers/svga/svga_swtnl_draw.c
@@ -42,9 +42,9 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
  {
     struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = { 0 };
     struct pipe_transfer *ib_transfer = NULL;
-   struct pipe_transfer *cb_transfer = NULL;
+   struct pipe_transfer *cb_transfer[SVGA_MAX_CONST_BUFS] = { 0 };
     struct draw_context *draw = svga->swtnl.draw;
-   unsigned i;
+   unsigned i, old_num_vertex_buffers;
     const void *map;
     enum pipe_error ret;
  
@@ -76,6 +76,7 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
           draw_set_mapped_vertex_buffer(draw, i, map, ~0);
        }
     }
+   old_num_vertex_buffers = svga->curr.num_vertex_buffers;
  
     /* Map index buffer, if present */
     map = NULL;
@@ -88,16 +89,21 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
                         svga->curr.ib.index_size, ~0);
     }
  
-   if (svga->curr.cbufs[PIPE_SHADER_VERTEX].buffer) {
+   /* Map constant buffers */
+   for (i = 0; i < Elements(svga->curr.constbufs[PIPE_SHADER_VERTEX]); ++i) {
+      if (svga->curr.constbufs[PIPE_SHADER_VERTEX][i].buffer == NULL) {
+         continue;
+      }
+
        map = pipe_buffer_map(&svga->pipe,
-                            svga->curr.cbufs[PIPE_SHADER_VERTEX].buffer,
+                            svga->curr.constbufs[PIPE_SHADER_VERTEX][i].buffer,
                              PIPE_TRANSFER_READ,
-                           &cb_transfer);
+                            &cb_transfer[i]);
        assert(map);
        draw_set_mapped_constant_buffer(
-         draw, PIPE_SHADER_VERTEX, 0,
+         draw, PIPE_SHADER_VERTEX, i,
           map,
-         svga->curr.cbufs[PIPE_SHADER_VERTEX].buffer->width0);
+         svga->curr.constbufs[PIPE_SHADER_VERTEX][i].buffer->width0);
     }
  
     draw_vbo(draw, info);
@@ -105,8 +111,8 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
     draw_flush(svga->swtnl.draw);
  
     /* Ensure the draw module didn't touch this */
-   assert(i == svga->curr.num_vertex_buffers);
-   
+   assert(old_num_vertex_buffers == svga->curr.num_vertex_buffers);
+
     /*
      * unmap vertex/index buffers
      */
@@ -122,8 +128,10 @@ svga_swtnl_draw_vbo(struct svga_context *svga,
        draw_set_indexes(draw, NULL, 0, 0);
     }
  
-   if (svga->curr.cbufs[PIPE_SHADER_VERTEX].buffer) {
-      pipe_buffer_unmap(&svga->pipe, cb_transfer);
+   for (i = 0; i < Elements(svga->curr.constbufs[PIPE_SHADER_VERTEX]); ++i) {
+      if (svga->curr.constbufs[PIPE_SHADER_VERTEX][i].buffer) {
+         pipe_buffer_unmap(&svga->pipe, cb_transfer[i]);
+      }
     }
  
     /* Now safe to remove the need_swtnl flag in any update_state call */
@@ -167,9 +175,6 @@ boolean svga_init_swtnl( struct svga_context *svga )
     if (!screen->haveLineSmooth)
        draw_install_aaline_stage(svga->swtnl.draw, &svga->pipe);
  
-   /* always install polygon stipple stage */
-   draw_install_pstipple_stage(svga->swtnl.draw, &svga->pipe);
-
     /* enable/disable line stipple stage depending on device caps */
     draw_enable_line_stipple(svga->swtnl.draw, !screen->haveLineStipple);
  
diff --git a/src/gallium/drivers/svga/svga_swtnl_private.h b/src/gallium/drivers/svga/svga_swtnl_private.h

index e2106e1..0a226ab 100644 (file)
--- a/src/gallium/drivers/svga/svga_swtnl_private.h
+++ b/src/gallium/drivers/svga/svga_swtnl_private.h
@@ -43,6 +43,8 @@ struct svga_vbuf_render {
  
     unsigned vertex_size;
  
+   SVGA3dElementLayoutId layout_id; /**< current element layout id */
+
     unsigned prim;
  
     struct pipe_resource *vbuf;
@@ -50,6 +52,8 @@ struct svga_vbuf_render {
     struct pipe_transfer *vbuf_transfer;
     struct pipe_transfer *ibuf_transfer;
  
+   void *vbuf_ptr;
+
     /* current size of buffer */
     size_t vbuf_size;
     size_t ibuf_size;
diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c

index e62698e..79dc0bf 100644 (file)
--- a/src/gallium/drivers/svga/svga_swtnl_state.c
+++ b/src/gallium/drivers/svga/svga_swtnl_state.c
@@ -25,10 +25,13 @@
  
  #include "draw/draw_context.h"
  #include "draw/draw_vbuf.h"
+#include "util/u_bitmask.h"
  #include "util/u_inlines.h"
  #include "pipe/p_state.h"
  
+#include "svga_cmd.h"
  #include "svga_context.h"
+#include "svga_shader.h"
  #include "svga_swtnl.h"
  #include "svga_state.h"
  #include "svga_tgsi.h"
@@ -51,30 +54,37 @@ static void set_draw_viewport( struct svga_context *svga )
     float adjx = 0.0f;
     float adjy = 0.0f;
  
-   switch (svga->curr.reduced_prim) {
-   case PIPE_PRIM_POINTS:
-      adjx = SVGA_POINT_ADJ_X;
-      adjy = SVGA_POINT_ADJ_Y;
-      break;
-   case PIPE_PRIM_LINES:
-      /* XXX: This is to compensate for the fact that wide lines are
-       * going to be drawn with triangles, but we're not catching all
-       * cases where that will happen.
-       */
-      if (svga->curr.rast->need_pipeline & SVGA_PIPELINE_FLAG_LINES)
-      {
-         adjx = SVGA_LINE_ADJ_X + 0.175f;
-         adjy = SVGA_LINE_ADJ_Y - 0.175f;
+   if (svga_have_vgpu10(svga)) {
+      if (svga->curr.reduced_prim == PIPE_PRIM_TRIANGLES) {
+         adjy = 0.25;
        }
-      else {
-         adjx = SVGA_LINE_ADJ_X;
-         adjy = SVGA_LINE_ADJ_Y;
+   }
+   else {
+      switch (svga->curr.reduced_prim) {
+      case PIPE_PRIM_POINTS:
+         adjx = SVGA_POINT_ADJ_X;
+         adjy = SVGA_POINT_ADJ_Y;
+         break;
+      case PIPE_PRIM_LINES:
+         /* XXX: This is to compensate for the fact that wide lines are
+          * going to be drawn with triangles, but we're not catching all
+          * cases where that will happen.
+          */
+         if (svga->curr.rast->need_pipeline & SVGA_PIPELINE_FLAG_LINES)
+         {
+            adjx = SVGA_LINE_ADJ_X + 0.175f;
+            adjy = SVGA_LINE_ADJ_Y - 0.175f;
+         }
+         else {
+            adjx = SVGA_LINE_ADJ_X;
+            adjy = SVGA_LINE_ADJ_Y;
+         }
+         break;
+      case PIPE_PRIM_TRIANGLES:
+         adjx += SVGA_TRIANGLE_ADJ_X;
+         adjy += SVGA_TRIANGLE_ADJ_Y;
+         break;
        }
-      break;
-   case PIPE_PRIM_TRIANGLES:
-      adjx += SVGA_TRIANGLE_ADJ_X;
-      adjy += SVGA_TRIANGLE_ADJ_Y;
-      break;
     }
  
     vp.translate[0] += adjx;
@@ -150,6 +160,59 @@ struct svga_tracked_state svga_update_swtnl_draw =
  };
  
  
+static SVGA3dSurfaceFormat
+translate_vertex_format(SVGA3dDeclType format)
+{
+   switch (format) {
+   case SVGA3D_DECLTYPE_FLOAT1:
+      return SVGA3D_R32_FLOAT;
+   case SVGA3D_DECLTYPE_FLOAT2:
+      return SVGA3D_R32G32_FLOAT;
+   case SVGA3D_DECLTYPE_FLOAT3:
+      return SVGA3D_R32G32B32_FLOAT;
+   case SVGA3D_DECLTYPE_FLOAT4:
+      return SVGA3D_R32G32B32A32_FLOAT;
+   default:
+      assert(!"Unexpected format in translate_vertex_format()");
+      return SVGA3D_R32G32B32A32_FLOAT;
+   }
+}
+
+
+static SVGA3dElementLayoutId
+svga_vdecl_to_input_element(struct svga_context *svga,
+                            const SVGA3dVertexDecl *vdecl, unsigned num_decls)
+{
+   SVGA3dElementLayoutId id;
+   SVGA3dInputElementDesc elements[PIPE_MAX_ATTRIBS];
+   enum pipe_error ret;
+   unsigned i;
+
+   assert(num_decls <= PIPE_MAX_ATTRIBS);
+   assert(svga_have_vgpu10(svga));
+
+   for (i = 0; i < num_decls; i++) {
+      elements[i].inputSlot = 0; /* vertex buffer index */
+      elements[i].alignedByteOffset = vdecl[i].array.offset;
+      elements[i].format = translate_vertex_format(vdecl[i].identity.type);
+      elements[i].inputSlotClass = SVGA3D_INPUT_PER_VERTEX_DATA;
+      elements[i].instanceDataStepRate = 0;
+      elements[i].inputRegister = i;
+   }
+
+   id = util_bitmask_add(svga->input_element_object_id_bm);
+
+   ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, num_decls, id, elements);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, num_decls, id, elements);
+      assert(ret == PIPE_OK);
+   }
+
+   return id;
+}
+
+
  enum pipe_error
  svga_swtnl_update_vdecl( struct svga_context *svga )
  {
@@ -164,16 +227,19 @@ svga_swtnl_update_vdecl( struct svga_context *svga )
     int nr_decls = 0;
     int src;
     unsigned i;
+   int any_change;
  
     memset(vinfo, 0, sizeof(*vinfo));
     memset(vdecl, 0, sizeof(vdecl));
  
     draw_prepare_shader_outputs(draw);
+
     /* always add position */
     src = draw_find_shader_output(draw, TGSI_SEMANTIC_POSITION, 0);
     draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src);
     vinfo->attrib[0].emit = EMIT_4F;
     vdecl[0].array.offset = offset;
+   vdecl[0].identity.method = SVGA3D_DECLMETHOD_DEFAULT;
     vdecl[0].identity.type = SVGA3D_DECLTYPE_FLOAT4;
     vdecl[0].identity.usage = SVGA3D_DECLUSAGE_POSITIONT;
     vdecl[0].identity.usageIndex = 0;
@@ -225,16 +291,67 @@ svga_swtnl_update_vdecl( struct svga_context *svga )
     draw_compute_vertex_size(vinfo);
  
     svga_render->vdecl_count = nr_decls;
-   for (i = 0; i < svga_render->vdecl_count; i++)
+   for (i = 0; i < svga_render->vdecl_count; i++) {
        vdecl[i].array.stride = offset;
+   }
  
-   if (memcmp(svga_render->vdecl, vdecl, sizeof(vdecl)) == 0)
-      return PIPE_OK;
+   any_change = memcmp(svga_render->vdecl, vdecl, sizeof(vdecl));
+
+   if (svga_have_vgpu10(svga)) {
+      enum pipe_error ret;
+
+      if (!any_change && svga_render->layout_id != SVGA3D_INVALID_ID) {
+         return PIPE_OK;
+      }
+
+      if (svga_render->layout_id != SVGA3D_INVALID_ID) {
+         /* destroy old */
+         ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc,
+                                                  svga_render->layout_id);
+         if (ret != PIPE_OK) {
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc,
+                                                     svga_render->layout_id);
+            assert(ret == PIPE_OK);
+         }
+
+         /**
+          * reset current layout id state after the element layout is
+          * destroyed, so that if a new layout has the same layout id, we
+          * will know to re-issue the SetInputLayout command.
+          */
+         if (svga->state.hw_draw.layout_id == svga_render->layout_id)
+            svga->state.hw_draw.layout_id = SVGA3D_INVALID_ID;
+
+         util_bitmask_clear(svga->input_element_object_id_bm,
+                            svga_render->layout_id);
+      }
+
+      svga_render->layout_id =
+         svga_vdecl_to_input_element(svga, vdecl, nr_decls);
+
+      /* bind new */
+      if (svga->state.hw_draw.layout_id != svga_render->layout_id) {
+         ret = SVGA3D_vgpu10_SetInputLayout(svga->swc, svga_render->layout_id);
+         if (ret != PIPE_OK) {
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_vgpu10_SetInputLayout(svga->swc,
+                                               svga_render->layout_id);
+            assert(ret == PIPE_OK);
+         }
+
+         svga->state.hw_draw.layout_id = svga_render->layout_id;
+      }
+   }
+   else {
+      if (!any_change)
+         return PIPE_OK;
+   }
  
     memcpy(svga_render->vdecl, vdecl, sizeof(vdecl));
     svga->swtnl.new_vdecl = TRUE;
  
-   return PIPE_OK;
+   return 0;
  }
  
  
diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c

index 2e2ff5e..9a6fb46 100644 (file)
--- a/src/gallium/drivers/svga/svga_tgsi.c
+++ b/src/gallium/drivers/svga/svga_tgsi.c
@@ -37,6 +37,7 @@
  #include "svgadump/svga_shader_dump.h"
  
  #include "svga_context.h"
+#include "svga_shader.h"
  #include "svga_tgsi.h"
  #include "svga_tgsi_emit.h"
  #include "svga_debug.h"
@@ -166,97 +167,6 @@ svga_shader_emit_header(struct svga_shader_emitter *emit)
  
  
  /**
- * Use the shader info to generate a bitmask indicating which generic
- * inputs are used by the shader.  A set bit indicates that GENERIC[i]
- * is used.
- */
-unsigned
-svga_get_generic_inputs_mask(const struct tgsi_shader_info *info)
-{
-   unsigned i, mask = 0x0;
-
-   for (i = 0; i < info->num_inputs; i++) {
-      if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
-         unsigned j = info->input_semantic_index[i];
-         assert(j < sizeof(mask) * 8);
-         mask |= 1 << j;
-      }
-   }
-
-   return mask;
-}
-
-
-/**
- * Given a mask of used generic variables (as returned by the above functions)
- * fill in a table which maps those indexes to small integers.
- * This table is used by the remap_generic_index() function in
- * svga_tgsi_decl_sm30.c
- * Example: if generics_mask = binary(1010) it means that GENERIC[1] and
- * GENERIC[3] are used.  The remap_table will contain:
- *   table[1] = 0;
- *   table[3] = 1;
- * The remaining table entries will be filled in with the next unused
- * generic index (in this example, 2).
- */
-void
-svga_remap_generics(unsigned generics_mask,
-                    int8_t remap_table[MAX_GENERIC_VARYING])
-{
-   /* Note texcoord[0] is reserved so start at 1 */
-   unsigned count = 1, i;
-
-   for (i = 0; i < MAX_GENERIC_VARYING; i++) {
-      remap_table[i] = -1;
-   }
-
-   /* for each bit set in generic_mask */
-   while (generics_mask) {
-      unsigned index = ffs(generics_mask) - 1;
-      remap_table[index] = count++;
-      generics_mask &= ~(1 << index);
-   }
-}
-
-
-/**
- * Use the generic remap table to map a TGSI generic varying variable
- * index to a small integer.  If the remapping table doesn't have a
- * valid value for the given index (the table entry is -1) it means
- * the fragment shader doesn't use that VS output.  Just allocate
- * the next free value in that case.  Alternately, we could cull
- * VS instructions that write to register, or replace the register
- * with a dummy temp register.
- * XXX TODO: we should do one of the later as it would save precious
- * texcoord registers.
- */
-int
-svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
-                         int generic_index)
-{
-   assert(generic_index < MAX_GENERIC_VARYING);
-
-   if (generic_index >= MAX_GENERIC_VARYING) {
-      /* just don't return a random/garbage value */
-      generic_index = MAX_GENERIC_VARYING - 1;
-   }
-
-   if (remap_table[generic_index] == -1) {
-      /* This is a VS output that has no matching PS input.  Find a
-       * free index.
-       */
-      int i, max = 0;
-      for (i = 0; i < MAX_GENERIC_VARYING; i++) {
-         max = MAX2(max, remap_table[i]);
-      }
-      remap_table[generic_index] = max + 1;
-   }
-
-   return remap_table[generic_index];
-}
-
-
-/**
   * Parse TGSI shader and translate to SVGA/DX9 serialized
   * representation.
   *
@@ -264,9 +174,9 @@ svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
   * can be dynamically grown.  Once we've finished and know how large
   * it is, it will be copied to a hardware buffer for upload.
   */
-static struct svga_shader_variant *
-svga_tgsi_translate(const struct svga_shader *shader,
-                    const struct svga_compile_key *key, unsigned unit)
+struct svga_shader_variant *
+svga_tgsi_vgpu9_translate(const struct svga_shader *shader,
+                          const struct svga_compile_key *key, unsigned unit)
  {
     struct svga_shader_variant *variant = NULL;
     struct svga_shader_emitter emit;
@@ -288,10 +198,10 @@ svga_tgsi_translate(const struct svga_shader *shader,
     emit.imm_start = emit.info.file_max[TGSI_FILE_CONSTANT] + 1;
  
     if (unit == PIPE_SHADER_FRAGMENT)
-      emit.imm_start += key->fkey.num_unnormalized_coords;
+      emit.imm_start += key->num_unnormalized_coords;
  
     if (unit == PIPE_SHADER_VERTEX) {
-      emit.imm_start += key->vkey.need_prescale ? 2 : 0;
+      emit.imm_start += key->vs.need_prescale ? 2 : 0;
     }
  
     emit.nr_hw_float_const =
@@ -327,7 +237,11 @@ svga_tgsi_translate(const struct svga_shader *shader,
     memcpy(&variant->key, key, sizeof(*key));
     variant->id = UTIL_BITMASK_INVALID_INDEX;
  
-   if (SVGA_DEBUG & DEBUG_TGSI) {
+   variant->pstipple_sampler_unit = emit.pstipple_sampler_unit;
+
+#if 0
+   if (!svga_shader_verify(variant->tokens, variant->nr_tokens) ||
+       SVGA_DEBUG & DEBUG_TGSI) {
        debug_printf("#####################################\n");
        debug_printf("Shader %u below\n", shader->id);
        tgsi_dump(shader->tokens, 0);
@@ -337,6 +251,7 @@ svga_tgsi_translate(const struct svga_shader *shader,
        }
        debug_printf("#####################################\n");
     }
+#endif
  
     return variant;
  
@@ -345,39 +260,3 @@ svga_tgsi_translate(const struct svga_shader *shader,
     FREE(emit.buf);
     return NULL;
  }
-
-
-struct svga_shader_variant *
-svga_translate_fragment_program(const struct svga_fragment_shader *fs,
-                                const struct svga_fs_compile_key *fkey)
-{
-   struct svga_compile_key key;
-
-   memset(&key, 0, sizeof(key));
-
-   memcpy(&key.fkey, fkey, sizeof *fkey);
-
-   memcpy(key.generic_remap_table, fs->generic_remap_table,
-          sizeof(fs->generic_remap_table));
-
-   return svga_tgsi_translate(&fs->base, &key, PIPE_SHADER_FRAGMENT);
-}
-
-
-struct svga_shader_variant *
-svga_translate_vertex_program(const struct svga_vertex_shader *vs,
-                              const struct svga_vs_compile_key *vkey)
-{
-   struct svga_compile_key key;
-
-   memset(&key, 0, sizeof(key));
-
-   memcpy(&key.vkey, vkey, sizeof *vkey);
-
-   /* Note: we could alternately store the remap table in the vkey but
-    * that would make it larger.  We just regenerate it here instead.
-    */
-   svga_remap_generics(vkey->fs_generic_inputs, key.generic_remap_table);
-
-   return svga_tgsi_translate(&vs->base, &key, PIPE_SHADER_VERTEX);
-}
diff --git a/src/gallium/drivers/svga/svga_tgsi.h b/src/gallium/drivers/svga/svga_tgsi.h

index 5c47a4a..207a3f0 100644 (file)
--- a/src/gallium/drivers/svga/svga_tgsi.h
+++ b/src/gallium/drivers/svga/svga_tgsi.h
@@ -26,94 +26,16 @@
  #ifndef SVGA_TGSI_H
  #define SVGA_TGSI_H
  
-#include "pipe/p_state.h"
+#include "pipe/p_compiler.h"
+#include "svga3d_reg.h"
  
-#include "svga_hw_reg.h"
  
+#define MAX_VGPU10_ADDR_REGS 2
  
-/**
- * We use a 32-bit mask to keep track of the generic indexes.
- */
-#define MAX_GENERIC_VARYING 32
-
-
-struct svga_fragment_shader;
-struct svga_vertex_shader;
+struct svga_compile_key;
+struct svga_context;
  struct svga_shader;
-struct tgsi_shader_info;
-struct tgsi_token;
-
-
-struct svga_vs_compile_key
-{
-   unsigned fs_generic_inputs;
-   unsigned need_prescale:1;
-   unsigned allow_psiz:1;
-   unsigned adjust_attrib_range:16;
-   unsigned adjust_attrib_w_1:16;
-};
-
-struct svga_fs_compile_key
-{
-   unsigned light_twoside:1;
-   unsigned front_ccw:1;
-   unsigned white_fragments:1;
-   unsigned write_color0_to_n_cbufs:3;
-   unsigned num_textures:8;
-   unsigned num_unnormalized_coords:8;
-   unsigned sprite_origin_lower_left:1;
-   struct {
-      unsigned compare_mode:1;
-      unsigned compare_func:3;
-      unsigned unnormalized:1;
-      unsigned width_height_idx:7;
-      unsigned texture_target:8;
-      unsigned sprite_texgen:1;
-      unsigned swizzle_r:3;
-      unsigned swizzle_g:3;
-      unsigned swizzle_b:3;
-      unsigned swizzle_a:3;
-   } tex[PIPE_MAX_SAMPLERS];
-};
-
-/**
- * Key/index for identifying shader variants.
- */
-struct svga_compile_key {
-   struct svga_vs_compile_key vkey;
-   struct svga_fs_compile_key fkey;
-   int8_t generic_remap_table[MAX_GENERIC_VARYING];
-};
-
-
-/**
- * A single TGSI shader may be compiled into different variants of
- * SVGA3D shaders depending on the compile key.  Each user shader
- * will have a linked list of these variants.
- */
-struct svga_shader_variant
-{
-   const struct svga_shader *shader;
-
-   /** Parameters used to generate this variant */
-   struct svga_compile_key key;
-
-   /* Compiled shader tokens:
-    */
-   const unsigned *tokens;
-   unsigned nr_tokens;
-
-   /** Per-context shader identifier used with SVGA_3D_CMD_SHADER_DEFINE,
-    * SVGA_3D_CMD_SET_SHADER and SVGA_3D_CMD_SHADER_DESTROY.
-    */
-   unsigned id;
-   
-   /* GB object buffer containing the bytecode */
-   struct svga_winsys_gb_shader *gb_shader;
-
-   /** Next variant */
-   struct svga_shader_variant *next;
-};
+struct svga_shader_variant;
  
  
  /* TGSI doesn't provide use with VS input semantics (they're actually
@@ -140,37 +62,16 @@ static inline void svga_generate_vdecl_semantics( unsigned idx,
  
  
  
-static inline unsigned svga_vs_key_size( const struct svga_vs_compile_key *key )
-{
-   return sizeof *key;
-}
-
-static inline unsigned svga_fs_key_size( const struct svga_fs_compile_key *key )
-{
-   return (const char *)&key->tex[key->num_textures] - (const char *)key;
-}
-
  struct svga_shader_variant *
-svga_translate_fragment_program( const struct svga_fragment_shader *fs,
-                                 const struct svga_fs_compile_key *fkey );
+svga_tgsi_vgpu9_translate(const struct svga_shader *shader,
+                          const struct svga_compile_key *key, unsigned unit);
  
  struct svga_shader_variant *
-svga_translate_vertex_program( const struct svga_vertex_shader *fs,
-                               const struct svga_vs_compile_key *vkey );
-
-
-unsigned
-svga_get_generic_inputs_mask(const struct tgsi_shader_info *info);
-
-unsigned
-svga_get_generic_outputs_mask(const struct tgsi_shader_info *info);
-
-void
-svga_remap_generics(unsigned generics_mask,
-                    int8_t remap_table[MAX_GENERIC_VARYING]);
+svga_tgsi_vgpu10_translate(struct svga_context *svga,
+                           const struct svga_shader *shader,
+                           const struct svga_compile_key *key,
+                           unsigned unit);
  
-int
-svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
-                         int generic_index);
+boolean svga_shader_verify(const uint32_t *tokens, unsigned nr_tokens);
  
  #endif
diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c

index 42d6f48..ca4009b 100644 (file)
--- a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
+++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
@@ -216,7 +216,7 @@ ps30_input(struct svga_shader_emitter *emit,
  
        return emit_decl( emit, reg, 0, 0 );
     }
-   else if (emit->key.fkey.light_twoside &&
+   else if (emit->key.fs.light_twoside &&
              (semantic.Name == TGSI_SEMANTIC_COLOR)) {
  
        if (!translate_vs_ps_semantic( emit, semantic, &usage, &index ))
@@ -285,9 +285,9 @@ ps30_input(struct svga_shader_emitter *emit,
           return FALSE;
  
        if (semantic.Name == TGSI_SEMANTIC_GENERIC &&
-          emit->key.fkey.sprite_origin_lower_left &&
+          emit->key.sprite_origin_lower_left &&
            index >= 1 &&
-          emit->key.fkey.tex[index - 1].sprite_texgen) {
+          emit->key.tex[index - 1].sprite_texgen) {
           /* This is a sprite texture coord with lower-left origin.
            * We need to invert the texture T coordinate since the SVGA3D
            * device only supports an upper-left origin.
@@ -329,7 +329,7 @@ ps30_output(struct svga_shader_emitter *emit,
     switch (semantic.Name) {
     case TGSI_SEMANTIC_COLOR:
        if (emit->unit == PIPE_SHADER_FRAGMENT) {
-         if (emit->key.fkey.white_fragments) {
+         if (emit->key.fs.white_fragments) {
              /* Used for XOR logicop mode */
              emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
                                                    emit->nr_hw_temp++ );
@@ -337,14 +337,14 @@ ps30_output(struct svga_shader_emitter *emit,
              emit->true_color_output[idx] = dst_register(SVGA3DREG_COLOROUT, 
                                                          semantic.Index);
           }
-         else if (emit->key.fkey.write_color0_to_n_cbufs) {
+         else if (emit->key.fs.write_color0_to_n_cbufs) {
              /* We'll write color output [0] to all render targets.
               * Prepare all the output registers here, but only when the
               * semantic.Index == 0 so we don't do this more than once.
               */
              if (semantic.Index == 0) {
                 unsigned i;
-               for (i = 0; i < emit->key.fkey.write_color0_to_n_cbufs; i++) {
+               for (i = 0; i < emit->key.fs.write_color0_to_n_cbufs; i++) {
                    emit->output_map[idx+i] = dst_register(SVGA3DREG_TEMP,
                                                       emit->nr_hw_temp++);
                    emit->temp_color_output[i] = emit->output_map[idx+i];
@@ -487,7 +487,7 @@ vs30_output(struct svga_shader_emitter *emit,
        /* This has the effect of not declaring psiz (below) and not 
         * emitting the final MOV to true_psiz in the postamble.
         */
-      if (!emit->key.vkey.allow_psiz)
+      if (!emit->key.vs.allow_psiz)
           return TRUE;
  
        emit->true_psiz = dcl.dst;
@@ -517,7 +517,7 @@ vs30_output(struct svga_shader_emitter *emit,
  static ubyte
  svga_tgsi_sampler_type(const struct svga_shader_emitter *emit, int idx)
  {
-   switch (emit->key.fkey.tex[idx].texture_target) {
+   switch (emit->key.tex[idx].texture_target) {
     case PIPE_TEXTURE_1D:
        return SVGA3DSAMP_2D;
     case PIPE_TEXTURE_2D:
diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h

index 1a1dac2..0b82483 100644 (file)
--- a/src/gallium/drivers/svga/svga_tgsi_emit.h
+++ b/src/gallium/drivers/svga/svga_tgsi_emit.h
@@ -28,6 +28,7 @@
  
  #include "tgsi/tgsi_scan.h"
  #include "svga_hw_reg.h"
+#include "svga_shader.h"
  #include "svga_tgsi.h"
  #include "svga3d_shaderdefs.h"
  
@@ -130,6 +131,8 @@ struct svga_shader_emitter
     struct svga_arl_consts arl_consts[12];
     int num_arl_consts;
     int current_arl;
+
+   unsigned pstipple_sampler_unit;
  };
  
  
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c

index bac9560..00c91a4 100644 (file)
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -29,6 +29,7 @@
  #include "tgsi/tgsi_parse.h"
  #include "util/u_memory.h"
  #include "util/u_math.h"
+#include "util/u_pstipple.h"
  
  #include "svga_tgsi_emit.h"
  #include "svga_context.h"
@@ -862,7 +863,7 @@ create_common_immediate( struct svga_shader_emitter *emit )
     idx++;
  
     /* Emit constant {2, 0, 0, 0} (only the 2 is used for now) */
-   if (emit->key.vkey.adjust_attrib_range) {
+   if (emit->key.vs.adjust_attrib_range) {
        if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
                             idx, 2.0f, 0.0f, 0.0f, 0.0f ))
           return FALSE;
@@ -1015,7 +1016,7 @@ get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num )
     struct src_register reg;
  
     /* the width/height indexes start right after constants */
-   idx = emit->key.fkey.tex[sampler_num].width_height_idx +
+   idx = emit->key.tex[sampler_num].width_height_idx +
           emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
  
     reg = src_register( SVGA3DREG_CONST, idx );
@@ -1723,7 +1724,7 @@ emit_tex2(struct svga_shader_emitter *emit,
     texcoord = translate_src_register( emit, &insn->Src[0] );
     sampler = translate_src_register( emit, &insn->Src[1] );
  
-   if (emit->key.fkey.tex[sampler.base.num].unnormalized ||
+   if (emit->key.tex[sampler.base.num].unnormalized ||
         emit->dynamic_branching_level > 0)
        tmp = get_temp( emit );
  
@@ -1755,7 +1756,7 @@ emit_tex2(struct svga_shader_emitter *emit,
  
     /* Explicit normalization of texcoords:
      */
-   if (emit->key.fkey.tex[sampler.base.num].unnormalized) {
+   if (emit->key.tex[sampler.base.num].unnormalized) {
        struct src_register wh = get_tex_dimensions( emit, sampler.base.num );
  
        /* MUL  tmp, SRC0, WH */
@@ -1891,14 +1892,14 @@ emit_tex(struct svga_shader_emitter *emit,
     const unsigned unit = src1.base.num;
  
     /* check for shadow samplers */
-   boolean compare = (emit->key.fkey.tex[unit].compare_mode ==
+   boolean compare = (emit->key.tex[unit].compare_mode ==
                        PIPE_TEX_COMPARE_R_TO_TEXTURE);
  
     /* texture swizzle */
-   boolean swizzle = (emit->key.fkey.tex[unit].swizzle_r != PIPE_SWIZZLE_RED ||
-                      emit->key.fkey.tex[unit].swizzle_g != PIPE_SWIZZLE_GREEN ||
-                      emit->key.fkey.tex[unit].swizzle_b != PIPE_SWIZZLE_BLUE ||
-                      emit->key.fkey.tex[unit].swizzle_a != PIPE_SWIZZLE_ALPHA);
+   boolean swizzle = (emit->key.tex[unit].swizzle_r != PIPE_SWIZZLE_RED ||
+                      emit->key.tex[unit].swizzle_g != PIPE_SWIZZLE_GREEN ||
+                      emit->key.tex[unit].swizzle_b != PIPE_SWIZZLE_BLUE ||
+                      emit->key.tex[unit].swizzle_a != PIPE_SWIZZLE_ALPHA);
  
     boolean saturate = insn->Instruction.Saturate;
  
@@ -1965,7 +1966,7 @@ emit_tex(struct svga_shader_emitter *emit,
  
           /* Compare texture sample value against R component of texcoord */
           if (!emit_select(emit,
-                          emit->key.fkey.tex[unit].compare_func,
+                          emit->key.tex[unit].compare_func,
                            writemask( dst2, TGSI_WRITEMASK_XYZ ),
                            r_coord,
                            tex_src_x))
@@ -1991,10 +1992,10 @@ emit_tex(struct svga_shader_emitter *emit,
        /* swizzle from tex_result to dst (handles saturation too, if any) */
        emit_tex_swizzle(emit,
                         dst, src(tex_result),
-                       emit->key.fkey.tex[unit].swizzle_r,
-                       emit->key.fkey.tex[unit].swizzle_g,
-                       emit->key.fkey.tex[unit].swizzle_b,
-                       emit->key.fkey.tex[unit].swizzle_a);
+                       emit->key.tex[unit].swizzle_r,
+                       emit->key.tex[unit].swizzle_g,
+                       emit->key.tex[unit].swizzle_b,
+                       emit->key.tex[unit].swizzle_a);
     }
  
     return TRUE;
@@ -3113,7 +3114,7 @@ make_immediate(struct svga_shader_emitter *emit,
  static boolean
  emit_vs_preamble(struct svga_shader_emitter *emit)
  {
-   if (!emit->key.vkey.need_prescale) {
+   if (!emit->key.vs.need_prescale) {
        if (!make_immediate( emit, 0, 0, .5, .5,
                             &emit->imm_0055))
           return FALSE;
@@ -3190,7 +3191,7 @@ emit_ps_postamble(struct svga_shader_emitter *emit)
            * logicop workaround.
            */
           if (emit->unit == PIPE_SHADER_FRAGMENT &&
-             emit->key.fkey.white_fragments) {
+             emit->key.fs.white_fragments) {
              struct src_register one = get_one_immediate(emit);
  
              if (!submit_op1( emit,
@@ -3200,7 +3201,7 @@ emit_ps_postamble(struct svga_shader_emitter *emit)
                 return FALSE;
           }
           else if (emit->unit == PIPE_SHADER_FRAGMENT &&
-                  i < emit->key.fkey.write_color0_to_n_cbufs) {
+                  i < emit->key.fs.write_color0_to_n_cbufs) {
              /* Write temp color output [0] to true output [i] */
              if (!submit_op1(emit, inst_token(SVGA3DOP_MOV),
                              emit->true_color_output[i],
@@ -3244,7 +3245,7 @@ emit_vs_postamble(struct svga_shader_emitter *emit)
     /* Need to perform various manipulations on vertex position to cope
      * with the different GL and D3D clip spaces.
      */
-   if (emit->key.vkey.need_prescale) {
+   if (emit->key.vs.need_prescale) {
        SVGA3dShaderDestToken temp_pos = emit->temp_pos;
        SVGA3dShaderDestToken depth = emit->depth_pos;
        SVGA3dShaderDestToken pos = emit->true_pos;
@@ -3372,7 +3373,7 @@ emit_light_twoside(struct svga_shader_emitter *emit)
  
     if_token = inst_token( SVGA3DOP_IFC );
  
-   if (emit->key.fkey.front_ccw)
+   if (emit->key.fs.front_ccw)
        if_token.control = SVGA3DOPCOMP_LT;
     else
        if_token.control = SVGA3DOPCOMP_GT;
@@ -3423,7 +3424,7 @@ emit_frontface(struct svga_shader_emitter *emit)
     temp = dst_register( SVGA3DREG_TEMP,
                          emit->nr_hw_temp++ );
  
-   if (emit->key.fkey.front_ccw) {
+   if (emit->key.fs.front_ccw) {
        pass = get_zero_immediate(emit);
        fail = get_one_immediate(emit);
     } else {
@@ -3494,8 +3495,8 @@ emit_inverted_texcoords(struct svga_shader_emitter *emit)
  static boolean
  emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit)
  {
-   unsigned adjust_mask = (emit->key.vkey.adjust_attrib_range |
-                           emit->key.vkey.adjust_attrib_w_1);
+   unsigned adjust_mask = (emit->key.vs.adjust_attrib_range |
+                           emit->key.vs.adjust_attrib_w_1);
   
     while (adjust_mask) {
        /* Adjust vertex attrib range and/or set W component = 1 */
@@ -3506,7 +3507,7 @@ emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit)
        tmp = src_register(SVGA3DREG_TEMP, emit->nr_hw_temp);
        emit->nr_hw_temp++;
  
-      if (emit->key.vkey.adjust_attrib_range & (1 << index)) {
+      if (emit->key.vs.adjust_attrib_range & (1 << index)) {
           /* The vertex input/attribute is supposed to be a signed value in
            * the range [-1,1] but we actually fetched/converted it to the
            * range [0,1].  This most likely happens when the app specifies a
@@ -3558,7 +3559,7 @@ emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit)
              return FALSE;
        }
  
-      if (emit->key.vkey.adjust_attrib_w_1 & (1 << index)) {
+      if (emit->key.vs.adjust_attrib_w_1 & (1 << index)) {
           /* move 1 into W position of tmp */
           if (!submit_op1(emit,
                           inst_token(SVGA3DOP_MOV),
@@ -3588,10 +3589,10 @@ needs_to_create_common_immediate(const struct svga_shader_emitter *emit)
     unsigned i;
  
     if (emit->unit == PIPE_SHADER_FRAGMENT) {
-      if (emit->key.fkey.light_twoside)
+      if (emit->key.fs.light_twoside)
           return TRUE;
  
-      if (emit->key.fkey.white_fragments)
+      if (emit->key.fs.white_fragments)
           return TRUE;
  
        if (emit->emit_frontface)
@@ -3606,16 +3607,16 @@ needs_to_create_common_immediate(const struct svga_shader_emitter *emit)
           return TRUE;
  
        /* look for any PIPE_SWIZZLE_ZERO/ONE terms */
-      for (i = 0; i < emit->key.fkey.num_textures; i++) {
-         if (emit->key.fkey.tex[i].swizzle_r > PIPE_SWIZZLE_ALPHA ||
-             emit->key.fkey.tex[i].swizzle_g > PIPE_SWIZZLE_ALPHA ||
-             emit->key.fkey.tex[i].swizzle_b > PIPE_SWIZZLE_ALPHA ||
-             emit->key.fkey.tex[i].swizzle_a > PIPE_SWIZZLE_ALPHA)
+      for (i = 0; i < emit->key.num_textures; i++) {
+         if (emit->key.tex[i].swizzle_r > PIPE_SWIZZLE_ALPHA ||
+             emit->key.tex[i].swizzle_g > PIPE_SWIZZLE_ALPHA ||
+             emit->key.tex[i].swizzle_b > PIPE_SWIZZLE_ALPHA ||
+             emit->key.tex[i].swizzle_a > PIPE_SWIZZLE_ALPHA)
              return TRUE;
        }
  
-      for (i = 0; i < emit->key.fkey.num_textures; i++) {
-         if (emit->key.fkey.tex[i].compare_mode
+      for (i = 0; i < emit->key.num_textures; i++) {
+         if (emit->key.tex[i].compare_mode
               == PIPE_TEX_COMPARE_R_TO_TEXTURE)
              return TRUE;
        }
@@ -3623,8 +3624,8 @@ needs_to_create_common_immediate(const struct svga_shader_emitter *emit)
     else if (emit->unit == PIPE_SHADER_VERTEX) {
        if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1)
           return TRUE;
-      if (emit->key.vkey.adjust_attrib_range ||
-          emit->key.vkey.adjust_attrib_w_1)
+      if (emit->key.vs.adjust_attrib_range ||
+          emit->key.vs.adjust_attrib_w_1)
           return TRUE;
     }
  
@@ -3772,7 +3773,7 @@ svga_shader_emit_helpers(struct svga_shader_emitter *emit)
        if (!emit_ps_preamble( emit ))
           return FALSE;
  
-      if (emit->key.fkey.light_twoside) {
+      if (emit->key.fs.light_twoside) {
           if (!emit_light_twoside( emit ))
              return FALSE;
        }
@@ -3787,14 +3788,14 @@ svga_shader_emit_helpers(struct svga_shader_emitter *emit)
     }
     else {
        assert(emit->unit == PIPE_SHADER_VERTEX);
-      if (emit->key.vkey.adjust_attrib_range ||
-          emit->key.vkey.adjust_attrib_w_1) {
-         if (!emit_adjusted_vertex_attribs(emit))
+      if (emit->key.vs.adjust_attrib_range) {
+         if (!emit_adjusted_vertex_attribs(emit) ||
+             emit->key.vs.adjust_attrib_w_1) {
              return FALSE;
+         }
        }
     }
  
-
     return TRUE;
  }
  
@@ -3808,10 +3809,30 @@ svga_shader_emit_instructions(struct svga_shader_emitter *emit,
                                const struct tgsi_token *tokens)
  {
     struct tgsi_parse_context parse;
+   const struct tgsi_token *new_tokens = NULL;
     boolean ret = TRUE;
     boolean helpers_emitted = FALSE;
     unsigned line_nr = 0;
  
+   if (emit->unit == PIPE_SHADER_FRAGMENT && emit->key.fs.pstipple) {
+      unsigned unit;
+
+      new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0);
+
+      if (new_tokens) {
+         /* Setup texture state for stipple */
+         emit->key.tex[unit].texture_target = PIPE_TEXTURE_2D;
+         emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
+         emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
+         emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
+         emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
+
+         emit->pstipple_sampler_unit = unit;
+
+         tokens = new_tokens;
+      }
+   }
+
     tgsi_parse_init( &parse, tokens );
     emit->internal_imm_count = 0;
  
@@ -3878,5 +3899,9 @@ svga_shader_emit_instructions(struct svga_shader_emitter *emit,
  
  done:
     tgsi_parse_free( &parse );
+   if (new_tokens) {
+      tgsi_free_tokens(new_tokens);
+   }
+
     return ret;
  }
diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c

new file mode 100644 (file)

index 0000000..e4f027b
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
@@ -0,0 +1,6778 @@
+/**********************************************************
+ * Copyright 1998-2013 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file svga_tgsi_vgpu10.c
+ *
+ * TGSI -> VGPU10 shader translation.
+ *
+ * \author Mingcheng Chen
+ * \author Brian Paul
+ */
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_defines.h"
+#include "tgsi/tgsi_build.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_two_side.h"
+#include "tgsi/tgsi_aa_point.h"
+#include "tgsi/tgsi_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_bitmask.h"
+#include "util/u_debug.h"
+#include "util/u_pstipple.h"
+
+#include "svga_context.h"
+#include "svga_debug.h"
+#include "svga_link.h"
+#include "svga_shader.h"
+#include "svga_tgsi.h"
+
+#include "VGPU10ShaderTokens.h"
+
+
+#define INVALID_INDEX 99999
+#define MAX_INTERNAL_TEMPS 3
+#define MAX_SYSTEM_VALUES 4
+#define MAX_IMMEDIATE_COUNT \
+        (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4)
+#define MAX_TEMP_ARRAYS 64  /* Enough? */
+
+
+/**
+ * Clipping is complicated.  There's four different cases which we
+ * handle during VS/GS shader translation:
+ */
+enum clipping_mode
+{
+   CLIP_NONE,     /**< No clipping enabled */
+   CLIP_LEGACY,   /**< The shader has no clipping declarations or code but
+                   * one or more user-defined clip planes are enabled.  We
+                   * generate extra code to emit clip distances.
+                   */
+   CLIP_DISTANCE, /**< The shader already declares clip distance output
+                   * registers and has code to write to them.
+                   */
+   CLIP_VERTEX    /**< The shader declares a clip vertex output register and
+                  * has code that writes to the register.  We convert the
+                  * clipvertex position into one or more clip distances.
+                  */
+};
+
+
+struct svga_shader_emitter_v10
+{
+   /* The token output buffer */
+   unsigned size;
+   char *buf;
+   char *ptr;
+
+   /* Information about the shader and state (does not change) */
+   struct svga_compile_key key;
+   struct tgsi_shader_info info;
+   unsigned unit;
+
+   unsigned inst_start_token;
+   boolean discard_instruction; /**< throw away current instruction? */
+
+   union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4];
+   unsigned num_immediates;      /**< Number of immediates emitted */
+   unsigned common_immediate_pos[8];  /**< literals for common immediates */
+   unsigned num_common_immediates;
+   boolean immediates_emitted;
+
+   unsigned num_outputs;      /**< include any extra outputs */
+                              /**  The first extra output is reserved for
+                               *   non-adjusted vertex position for
+                               *   stream output purpose
+                               */
+
+   /* Temporary Registers */
+   unsigned num_shader_temps; /**< num of temps used by original shader */
+   unsigned internal_temp_count;  /**< currently allocated internal temps */
+   struct {
+      unsigned start, size;
+   } temp_arrays[MAX_TEMP_ARRAYS];
+   unsigned num_temp_arrays;
+
+   /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */
+   struct {
+      unsigned arrayId, index;
+   } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */
+
+   /** Number of constants used by original shader for each constant buffer.
+    * The size should probably always match with that of svga_state.constbufs.
+    */
+   unsigned num_shader_consts[SVGA_MAX_CONST_BUFS];
+
+   /* Samplers */
+   unsigned num_samplers;
+
+   /* Address regs (really implemented with temps) */
+   unsigned num_address_regs;
+   unsigned address_reg_index[MAX_VGPU10_ADDR_REGS];
+
+   /* Output register usage masks */
+   ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS];
+
+   /* To map TGSI system value index to VGPU shader input indexes */
+   ubyte system_value_indexes[MAX_SYSTEM_VALUES];
+
+   struct {
+      /* vertex position scale/translation */
+      unsigned out_index;  /**< the real position output reg */
+      unsigned tmp_index;  /**< the fake/temp position output reg */
+      unsigned so_index;   /**< the non-adjusted position output reg */
+      unsigned prescale_scale_index, prescale_trans_index;
+      boolean  need_prescale;
+   } vposition;
+
+   /* For vertex shaders only */
+   struct {
+      /* viewport constant */
+      unsigned viewport_index;
+
+      /* temp index of adjusted vertex attributes */
+      unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS];
+   } vs;
+
+   /* For fragment shaders only */
+   struct {
+      /* apha test */
+      unsigned color_out_index[PIPE_MAX_COLOR_BUFS];  /**< the real color output regs */
+      unsigned color_tmp_index;  /**< fake/temp color output reg */
+      unsigned alpha_ref_index;  /**< immediate constant for alpha ref */
+
+      /* front-face */
+      unsigned face_input_index; /**< real fragment shader face reg (bool) */
+      unsigned face_tmp_index;   /**< temp face reg converted to -1 / +1 */
+
+      unsigned pstipple_sampler_unit;
+
+      unsigned fragcoord_input_index;  /**< real fragment position input reg */
+      unsigned fragcoord_tmp_index;    /**< 1/w modified position temp reg */
+   } fs;
+
+   /* For geometry shaders only */
+   struct {
+      VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */
+      VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */
+      unsigned input_size;       /**< size of input arrays */
+      unsigned prim_id_index;    /**< primitive id register index */
+      unsigned max_out_vertices; /**< maximum number of output vertices */
+   } gs;
+
+   /* For vertex or geometry shaders */
+   enum clipping_mode clip_mode;
+   unsigned clip_dist_out_index; /**< clip distance output register index */
+   unsigned clip_dist_tmp_index; /**< clip distance temporary register */
+   unsigned clip_dist_so_index;  /**< clip distance shadow copy */
+
+   /** Index of temporary holding the clipvertex coordinate */
+   unsigned clip_vertex_out_index; /**< clip vertex output register index */
+   unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */
+
+   /* user clip plane constant slot indexes */
+   unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES];
+
+   boolean uses_flat_interp;
+
+   /* For all shaders: const reg index for RECT coord scaling */
+   unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS];
+
+   /* For all shaders: const reg index for texture buffer size */
+   unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS];
+
+   /* VS/GS/FS Linkage info */
+   struct shader_linkage linkage;
+
+   bool register_overflow;  /**< Set if we exceed a VGPU10 register limit */
+};
+
+
+static boolean
+emit_post_helpers(struct svga_shader_emitter_v10 *emit);
+
+static boolean
+emit_vertex(struct svga_shader_emitter_v10 *emit,
+            const struct tgsi_full_instruction *inst);
+
+static char err_buf[128];
+
+static boolean
+expand(struct svga_shader_emitter_v10 *emit)
+{
+   char *new_buf;
+   unsigned newsize = emit->size * 2;
+
+   if (emit->buf != err_buf)
+      new_buf = REALLOC(emit->buf, emit->size, newsize);
+   else
+      new_buf = NULL;
+
+   if (new_buf == NULL) {
+      emit->ptr = err_buf;
+      emit->buf = err_buf;
+      emit->size = sizeof(err_buf);
+      return FALSE;
+   }
+
+   emit->size = newsize;
+   emit->ptr = new_buf + (emit->ptr - emit->buf);
+   emit->buf = new_buf;
+   return TRUE;
+}
+
+/**
+ * Create and initialize a new svga_shader_emitter_v10 object.
+ */
+static struct svga_shader_emitter_v10 *
+alloc_emitter(void)
+{
+   struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit));
+
+   if (!emit)
+      return NULL;
+
+   /* to initialize the output buffer */
+   emit->size = 512;
+   if (!expand(emit)) {
+      FREE(emit);
+      return NULL;
+   }
+   return emit;
+}
+
+/**
+ * Free an svga_shader_emitter_v10 object.
+ */
+static void
+free_emitter(struct svga_shader_emitter_v10 *emit)
+{
+   assert(emit);
+   FREE(emit->buf);    /* will be NULL if translation succeeded */
+   FREE(emit);
+}
+
+static inline boolean
+reserve(struct svga_shader_emitter_v10 *emit,
+        unsigned nr_dwords)
+{
+   while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) {
+      if (!expand(emit))
+         return FALSE;
+   }
+
+   return TRUE;
+}
+
+static boolean
+emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword)
+{
+   if (!reserve(emit, 1))
+      return FALSE;
+
+   *(uint32 *)emit->ptr = dword;
+   emit->ptr += sizeof dword;
+   return TRUE;
+}
+
+static boolean
+emit_dwords(struct svga_shader_emitter_v10 *emit,
+            const uint32 *dwords,
+            unsigned nr)
+{
+   if (!reserve(emit, nr))
+      return FALSE;
+
+   memcpy(emit->ptr, dwords, nr * sizeof *dwords);
+   emit->ptr += nr * sizeof *dwords;
+   return TRUE;
+}
+
+/** Return the number of tokens in the emitter's buffer */
+static unsigned
+emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit)
+{
+   return (emit->ptr - emit->buf) / sizeof(unsigned);
+}
+
+
+/**
+ * Check for register overflow.  If we overflow we'll set an
+ * error flag.  This function can be called for register declarations
+ * or use as src/dst instruction operands.
+ * \param type  register type.  One of VGPU10_OPERAND_TYPE_x
+                or VGPU10_OPCODE_DCL_x
+ * \param index  the register index
+ */
+static void
+check_register_index(struct svga_shader_emitter_v10 *emit,
+                     unsigned operandType, unsigned index)
+{
+   bool overflow_before = emit->register_overflow;
+
+   switch (operandType) {
+   case VGPU10_OPERAND_TYPE_TEMP:
+   case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP:
+   case VGPU10_OPCODE_DCL_TEMPS:
+      if (index >= VGPU10_MAX_TEMPS) {
+         emit->register_overflow = TRUE;
+      }
+      break;
+   case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER:
+   case VGPU10_OPCODE_DCL_CONSTANT_BUFFER:
+      if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
+         emit->register_overflow = TRUE;
+      }
+      break;
+   case VGPU10_OPERAND_TYPE_INPUT:
+   case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID:
+   case VGPU10_OPCODE_DCL_INPUT:
+   case VGPU10_OPCODE_DCL_INPUT_SGV:
+   case VGPU10_OPCODE_DCL_INPUT_SIV:
+   case VGPU10_OPCODE_DCL_INPUT_PS:
+   case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
+   case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
+      if ((emit->unit == PIPE_SHADER_VERTEX &&
+           index >= VGPU10_MAX_VS_INPUTS) ||
+          (emit->unit == PIPE_SHADER_GEOMETRY &&
+           index >= VGPU10_MAX_GS_INPUTS) ||
+          (emit->unit == PIPE_SHADER_FRAGMENT &&
+           index >= VGPU10_MAX_FS_INPUTS)) {
+         emit->register_overflow = TRUE;
+      }
+      break;
+   case VGPU10_OPERAND_TYPE_OUTPUT:
+   case VGPU10_OPCODE_DCL_OUTPUT:
+   case VGPU10_OPCODE_DCL_OUTPUT_SGV:
+   case VGPU10_OPCODE_DCL_OUTPUT_SIV:
+      if ((emit->unit == PIPE_SHADER_VERTEX &&
+           index >= VGPU10_MAX_VS_OUTPUTS) ||
+          (emit->unit == PIPE_SHADER_GEOMETRY &&
+           index >= VGPU10_MAX_GS_OUTPUTS) ||
+          (emit->unit == PIPE_SHADER_FRAGMENT &&
+           index >= VGPU10_MAX_FS_OUTPUTS)) {
+         emit->register_overflow = TRUE;
+      }
+      break;
+   case VGPU10_OPERAND_TYPE_SAMPLER:
+   case VGPU10_OPCODE_DCL_SAMPLER:
+      if (index >= VGPU10_MAX_SAMPLERS) {
+         emit->register_overflow = TRUE;
+      }
+      break;
+   case VGPU10_OPERAND_TYPE_RESOURCE:
+   case VGPU10_OPCODE_DCL_RESOURCE:
+      if (index >= VGPU10_MAX_RESOURCES) {
+         emit->register_overflow = TRUE;
+      }
+      break;
+   case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
+      if (index >= MAX_IMMEDIATE_COUNT) {
+         emit->register_overflow = TRUE;
+      }
+      break;
+   default:
+      assert(0);
+      ; /* nothing */
+   }
+
+   if (emit->register_overflow && !overflow_before) {
+      debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n",
+                   operandType, index);
+   }
+}
+
+
+/**
+ * Examine misc state to determine the clipping mode.
+ */
+static void
+determine_clipping_mode(struct svga_shader_emitter_v10 *emit)
+{
+   if (emit->info.num_written_clipdistance > 0) {
+      emit->clip_mode = CLIP_DISTANCE;
+   }
+   else if (emit->info.writes_clipvertex) {
+      emit->clip_mode = CLIP_VERTEX;
+   }
+   else if (emit->key.clip_plane_enable) {
+      emit->clip_mode = CLIP_LEGACY;
+   }
+   else {
+      emit->clip_mode = CLIP_NONE;
+   }
+}
+
+
+/**
+ * For clip distance register declarations and clip distance register
+ * writes we need to mask the declaration usage or instruction writemask
+ * (respectively) against the set of the really-enabled clipping planes.
+ *
+ * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables
+ * has a VS that writes to all 8 clip distance registers, but the plane enable
+ * flags are a subset of that.
+ *
+ * This function is used to apply the plane enable flags to the register
+ * declaration or instruction writemask.
+ *
+ * \param writemask  the declaration usage mask or instruction writemask
+ * \param clip_reg_index  which clip plane register is being declared/written.
+ *                        The legal values are 0 and 1 (two clip planes per
+ *                        register, for a total of 8 clip planes)
+ */
+static unsigned
+apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit,
+                      unsigned writemask, unsigned clip_reg_index)
+{
+   unsigned shift;
+
+   assert(clip_reg_index < 2);
+
+   /* four clip planes per clip register: */
+   shift = clip_reg_index * 4;
+   writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf);
+
+   return writemask;
+}
+
+
+/**
+ * Translate gallium shader type into VGPU10 type.
+ */
+static VGPU10_PROGRAM_TYPE
+translate_shader_type(unsigned type)
+{
+   switch (type) {
+   case PIPE_SHADER_VERTEX:
+      return VGPU10_VERTEX_SHADER;
+   case PIPE_SHADER_GEOMETRY:
+      return VGPU10_GEOMETRY_SHADER;
+   case PIPE_SHADER_FRAGMENT:
+      return VGPU10_PIXEL_SHADER;
+   default:
+      assert(!"Unexpected shader type");
+      return VGPU10_VERTEX_SHADER;
+   }
+}
+
+
+/**
+ * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x
+ * Note: we only need to translate the opcodes for "simple" instructions,
+ * as seen below.  All other opcodes are handled/translated specially.
+ */
+static VGPU10_OPCODE_TYPE
+translate_opcode(unsigned opcode)
+{
+   switch (opcode) {
+   case TGSI_OPCODE_MOV:
+      return VGPU10_OPCODE_MOV;
+   case TGSI_OPCODE_MUL:
+      return VGPU10_OPCODE_MUL;
+   case TGSI_OPCODE_ADD:
+      return VGPU10_OPCODE_ADD;
+   case TGSI_OPCODE_DP3:
+      return VGPU10_OPCODE_DP3;
+   case TGSI_OPCODE_DP4:
+      return VGPU10_OPCODE_DP4;
+   case TGSI_OPCODE_MIN:
+      return VGPU10_OPCODE_MIN;
+   case TGSI_OPCODE_MAX:
+      return VGPU10_OPCODE_MAX;
+   case TGSI_OPCODE_MAD:
+      return VGPU10_OPCODE_MAD;
+   case TGSI_OPCODE_SQRT:
+      return VGPU10_OPCODE_SQRT;
+   case TGSI_OPCODE_FRC:
+      return VGPU10_OPCODE_FRC;
+   case TGSI_OPCODE_FLR:
+      return VGPU10_OPCODE_ROUND_NI;
+   case TGSI_OPCODE_FSEQ:
+      return VGPU10_OPCODE_EQ;
+   case TGSI_OPCODE_FSGE:
+      return VGPU10_OPCODE_GE;
+   case TGSI_OPCODE_FSNE:
+      return VGPU10_OPCODE_NE;
+   case TGSI_OPCODE_DDX:
+      return VGPU10_OPCODE_DERIV_RTX;
+   case TGSI_OPCODE_DDY:
+      return VGPU10_OPCODE_DERIV_RTY;
+   case TGSI_OPCODE_RET:
+      return VGPU10_OPCODE_RET;
+   case TGSI_OPCODE_DIV:
+      return VGPU10_OPCODE_DIV;
+   case TGSI_OPCODE_IDIV:
+      return VGPU10_OPCODE_IDIV;
+   case TGSI_OPCODE_DP2:
+      return VGPU10_OPCODE_DP2;
+   case TGSI_OPCODE_BRK:
+      return VGPU10_OPCODE_BREAK;
+   case TGSI_OPCODE_IF:
+      return VGPU10_OPCODE_IF;
+   case TGSI_OPCODE_ELSE:
+      return VGPU10_OPCODE_ELSE;
+   case TGSI_OPCODE_ENDIF:
+      return VGPU10_OPCODE_ENDIF;
+   case TGSI_OPCODE_CEIL:
+      return VGPU10_OPCODE_ROUND_PI;
+   case TGSI_OPCODE_I2F:
+      return VGPU10_OPCODE_ITOF;
+   case TGSI_OPCODE_NOT:
+      return VGPU10_OPCODE_NOT;
+   case TGSI_OPCODE_TRUNC:
+      return VGPU10_OPCODE_ROUND_Z;
+   case TGSI_OPCODE_SHL:
+      return VGPU10_OPCODE_ISHL;
+   case TGSI_OPCODE_AND:
+      return VGPU10_OPCODE_AND;
+   case TGSI_OPCODE_OR:
+      return VGPU10_OPCODE_OR;
+   case TGSI_OPCODE_XOR:
+      return VGPU10_OPCODE_XOR;
+   case TGSI_OPCODE_CONT:
+      return VGPU10_OPCODE_CONTINUE;
+   case TGSI_OPCODE_EMIT:
+      return VGPU10_OPCODE_EMIT;
+   case TGSI_OPCODE_ENDPRIM:
+      return VGPU10_OPCODE_CUT;
+   case TGSI_OPCODE_BGNLOOP:
+      return VGPU10_OPCODE_LOOP;
+   case TGSI_OPCODE_ENDLOOP:
+      return VGPU10_OPCODE_ENDLOOP;
+   case TGSI_OPCODE_ENDSUB:
+      return VGPU10_OPCODE_RET;
+   case TGSI_OPCODE_NOP:
+      return VGPU10_OPCODE_NOP;
+   case TGSI_OPCODE_BREAKC:
+      return VGPU10_OPCODE_BREAKC;
+   case TGSI_OPCODE_END:
+      return VGPU10_OPCODE_RET;
+   case TGSI_OPCODE_F2I:
+      return VGPU10_OPCODE_FTOI;
+   case TGSI_OPCODE_IMAX:
+      return VGPU10_OPCODE_IMAX;
+   case TGSI_OPCODE_IMIN:
+      return VGPU10_OPCODE_IMIN;
+   case TGSI_OPCODE_UDIV:
+   case TGSI_OPCODE_UMOD:
+   case TGSI_OPCODE_MOD:
+      return VGPU10_OPCODE_UDIV;
+   case TGSI_OPCODE_IMUL_HI:
+      return VGPU10_OPCODE_IMUL;
+   case TGSI_OPCODE_INEG:
+      return VGPU10_OPCODE_INEG;
+   case TGSI_OPCODE_ISHR:
+      return VGPU10_OPCODE_ISHR;
+   case TGSI_OPCODE_ISGE:
+      return VGPU10_OPCODE_IGE;
+   case TGSI_OPCODE_ISLT:
+      return VGPU10_OPCODE_ILT;
+   case TGSI_OPCODE_F2U:
+      return VGPU10_OPCODE_FTOU;
+   case TGSI_OPCODE_UADD:
+      return VGPU10_OPCODE_IADD;
+   case TGSI_OPCODE_U2F:
+      return VGPU10_OPCODE_UTOF;
+   case TGSI_OPCODE_UCMP:
+      return VGPU10_OPCODE_MOVC;
+   case TGSI_OPCODE_UMAD:
+      return VGPU10_OPCODE_UMAD;
+   case TGSI_OPCODE_UMAX:
+      return VGPU10_OPCODE_UMAX;
+   case TGSI_OPCODE_UMIN:
+      return VGPU10_OPCODE_UMIN;
+   case TGSI_OPCODE_UMUL:
+   case TGSI_OPCODE_UMUL_HI:
+      return VGPU10_OPCODE_UMUL;
+   case TGSI_OPCODE_USEQ:
+      return VGPU10_OPCODE_IEQ;
+   case TGSI_OPCODE_USGE:
+      return VGPU10_OPCODE_UGE;
+   case TGSI_OPCODE_USHR:
+      return VGPU10_OPCODE_USHR;
+   case TGSI_OPCODE_USLT:
+      return VGPU10_OPCODE_ULT;
+   case TGSI_OPCODE_USNE:
+      return VGPU10_OPCODE_INE;
+   case TGSI_OPCODE_SWITCH:
+      return VGPU10_OPCODE_SWITCH;
+   case TGSI_OPCODE_CASE:
+      return VGPU10_OPCODE_CASE;
+   case TGSI_OPCODE_DEFAULT:
+      return VGPU10_OPCODE_DEFAULT;
+   case TGSI_OPCODE_ENDSWITCH:
+      return VGPU10_OPCODE_ENDSWITCH;
+   case TGSI_OPCODE_FSLT:
+      return VGPU10_OPCODE_LT;
+   case TGSI_OPCODE_ROUND:
+      return VGPU10_OPCODE_ROUND_NE;
+   default:
+      assert(!"Unexpected TGSI opcode in translate_opcode()");
+      return VGPU10_OPCODE_NOP;
+   }
+}
+
+
+/**
+ * Translate a TGSI register file type into a VGPU10 operand type.
+ * \param array  is the TGSI_FILE_TEMPORARY register an array?
+ */
+static VGPU10_OPERAND_TYPE
+translate_register_file(enum tgsi_file_type file, boolean array)
+{
+   switch (file) {
+   case TGSI_FILE_CONSTANT:
+      return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
+   case TGSI_FILE_INPUT:
+      return VGPU10_OPERAND_TYPE_INPUT;
+   case TGSI_FILE_OUTPUT:
+      return VGPU10_OPERAND_TYPE_OUTPUT;
+   case TGSI_FILE_TEMPORARY:
+      return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP
+                   : VGPU10_OPERAND_TYPE_TEMP;
+   case TGSI_FILE_IMMEDIATE:
+      /* all immediates are 32-bit values at this time so
+       * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time.
+       */
+      return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER;
+   case TGSI_FILE_SAMPLER:
+      return VGPU10_OPERAND_TYPE_SAMPLER;
+   case TGSI_FILE_SYSTEM_VALUE:
+      return VGPU10_OPERAND_TYPE_INPUT;
+
+   /* XXX TODO more cases to finish */
+
+   default:
+      assert(!"Bad tgsi register file!");
+      return VGPU10_OPERAND_TYPE_NULL;
+   }
+}
+
+
+/**
+ * Emit a null dst register
+ */
+static void
+emit_null_dst_register(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10OperandToken0 operand;
+
+   operand.value = 0;
+   operand.operandType = VGPU10_OPERAND_TYPE_NULL;
+   operand.numComponents = VGPU10_OPERAND_0_COMPONENT;
+
+   emit_dword(emit, operand.value);
+}
+
+
+/**
+ * If the given register is a temporary, return the array ID.
+ * Else return zero.
+ */
+static unsigned
+get_temp_array_id(const struct svga_shader_emitter_v10 *emit,
+                  unsigned file, unsigned index)
+{
+   if (file == TGSI_FILE_TEMPORARY) {
+      return emit->temp_map[index].arrayId;
+   }
+   else {
+      return 0;
+   }
+}
+
+
+/**
+ * If the given register is a temporary, convert the index from a TGSI
+ * TEMPORARY index to a VGPU10 temp index.
+ */
+static unsigned
+remap_temp_index(const struct svga_shader_emitter_v10 *emit,
+                 unsigned file, unsigned index)
+{
+   if (file == TGSI_FILE_TEMPORARY) {
+      return emit->temp_map[index].index;
+   }
+   else {
+      return index;
+   }
+}
+
+
+/**
+ * Setup the operand0 fields related to indexing (1D, 2D, relative, etc).
+ * Note: the operandType field must already be initialized.
+ */
+static VGPU10OperandToken0
+setup_operand0_indexing(struct svga_shader_emitter_v10 *emit,
+                        VGPU10OperandToken0 operand0,
+                        unsigned file,
+                        boolean indirect, boolean index2D,
+                        unsigned tempArrayID)
+{
+   unsigned indexDim, index0Rep, index1Rep = VGPU10_OPERAND_INDEX_0D;
+
+   /*
+    * Compute index dimensions
+    */
+   if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 ||
+       operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
+      /* there's no swizzle for in-line immediates */
+      indexDim = VGPU10_OPERAND_INDEX_0D;
+      assert(operand0.selectionMode == 0);
+   }
+   else {
+      if (index2D ||
+          tempArrayID > 0 ||
+          operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) {
+         indexDim = VGPU10_OPERAND_INDEX_2D;
+      }
+      else {
+         indexDim = VGPU10_OPERAND_INDEX_1D;
+      }
+   }
+
+   /*
+    * Compute index representations (immediate, relative, etc).
+    */
+   if (tempArrayID > 0) {
+      assert(file == TGSI_FILE_TEMPORARY);
+      /* First index is the array ID, second index is the array element */
+      index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+      if (indirect) {
+         index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
+      }
+      else {
+         index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+      }
+   }
+   else if (indirect) {
+      if (file == TGSI_FILE_CONSTANT) {
+         /* index[0] indicates which constant buffer while index[1] indicates
+          * the position in the constant buffer.
+          */
+         index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+         index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
+      }
+      else {
+         /* All other register files are 1-dimensional */
+         index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
+      }
+   }
+   else {
+      index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+      index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+   }
+
+   operand0.indexDimension = indexDim;
+   operand0.index0Representation = index0Rep;
+   operand0.index1Representation = index1Rep;
+
+   return operand0;
+}
+
+
+/**
+ * Emit the operand for expressing an address register for indirect indexing.
+ * Note that the address register is really just a temp register.
+ * \param addr_reg_index  which address register to use
+ */
+static void
+emit_indirect_register(struct svga_shader_emitter_v10 *emit,
+                       unsigned addr_reg_index)
+{
+   unsigned tmp_reg_index;
+   VGPU10OperandToken0 operand0;
+
+   assert(addr_reg_index < MAX_VGPU10_ADDR_REGS);
+
+   tmp_reg_index = emit->address_reg_index[addr_reg_index];
+
+   /* operand0 is a simple temporary register, selecting one component */
+   operand0.value = 0;
+   operand0.operandType = VGPU10_OPERAND_TYPE_TEMP;
+   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+   operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
+   operand0.swizzleX = 0;
+   operand0.swizzleY = 1;
+   operand0.swizzleZ = 2;
+   operand0.swizzleW = 3;
+
+   emit_dword(emit, operand0.value);
+   emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index));
+}
+
+
+/**
+ * Translate the dst register of a TGSI instruction and emit VGPU10 tokens.
+ * \param emit  the emitter context
+ * \param reg  the TGSI dst register to translate
+ */
+static void
+emit_dst_register(struct svga_shader_emitter_v10 *emit,
+                  const struct tgsi_full_dst_register *reg)
+{
+   unsigned file = reg->Register.File;
+   unsigned index = reg->Register.Index;
+   const unsigned sem_name = emit->info.output_semantic_name[index];
+   const unsigned sem_index = emit->info.output_semantic_index[index];
+   unsigned writemask = reg->Register.WriteMask;
+   const unsigned indirect = reg->Register.Indirect;
+   const unsigned tempArrayId = get_temp_array_id(emit, file, index);
+   const unsigned index2d = reg->Register.Dimension;
+   VGPU10OperandToken0 operand0;
+
+   if (file == TGSI_FILE_OUTPUT) {
+      if (emit->unit == PIPE_SHADER_VERTEX ||
+          emit->unit == PIPE_SHADER_GEOMETRY) {
+         if (index == emit->vposition.out_index &&
+             emit->vposition.tmp_index != INVALID_INDEX) {
+            /* replace OUTPUT[POS] with TEMP[POS].  We need to store the
+             * vertex position result in a temporary so that we can modify
+             * it in the post_helper() code.
+             */
+            file = TGSI_FILE_TEMPORARY;
+            index = emit->vposition.tmp_index;
+         }
+         else if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
+                  emit->clip_dist_tmp_index != INVALID_INDEX) {
+            /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
+             * We store the clip distance in a temporary first, then
+             * we'll copy it to the shadow copy and to CLIPDIST with the
+             * enabled planes mask in emit_clip_distance_instructions().
+             */
+            file = TGSI_FILE_TEMPORARY;
+            index = emit->clip_dist_tmp_index + sem_index;
+         }
+         else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
+                  emit->clip_vertex_tmp_index != INVALID_INDEX) {
+            /* replace the CLIPVERTEX output register with a temporary */
+            assert(emit->clip_mode == CLIP_VERTEX);
+            assert(sem_index == 0);
+            file = TGSI_FILE_TEMPORARY;
+            index = emit->clip_vertex_tmp_index;
+         }
+      }
+      else if (emit->unit == PIPE_SHADER_FRAGMENT) {
+         if (sem_name == TGSI_SEMANTIC_POSITION) {
+            /* Fragment depth output register */
+            operand0.value = 0;
+            operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
+            operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
+            operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
+            emit_dword(emit, operand0.value);
+            return;
+         }
+         else if (index == emit->fs.color_out_index[0] &&
+             emit->fs.color_tmp_index != INVALID_INDEX) {
+            /* replace OUTPUT[COLOR] with TEMP[COLOR].  We need to store the
+             * fragment color result in a temporary so that we can read it
+             * it in the post_helper() code.
+             */
+            file = TGSI_FILE_TEMPORARY;
+            index = emit->fs.color_tmp_index;
+         }
+         else {
+            /* Typically, for fragment shaders, the output register index
+             * matches the color semantic index.  But not when we write to
+             * the fragment depth register.  In that case, OUT[0] will be
+             * fragdepth and OUT[1] will be the 0th color output.  We need
+             * to use the semantic index for color outputs.
+             */
+            assert(sem_name == TGSI_SEMANTIC_COLOR);
+            index = emit->info.output_semantic_index[index];
+         }
+      }
+   }
+
+   /* init operand tokens to all zero */
+   operand0.value = 0;
+
+   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+
+   /* the operand has a writemask */
+   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
+
+   /* Which of the four dest components to write to. Note that we can use a
+    * simple assignment here since TGSI writemasks match VGPU10 writemasks.
+    */
+   STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X);
+   operand0.mask = writemask;
+
+   /* translate TGSI register file type to VGPU10 operand type */
+   operand0.operandType = translate_register_file(file, tempArrayId > 0);
+
+   check_register_index(emit, operand0.operandType, index);
+
+   operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
+                                      index2d, tempArrayId);
+
+   /* Emit tokens */
+   emit_dword(emit, operand0.value);
+   if (tempArrayId > 0) {
+      emit_dword(emit, tempArrayId);
+   }
+
+   emit_dword(emit, remap_temp_index(emit, file, index));
+
+   if (indirect) {
+      emit_indirect_register(emit, reg->Indirect.Index);
+   }
+}
+
+
+/**
+ * Translate a src register of a TGSI instruction and emit VGPU10 tokens.
+ */
+static void
+emit_src_register(struct svga_shader_emitter_v10 *emit,
+                  const struct tgsi_full_src_register *reg)
+{
+   unsigned file = reg->Register.File;
+   unsigned index = reg->Register.Index;
+   const unsigned indirect = reg->Register.Indirect;
+   const unsigned tempArrayId = get_temp_array_id(emit, file, index);
+   const unsigned index2d = reg->Register.Dimension;
+   const unsigned swizzleX = reg->Register.SwizzleX;
+   const unsigned swizzleY = reg->Register.SwizzleY;
+   const unsigned swizzleZ = reg->Register.SwizzleZ;
+   const unsigned swizzleW = reg->Register.SwizzleW;
+   const unsigned absolute = reg->Register.Absolute;
+   const unsigned negate = reg->Register.Negate;
+   bool is_prim_id = FALSE;
+
+   VGPU10OperandToken0 operand0;
+   VGPU10OperandToken1 operand1;
+
+   if (emit->unit == PIPE_SHADER_FRAGMENT &&
+      file == TGSI_FILE_INPUT) {
+      if (index == emit->fs.face_input_index) {
+         /* Replace INPUT[FACE] with TEMP[FACE] */
+         file = TGSI_FILE_TEMPORARY;
+         index = emit->fs.face_tmp_index;
+      }
+      else if (index == emit->fs.fragcoord_input_index) {
+         /* Replace INPUT[POSITION] with TEMP[POSITION] */
+         file = TGSI_FILE_TEMPORARY;
+         index = emit->fs.fragcoord_tmp_index;
+      }
+      else {
+         /* We remap fragment shader inputs to that FS input indexes
+          * match up with VS/GS output indexes.
+          */
+         index = emit->linkage.input_map[index];
+      }
+   }
+   else if (emit->unit == PIPE_SHADER_GEOMETRY &&
+            file == TGSI_FILE_INPUT) {
+      is_prim_id = (index == emit->gs.prim_id_index);
+      index = emit->linkage.input_map[index];
+   }
+   else if (emit->unit == PIPE_SHADER_VERTEX) {
+      if (file == TGSI_FILE_INPUT) {
+         /* if input is adjusted... */
+         if ((emit->key.vs.adjust_attrib_w_1 |
+              emit->key.vs.adjust_attrib_itof |
+              emit->key.vs.adjust_attrib_utof |
+              emit->key.vs.attrib_is_bgra |
+              emit->key.vs.attrib_puint_to_snorm |
+              emit->key.vs.attrib_puint_to_uscaled |
+              emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) {
+            file = TGSI_FILE_TEMPORARY;
+            index = emit->vs.adjusted_input[index];
+         }
+      }
+      else if (file == TGSI_FILE_SYSTEM_VALUE) {
+         assert(index < Elements(emit->system_value_indexes));
+         index = emit->system_value_indexes[index];
+      }
+   }
+
+   operand0.value = operand1.value = 0;
+
+   if (is_prim_id) {
+      operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
+      operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
+   }
+   else {
+      operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+      operand0.operandType = translate_register_file(file, tempArrayId > 0);
+   }
+
+   operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
+                                      index2d, tempArrayId);
+
+   if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 &&
+       operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
+      /* there's no swizzle for in-line immediates */
+      if (swizzleX == swizzleY &&
+          swizzleX == swizzleZ &&
+          swizzleX == swizzleW) {
+         operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
+      }
+      else {
+         operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
+      }
+
+      operand0.swizzleX = swizzleX;
+      operand0.swizzleY = swizzleY;
+      operand0.swizzleZ = swizzleZ;
+      operand0.swizzleW = swizzleW;
+
+      if (absolute || negate) {
+         operand0.extended = 1;
+         operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER;
+         if (absolute && !negate)
+            operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS;
+         if (!absolute && negate)
+            operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG;
+         if (absolute && negate)
+            operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG;
+      }
+   }
+
+   /* Emit the operand tokens */
+   emit_dword(emit, operand0.value);
+   if (operand0.extended)
+      emit_dword(emit, operand1.value);
+
+   if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) {
+      /* Emit the four float/int in-line immediate values */
+      unsigned *c;
+      assert(index < Elements(emit->immediates));
+      assert(file == TGSI_FILE_IMMEDIATE);
+      assert(swizzleX < 4);
+      assert(swizzleY < 4);
+      assert(swizzleZ < 4);
+      assert(swizzleW < 4);
+      c = (unsigned *) emit->immediates[index];
+      emit_dword(emit, c[swizzleX]);
+      emit_dword(emit, c[swizzleY]);
+      emit_dword(emit, c[swizzleZ]);
+      emit_dword(emit, c[swizzleW]);
+   }
+   else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) {
+      /* Emit the register index(es) */
+      if (index2d ||
+          operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) {
+         emit_dword(emit, reg->Dimension.Index);
+      }
+
+      if (tempArrayId > 0) {
+         emit_dword(emit, tempArrayId);
+      }
+
+      emit_dword(emit, remap_temp_index(emit, file, index));
+
+      if (indirect) {
+         emit_indirect_register(emit, reg->Indirect.Index);
+      }
+   }
+}
+
+
+/**
+ * Emit a resource operand (for use with a SAMPLE instruction).
+ */
+static void
+emit_resource_register(struct svga_shader_emitter_v10 *emit,
+                       unsigned resource_number)
+{
+   VGPU10OperandToken0 operand0;
+
+   check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number);
+
+   /* init */
+   operand0.value = 0;
+
+   operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
+   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
+   operand0.swizzleX = VGPU10_COMPONENT_X;
+   operand0.swizzleY = VGPU10_COMPONENT_Y;
+   operand0.swizzleZ = VGPU10_COMPONENT_Z;
+   operand0.swizzleW = VGPU10_COMPONENT_W;
+
+   emit_dword(emit, operand0.value);
+   emit_dword(emit, resource_number);
+}
+
+
+/**
+ * Emit a sampler operand (for use with a SAMPLE instruction).
+ */
+static void
+emit_sampler_register(struct svga_shader_emitter_v10 *emit,
+                      unsigned sampler_number)
+{
+   VGPU10OperandToken0 operand0;
+
+   check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number);
+
+   /* init */
+   operand0.value = 0;
+
+   operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
+   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+
+   emit_dword(emit, operand0.value);
+   emit_dword(emit, sampler_number);
+}
+
+
+/**
+ * Emit an operand which reads the IS_FRONT_FACING register.
+ */
+static void
+emit_face_register(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10OperandToken0 operand0;
+   unsigned index = emit->linkage.input_map[emit->fs.face_input_index];
+
+   /* init */
+   operand0.value = 0;
+
+   operand0.operandType = VGPU10_OPERAND_TYPE_INPUT;
+   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
+   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+
+   operand0.swizzleX = VGPU10_COMPONENT_X;
+   operand0.swizzleY = VGPU10_COMPONENT_X;
+   operand0.swizzleZ = VGPU10_COMPONENT_X;
+   operand0.swizzleW = VGPU10_COMPONENT_X;
+
+   emit_dword(emit, operand0.value);
+   emit_dword(emit, index);
+}
+
+
+/**
+ * Emit the token for a VGPU10 opcode.
+ * \param saturate   clamp result to [0,1]?
+ */
+static void
+emit_opcode(struct svga_shader_emitter_v10 *emit,
+            unsigned vgpu10_opcode, boolean saturate)
+{
+   VGPU10OpcodeToken0 token0;
+
+   token0.value = 0;  /* init all fields to zero */
+   token0.opcodeType = vgpu10_opcode;
+   token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
+   token0.saturate = saturate;
+
+   emit_dword(emit, token0.value);
+}
+
+
+/**
+ * Emit the token for a VGPU10 resinfo instruction.
+ * \param modifier   return type modifier, _uint or _rcpFloat.
+ *                   TODO: We may want to remove this parameter if it will
+ *                   only ever be used as _uint.
+ */
+static void
+emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit,
+                    VGPU10_RESINFO_RETURN_TYPE modifier)
+{
+   VGPU10OpcodeToken0 token0;
+
+   token0.value = 0;  /* init all fields to zero */
+   token0.opcodeType = VGPU10_OPCODE_RESINFO;
+   token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
+   token0.resinfoReturnType = modifier;
+
+   emit_dword(emit, token0.value);
+}
+
+
+/**
+ * Emit opcode tokens for a texture sample instruction.  Texture instructions
+ * can be rather complicated (texel offsets, etc) so we have this specialized
+ * function.
+ */
+static void
+emit_sample_opcode(struct svga_shader_emitter_v10 *emit,
+                   unsigned vgpu10_opcode, boolean saturate,
+                   const int offsets[3])
+{
+   VGPU10OpcodeToken0 token0;
+   VGPU10OpcodeToken1 token1;
+
+   token0.value = 0;  /* init all fields to zero */
+   token0.opcodeType = vgpu10_opcode;
+   token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
+   token0.saturate = saturate;
+
+   if (offsets[0] || offsets[1] || offsets[2]) {
+      assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
+      assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
+      assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
+      assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
+      assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
+      assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
+
+      token0.extended = 1;
+      token1.value = 0;
+      token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS;
+      token1.offsetU = offsets[0];
+      token1.offsetV = offsets[1];
+      token1.offsetW = offsets[2];
+   }
+
+   emit_dword(emit, token0.value);
+   if (token0.extended) {
+      emit_dword(emit, token1.value);
+   }
+}
+
+
+/**
+ * Emit a DISCARD opcode token.
+ * If nonzero is set, we'll discard the fragment if the X component is not 0.
+ * Otherwise, we'll discard the fragment if the X component is 0.
+ */
+static void
+emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero)
+{
+   VGPU10OpcodeToken0 opcode0;
+
+   opcode0.value = 0;
+   opcode0.opcodeType = VGPU10_OPCODE_DISCARD;
+   if (nonzero)
+      opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
+
+   emit_dword(emit, opcode0.value);
+}
+
+
+/**
+ * We need to call this before we begin emitting a VGPU10 instruction.
+ */
+static void
+begin_emit_instruction(struct svga_shader_emitter_v10 *emit)
+{
+   assert(emit->inst_start_token == 0);
+   /* Save location of the instruction's VGPU10OpcodeToken0 token.
+    * Note, we can't save a pointer because it would become invalid if
+    * we have to realloc the output buffer.
+    */
+   emit->inst_start_token = emit_get_num_tokens(emit);
+}
+
+
+/**
+ * We need to call this after we emit the last token of a VGPU10 instruction.
+ * This function patches in the opcode token's instructionLength field.
+ */
+static void
+end_emit_instruction(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
+   unsigned inst_length;
+
+   assert(emit->inst_start_token > 0);
+
+   if (emit->discard_instruction) {
+      /* Back up the emit->ptr to where this instruction started so
+       * that we discard the current instruction.
+       */
+      emit->ptr = (char *) (tokens + emit->inst_start_token);
+   }
+   else {
+      /* Compute instruction length and patch that into the start of
+       * the instruction.
+       */
+      inst_length = emit_get_num_tokens(emit) - emit->inst_start_token;
+
+      assert(inst_length > 0);
+
+      tokens[emit->inst_start_token].instructionLength = inst_length;
+   }
+
+   emit->inst_start_token = 0; /* reset to zero for error checking */
+   emit->discard_instruction = FALSE;
+}
+
+
+/**
+ * Return index for a free temporary register.
+ */
+static unsigned
+get_temp_index(struct svga_shader_emitter_v10 *emit)
+{
+   assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS);
+   return emit->num_shader_temps + emit->internal_temp_count++;
+}
+
+
+/**
+ * Release the temporaries which were generated by get_temp_index().
+ */
+static void
+free_temp_indexes(struct svga_shader_emitter_v10 *emit)
+{
+   emit->internal_temp_count = 0;
+}
+
+
+/**
+ * Create a tgsi_full_src_register.
+ */
+static struct tgsi_full_src_register
+make_src_reg(unsigned file, unsigned index)
+{
+   struct tgsi_full_src_register reg;
+
+   memset(&reg, 0, sizeof(reg));
+   reg.Register.File = file;
+   reg.Register.Index = index;
+   reg.Register.SwizzleX = TGSI_SWIZZLE_X;
+   reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
+   reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
+   reg.Register.SwizzleW = TGSI_SWIZZLE_W;
+   return reg;
+}
+
+
+/**
+ * Create a tgsi_full_src_register for a temporary.
+ */
+static struct tgsi_full_src_register
+make_src_temp_reg(unsigned index)
+{
+   return make_src_reg(TGSI_FILE_TEMPORARY, index);
+}
+
+
+/**
+ * Create a tgsi_full_src_register for a constant.
+ */
+static struct tgsi_full_src_register
+make_src_const_reg(unsigned index)
+{
+   return make_src_reg(TGSI_FILE_CONSTANT, index);
+}
+
+
+/**
+ * Create a tgsi_full_src_register for an immediate constant.
+ */
+static struct tgsi_full_src_register
+make_src_immediate_reg(unsigned index)
+{
+   return make_src_reg(TGSI_FILE_IMMEDIATE, index);
+}
+
+
+/**
+ * Create a tgsi_full_dst_register.
+ */
+static struct tgsi_full_dst_register
+make_dst_reg(unsigned file, unsigned index)
+{
+   struct tgsi_full_dst_register reg;
+
+   memset(&reg, 0, sizeof(reg));
+   reg.Register.File = file;
+   reg.Register.Index = index;
+   reg.Register.WriteMask = TGSI_WRITEMASK_XYZW;
+   return reg;
+}
+
+
+/**
+ * Create a tgsi_full_dst_register for a temporary.
+ */
+static struct tgsi_full_dst_register
+make_dst_temp_reg(unsigned index)
+{
+   return make_dst_reg(TGSI_FILE_TEMPORARY, index);
+}
+
+
+/**
+ * Create a tgsi_full_dst_register for an output.
+ */
+static struct tgsi_full_dst_register
+make_dst_output_reg(unsigned index)
+{
+   return make_dst_reg(TGSI_FILE_OUTPUT, index);
+}
+
+
+/**
+ * Create negated tgsi_full_src_register.
+ */
+static struct tgsi_full_src_register
+negate_src(const struct tgsi_full_src_register *reg)
+{
+   struct tgsi_full_src_register neg = *reg;
+   neg.Register.Negate = !reg->Register.Negate;
+   return neg;
+}
+
+/**
+ * Create absolute value of a tgsi_full_src_register.
+ */
+static struct tgsi_full_src_register
+absolute_src(const struct tgsi_full_src_register *reg)
+{
+   struct tgsi_full_src_register absolute = *reg;
+   absolute.Register.Absolute = 1;
+   return absolute;
+}
+
+
+/** Return the named swizzle term from the src register */
+static inline unsigned
+get_swizzle(const struct tgsi_full_src_register *reg, unsigned term)
+{
+   switch (term) {
+   case TGSI_SWIZZLE_X:
+      return reg->Register.SwizzleX;
+   case TGSI_SWIZZLE_Y:
+      return reg->Register.SwizzleY;
+   case TGSI_SWIZZLE_Z:
+      return reg->Register.SwizzleZ;
+   case TGSI_SWIZZLE_W:
+      return reg->Register.SwizzleW;
+   default:
+      assert(!"Bad swizzle");
+      return TGSI_SWIZZLE_X;
+   }
+}
+
+
+/**
+ * Create swizzled tgsi_full_src_register.
+ */
+static struct tgsi_full_src_register
+swizzle_src(const struct tgsi_full_src_register *reg,
+            unsigned swizzleX, unsigned swizzleY,
+            unsigned swizzleZ, unsigned swizzleW)
+{
+   struct tgsi_full_src_register swizzled = *reg;
+   /* Note: we swizzle the current swizzle */
+   swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX);
+   swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY);
+   swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ);
+   swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW);
+   return swizzled;
+}
+
+
+/**
+ * Create swizzled tgsi_full_src_register where all the swizzle
+ * terms are the same.
+ */
+static struct tgsi_full_src_register
+scalar_src(const struct tgsi_full_src_register *reg, unsigned swizzle)
+{
+   struct tgsi_full_src_register swizzled = *reg;
+   /* Note: we swizzle the current swizzle */
+   swizzled.Register.SwizzleX =
+   swizzled.Register.SwizzleY =
+   swizzled.Register.SwizzleZ =
+   swizzled.Register.SwizzleW = get_swizzle(reg, swizzle);
+   return swizzled;
+}
+
+
+/**
+ * Create new tgsi_full_dst_register with writemask.
+ * \param mask  bitmask of TGSI_WRITEMASK_[XYZW]
+ */
+static struct tgsi_full_dst_register
+writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask)
+{
+   struct tgsi_full_dst_register masked = *reg;
+   masked.Register.WriteMask = mask;
+   return masked;
+}
+
+
+/**
+ * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW.
+ */
+static boolean
+same_swizzle_terms(const struct tgsi_full_src_register *reg)
+{
+   return (reg->Register.SwizzleX == reg->Register.SwizzleY &&
+           reg->Register.SwizzleY == reg->Register.SwizzleZ &&
+           reg->Register.SwizzleZ == reg->Register.SwizzleW);
+}
+
+
+/**
+ * Search the vector for the value 'x' and return its position.
+ */
+static int
+find_imm_in_vec4(const union tgsi_immediate_data vec[4],
+                 union tgsi_immediate_data x)
+{
+   unsigned i;
+   for (i = 0; i < 4; i++) {
+      if (vec[i].Int == x.Int)
+         return i;
+   }
+   return -1;
+}
+
+
+/**
+ * Helper used by make_immediate_reg(), make_immediate_reg_4().
+ */
+static int
+find_immediate(struct svga_shader_emitter_v10 *emit,
+               union tgsi_immediate_data x, unsigned startIndex)
+{
+   const unsigned endIndex = emit->num_immediates;
+   unsigned i;
+
+   assert(emit->immediates_emitted);
+
+   /* Search immediates for x, y, z, w */
+   for (i = startIndex; i < endIndex; i++) {
+      if (x.Int == emit->immediates[i][0].Int ||
+          x.Int == emit->immediates[i][1].Int ||
+          x.Int == emit->immediates[i][2].Int ||
+          x.Int == emit->immediates[i][3].Int) {
+         return i;
+      }
+   }
+   /* Should never try to use an immediate value that wasn't pre-declared */
+   assert(!"find_immediate() failed!");
+   return -1;
+}
+
+
+/**
+ * Return a tgsi_full_src_register for an immediate/literal
+ * union tgsi_immediate_data[4] value.
+ * Note: the values must have been previously declared/allocated in
+ * emit_pre_helpers().  And, all of x,y,z,w must be located in the same
+ * vec4 immediate.
+ */
+static struct tgsi_full_src_register
+make_immediate_reg_4(struct svga_shader_emitter_v10 *emit,
+                     const union tgsi_immediate_data imm[4])
+{
+   struct tgsi_full_src_register reg;
+   unsigned i;
+
+   for (i = 0; i < emit->num_common_immediates; i++) {
+      /* search for first component value */
+      int immpos = find_immediate(emit, imm[0], i);
+      int x, y, z, w;
+
+      assert(immpos >= 0);
+
+      /* find remaining components within the immediate vector */
+      x = find_imm_in_vec4(emit->immediates[immpos], imm[0]);
+      y = find_imm_in_vec4(emit->immediates[immpos], imm[1]);
+      z = find_imm_in_vec4(emit->immediates[immpos], imm[2]);
+      w = find_imm_in_vec4(emit->immediates[immpos], imm[3]);
+
+      if (x >=0 &&  y >= 0 && z >= 0 && w >= 0) {
+         /* found them all */
+         memset(&reg, 0, sizeof(reg));
+         reg.Register.File = TGSI_FILE_IMMEDIATE;
+         reg.Register.Index = immpos;
+         reg.Register.SwizzleX = x;
+         reg.Register.SwizzleY = y;
+         reg.Register.SwizzleZ = z;
+         reg.Register.SwizzleW = w;
+         return reg;
+      }
+      /* else, keep searching */
+   }
+
+   assert(!"Failed to find immediate register!");
+
+   /* Just return IMM[0].xxxx */
+   memset(&reg, 0, sizeof(reg));
+   reg.Register.File = TGSI_FILE_IMMEDIATE;
+   return reg;
+}
+
+
+/**
+ * Return a tgsi_full_src_register for an immediate/literal
+ * union tgsi_immediate_data value of the form {value, value, value, value}.
+ * \sa make_immediate_reg_4() regarding allowed values.
+ */
+static struct tgsi_full_src_register
+make_immediate_reg(struct svga_shader_emitter_v10 *emit,
+                   union tgsi_immediate_data value)
+{
+   struct tgsi_full_src_register reg;
+   int immpos = find_immediate(emit, value, 0);
+
+   assert(immpos >= 0);
+
+   memset(&reg, 0, sizeof(reg));
+   reg.Register.File = TGSI_FILE_IMMEDIATE;
+   reg.Register.Index = immpos;
+   reg.Register.SwizzleX =
+   reg.Register.SwizzleY =
+   reg.Register.SwizzleZ =
+   reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value);
+
+   return reg;
+}
+
+
+/**
+ * Return a tgsi_full_src_register for an immediate/literal float[4] value.
+ * \sa make_immediate_reg_4() regarding allowed values.
+ */
+static struct tgsi_full_src_register
+make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit,
+                          float x, float y, float z, float w)
+{
+   union tgsi_immediate_data imm[4];
+   imm[0].Float = x;
+   imm[1].Float = y;
+   imm[2].Float = z;
+   imm[3].Float = w;
+   return make_immediate_reg_4(emit, imm);
+}
+
+
+/**
+ * Return a tgsi_full_src_register for an immediate/literal float value
+ * of the form {value, value, value, value}.
+ * \sa make_immediate_reg_4() regarding allowed values.
+ */
+static struct tgsi_full_src_register
+make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value)
+{
+   union tgsi_immediate_data imm;
+   imm.Float = value;
+   return make_immediate_reg(emit, imm);
+}
+
+
+/**
+ * Return a tgsi_full_src_register for an immediate/literal int[4] vector.
+ */
+static struct tgsi_full_src_register
+make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit,
+                        int x, int y, int z, int w)
+{
+   union tgsi_immediate_data imm[4];
+   imm[0].Int = x;
+   imm[1].Int = y;
+   imm[2].Int = z;
+   imm[3].Int = w;
+   return make_immediate_reg_4(emit, imm);
+}
+
+
+/**
+ * Return a tgsi_full_src_register for an immediate/literal int value
+ * of the form {value, value, value, value}.
+ * \sa make_immediate_reg_4() regarding allowed values.
+ */
+static struct tgsi_full_src_register
+make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value)
+{
+   union tgsi_immediate_data imm;
+   imm.Int = value;
+   return make_immediate_reg(emit, imm);
+}
+
+
+/**
+ * Allocate space for a union tgsi_immediate_data[4] immediate.
+ * \return  the index/position of the immediate.
+ */
+static unsigned
+alloc_immediate_4(struct svga_shader_emitter_v10 *emit,
+                  const union tgsi_immediate_data imm[4])
+{
+   unsigned n = emit->num_immediates++;
+   assert(!emit->immediates_emitted);
+   assert(n < Elements(emit->immediates));
+   emit->immediates[n][0] = imm[0];
+   emit->immediates[n][1] = imm[1];
+   emit->immediates[n][2] = imm[2];
+   emit->immediates[n][3] = imm[3];
+   return n;
+}
+
+
+/**
+ * Allocate space for a float[4] immediate.
+ * \return  the index/position of the immediate.
+ */
+static unsigned
+alloc_immediate_float4(struct svga_shader_emitter_v10 *emit,
+                       float x, float y, float z, float w)
+{
+   union tgsi_immediate_data imm[4];
+   imm[0].Float = x;
+   imm[1].Float = y;
+   imm[2].Float = z;
+   imm[3].Float = w;
+   return alloc_immediate_4(emit, imm);
+}
+
+
+/**
+ * Allocate space for a int[4] immediate.
+ * \return  the index/position of the immediate.
+ */
+static unsigned
+alloc_immediate_int4(struct svga_shader_emitter_v10 *emit,
+                       int x, int y, int z, int w)
+{
+   union tgsi_immediate_data imm[4];
+   imm[0].Int = x;
+   imm[1].Int = y;
+   imm[2].Int = z;
+   imm[3].Int = w;
+   return alloc_immediate_4(emit, imm);
+}
+
+
+/**
+ * Allocate a shader input to store a system value.
+ */
+static unsigned
+alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index)
+{
+   const unsigned n = emit->info.num_inputs + index;
+   assert(index < Elements(emit->system_value_indexes));
+   emit->system_value_indexes[index] = n;
+   return n;
+}
+
+
+/**
+ * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10.
+ */
+static boolean
+emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit,
+                      const struct tgsi_full_immediate *imm)
+{
+   /* We don't actually emit any code here.  We just save the
+    * immediate values and emit them later.
+    */
+   alloc_immediate_4(emit, imm->u);
+   return TRUE;
+}
+
+
+/**
+ * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block
+ * containing all the immediate values previously allocated
+ * with alloc_immediate_4().
+ */
+static boolean
+emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10OpcodeToken0 token;
+
+   assert(!emit->immediates_emitted);
+
+   token.value = 0;
+   token.opcodeType = VGPU10_OPCODE_CUSTOMDATA;
+   token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER;
+
+   /* Note: no begin/end_emit_instruction() calls */
+   emit_dword(emit, token.value);
+   emit_dword(emit, 2 + 4 * emit->num_immediates);
+   emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates);
+
+   emit->immediates_emitted = TRUE;
+
+   return TRUE;
+}
+
+
+/**
+ * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10
+ * interpolation mode.
+ * \return a VGPU10_INTERPOLATION_x value
+ */
+static unsigned
+translate_interpolation(const struct svga_shader_emitter_v10 *emit,
+                        unsigned interp, unsigned interpolate_loc)
+{
+   if (interp == TGSI_INTERPOLATE_COLOR) {
+      interp = emit->key.fs.flatshade ?
+         TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
+   }
+
+   switch (interp) {
+   case TGSI_INTERPOLATE_CONSTANT:
+      return VGPU10_INTERPOLATION_CONSTANT;
+   case TGSI_INTERPOLATE_LINEAR:
+      return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ?
+             VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID :
+             VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE;
+   case TGSI_INTERPOLATE_PERSPECTIVE:
+      return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ?
+             VGPU10_INTERPOLATION_LINEAR_CENTROID :
+             VGPU10_INTERPOLATION_LINEAR;
+   default:
+      assert(!"Unexpected interpolation mode");
+      return VGPU10_INTERPOLATION_CONSTANT;
+   }
+}
+
+
+/**
+ * Translate a TGSI property to VGPU10.
+ * Don't emit any instructions yet, only need to gather the primitive property information.
+ * The output primitive topology might be changed later. The final property instructions
+ * will be emitted as part of the pre-helper code.
+ */
+static boolean
+emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
+                     const struct tgsi_full_property *prop)
+{
+   static const VGPU10_PRIMITIVE primType[] = {
+      VGPU10_PRIMITIVE_POINT,           /* PIPE_PRIM_POINTS */
+      VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINES */
+      VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINE_LOOP */
+      VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINE_STRIP */
+      VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLES */
+      VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLE_STRIP */
+      VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLE_FAN */
+      VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_QUADS */
+      VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_QUAD_STRIP */
+      VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_POLYGON */
+      VGPU10_PRIMITIVE_LINE_ADJ,        /* PIPE_PRIM_LINES_ADJACENCY */
+      VGPU10_PRIMITIVE_LINE_ADJ,        /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
+      VGPU10_PRIMITIVE_TRIANGLE_ADJ,    /* PIPE_PRIM_TRIANGLES_ADJACENCY */
+      VGPU10_PRIMITIVE_TRIANGLE_ADJ     /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
+   };
+
+   static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = {
+      VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST,     /* PIPE_PRIM_POINTS */
+      VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* PIPE_PRIM_LINES */
+      VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* PIPE_PRIM_LINE_LOOP */
+      VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP,     /* PIPE_PRIM_LINE_STRIP */
+      VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST,  /* PIPE_PRIM_TRIANGLES */
+      VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */
+      VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */
+      VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_QUADS */
+      VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_QUAD_STRIP */
+      VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_POLYGON */
+      VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* PIPE_PRIM_LINES_ADJACENCY */
+      VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
+      VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */
+      VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
+   };
+
+   static const unsigned inputArraySize[] = {
+      0,       /* VGPU10_PRIMITIVE_UNDEFINED */
+      1,       /* VGPU10_PRIMITIVE_POINT */
+      2,       /* VGPU10_PRIMITIVE_LINE */
+      3,       /* VGPU10_PRIMITIVE_TRIANGLE */
+      0,
+      0,
+      4,       /* VGPU10_PRIMITIVE_LINE_ADJ */
+      6        /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */
+   };
+
+   switch (prop->Property.PropertyName) {
+   case TGSI_PROPERTY_GS_INPUT_PRIM:
+      assert(prop->u[0].Data < Elements(primType));
+      emit->gs.prim_type = primType[prop->u[0].Data];
+      assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED);
+      emit->gs.input_size = inputArraySize[emit->gs.prim_type];
+      break;
+
+   case TGSI_PROPERTY_GS_OUTPUT_PRIM:
+      assert(prop->u[0].Data < Elements(primTopology));
+      emit->gs.prim_topology = primTopology[prop->u[0].Data];
+      assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED);
+      break;
+
+   case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
+      emit->gs.max_out_vertices = prop->u[0].Data;
+      break;
+
+   default:
+      break;
+   }
+
+   return TRUE;
+}
+
+
+static void
+emit_property_instruction(struct svga_shader_emitter_v10 *emit,
+                          VGPU10OpcodeToken0 opcode0, unsigned nData,
+                          unsigned data)
+{
+   begin_emit_instruction(emit);
+   emit_dword(emit, opcode0.value);
+   if (nData)
+      emit_dword(emit, data);
+   end_emit_instruction(emit);
+}
+
+
+/**
+ * Emit property instructions
+ */
+static void
+emit_property_instructions(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10OpcodeToken0 opcode0;
+
+   assert(emit->unit == PIPE_SHADER_GEOMETRY);
+
+   /* emit input primitive type declaration */
+   opcode0.value = 0;
+   opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE;
+   opcode0.primitive = emit->gs.prim_type;
+   emit_property_instruction(emit, opcode0, 0, 0);
+
+   /* emit output primitive topology declaration */
+   opcode0.value = 0;
+   opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY;
+   opcode0.primitiveTopology = emit->gs.prim_topology;
+   emit_property_instruction(emit, opcode0, 0, 0);
+
+   /* emit max output vertices */
+   opcode0.value = 0;
+   opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT;
+   emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices);
+}
+
+
+/**
+ * Emit a vgpu10 declaration "instruction".
+ * \param index  the register index
+ * \param size   array size of the operand. In most cases, it is 1,
+ *               but for inputs to geometry shader, the array size varies
+ *               depending on the primitive type.
+ */
+static void
+emit_decl_instruction(struct svga_shader_emitter_v10 *emit,
+                      VGPU10OpcodeToken0 opcode0,
+                      VGPU10OperandToken0 operand0,
+                      VGPU10NameToken name_token,
+                      unsigned index, unsigned size)
+{
+   assert(opcode0.opcodeType);
+   assert(operand0.mask);
+
+   begin_emit_instruction(emit);
+   emit_dword(emit, opcode0.value);
+
+   emit_dword(emit, operand0.value);
+
+   if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) {
+      /* Next token is the index of the register to declare */
+      emit_dword(emit, index);
+   }
+   else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) {
+      /* Next token is the size of the register */
+      emit_dword(emit, size);
+
+      /* Followed by the index of the register */
+      emit_dword(emit, index);
+   }
+
+   if (name_token.value) {
+      emit_dword(emit, name_token.value);
+   }
+
+   end_emit_instruction(emit);
+}
+
+
+/**
+ * Emit the declaration for a shader input.
+ * \param opcodeType  opcode type, one of VGPU10_OPCODE_DCL_INPUTx
+ * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x
+ * \param dim         index dimension
+ * \param index       the input register index
+ * \param size        array size of the operand. In most cases, it is 1,
+ *                    but for inputs to geometry shader, the array size varies
+ *                    depending on the primitive type.
+ * \param name        one of VGPU10_NAME_x
+ * \parma numComp     number of components
+ * \param selMode     component selection mode
+ * \param usageMask   bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
+ * \param interpMode  interpolation mode
+ */
+static void
+emit_input_declaration(struct svga_shader_emitter_v10 *emit,
+                       unsigned opcodeType, unsigned operandType,
+                       unsigned dim, unsigned index, unsigned size,
+                       unsigned name, unsigned numComp,
+                       unsigned selMode, unsigned usageMask,
+                       unsigned interpMode)
+{
+   VGPU10OpcodeToken0 opcode0;
+   VGPU10OperandToken0 operand0;
+   VGPU10NameToken name_token;
+
+   assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
+   assert(opcodeType == VGPU10_OPCODE_DCL_INPUT ||
+          opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV ||
+          opcodeType == VGPU10_OPCODE_DCL_INPUT_PS ||
+          opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV);
+   assert(operandType == VGPU10_OPERAND_TYPE_INPUT ||
+          operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID);
+   assert(numComp <= VGPU10_OPERAND_4_COMPONENT);
+   assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
+   assert(dim <= VGPU10_OPERAND_INDEX_3D);
+   assert(name == VGPU10_NAME_UNDEFINED ||
+          name == VGPU10_NAME_POSITION ||
+          name == VGPU10_NAME_INSTANCE_ID ||
+          name == VGPU10_NAME_VERTEX_ID ||
+          name == VGPU10_NAME_PRIMITIVE_ID ||
+          name == VGPU10_NAME_IS_FRONT_FACE);
+   assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED ||
+          interpMode == VGPU10_INTERPOLATION_CONSTANT ||
+          interpMode == VGPU10_INTERPOLATION_LINEAR ||
+          interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID ||
+          interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE ||
+          interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID);
+
+   check_register_index(emit, opcodeType, index);
+
+   opcode0.value = operand0.value = name_token.value = 0;
+
+   opcode0.opcodeType = opcodeType;
+   opcode0.interpolationMode = interpMode;
+
+   operand0.operandType = operandType;
+   operand0.numComponents = numComp;
+   operand0.selectionMode = selMode;
+   operand0.mask = usageMask;
+   operand0.indexDimension = dim;
+   operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+   if (dim == VGPU10_OPERAND_INDEX_2D)
+      operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+
+   name_token.name = name;
+
+   emit_decl_instruction(emit, opcode0, operand0, name_token, index, size);
+}
+
+
+/**
+ * Emit the declaration for a shader output.
+ * \param type  one of VGPU10_OPCODE_DCL_OUTPUTx
+ * \param index  the output register index
+ * \param name  one of VGPU10_NAME_x
+ * \param usageMask  bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
+ */
+static void
+emit_output_declaration(struct svga_shader_emitter_v10 *emit,
+                        unsigned type, unsigned index,
+                        unsigned name, unsigned usageMask)
+{
+   VGPU10OpcodeToken0 opcode0;
+   VGPU10OperandToken0 operand0;
+   VGPU10NameToken name_token;
+
+   assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
+   assert(type == VGPU10_OPCODE_DCL_OUTPUT ||
+          type == VGPU10_OPCODE_DCL_OUTPUT_SGV ||
+          type == VGPU10_OPCODE_DCL_OUTPUT_SIV);
+   assert(name == VGPU10_NAME_UNDEFINED ||
+          name == VGPU10_NAME_POSITION ||
+          name == VGPU10_NAME_PRIMITIVE_ID ||
+          name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
+          name == VGPU10_NAME_CLIP_DISTANCE);
+
+   check_register_index(emit, type, index);
+
+   opcode0.value = operand0.value = name_token.value = 0;
+
+   opcode0.opcodeType = type;
+   operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
+   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
+   operand0.mask = usageMask;
+   operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+   operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+
+   name_token.name = name;
+
+   emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
+}
+
+
+/**
+ * Emit the declaration for the fragment depth output.
+ */
+static void
+emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10OpcodeToken0 opcode0;
+   VGPU10OperandToken0 operand0;
+   VGPU10NameToken name_token;
+
+   assert(emit->unit == PIPE_SHADER_FRAGMENT);
+
+   opcode0.value = operand0.value = name_token.value = 0;
+
+   opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
+   operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
+   operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
+   operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
+   operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
+
+   emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
+}
+
+
+/**
+ * Emit the declaration for a system value input/output.
+ */
+static void
+emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
+                              unsigned semantic_name, unsigned index)
+{
+   switch (semantic_name) {
+   case TGSI_SEMANTIC_INSTANCEID:
+      index = alloc_system_value_index(emit, index);
+      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
+                             VGPU10_OPERAND_TYPE_INPUT,
+                             VGPU10_OPERAND_INDEX_1D,
+                             index, 1,
+                             VGPU10_NAME_INSTANCE_ID,
+                             VGPU10_OPERAND_4_COMPONENT,
+                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+                             VGPU10_OPERAND_4_COMPONENT_MASK_X,
+                             VGPU10_INTERPOLATION_UNDEFINED);
+      break;
+   case TGSI_SEMANTIC_VERTEXID:
+      index = alloc_system_value_index(emit, index);
+      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
+                             VGPU10_OPERAND_TYPE_INPUT,
+                             VGPU10_OPERAND_INDEX_1D,
+                             index, 1,
+                             VGPU10_NAME_VERTEX_ID,
+                             VGPU10_OPERAND_4_COMPONENT,
+                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+                             VGPU10_OPERAND_4_COMPONENT_MASK_X,
+                             VGPU10_INTERPOLATION_UNDEFINED);
+      break;
+   default:
+      ; /* XXX */
+   }
+}
+
+/**
+ * Translate a TGSI declaration to VGPU10.
+ */
+static boolean
+emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
+                        const struct tgsi_full_declaration *decl)
+{
+   switch (decl->Declaration.File) {
+   case TGSI_FILE_INPUT:
+      /* do nothing - see emit_input_declarations() */
+      return TRUE;
+
+   case TGSI_FILE_OUTPUT:
+      assert(decl->Range.First == decl->Range.Last);
+      emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask;
+      return TRUE;
+
+   case TGSI_FILE_TEMPORARY:
+      /* Don't declare the temps here.  Just keep track of how many
+       * and emit the declaration later.
+       */
+      if (decl->Declaration.Array) {
+         /* Indexed temporary array.  Save the start index of the array
+          * and the size of the array.
+          */
+         const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS);
+         unsigned i;
+
+         assert(arrayID < ARRAY_SIZE(emit->temp_arrays));
+
+         /* Save this array so we can emit the declaration for it later */
+         emit->temp_arrays[arrayID].start = decl->Range.First;
+         emit->temp_arrays[arrayID].size =
+            decl->Range.Last - decl->Range.First + 1;
+
+         emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1);
+         assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS);
+         emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS);
+
+         /* Fill in the temp_map entries for this array */
+         for (i = decl->Range.First; i <= decl->Range.Last; i++) {
+            emit->temp_map[i].arrayId = arrayID;
+            emit->temp_map[i].index = i - decl->Range.First;
+         }
+      }
+
+      /* for all temps, indexed or not, keep track of highest index */
+      emit->num_shader_temps = MAX2(emit->num_shader_temps,
+                                    decl->Range.Last + 1);
+      return TRUE;
+
+   case TGSI_FILE_CONSTANT:
+      /* Don't declare constants here.  Just keep track and emit later. */
+      {
+         unsigned constbuf = 0, num_consts;
+         if (decl->Declaration.Dimension) {
+            constbuf = decl->Dim.Index2D;
+         }
+         /* We throw an assertion here when, in fact, the shader should never
+          * have linked due to constbuf index out of bounds, so we shouldn't
+          * have reached here.
+          */
+         assert(constbuf < Elements(emit->num_shader_consts));
+
+         num_consts = MAX2(emit->num_shader_consts[constbuf],
+                           decl->Range.Last + 1);
+
+         if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
+            debug_printf("Warning: constant buffer is declared to size [%u]"
+                         " but [%u] is the limit.\n",
+                         num_consts,
+                         VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
+         }
+         /* The linker doesn't enforce the max UBO size so we clamp here */
+         emit->num_shader_consts[constbuf] =
+            MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
+      }
+      return TRUE;
+
+   case TGSI_FILE_IMMEDIATE:
+      assert(!"TGSI_FILE_IMMEDIATE not handled yet!");
+      return FALSE;
+
+   case TGSI_FILE_SYSTEM_VALUE:
+      emit_system_value_declaration(emit, decl->Semantic.Name,
+                                    decl->Range.First);
+      return TRUE;
+
+   case TGSI_FILE_SAMPLER:
+      /* Don't declare samplers here.  Just keep track and emit later. */
+      emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
+      return TRUE;
+
+   case TGSI_FILE_RESOURCE:
+      /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/
+      /* XXX more, VGPU10_RETURN_TYPE_FLOAT */
+      assert(!"TGSI_FILE_RESOURCE not handled yet");
+      return FALSE;
+
+   case TGSI_FILE_ADDRESS:
+      emit->num_address_regs = MAX2(emit->num_address_regs,
+                                    decl->Range.Last + 1);
+      return TRUE;
+
+   case TGSI_FILE_SAMPLER_VIEW:
+      /* Not used at this time, but maybe in the future.
+       * See emit_resource_declarations().
+       */
+      return TRUE;
+
+   default:
+      assert(!"Unexpected type of declaration");
+      return FALSE;
+   }
+}
+
+
+
+/**
+ * Emit all input declarations.
+ */
+static boolean
+emit_input_declarations(struct svga_shader_emitter_v10 *emit)
+{
+   unsigned i;
+
+   if (emit->unit == PIPE_SHADER_FRAGMENT) {
+
+      for (i = 0; i < emit->linkage.num_inputs; i++) {
+         unsigned semantic_name = emit->info.input_semantic_name[i];
+         unsigned usage_mask = emit->info.input_usage_mask[i];
+         unsigned index = emit->linkage.input_map[i];
+         unsigned type, interpolationMode, name;
+
+         if (usage_mask == 0)
+            continue;  /* register is not actually used */
+
+         if (semantic_name == TGSI_SEMANTIC_POSITION) {
+            /* fragment position input */
+            type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
+            interpolationMode = VGPU10_INTERPOLATION_LINEAR;
+            name = VGPU10_NAME_POSITION;
+            if (usage_mask & TGSI_WRITEMASK_W) {
+               /* we need to replace use of 'w' with '1/w' */
+               emit->fs.fragcoord_input_index = i;
+            }
+         }
+         else if (semantic_name == TGSI_SEMANTIC_FACE) {
+            /* fragment front-facing input */
+            type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
+            interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
+            name = VGPU10_NAME_IS_FRONT_FACE;
+            emit->fs.face_input_index = i;
+         }
+         else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
+            /* primitive ID */
+            type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
+            interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
+            name = VGPU10_NAME_PRIMITIVE_ID;
+         }
+         else {
+            /* general fragment input */
+            type = VGPU10_OPCODE_DCL_INPUT_PS;
+            interpolationMode =
+               translate_interpolation(emit,
+                                       emit->info.input_interpolate[i],
+                                       emit->info.input_interpolate_loc[i]);
+
+            /* keeps track if flat interpolation mode is being used */
+            emit->uses_flat_interp = emit->uses_flat_interp ||
+               (interpolationMode == VGPU10_INTERPOLATION_CONSTANT);
+
+            name = VGPU10_NAME_UNDEFINED;
+         }
+
+         emit_input_declaration(emit, type,
+                                VGPU10_OPERAND_TYPE_INPUT,
+                                VGPU10_OPERAND_INDEX_1D, index, 1,
+                                name,
+                                VGPU10_OPERAND_4_COMPONENT,
+                                VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+                                VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+                                interpolationMode);
+      }
+   }
+   else if (emit->unit == PIPE_SHADER_GEOMETRY) {
+
+      for (i = 0; i < emit->info.num_inputs; i++) {
+         unsigned semantic_name = emit->info.input_semantic_name[i];
+         unsigned usage_mask = emit->info.input_usage_mask[i];
+         unsigned index = emit->linkage.input_map[i];
+         unsigned opcodeType, operandType;
+         unsigned numComp, selMode;
+         unsigned name;
+         unsigned dim;
+
+         if (usage_mask == 0)
+            continue;  /* register is not actually used */
+
+         opcodeType = VGPU10_OPCODE_DCL_INPUT;
+         operandType = VGPU10_OPERAND_TYPE_INPUT;
+         numComp = VGPU10_OPERAND_4_COMPONENT;
+         selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
+         name = VGPU10_NAME_UNDEFINED;
+
+         /* all geometry shader inputs are two dimensional except gl_PrimitiveID */
+         dim = VGPU10_OPERAND_INDEX_2D;
+
+         if (semantic_name == TGSI_SEMANTIC_PRIMID) {
+            /* Primitive ID */
+            operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
+            dim = VGPU10_OPERAND_INDEX_0D;
+            numComp = VGPU10_OPERAND_0_COMPONENT;
+            selMode = 0;
+
+            /* also save the register index so we can check for
+             * primitive id when emit src register. We need to modify the
+             * operand type, index dimension when emit primitive id src reg.
+             */
+            emit->gs.prim_id_index = i;
+         }
+         else if (semantic_name == TGSI_SEMANTIC_POSITION) {
+            /* vertex position input */
+            opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV;
+            name = VGPU10_NAME_POSITION;
+         }
+
+         emit_input_declaration(emit, opcodeType, operandType,
+                                dim, index,
+                                emit->gs.input_size,
+                                name,
+                                numComp, selMode,
+                                VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+                                VGPU10_INTERPOLATION_UNDEFINED);
+      }
+   }
+   else {
+      assert(emit->unit == PIPE_SHADER_VERTEX);
+
+      for (i = 0; i < emit->info.num_inputs; i++) {
+         unsigned usage_mask = emit->info.input_usage_mask[i];
+         unsigned index = i;
+
+         if (usage_mask == 0)
+            continue;  /* register is not actually used */
+
+         emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
+                                VGPU10_OPERAND_TYPE_INPUT,
+                                VGPU10_OPERAND_INDEX_1D, index, 1,
+                                VGPU10_NAME_UNDEFINED,
+                                VGPU10_OPERAND_4_COMPONENT,
+                                VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+                                VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+                                VGPU10_INTERPOLATION_UNDEFINED);
+      }
+   }
+
+   return TRUE;
+}
+
+
+/**
+ * Emit all output declarations.
+ */
+static boolean
+emit_output_declarations(struct svga_shader_emitter_v10 *emit)
+{
+   unsigned i;
+
+   for (i = 0; i < emit->info.num_outputs; i++) {
+      /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/
+      const unsigned semantic_name = emit->info.output_semantic_name[i];
+      const unsigned semantic_index = emit->info.output_semantic_index[i];
+      unsigned index = i;
+
+      if (emit->unit == PIPE_SHADER_FRAGMENT) {
+         if (semantic_name == TGSI_SEMANTIC_COLOR) {
+            assert(semantic_index < Elements(emit->fs.color_out_index));
+
+            emit->fs.color_out_index[semantic_index] = index;
+
+            /* The semantic index is the shader's color output/buffer index */
+            emit_output_declaration(emit,
+                                    VGPU10_OPCODE_DCL_OUTPUT, semantic_index,
+                                    VGPU10_NAME_UNDEFINED,
+                                    VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
+
+            if (semantic_index == 0) {
+               if (emit->key.fs.write_color0_to_n_cbufs > 1) {
+                  /* Emit declarations for the additional color outputs
+                   * for broadcasting.
+                   */
+                  unsigned j;
+                  for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) {
+                     /* Allocate a new output index */
+                     unsigned idx = emit->info.num_outputs + j - 1;
+                     emit->fs.color_out_index[j] = idx;
+                     emit_output_declaration(emit,
+                                        VGPU10_OPCODE_DCL_OUTPUT, idx,
+                                        VGPU10_NAME_UNDEFINED,
+                                        VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
+                     emit->info.output_semantic_index[idx] = j;
+                  }
+               }
+            }
+            else {
+               assert(!emit->key.fs.write_color0_to_n_cbufs);
+            }
+         }
+         else if (semantic_name == TGSI_SEMANTIC_POSITION) {
+            /* Fragment depth output */
+            emit_fragdepth_output_declaration(emit);
+         }
+         else {
+            assert(!"Bad output semantic name");
+         }
+      }
+      else {
+         /* VS or GS */
+         unsigned name, type;
+         unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
+
+         switch (semantic_name) {
+         case TGSI_SEMANTIC_POSITION:
+            assert(emit->unit != PIPE_SHADER_FRAGMENT);
+            type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
+            name = VGPU10_NAME_POSITION;
+            /* Save the index of the vertex position output register */
+            emit->vposition.out_index = index;
+            break;
+         case TGSI_SEMANTIC_CLIPDIST:
+            type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
+            name = VGPU10_NAME_CLIP_DISTANCE;
+            /* save the starting index of the clip distance output register */
+            if (semantic_index == 0)
+               emit->clip_dist_out_index = index;
+            writemask = emit->output_usage_mask[index];
+            writemask = apply_clip_plane_mask(emit, writemask, semantic_index);
+            if (writemask == 0x0) {
+               continue; /* discard this do-nothing declaration */
+            }
+            break;
+         case TGSI_SEMANTIC_PRIMID:
+            assert(emit->unit == PIPE_SHADER_GEOMETRY);
+            type = VGPU10_OPCODE_DCL_OUTPUT_SGV;
+            name = VGPU10_NAME_PRIMITIVE_ID;
+            break;
+         case TGSI_SEMANTIC_LAYER:
+            assert(emit->unit == PIPE_SHADER_GEOMETRY);
+            type = VGPU10_OPCODE_DCL_OUTPUT_SGV;
+            name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX;
+            break;
+         case TGSI_SEMANTIC_CLIPVERTEX:
+            type = VGPU10_OPCODE_DCL_OUTPUT;
+            name = VGPU10_NAME_UNDEFINED;
+            emit->clip_vertex_out_index = index;
+            break;
+         default:
+            /* generic output */
+            type = VGPU10_OPCODE_DCL_OUTPUT;
+            name = VGPU10_NAME_UNDEFINED;
+         }
+
+         emit_output_declaration(emit, type, index, name, writemask);
+      }
+   }
+
+   if (emit->vposition.so_index != INVALID_INDEX &&
+       emit->vposition.out_index != INVALID_INDEX) {
+
+      assert(emit->unit != PIPE_SHADER_FRAGMENT);
+
+      /* Emit the declaration for the non-adjusted vertex position
+       * for stream output purpose
+       */
+      emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
+                              emit->vposition.so_index,
+                              VGPU10_NAME_UNDEFINED,
+                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
+   }
+
+   if (emit->clip_dist_so_index != INVALID_INDEX &&
+       emit->clip_dist_out_index != INVALID_INDEX) {
+
+      assert(emit->unit != PIPE_SHADER_FRAGMENT);
+
+      /* Emit the declaration for the clip distance shadow copy which
+       * will be used for stream output purpose and for clip distance
+       * varying variable
+       */
+      emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
+                              emit->clip_dist_so_index,
+                              VGPU10_NAME_UNDEFINED,
+                              emit->output_usage_mask[emit->clip_dist_out_index]);
+
+      if (emit->info.num_written_clipdistance > 4) {
+         /* for the second clip distance register, each handles 4 planes */
+         emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
+                                 emit->clip_dist_so_index + 1,
+                                 VGPU10_NAME_UNDEFINED,
+                                 emit->output_usage_mask[emit->clip_dist_out_index+1]);
+      }
+   }
+
+   return TRUE;
+}
+
+
+/**
+ * Emit the declaration for the temporary registers.
+ */
+static boolean
+emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
+{
+   unsigned total_temps, reg, i;
+
+   total_temps = emit->num_shader_temps;
+
+   /* Allocate extra temps for specially-implemented instructions,
+    * such as LIT.
+    */
+   total_temps += MAX_INTERNAL_TEMPS;
+
+   if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) {
+      if (emit->vposition.need_prescale || emit->key.vs.undo_viewport ||
+          emit->key.clip_plane_enable ||
+          emit->vposition.so_index != INVALID_INDEX) {
+         emit->vposition.tmp_index = total_temps;
+         total_temps += 1;
+      }
+
+      if (emit->unit == PIPE_SHADER_VERTEX) {
+         unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 |
+                                 emit->key.vs.adjust_attrib_itof |
+                                 emit->key.vs.adjust_attrib_utof |
+                                 emit->key.vs.attrib_is_bgra |
+                                 emit->key.vs.attrib_puint_to_snorm |
+                                 emit->key.vs.attrib_puint_to_uscaled |
+                                 emit->key.vs.attrib_puint_to_sscaled);
+         while (attrib_mask) {
+            unsigned index = u_bit_scan(&attrib_mask);
+            emit->vs.adjusted_input[index] = total_temps++;
+         }
+      }
+
+      if (emit->clip_mode == CLIP_DISTANCE) {
+         /* We need to write the clip distance to a temporary register
+          * first. Then it will be copied to the shadow copy for
+          * the clip distance varying variable and stream output purpose.
+          * It will also be copied to the actual CLIPDIST register
+          * according to the enabled clip planes
+          */
+         emit->clip_dist_tmp_index = total_temps++;
+         if (emit->info.num_written_clipdistance > 4)
+            total_temps++; /* second clip register */
+      }
+      else if (emit->clip_mode == CLIP_VERTEX) {
+         /* We need to convert the TGSI CLIPVERTEX output to one or more
+          * clip distances.  Allocate a temp reg for the clipvertex here.
+          */
+         assert(emit->info.writes_clipvertex > 0);
+         emit->clip_vertex_tmp_index = total_temps;
+         total_temps++;
+      }
+   }
+   else if (emit->unit == PIPE_SHADER_FRAGMENT) {
+      if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS ||
+          emit->key.fs.write_color0_to_n_cbufs > 1) {
+         /* Allocate a temp to hold the output color */
+         emit->fs.color_tmp_index = total_temps;
+         total_temps += 1;
+      }
+
+      if (emit->fs.face_input_index != INVALID_INDEX) {
+         /* Allocate a temp for the +/-1 face register */
+         emit->fs.face_tmp_index = total_temps;
+         total_temps += 1;
+      }
+
+      if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
+         /* Allocate a temp for modified fragment position register */
+         emit->fs.fragcoord_tmp_index = total_temps;
+         total_temps += 1;
+      }
+   }
+
+   for (i = 0; i < emit->num_address_regs; i++) {
+      emit->address_reg_index[i] = total_temps++;
+   }
+
+   /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10
+    * temp indexes.  Basically, we compact all the non-array temp register
+    * indexes into a consecutive series.
+    *
+    * Before, we may have some TGSI declarations like:
+    *   DCL TEMP[0..1], LOCAL
+    *   DCL TEMP[2..4], ARRAY(1), LOCAL
+    *   DCL TEMP[5..7], ARRAY(2), LOCAL
+    *   plus, some extra temps, like TEMP[8], TEMP[9] for misc things
+    *
+    * After, we'll have a map like this:
+    *   temp_map[0] = { array 0, index 0 }
+    *   temp_map[1] = { array 0, index 1 }
+    *   temp_map[2] = { array 1, index 0 }
+    *   temp_map[3] = { array 1, index 1 }
+    *   temp_map[4] = { array 1, index 2 }
+    *   temp_map[5] = { array 2, index 0 }
+    *   temp_map[6] = { array 2, index 1 }
+    *   temp_map[7] = { array 2, index 2 }
+    *   temp_map[8] = { array 0, index 2 }
+    *   temp_map[9] = { array 0, index 3 }
+    *
+    * We'll declare two arrays of 3 elements, plus a set of four non-indexed
+    * temps numbered 0..3
+    *
+    * Any time we emit a temporary register index, we'll have to use the
+    * temp_map[] table to convert the TGSI index to the VGPU10 index.
+    *
+    * Finally, we recompute the total_temps value here.
+    */
+   reg = 0;
+   for (i = 0; i < total_temps; i++) {
+      if (emit->temp_map[i].arrayId == 0) {
+         emit->temp_map[i].index = reg++;
+      }
+   }
+   total_temps = reg;
+
+   if (0) {
+      debug_printf("total_temps %u\n", total_temps);
+      for (i = 0; i < 30; i++) {
+         debug_printf("temp %u ->  array %u  index %u\n",
+                      i, emit->temp_map[i].arrayId, emit->temp_map[i].index);
+      }
+   }
+
+   /* Emit declaration of ordinary temp registers */
+   if (total_temps > 0) {
+      VGPU10OpcodeToken0 opcode0;
+
+      opcode0.value = 0;
+      opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS;
+
+      begin_emit_instruction(emit);
+      emit_dword(emit, opcode0.value);
+      emit_dword(emit, total_temps);
+      end_emit_instruction(emit);
+   }
+
+   /* Emit declarations for indexable temp arrays.  Skip 0th entry since
+    * it's unused.
+    */
+   for (i = 1; i < emit->num_temp_arrays; i++) {
+      unsigned num_temps = emit->temp_arrays[i].size;
+
+      if (num_temps > 0) {
+         VGPU10OpcodeToken0 opcode0;
+
+         opcode0.value = 0;
+         opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP;
+
+         begin_emit_instruction(emit);
+         emit_dword(emit, opcode0.value);
+         emit_dword(emit, i); /* which array */
+         emit_dword(emit, num_temps);
+         emit_dword(emit, 4); /* num components */
+         end_emit_instruction(emit);
+
+         total_temps += num_temps;
+      }
+   }
+
+   /* Check that the grand total of all regular and indexed temps is
+    * under the limit.
+    */
+   check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1);
+
+   return TRUE;
+}
+
+
+static boolean
+emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10OpcodeToken0 opcode0;
+   VGPU10OperandToken0 operand0;
+   unsigned total_consts, i;
+
+   opcode0.value = 0;
+   opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER;
+   opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED;
+   /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */
+
+   operand0.value = 0;
+   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+   operand0.indexDimension = VGPU10_OPERAND_INDEX_2D;
+   operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+   operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+   operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
+   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
+   operand0.swizzleX = 0;
+   operand0.swizzleY = 1;
+   operand0.swizzleZ = 2;
+   operand0.swizzleW = 3;
+
+   /**
+    * Emit declaration for constant buffer [0].  We also allocate
+    * room for the extra constants here.
+    */
+   total_consts = emit->num_shader_consts[0];
+
+   /* Now, allocate constant slots for the "extra" constants */
+
+   /* Vertex position scale/translation */
+   if (emit->vposition.need_prescale) {
+      emit->vposition.prescale_scale_index = total_consts++;
+      emit->vposition.prescale_trans_index = total_consts++;
+   }
+
+   if (emit->unit == PIPE_SHADER_VERTEX) {
+      if (emit->key.vs.undo_viewport) {
+         emit->vs.viewport_index = total_consts++;
+      }
+   }
+
+   /* user-defined clip planes */
+   if (emit->key.clip_plane_enable) {
+      unsigned n = util_bitcount(emit->key.clip_plane_enable);
+      assert(emit->unit == PIPE_SHADER_VERTEX ||
+             emit->unit == PIPE_SHADER_GEOMETRY);
+      for (i = 0; i < n; i++) {
+         emit->clip_plane_const[i] = total_consts++;
+      }
+   }
+
+   /* Texcoord scale factors for RECT textures */
+   {
+      for (i = 0; i < emit->num_samplers; i++) {
+         if (emit->key.tex[i].unnormalized) {
+            emit->texcoord_scale_index[i] = total_consts++;
+         }
+      }
+   }
+
+   /* Texture buffer sizes */
+   for (i = 0; i < emit->num_samplers; i++) {
+      if (emit->key.tex[i].texture_target == PIPE_BUFFER) {
+         emit->texture_buffer_size_index[i] = total_consts++;
+      }
+   }
+
+   if (total_consts > 0) {
+      begin_emit_instruction(emit);
+      emit_dword(emit, opcode0.value);
+      emit_dword(emit, operand0.value);
+      emit_dword(emit, 0);  /* which const buffer slot */
+      emit_dword(emit, total_consts);
+      end_emit_instruction(emit);
+   }
+
+   /* Declare remaining constant buffers (UBOs) */
+   for (i = 1; i < Elements(emit->num_shader_consts); i++) {
+      if (emit->num_shader_consts[i] > 0) {
+         begin_emit_instruction(emit);
+         emit_dword(emit, opcode0.value);
+         emit_dword(emit, operand0.value);
+         emit_dword(emit, i);  /* which const buffer slot */
+         emit_dword(emit, emit->num_shader_consts[i]);
+         end_emit_instruction(emit);
+      }
+   }
+
+   return TRUE;
+}
+
+
+/**
+ * Emit declarations for samplers.
+ */
+static boolean
+emit_sampler_declarations(struct svga_shader_emitter_v10 *emit)
+{
+   unsigned i;
+
+   for (i = 0; i < emit->num_samplers; i++) {
+      VGPU10OpcodeToken0 opcode0;
+      VGPU10OperandToken0 operand0;
+
+      opcode0.value = 0;
+      opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER;
+      opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT;
+
+      operand0.value = 0;
+      operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
+      operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
+      operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+      operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+
+      begin_emit_instruction(emit);
+      emit_dword(emit, opcode0.value);
+      emit_dword(emit, operand0.value);
+      emit_dword(emit, i);
+      end_emit_instruction(emit);
+   }
+
+   return TRUE;
+}
+
+
+/**
+ * Translate PIPE_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x.
+ */
+static unsigned
+pipe_texture_to_resource_dimension(unsigned target, bool msaa)
+{
+   switch (target) {
+   case PIPE_BUFFER:
+      return VGPU10_RESOURCE_DIMENSION_BUFFER;
+   case PIPE_TEXTURE_1D:
+      return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
+   case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_RECT:
+      return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS
+         : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
+   case PIPE_TEXTURE_3D:
+      return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
+   case PIPE_TEXTURE_CUBE:
+      return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
+   case PIPE_TEXTURE_1D_ARRAY:
+      return VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY;
+   case PIPE_TEXTURE_2D_ARRAY:
+      return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
+         : VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY;
+   case PIPE_TEXTURE_CUBE_ARRAY:
+      return VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY;
+   default:
+      assert(!"Unexpected resource type");
+      return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
+   }
+}
+
+
+/**
+ * Given a tgsi_return_type, return true iff it is an integer type.
+ */
+static boolean
+is_integer_type(enum tgsi_return_type type)
+{
+   switch (type) {
+      case TGSI_RETURN_TYPE_SINT:
+      case TGSI_RETURN_TYPE_UINT:
+         return TRUE;
+      case TGSI_RETURN_TYPE_FLOAT:
+      case TGSI_RETURN_TYPE_UNORM:
+      case TGSI_RETURN_TYPE_SNORM:
+         return FALSE;
+      case TGSI_RETURN_TYPE_COUNT:
+      default:
+         assert(!"is_integer_type: Unknown tgsi_return_type");
+         return FALSE;
+   }
+}
+
+
+/**
+ * Emit declarations for resources.
+ * XXX When we're sure that all TGSI shaders will be generated with
+ * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may
+ * rework this code.
+ */
+static boolean
+emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
+{
+   unsigned i;
+
+   /* Emit resource decl for each sampler */
+   for (i = 0; i < emit->num_samplers; i++) {
+      VGPU10OpcodeToken0 opcode0;
+      VGPU10OperandToken0 operand0;
+      VGPU10ResourceReturnTypeToken return_type;
+      VGPU10_RESOURCE_RETURN_TYPE rt;
+
+      opcode0.value = 0;
+      opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;
+      opcode0.resourceDimension =
+         pipe_texture_to_resource_dimension(emit->key.tex[i].texture_target,
+                                            emit->key.tex[i].texture_msaa);
+      operand0.value = 0;
+      operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
+      operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
+      operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+      operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+
+#if 1
+      /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */
+      STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1);
+      STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1);
+      STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1);
+      STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1);
+      STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1);
+      assert(emit->key.tex[i].return_type <= TGSI_RETURN_TYPE_FLOAT);
+      rt = emit->key.tex[i].return_type + 1;
+#else
+      switch (emit->key.tex[i].return_type) {
+         case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break;
+         case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break;
+         case TGSI_RETURN_TYPE_SINT:  rt = VGPU10_RETURN_TYPE_SINT;  break;
+         case TGSI_RETURN_TYPE_UINT:  rt = VGPU10_RETURN_TYPE_UINT;  break;
+         case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break;
+         case TGSI_RETURN_TYPE_COUNT:
+         default:
+            rt = VGPU10_RETURN_TYPE_FLOAT;
+            assert(!"emit_resource_declarations: Unknown tgsi_return_type");
+      }
+#endif
+
+      return_type.value = 0;
+      return_type.component0 = rt;
+      return_type.component1 = rt;
+      return_type.component2 = rt;
+      return_type.component3 = rt;
+
+      begin_emit_instruction(emit);
+      emit_dword(emit, opcode0.value);
+      emit_dword(emit, operand0.value);
+      emit_dword(emit, i);
+      emit_dword(emit, return_type.value);
+      end_emit_instruction(emit);
+   }
+
+   return TRUE;
+}
+
+static void
+emit_instruction_op1(struct svga_shader_emitter_v10 *emit,
+                     unsigned opcode,
+                     const struct tgsi_full_dst_register *dst,
+                     const struct tgsi_full_src_register *src,
+                     boolean saturate)
+{
+   begin_emit_instruction(emit);
+   emit_opcode(emit, opcode, saturate);
+   emit_dst_register(emit, dst);
+   emit_src_register(emit, src);
+   end_emit_instruction(emit);
+}
+
+static void
+emit_instruction_op2(struct svga_shader_emitter_v10 *emit,
+                     unsigned opcode,
+                     const struct tgsi_full_dst_register *dst,
+                     const struct tgsi_full_src_register *src1,
+                     const struct tgsi_full_src_register *src2,
+                     boolean saturate)
+{
+   begin_emit_instruction(emit);
+   emit_opcode(emit, opcode, saturate);
+   emit_dst_register(emit, dst);
+   emit_src_register(emit, src1);
+   emit_src_register(emit, src2);
+   end_emit_instruction(emit);
+}
+
+static void
+emit_instruction_op3(struct svga_shader_emitter_v10 *emit,
+                     unsigned opcode,
+                     const struct tgsi_full_dst_register *dst,
+                     const struct tgsi_full_src_register *src1,
+                     const struct tgsi_full_src_register *src2,
+                     const struct tgsi_full_src_register *src3,
+                     boolean saturate)
+{
+   begin_emit_instruction(emit);
+   emit_opcode(emit, opcode, saturate);
+   emit_dst_register(emit, dst);
+   emit_src_register(emit, src1);
+   emit_src_register(emit, src2);
+   emit_src_register(emit, src3);
+   end_emit_instruction(emit);
+}
+
+/**
+ * Emit the actual clip distance instructions to be used for clipping
+ * by copying the clip distance from the temporary registers to the
+ * CLIPDIST registers written with the enabled planes mask.
+ * Also copy the clip distance from the temporary to the clip distance
+ * shadow copy register which will be referenced by the input shader
+ */
+static void
+emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
+{
+   struct tgsi_full_src_register tmp_clip_dist_src;
+   struct tgsi_full_dst_register clip_dist_dst;
+
+   unsigned i;
+   unsigned clip_plane_enable = emit->key.clip_plane_enable;
+   unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index;
+   unsigned num_written_clipdist = emit->info.num_written_clipdistance;
+
+   assert(emit->clip_dist_out_index != INVALID_INDEX);
+   assert(emit->clip_dist_tmp_index != INVALID_INDEX);
+
+   /**
+    * Temporary reset the temporary clip dist register index so
+    * that the copy to the real clip dist register will not
+    * attempt to copy to the temporary register again
+    */
+   emit->clip_dist_tmp_index = INVALID_INDEX;
+
+   for (i = 0; i < 2 && num_written_clipdist; i++, num_written_clipdist-=4) {
+
+      tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i);
+
+      /**
+       * copy to the shadow copy for use by varying variable and
+       * stream output. All clip distances
+       * will be written regardless of the enabled clipping planes.
+       */
+      clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
+                                   emit->clip_dist_so_index + i);
+
+      /* MOV clip_dist_so, tmp_clip_dist */
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
+                           &tmp_clip_dist_src, FALSE);
+
+      /**
+       * copy those clip distances to enabled clipping planes
+       * to CLIPDIST registers for clipping
+       */
+      if (clip_plane_enable & 0xf) {
+         clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
+                                      emit->clip_dist_out_index + i);
+         clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf);
+
+         /* MOV CLIPDIST, tmp_clip_dist */
+         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
+                              &tmp_clip_dist_src, FALSE);
+      }
+      /* four clip planes per clip register */
+      clip_plane_enable >>= 4;
+   }
+   /**
+    * set the temporary clip dist register index back to the
+    * temporary index for the next vertex
+    */
+   emit->clip_dist_tmp_index = clip_dist_tmp_index;
+}
+
+/* Declare clip distance output registers for user-defined clip planes
+ * or the TGSI_CLIPVERTEX output.
+ */
+static void
+emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit)
+{
+   unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
+   unsigned index = emit->num_outputs;
+   unsigned plane_mask;
+
+   assert(emit->unit == PIPE_SHADER_VERTEX ||
+          emit->unit == PIPE_SHADER_GEOMETRY);
+   assert(num_clip_planes <= 8);
+
+   if (emit->clip_mode != CLIP_LEGACY &&
+       emit->clip_mode != CLIP_VERTEX) {
+      return;
+   }
+
+   if (num_clip_planes == 0)
+      return;
+
+   /* Declare one or two clip output registers.  The number of components
+    * in the mask reflects the number of clip planes.  For example, if 5
+    * clip planes are needed, we'll declare outputs similar to:
+    * dcl_output_siv o2.xyzw, clip_distance
+    * dcl_output_siv o3.x, clip_distance
+    */
+   emit->clip_dist_out_index = index; /* save the starting clip dist reg index */
+
+   plane_mask = (1 << num_clip_planes) - 1;
+   if (plane_mask & 0xf) {
+      unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
+      emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index,
+                              VGPU10_NAME_CLIP_DISTANCE, cmask);
+      emit->num_outputs++;
+   }
+   if (plane_mask & 0xf0) {
+      unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
+      emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1,
+                              VGPU10_NAME_CLIP_DISTANCE, cmask);
+      emit->num_outputs++;
+   }
+}
+
+
+/**
+ * Emit the instructions for writing to the clip distance registers
+ * to handle legacy/automatic clip planes.
+ * For each clip plane, the distance is the dot product of the vertex
+ * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients.
+ * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE
+ * output registers already declared.
+ */
+static void
+emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit,
+                             unsigned vpos_tmp_index)
+{
+   unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
+
+   assert(emit->clip_mode == CLIP_LEGACY);
+   assert(num_clip_planes <= 8);
+
+   assert(emit->unit == PIPE_SHADER_VERTEX ||
+          emit->unit == PIPE_SHADER_GEOMETRY);
+
+   for (i = 0; i < num_clip_planes; i++) {
+      struct tgsi_full_dst_register dst;
+      struct tgsi_full_src_register plane_src, vpos_src;
+      unsigned reg_index = emit->clip_dist_out_index + i / 4;
+      unsigned comp = i % 4;
+      unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
+
+      /* create dst, src regs */
+      dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
+      dst = writemask_dst(&dst, writemask);
+
+      plane_src = make_src_const_reg(emit->clip_plane_const[i]);
+      vpos_src = make_src_temp_reg(vpos_tmp_index);
+
+      /* DP4 clip_dist, plane, vpos */
+      emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
+                           &plane_src, &vpos_src, FALSE);
+   }
+}
+
+
+/**
+ * Emit the instructions for computing the clip distance results from
+ * the clip vertex temporary.
+ * For each clip plane, the distance is the dot product of the clip vertex
+ * position (found in a temp reg) and the clip plane coefficients.
+ */
+static void
+emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit)
+{
+   const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable);
+   unsigned i;
+   struct tgsi_full_dst_register dst;
+   struct tgsi_full_src_register clipvert_src;
+   const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index;
+
+   assert(emit->unit == PIPE_SHADER_VERTEX ||
+          emit->unit == PIPE_SHADER_GEOMETRY);
+
+   assert(emit->clip_mode == CLIP_VERTEX);
+
+   clipvert_src = make_src_temp_reg(clip_vertex_tmp);
+
+   for (i = 0; i < num_clip; i++) {
+      struct tgsi_full_src_register plane_src;
+      unsigned reg_index = emit->clip_dist_out_index + i / 4;
+      unsigned comp = i % 4;
+      unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
+
+      /* create dst, src regs */
+      dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
+      dst = writemask_dst(&dst, writemask);
+
+      plane_src = make_src_const_reg(emit->clip_plane_const[i]);
+
+      /* DP4 clip_dist, plane, vpos */
+      emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
+                           &plane_src, &clipvert_src, FALSE);
+   }
+
+   /* copy temporary clip vertex register to the clip vertex register */
+
+   assert(emit->clip_vertex_out_index != INVALID_INDEX);
+
+   /**
+    * temporary reset the temporary clip vertex register index so
+    * that copy to the clip vertex register will not attempt
+    * to copy to the temporary register again
+    */
+   emit->clip_vertex_tmp_index = INVALID_INDEX;
+
+   /* MOV clip_vertex, clip_vertex_tmp */
+   dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index);
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+                        &dst, &clipvert_src, FALSE);
+
+   /**
+    * set the temporary clip vertex register index back to the
+    * temporary index for the next vertex
+    */
+   emit->clip_vertex_tmp_index = clip_vertex_tmp;
+}
+
+/**
+ * Emit code to convert RGBA to BGRA
+ */
+static void
+emit_swap_r_b(struct svga_shader_emitter_v10 *emit,
+                     const struct tgsi_full_dst_register *dst,
+                     const struct tgsi_full_src_register *src)
+{
+   struct tgsi_full_src_register bgra_src =
+      swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W);
+
+   begin_emit_instruction(emit);
+   emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
+   emit_dst_register(emit, dst);
+   emit_src_register(emit, &bgra_src);
+   end_emit_instruction(emit);
+}
+
+
+/** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */
+static void
+emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit,
+                    const struct tgsi_full_dst_register *dst,
+                    const struct tgsi_full_src_register *src)
+{
+   struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f);
+   struct tgsi_full_src_register two =
+      make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f);
+   struct tgsi_full_src_register neg_two =
+      make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
+
+   unsigned val_tmp = get_temp_index(emit);
+   struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp);
+   struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp);
+
+   unsigned bias_tmp = get_temp_index(emit);
+   struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp);
+   struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp);
+
+   /* val = src * 2.0 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst,
+                        src, &two, FALSE);
+
+   /* bias = src > 0.5 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst,
+                        src, &half, FALSE);
+
+   /* bias = bias & -2.0 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst,
+                        &bias_src, &neg_two, FALSE);
+
+   /* dst = val + bias */
+   emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst,
+                        &val_src, &bias_src, FALSE);
+
+   free_temp_indexes(emit);
+}
+
+
+/** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */
+static void
+emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit,
+                      const struct tgsi_full_dst_register *dst,
+                      const struct tgsi_full_src_register *src)
+{
+   struct tgsi_full_src_register scale =
+      make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f);
+
+   /* dst = src * scale */
+   emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale, FALSE);
+}
+
+
+/** Convert from R32_UINT to 10_10_10_2_sscaled */
+static void
+emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit,
+                      const struct tgsi_full_dst_register *dst,
+                      const struct tgsi_full_src_register *src)
+{
+   struct tgsi_full_src_register lshift =
+      make_immediate_reg_int4(emit, 22, 12, 2, 0);
+   struct tgsi_full_src_register rshift =
+      make_immediate_reg_int4(emit, 22, 22, 22, 30);
+
+   struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X);
+
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+
+   /*
+    * r = (pixel << 22) >> 22;   # signed int in [511, -512]
+    * g = (pixel << 12) >> 22;   # signed int in [511, -512]
+    * b = (pixel <<  2) >> 22;   # signed int in [511, -512]
+    * a = (pixel <<  0) >> 30;   # signed int in [1, -2]
+    * dst = i_to_f(r,g,b,a);     # convert to float
+    */
+   emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst,
+                        &src_xxxx, &lshift, FALSE);
+   emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst,
+                        &tmp_src, &rshift, FALSE);
+   emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src, FALSE);
+
+   free_temp_indexes(emit);
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_ABS instruction.
+ */
+static boolean
+emit_abs(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = ABS(s0):
+    *   dst = abs(s0)
+    * Translates into:
+    *   MOV dst, abs(s0)
+    */
+   struct tgsi_full_src_register abs_src0 = absolute_src(&inst->Src[0]);
+
+   /* MOV dst, abs(s0) */
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
+                        &abs_src0, inst->Instruction.Saturate);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction.
+ */
+static boolean
+emit_arl_uarl(struct svga_shader_emitter_v10 *emit,
+              const struct tgsi_full_instruction *inst)
+{
+   unsigned index = inst->Dst[0].Register.Index;
+   struct tgsi_full_dst_register dst;
+   unsigned opcode;
+
+   assert(index < MAX_VGPU10_ADDR_REGS);
+   dst = make_dst_temp_reg(emit->address_reg_index[index]);
+
+   /* ARL dst, s0
+    * Translates into:
+    * FTOI address_tmp, s0
+    *
+    * UARL dst, s0
+    * Translates into:
+    * MOV address_tmp, s0
+    */
+   if (inst->Instruction.Opcode == TGSI_OPCODE_ARL)
+      opcode = VGPU10_OPCODE_FTOI;
+   else
+      opcode = VGPU10_OPCODE_MOV;
+
+   emit_instruction_op1(emit, opcode, &dst, &inst->Src[0], FALSE);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_CAL instruction.
+ */
+static boolean
+emit_cal(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   unsigned label = inst->Label.Label;
+   VGPU10OperandToken0 operand;
+   operand.value = 0;
+   operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
+
+   begin_emit_instruction(emit);
+   emit_dword(emit, operand.value);
+   emit_dword(emit, label);
+   end_emit_instruction(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_IABS instruction.
+ */
+static boolean
+emit_iabs(struct svga_shader_emitter_v10 *emit,
+          const struct tgsi_full_instruction *inst)
+{
+   /* dst.x = (src0.x < 0) ? -src0.x : src0.x
+    * dst.y = (src0.y < 0) ? -src0.y : src0.y
+    * dst.z = (src0.z < 0) ? -src0.z : src0.z
+    * dst.w = (src0.w < 0) ? -src0.w : src0.w
+    *
+    * Translates into
+    *   IMAX dst, src, neg(src)
+    */
+   struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
+   emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0],
+                        &inst->Src[0], &neg_src, FALSE);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_CMP instruction.
+ */
+static boolean
+emit_cmp(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst.x = (src0.x < 0) ? src1.x : src2.x
+    * dst.y = (src0.y < 0) ? src1.y : src2.y
+    * dst.z = (src0.z < 0) ? src1.z : src2.z
+    * dst.w = (src0.w < 0) ? src1.w : src2.w
+    *
+    * Translates into
+    *   LT tmp, src0, 0.0
+    *   MOVC dst, tmp, src1, src2
+    */
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst,
+                        &inst->Src[0], &zero, FALSE);
+   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0],
+                        &tmp_src, &inst->Src[1], &inst->Src[2],
+                        inst->Instruction.Saturate);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_DP2A instruction.
+ */
+static boolean
+emit_dp2a(struct svga_shader_emitter_v10 *emit,
+          const struct tgsi_full_instruction *inst)
+{
+   /* dst.x = src0.x * src1.x + src0.y * src1.y + src2.x
+    * dst.y = src0.x * src1.x + src0.y * src1.y + src2.x
+    * dst.z = src0.x * src1.x + src0.y * src1.y + src2.x
+    * dst.w = src0.x * src1.x + src0.y * src1.y + src2.x
+    * Translate into
+    *   MAD tmp.x, s0.y, s1.y, s2.x
+    *   MAD tmp.x, s0.x, s1.x, tmp.x
+    *   MOV dst.xyzw, tmp.xxxx
+    */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+   struct tgsi_full_src_register tmp_src_xxxx =
+      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+   struct tgsi_full_dst_register tmp_dst_x =
+      writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+
+   struct tgsi_full_src_register src0_xxxx =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
+   struct tgsi_full_src_register src0_yyyy =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
+   struct tgsi_full_src_register src1_xxxx =
+      scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
+   struct tgsi_full_src_register src1_yyyy =
+      scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
+   struct tgsi_full_src_register src2_xxxx =
+      scalar_src(&inst->Src[2], TGSI_SWIZZLE_X);
+
+   emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_yyyy,
+                        &src1_yyyy, &src2_xxxx, FALSE);
+   emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_xxxx,
+                        &src1_xxxx, &tmp_src_xxxx, FALSE);
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
+                        &tmp_src_xxxx, inst->Instruction.Saturate);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_DPH instruction.
+ */
+static boolean
+emit_dph(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /*
+    * DP3 tmp, s0, s1
+    * ADD dst, tmp, s1.wwww
+    */
+
+   struct tgsi_full_src_register s1_wwww =
+      swizzle_src(&inst->Src[1], TGSI_SWIZZLE_W, TGSI_SWIZZLE_W,
+                  TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
+
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+   /* DP3 tmp, s0, s1 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_DP3, &tmp_dst, &inst->Src[0],
+                        &inst->Src[1], FALSE);
+
+   /* ADD dst, tmp, s1.wwww */
+   emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], &tmp_src,
+                        &s1_wwww, inst->Instruction.Saturate);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_DST instruction.
+ */
+static boolean
+emit_dst(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /*
+    * dst.x = 1
+    * dst.y = src0.y * src1.y
+    * dst.z = src0.z
+    * dst.w = src1.w
+    */
+
+   struct tgsi_full_src_register s0_yyyy =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
+   struct tgsi_full_src_register s0_zzzz =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
+   struct tgsi_full_src_register s1_yyyy =
+      scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
+   struct tgsi_full_src_register s1_wwww =
+      scalar_src(&inst->Src[1], TGSI_SWIZZLE_W);
+
+   /*
+    * If dst and either src0 and src1 are the same we need
+    * to create a temporary for it and insert a extra move.
+    */
+   unsigned tmp_move = get_temp_index(emit);
+   struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
+   struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
+
+   /* MOV dst.x, 1.0 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+      struct tgsi_full_dst_register dst_x =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_X);
+      struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE);
+   }
+
+   /* MUL dst.y, s0.y, s1.y */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+      struct tgsi_full_dst_register dst_y =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
+
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy,
+                           &s1_yyyy, inst->Instruction.Saturate);
+   }
+
+   /* MOV dst.z, s0.z */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+      struct tgsi_full_dst_register dst_z =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, &s0_zzzz,
+                           inst->Instruction.Saturate);
+  }
+
+   /* MOV dst.w, s1.w */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+      struct tgsi_full_dst_register dst_w =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_W);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &s1_wwww,
+                           inst->Instruction.Saturate);
+   }
+
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
+                        FALSE);
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+
+/**
+ * Emit code for TGSI_OPCODE_ENDPRIM (GS only)
+ */
+static boolean
+emit_endprim(struct svga_shader_emitter_v10 *emit,
+             const struct tgsi_full_instruction *inst)
+{
+   assert(emit->unit == PIPE_SHADER_GEOMETRY);
+
+   /* We can't use emit_simple() because the TGSI instruction has one
+    * operand (vertex stream number) which we must ignore for VGPU10.
+    */
+   begin_emit_instruction(emit);
+   emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE);
+   end_emit_instruction(emit);
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_EX2 (2^x) instruction.
+ */
+static boolean
+emit_ex2(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* Note that TGSI_OPCODE_EX2 computes only one value from src.x
+    * while VGPU10 computes four values.
+    *
+    * dst = EX2(src):
+    *   dst.xyzw = 2.0 ^ src.x
+    */
+
+   struct tgsi_full_src_register src_xxxx =
+      swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
+                  TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+
+   /* EXP tmp, s0.xxxx */
+   emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx,
+                        inst->Instruction.Saturate);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_EXP instruction.
+ */
+static boolean
+emit_exp(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /*
+    * dst.x = 2 ^ floor(s0.x)
+    * dst.y = s0.x - floor(s0.x)
+    * dst.z = 2 ^ s0.x
+    * dst.w = 1.0
+    */
+
+   struct tgsi_full_src_register src_xxxx =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+   /*
+    * If dst and src are the same we need to create
+    * a temporary for it and insert a extra move.
+    */
+   unsigned tmp_move = get_temp_index(emit);
+   struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
+   struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
+
+   /* only use X component of temp reg */
+   tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+   tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+
+   /* ROUND_NI tmp.x, s0.x */
+   emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
+                        &src_xxxx, FALSE); /* round to -infinity */
+
+   /* EXP dst.x, tmp.x */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+      struct tgsi_full_dst_register dst_x =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_X);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src,
+                           inst->Instruction.Saturate);
+   }
+
+   /* ADD dst.y, s0.x, -tmp */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+      struct tgsi_full_dst_register dst_y =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
+      struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src);
+
+      emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx,
+                           &neg_tmp_src, inst->Instruction.Saturate);
+   }
+
+   /* EXP dst.z, s0.x */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+      struct tgsi_full_dst_register dst_z =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx,
+                           inst->Instruction.Saturate);
+   }
+
+   /* MOV dst.w, 1.0 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+      struct tgsi_full_dst_register dst_w =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_W);
+      struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one,
+                           FALSE);
+   }
+
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
+                        FALSE);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_IF instruction.
+ */
+static boolean
+emit_if(struct svga_shader_emitter_v10 *emit,
+        const struct tgsi_full_instruction *inst)
+{
+   VGPU10OpcodeToken0 opcode0;
+
+   /* The src register should be a scalar */
+   assert(inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleY &&
+          inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleZ &&
+          inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleW);
+
+   /* The only special thing here is that we need to set the
+    * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if
+    * src.x is non-zero.
+    */
+   opcode0.value = 0;
+   opcode0.opcodeType = VGPU10_OPCODE_IF;
+   opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
+
+   begin_emit_instruction(emit);
+   emit_dword(emit, opcode0.value);
+   emit_src_register(emit, &inst->Src[0]);
+   end_emit_instruction(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of
+ * the register components are negative).
+ */
+static boolean
+emit_kill_if(struct svga_shader_emitter_v10 *emit,
+             const struct tgsi_full_instruction *inst)
+{
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+
+   struct tgsi_full_dst_register tmp_dst_x =
+      writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+   struct tgsi_full_src_register tmp_src_xxxx =
+      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+
+   /* tmp = src[0] < 0.0 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
+                        &zero, FALSE);
+
+   if (!same_swizzle_terms(&inst->Src[0])) {
+      /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to
+       * logically OR the swizzle terms.  Most uses of KILL_IF only
+       * test one channel so it's good to avoid these extra steps.
+       */
+      struct tgsi_full_src_register tmp_src_yyyy =
+         scalar_src(&tmp_src, TGSI_SWIZZLE_Y);
+      struct tgsi_full_src_register tmp_src_zzzz =
+         scalar_src(&tmp_src, TGSI_SWIZZLE_Z);
+      struct tgsi_full_src_register tmp_src_wwww =
+         scalar_src(&tmp_src, TGSI_SWIZZLE_W);
+
+      emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
+                           &tmp_src_yyyy, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
+                           &tmp_src_zzzz, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
+                           &tmp_src_wwww, FALSE);
+   }
+
+   begin_emit_instruction(emit);
+   emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */
+   emit_src_register(emit, &tmp_src_xxxx);
+   end_emit_instruction(emit);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard).
+ */
+static boolean
+emit_kill(struct svga_shader_emitter_v10 *emit,
+          const struct tgsi_full_instruction *inst)
+{
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+
+   /* DISCARD if 0.0 is zero */
+   begin_emit_instruction(emit);
+   emit_discard_opcode(emit, FALSE);
+   emit_src_register(emit, &zero);
+   end_emit_instruction(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_LG2 instruction.
+ */
+static boolean
+emit_lg2(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* Note that TGSI_OPCODE_LG2 computes only one value from src.x
+    * while VGPU10 computes four values.
+    *
+    * dst = LG2(src):
+    *   dst.xyzw = log2(src.x)
+    */
+
+   struct tgsi_full_src_register src_xxxx =
+      swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
+                  TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+
+   /* LOG tmp, s0.xxxx */
+   emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &inst->Dst[0], &src_xxxx,
+                        inst->Instruction.Saturate);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_LIT instruction.
+ */
+static boolean
+emit_lit(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+   /*
+    * If dst and src are the same we need to create
+    * a temporary for it and insert a extra move.
+    */
+   unsigned tmp_move = get_temp_index(emit);
+   struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
+   struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
+
+   /*
+    * dst.x = 1
+    * dst.y = max(src.x, 0)
+    * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
+    * dst.w = 1
+    */
+
+   /* MOV dst.x, 1.0 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+      struct tgsi_full_dst_register dst_x =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_X);
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE);
+   }
+
+   /* MOV dst.w, 1.0 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+      struct tgsi_full_dst_register dst_w =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_W);
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE);
+   }
+
+   /* MAX dst.y, src.x, 0.0 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+      struct tgsi_full_dst_register dst_y =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
+      struct tgsi_full_src_register zero =
+         make_immediate_reg_float(emit, 0.0f);
+      struct tgsi_full_src_register src_xxxx =
+         swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
+                     TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+
+      emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx,
+                           &zero, inst->Instruction.Saturate);
+   }
+
+   /*
+    * tmp1 = clamp(src.w, -128, 128);
+    *   MAX tmp1, src.w, -128
+    *   MIN tmp1, tmp1, 128
+    *
+    * tmp2 = max(tmp2, 0);
+    *   MAX tmp2, src.y, 0
+    *
+    * tmp1 = pow(tmp2, tmp1);
+    *   LOG tmp2, tmp2
+    *   MUL tmp1, tmp2, tmp1
+    *   EXP tmp1, tmp1
+    *
+    * tmp1 = (src.w == 0) ? 1 : tmp1;
+    *   EQ tmp2, 0, src.w
+    *   MOVC tmp1, tmp2, 1.0, tmp1
+    *
+    * dst.z = (0 < src.x) ? tmp1 : 0;
+    *   LT tmp2, 0, src.x
+    *   MOVC dst.z, tmp2, tmp1, 0.0
+    */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+      struct tgsi_full_dst_register dst_z =
+         writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
+
+      unsigned tmp1 = get_temp_index(emit);
+      struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
+      struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
+      unsigned tmp2 = get_temp_index(emit);
+      struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
+      struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
+
+      struct tgsi_full_src_register src_xxxx =
+         scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
+      struct tgsi_full_src_register src_yyyy =
+         scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
+      struct tgsi_full_src_register src_wwww =
+         scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
+
+      struct tgsi_full_src_register zero =
+         make_immediate_reg_float(emit, 0.0f);
+      struct tgsi_full_src_register lowerbound =
+         make_immediate_reg_float(emit, -128.0f);
+      struct tgsi_full_src_register upperbound =
+         make_immediate_reg_float(emit, 128.0f);
+
+      emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww,
+                           &lowerbound, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src,
+                           &upperbound, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy,
+                           &zero, FALSE);
+
+      /* POW tmp1, tmp2, tmp1 */
+      /* LOG tmp2, tmp2 */
+      emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src,
+                           FALSE);
+
+      /* MUL tmp1, tmp2, tmp1 */
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src,
+                           &tmp1_src, FALSE);
+
+      /* EXP tmp1, tmp1 */
+      emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src,
+                           FALSE);
+
+      /* EQ tmp2, 0, src.w */
+      emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero,
+                           &src_wwww, FALSE);
+      /* MOVC tmp1.z, tmp2, tmp1, 1.0 */
+      emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst,
+                           &tmp2_src, &one, &tmp1_src, FALSE);
+
+      /* LT tmp2, 0, src.x */
+      emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero,
+                           &src_xxxx, FALSE);
+      /* MOVC dst.z, tmp2, tmp1, 0.0 */
+      emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z,
+                           &tmp2_src, &tmp1_src, &zero, FALSE);
+   }
+
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
+                        FALSE);
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_LOG instruction.
+ */
+static boolean
+emit_log(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /*
+    * dst.x = floor(lg2(abs(s0.x)))
+    * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x))))
+    * dst.z = lg2(abs(s0.x))
+    * dst.w = 1.0
+    */
+
+   struct tgsi_full_src_register src_xxxx =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx);
+
+   /* only use X component of temp reg */
+   tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+   tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+
+   /* LOG tmp.x, abs(s0.x) */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
+      emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst,
+                          &abs_src_xxxx, FALSE);
+   }
+
+   /* MOV dst.z, tmp.x */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+      struct tgsi_full_dst_register dst_z =
+         writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z,
+                           &tmp_src, inst->Instruction.Saturate);
+   }
+
+   /* FLR tmp.x, tmp.x */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
+      emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
+                           &tmp_src, FALSE);
+   }
+
+   /* MOV dst.x, tmp.x */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+      struct tgsi_full_dst_register dst_x =
+         writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &tmp_src,
+                           inst->Instruction.Saturate);
+   }
+
+   /* EXP tmp.x, tmp.x */
+   /* DIV dst.y, abs(s0.x), tmp.x */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+      struct tgsi_full_dst_register dst_y =
+         writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src,
+                           FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx,
+                           &tmp_src, inst->Instruction.Saturate);
+   }
+
+   /* MOV dst.w, 1.0 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+      struct tgsi_full_dst_register dst_w =
+         writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W);
+      struct tgsi_full_src_register one =
+         make_immediate_reg_float(emit, 1.0f);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE);
+   }
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_LRP instruction.
+ */
+static boolean
+emit_lrp(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = LRP(s0, s1, s2):
+    *   dst = s0 * (s1 - s2) + s2
+    * Translates into:
+    *   SUB tmp, s1, s2;        tmp = s1 - s2
+    *   MAD dst, s0, tmp, s2;   dst = s0 * t1 + s2
+    */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]);
+
+   /* ADD tmp, s1, -s2 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_tmp,
+                        &inst->Src[1], &neg_src2, FALSE);
+
+   /* MAD dst, s1, tmp, s3 */
+   emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &inst->Dst[0],
+                        &inst->Src[0], &src_tmp, &inst->Src[2],
+                        inst->Instruction.Saturate);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_POW instruction.
+ */
+static boolean
+emit_pow(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* Note that TGSI_OPCODE_POW computes only one value from src0.x and
+    * src1.x while VGPU10 computes four values.
+    *
+    * dst = POW(src0, src1):
+    *   dst.xyzw = src0.x ^ src1.x
+    */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register src0_xxxx =
+      swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
+                  TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+   struct tgsi_full_src_register src1_xxxx =
+      swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
+                  TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+
+   /* LOG tmp, s0.xxxx */
+   emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &src0_xxxx,
+                        FALSE);
+
+   /* MUL tmp, tmp, s1.xxxx */
+   emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, &tmp_src,
+                        &src1_xxxx, FALSE);
+
+   /* EXP tmp, s0.xxxx */
+   emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0],
+                        &tmp_src, inst->Instruction.Saturate);
+
+   /* free tmp */
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction.
+ */
+static boolean
+emit_rcp(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+   struct tgsi_full_dst_register tmp_dst_x =
+      writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+   struct tgsi_full_src_register tmp_src_xxxx =
+      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+
+   /* DIV tmp.x, 1.0, s0 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst_x, &one,
+                        &inst->Src[0], FALSE);
+
+   /* MOV dst, tmp.xxxx */
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
+                        &tmp_src_xxxx, inst->Instruction.Saturate);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_RSQ instruction.
+ */
+static boolean
+emit_rsq(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = RSQ(src):
+    *   dst.xyzw = 1 / sqrt(src.x)
+    * Translates into:
+    *   RSQ tmp, src.x
+    *   MOV dst, tmp.xxxx
+    */
+
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+   struct tgsi_full_dst_register tmp_dst_x =
+      writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+   struct tgsi_full_src_register tmp_src_xxxx =
+      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+
+   /* RSQ tmp, src.x */
+   emit_instruction_op1(emit, VGPU10_OPCODE_RSQ, &tmp_dst_x,
+                        &inst->Src[0], FALSE);
+
+   /* MOV dst, tmp.xxxx */
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
+                        &tmp_src_xxxx, inst->Instruction.Saturate);
+
+   /* free tmp */
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SCS instruction.
+ */
+static boolean
+emit_scs(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst.x = cos(src.x)
+    * dst.y = sin(src.x)
+    * dst.z = 0.0
+    * dst.w = 1.0
+    */
+   struct tgsi_full_dst_register dst_x =
+      writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
+   struct tgsi_full_dst_register dst_y =
+      writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
+   struct tgsi_full_dst_register dst_zw =
+      writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_ZW);
+
+   struct tgsi_full_src_register zero_one =
+      make_immediate_reg_float4(emit, 0.0f, 0.0f, 0.0f, 1.0f);
+
+   begin_emit_instruction(emit);
+   emit_opcode(emit, VGPU10_OPCODE_SINCOS, inst->Instruction.Saturate);
+   emit_dst_register(emit, &dst_y);
+   emit_dst_register(emit, &dst_x);
+   emit_src_register(emit, &inst->Src[0]);
+   end_emit_instruction(emit);
+
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+                        &dst_zw, &zero_one, inst->Instruction.Saturate);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
+ */
+static boolean
+emit_seq(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = SEQ(s0, s1):
+    *   dst = s0 == s1 ? 1.0 : 0.0  (per component)
+    * Translates into:
+    *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
+    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
+    */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+   /* EQ tmp, s0, s1 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0],
+                        &inst->Src[1], FALSE);
+
+   /* MOVC dst, tmp, one, zero */
+   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
+                        &one, &zero, FALSE);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction.
+ */
+static boolean
+emit_sge(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = SGE(s0, s1):
+    *   dst = s0 >= s1 ? 1.0 : 0.0  (per component)
+    * Translates into:
+    *   GE tmp, s0, s1;           tmp = s0 >= s1 : 0xffffffff : 0 (per comp)
+    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
+    */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+   /* GE tmp, s0, s1 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0],
+                        &inst->Src[1], FALSE);
+
+   /* MOVC dst, tmp, one, zero */
+   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
+                        &one, &zero, FALSE);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction.
+ */
+static boolean
+emit_sgt(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = SGT(s0, s1):
+    *   dst = s0 > s1 ? 1.0 : 0.0  (per component)
+    * Translates into:
+    *   LT tmp, s1, s0;           tmp = s1 < s0 ? 0xffffffff : 0 (per comp)
+    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
+    */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+   /* LT tmp, s1, s0 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1],
+                        &inst->Src[0], FALSE);
+
+   /* MOVC dst, tmp, one, zero */
+   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
+                        &one, &zero, FALSE);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions.
+ */
+static boolean
+emit_sincos(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+   struct tgsi_full_src_register tmp_src_xxxx =
+      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+   struct tgsi_full_dst_register tmp_dst_x =
+      writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+
+   begin_emit_instruction(emit);
+   emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE);
+
+   if(inst->Instruction.Opcode == TGSI_OPCODE_SIN)
+   {
+      emit_dst_register(emit, &tmp_dst_x);  /* first destination register */
+      emit_null_dst_register(emit);  /* second destination register */
+   }
+   else {
+      emit_null_dst_register(emit);
+      emit_dst_register(emit, &tmp_dst_x);
+   }
+
+   emit_src_register(emit, &inst->Src[0]);
+   end_emit_instruction(emit);
+
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
+                        &tmp_src_xxxx, inst->Instruction.Saturate);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction.
+ */
+static boolean
+emit_sle(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = SLE(s0, s1):
+    *   dst = s0 <= s1 ? 1.0 : 0.0  (per component)
+    * Translates into:
+    *   GE tmp, s1, s0;           tmp = s1 >= s0 : 0xffffffff : 0 (per comp)
+    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
+    */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+   /* GE tmp, s1, s0 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1],
+                        &inst->Src[0], FALSE);
+
+   /* MOVC dst, tmp, one, zero */
+   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
+                        &one, &zero, FALSE);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction.
+ */
+static boolean
+emit_slt(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = SLT(s0, s1):
+    *   dst = s0 < s1 ? 1.0 : 0.0  (per component)
+    * Translates into:
+    *   LT tmp, s0, s1;           tmp = s0 < s1 ? 0xffffffff : 0 (per comp)
+    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
+    */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+   /* LT tmp, s0, s1 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
+                        &inst->Src[1], FALSE);
+
+   /* MOVC dst, tmp, one, zero */
+   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
+                        &one, &zero, FALSE);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction.
+ */
+static boolean
+emit_sne(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = SNE(s0, s1):
+    *   dst = s0 != s1 ? 1.0 : 0.0  (per component)
+    * Translates into:
+    *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
+    *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
+    */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+   /* NE tmp, s0, s1 */
+   emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0],
+                        &inst->Src[1], FALSE);
+
+   /* MOVC dst, tmp, one, zero */
+   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
+                        &one, &zero, FALSE);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction.
+ */
+static boolean
+emit_ssg(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
+    * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
+    * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
+    * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
+    * Translates into:
+    *   LT tmp1, src, zero;           tmp1 = src < zero ? 0xffffffff : 0 (per comp)
+    *   MOVC tmp2, tmp1, -1.0, 0.0;   tmp2 = tmp1 ? -1.0 : 0.0 (per component)
+    *   LT tmp1, zero, src;           tmp1 = zero < src ? 0xffffffff : 0 (per comp)
+    *   MOVC dst, tmp1, 1.0, tmp2;    dst = tmp1 ? 1.0 : tmp2 (per component)
+    */
+   struct tgsi_full_src_register zero =
+      make_immediate_reg_float(emit, 0.0f);
+   struct tgsi_full_src_register one =
+      make_immediate_reg_float(emit, 1.0f);
+   struct tgsi_full_src_register neg_one =
+      make_immediate_reg_float(emit, -1.0f);
+
+   unsigned tmp1 = get_temp_index(emit);
+   struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
+   struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
+
+   unsigned tmp2 = get_temp_index(emit);
+   struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
+   struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
+
+   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0],
+                        &zero, FALSE);
+   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src,
+                        &neg_one, &zero, FALSE);
+   emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero,
+                        &inst->Src[0], FALSE);
+   emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src,
+                        &one, &tmp2_src, FALSE);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction.
+ */
+static boolean
+emit_issg(struct svga_shader_emitter_v10 *emit,
+          const struct tgsi_full_instruction *inst)
+{
+   /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
+    * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
+    * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
+    * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
+    * Translates into:
+    *   ILT tmp1, src, 0              tmp1 = src < 0 ? -1 : 0 (per component)
+    *   ILT tmp2, 0, src              tmp2 = 0 < src ? -1 : 0 (per component)
+    *   IADD dst, tmp1, neg(tmp2)     dst  = tmp1 - tmp2      (per component)
+    */
+   struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+
+   unsigned tmp1 = get_temp_index(emit);
+   struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
+   struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
+
+   unsigned tmp2 = get_temp_index(emit);
+   struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
+   struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
+
+   struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src);
+
+   emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst,
+                        &inst->Src[0], &zero, FALSE);
+   emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst,
+                        &zero, &inst->Src[0], FALSE);
+   emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0],
+                        &tmp1_src, &neg_tmp2, FALSE);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SUB instruction.
+ */
+static boolean
+emit_sub(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst = SUB(s0, s1):
+    *   dst = s0 - s1
+    * Translates into:
+    *   ADD dst, s0, neg(s1)
+    */
+   struct tgsi_full_src_register neg_src1 = negate_src(&inst->Src[1]);
+
+   /* ADD dst, s0, neg(s1) */
+   emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0],
+                        &inst->Src[0], &neg_src1,
+                        inst->Instruction.Saturate);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit a comparison instruction.  The dest register will get
+ * 0 or ~0 values depending on the outcome of comparing src0 to src1.
+ */
+static void
+emit_comparison(struct svga_shader_emitter_v10 *emit,
+                SVGA3dCmpFunc func,
+                const struct tgsi_full_dst_register *dst,
+                const struct tgsi_full_src_register *src0,
+                const struct tgsi_full_src_register *src1)
+{
+   struct tgsi_full_src_register immediate;
+   VGPU10OpcodeToken0 opcode0;
+   boolean swapSrc = FALSE;
+
+   /* Sanity checks for svga vs. gallium enums */
+   STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1));
+   STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1));
+
+   opcode0.value = 0;
+
+   switch (func) {
+   case SVGA3D_CMP_NEVER:
+      immediate = make_immediate_reg_int(emit, 0);
+      /* MOV dst, {0} */
+      begin_emit_instruction(emit);
+      emit_dword(emit, VGPU10_OPCODE_MOV);
+      emit_dst_register(emit, dst);
+      emit_src_register(emit, &immediate);
+      end_emit_instruction(emit);
+      return;
+   case SVGA3D_CMP_ALWAYS:
+      immediate = make_immediate_reg_int(emit, -1);
+      /* MOV dst, {-1} */
+      begin_emit_instruction(emit);
+      emit_dword(emit, VGPU10_OPCODE_MOV);
+      emit_dst_register(emit, dst);
+      emit_src_register(emit, &immediate);
+      end_emit_instruction(emit);
+      return;
+   case SVGA3D_CMP_LESS:
+      opcode0.opcodeType = VGPU10_OPCODE_LT;
+      break;
+   case SVGA3D_CMP_EQUAL:
+      opcode0.opcodeType = VGPU10_OPCODE_EQ;
+      break;
+   case SVGA3D_CMP_LESSEQUAL:
+      opcode0.opcodeType = VGPU10_OPCODE_GE;
+      swapSrc = TRUE;
+      break;
+   case SVGA3D_CMP_GREATER:
+      opcode0.opcodeType = VGPU10_OPCODE_LT;
+      swapSrc = TRUE;
+      break;
+   case SVGA3D_CMP_NOTEQUAL:
+      opcode0.opcodeType = VGPU10_OPCODE_NE;
+      break;
+   case SVGA3D_CMP_GREATEREQUAL:
+      opcode0.opcodeType = VGPU10_OPCODE_GE;
+      break;
+   default:
+      assert(!"Unexpected comparison mode");
+      opcode0.opcodeType = VGPU10_OPCODE_EQ;
+   }
+
+   begin_emit_instruction(emit);
+   emit_dword(emit, opcode0.value);
+   emit_dst_register(emit, dst);
+   if (swapSrc) {
+      emit_src_register(emit, src1);
+      emit_src_register(emit, src0);
+   }
+   else {
+      emit_src_register(emit, src0);
+      emit_src_register(emit, src1);
+   }
+   end_emit_instruction(emit);
+}
+
+
+/**
+ * Get texel/address offsets for a texture instruction.
+ */
+static void
+get_texel_offsets(const struct svga_shader_emitter_v10 *emit,
+                  const struct tgsi_full_instruction *inst, int offsets[3])
+{
+   if (inst->Texture.NumOffsets == 1) {
+      /* According to OpenGL Shader Language spec the offsets are only
+       * fetched from a previously-declared immediate/literal.
+       */
+      const struct tgsi_texture_offset *off = inst->TexOffsets;
+      const unsigned index = off[0].Index;
+      const unsigned swizzleX = off[0].SwizzleX;
+      const unsigned swizzleY = off[0].SwizzleY;
+      const unsigned swizzleZ = off[0].SwizzleZ;
+      const union tgsi_immediate_data *imm = emit->immediates[index];
+
+      assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE);
+
+      offsets[0] = imm[swizzleX].Int;
+      offsets[1] = imm[swizzleY].Int;
+      offsets[2] = imm[swizzleZ].Int;
+   }
+   else {
+      offsets[0] = offsets[1] = offsets[2] = 0;
+   }
+}
+
+
+/**
+ * Set up the coordinate register for texture sampling.
+ * When we're sampling from a RECT texture we have to scale the
+ * unnormalized coordinate to a normalized coordinate.
+ * We do that by multiplying the coordinate by an "extra" constant.
+ * An alternative would be to use the RESINFO instruction to query the
+ * texture's size.
+ */
+static struct tgsi_full_src_register
+setup_texcoord(struct svga_shader_emitter_v10 *emit,
+               unsigned unit,
+               const struct tgsi_full_src_register *coord)
+{
+   if (emit->key.tex[unit].unnormalized) {
+      unsigned scale_index = emit->texcoord_scale_index[unit];
+      unsigned tmp = get_temp_index(emit);
+      struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+      struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+      struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index);
+
+      /* MUL tmp, coord, const[] */
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
+                           coord, &scale_src, FALSE);
+      return tmp_src;
+   }
+   else {
+      /* use texcoord as-is */
+      return *coord;
+   }
+}
+
+
+/**
+ * For SAMPLE_C instructions, emit the extra src register which indicates
+ * the reference/comparision value.
+ */
+static void
+emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit,
+                          unsigned target,
+                          const struct tgsi_full_src_register *coord)
+{
+   struct tgsi_full_src_register coord_src_ref;
+   unsigned component;
+
+   assert(tgsi_is_shadow_target(target));
+
+   assert(target != TGSI_TEXTURE_SHADOWCUBE_ARRAY); /* XXX not implemented */
+   if (target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
+       target == TGSI_TEXTURE_SHADOWCUBE)
+      component = TGSI_SWIZZLE_W;
+   else
+      component = TGSI_SWIZZLE_Z;
+
+   coord_src_ref = scalar_src(coord, component);
+
+   emit_src_register(emit, &coord_src_ref);
+}
+
+
+/**
+ * Info for implementing texture swizzles.
+ * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle()
+ * functions use this to encapsulate the extra steps needed to perform
+ * a texture swizzle, or shadow/depth comparisons.
+ * The shadow/depth comparison is only done here if for the cases where
+ * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare).
+ */
+struct tex_swizzle_info
+{
+   boolean swizzled;
+   boolean shadow_compare;
+   unsigned unit;
+   unsigned texture_target;  /**< TGSI_TEXTURE_x */
+   struct tgsi_full_src_register tmp_src;
+   struct tgsi_full_dst_register tmp_dst;
+   const struct tgsi_full_dst_register *inst_dst;
+   const struct tgsi_full_src_register *coord_src;
+};
+
+
+/**
+ * Do setup for handling texture swizzles or shadow compares.
+ * \param unit  the texture unit
+ * \param inst  the TGSI texture instruction
+ * \param shadow_compare  do shadow/depth comparison?
+ * \param swz  returns the swizzle info
+ */
+static void
+begin_tex_swizzle(struct svga_shader_emitter_v10 *emit,
+                  unsigned unit,
+                  const struct tgsi_full_instruction *inst,
+                  boolean shadow_compare,
+                  struct tex_swizzle_info *swz)
+{
+   swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X ||
+                    emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y ||
+                    emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z ||
+                    emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W);
+
+   swz->shadow_compare = shadow_compare;
+   swz->texture_target = inst->Texture.Texture;
+
+   if (swz->swizzled || shadow_compare) {
+      /* Allocate temp register for the result of the SAMPLE instruction
+       * and the source of the MOV/compare/swizzle instructions.
+       */
+      unsigned tmp = get_temp_index(emit);
+      swz->tmp_src = make_src_temp_reg(tmp);
+      swz->tmp_dst = make_dst_temp_reg(tmp);
+
+      swz->unit = unit;
+   }
+   swz->inst_dst = &inst->Dst[0];
+   swz->coord_src = &inst->Src[0];
+}
+
+
+/**
+ * Returns the register to put the SAMPLE instruction results into.
+ * This will either be the original instruction dst reg (if no swizzle
+ * and no shadow comparison) or a temporary reg if there is a swizzle.
+ */
+static const struct tgsi_full_dst_register *
+get_tex_swizzle_dst(const struct tex_swizzle_info *swz)
+{
+   return (swz->swizzled || swz->shadow_compare)
+      ? &swz->tmp_dst : swz->inst_dst;
+}
+
+
+/**
+ * This emits the MOV instruction that actually implements a texture swizzle
+ * and/or shadow comparison.
+ */
+static void
+end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
+                const struct tex_swizzle_info *swz)
+{
+   if (swz->shadow_compare) {
+      /* Emit extra instructions to compare the fetched texel value against
+       * a texture coordinate component.  The result of the comparison
+       * is 0.0 or 1.0.
+       */
+      struct tgsi_full_src_register coord_src;
+      struct tgsi_full_src_register texel_src =
+         scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X);
+      struct tgsi_full_src_register one =
+         make_immediate_reg_float(emit, 1.0f);
+      /* convert gallium comparison func to SVGA comparison func */
+      SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1;
+
+      assert(emit->unit == PIPE_SHADER_FRAGMENT);
+
+      switch (swz->texture_target) {
+      case TGSI_TEXTURE_SHADOW2D:
+      case TGSI_TEXTURE_SHADOWRECT:
+      case TGSI_TEXTURE_SHADOW1D_ARRAY:
+         coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z);
+         break;
+      case TGSI_TEXTURE_SHADOW1D:
+         coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Y);
+         break;
+      case TGSI_TEXTURE_SHADOWCUBE:
+      case TGSI_TEXTURE_SHADOW2D_ARRAY:
+         coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_W);
+         break;
+      default:
+         assert(!"Unexpected texture target in end_tex_swizzle()");
+         coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z);
+      }
+
+      /* COMPARE tmp, coord, texel */
+      /* XXX it would seem that the texel and coord arguments should
+       * be transposed here, but piglit tests indicate otherwise.
+       */
+      emit_comparison(emit, compare_func,
+                      &swz->tmp_dst, &texel_src, &coord_src);
+
+      /* AND dest, tmp, {1.0} */
+      begin_emit_instruction(emit);
+      emit_opcode(emit, VGPU10_OPCODE_AND, FALSE);
+      if (swz->swizzled) {
+         emit_dst_register(emit, &swz->tmp_dst);
+      }
+      else {
+         emit_dst_register(emit, swz->inst_dst);
+      }
+      emit_src_register(emit, &swz->tmp_src);
+      emit_src_register(emit, &one);
+      end_emit_instruction(emit);
+   }
+
+   if (swz->swizzled) {
+      unsigned swz_r = emit->key.tex[swz->unit].swizzle_r;
+      unsigned swz_g = emit->key.tex[swz->unit].swizzle_g;
+      unsigned swz_b = emit->key.tex[swz->unit].swizzle_b;
+      unsigned swz_a = emit->key.tex[swz->unit].swizzle_a;
+      unsigned writemask_0 = 0, writemask_1 = 0;
+      boolean int_tex = is_integer_type(emit->key.tex[swz->unit].return_type);
+
+      /* Swizzle w/out zero/one terms */
+      struct tgsi_full_src_register src_swizzled =
+         swizzle_src(&swz->tmp_src,
+                     swz_r < PIPE_SWIZZLE_ZERO ? swz_r : PIPE_SWIZZLE_RED,
+                     swz_g < PIPE_SWIZZLE_ZERO ? swz_g : PIPE_SWIZZLE_GREEN,
+                     swz_b < PIPE_SWIZZLE_ZERO ? swz_b : PIPE_SWIZZLE_BLUE,
+                     swz_a < PIPE_SWIZZLE_ZERO ? swz_a : PIPE_SWIZZLE_ALPHA);
+
+      /* MOV dst, color(tmp).<swizzle> */
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+                           swz->inst_dst, &src_swizzled, FALSE);
+
+      /* handle swizzle zero terms */
+      writemask_0 = (((swz_r == PIPE_SWIZZLE_ZERO) << 0) |
+                     ((swz_g == PIPE_SWIZZLE_ZERO) << 1) |
+                     ((swz_b == PIPE_SWIZZLE_ZERO) << 2) |
+                     ((swz_a == PIPE_SWIZZLE_ZERO) << 3));
+
+      if (writemask_0) {
+         struct tgsi_full_src_register zero = int_tex ?
+            make_immediate_reg_int(emit, 0) :
+            make_immediate_reg_float(emit, 0.0f);
+         struct tgsi_full_dst_register dst =
+            writemask_dst(swz->inst_dst, writemask_0);
+
+         /* MOV dst.writemask_0, {0,0,0,0} */
+         emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+                              &dst, &zero, FALSE);
+      }
+
+      /* handle swizzle one terms */
+      writemask_1 = (((swz_r == PIPE_SWIZZLE_ONE) << 0) |
+                     ((swz_g == PIPE_SWIZZLE_ONE) << 1) |
+                     ((swz_b == PIPE_SWIZZLE_ONE) << 2) |
+                     ((swz_a == PIPE_SWIZZLE_ONE) << 3));
+
+      if (writemask_1) {
+         struct tgsi_full_src_register one = int_tex ?
+            make_immediate_reg_int(emit, 1) :
+            make_immediate_reg_float(emit, 1.0f);
+         struct tgsi_full_dst_register dst =
+            writemask_dst(swz->inst_dst, writemask_1);
+
+         /* MOV dst.writemask_1, {1,1,1,1} */
+         emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one, FALSE);
+      }
+   }
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SAMPLE instruction.
+ */
+static boolean
+emit_sample(struct svga_shader_emitter_v10 *emit,
+            const struct tgsi_full_instruction *inst)
+{
+   const unsigned resource_unit = inst->Src[1].Register.Index;
+   const unsigned sampler_unit = inst->Src[2].Register.Index;
+   struct tgsi_full_src_register coord;
+   int offsets[3];
+   struct tex_swizzle_info swz_info;
+
+   begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info);
+
+   get_texel_offsets(emit, inst, offsets);
+
+   coord = setup_texcoord(emit, resource_unit, &inst->Src[0]);
+
+   /* SAMPLE dst, coord(s0), resource, sampler */
+   begin_emit_instruction(emit);
+
+   emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE,
+                      inst->Instruction.Saturate, offsets);
+   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+   emit_src_register(emit, &coord);
+   emit_resource_register(emit, resource_unit);
+   emit_sampler_register(emit, sampler_unit);
+   end_emit_instruction(emit);
+
+   end_tex_swizzle(emit, &swz_info);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Check if a texture instruction is valid.
+ * An example of an invalid texture instruction is doing shadow comparison
+ * with an integer-valued texture.
+ * If we detect an invalid texture instruction, we replace it with:
+ *   MOV dst, {1,1,1,1};
+ * \return TRUE if valid, FALSE if invalid.
+ */
+static boolean
+is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit,
+                         const struct tgsi_full_instruction *inst)
+{
+   const unsigned unit = inst->Src[1].Register.Index;
+   const unsigned target = inst->Texture.Texture;
+   boolean valid = TRUE;
+
+   if (tgsi_is_shadow_target(target) &&
+       is_integer_type(emit->key.tex[unit].return_type)) {
+      debug_printf("Invalid SAMPLE_C with an integer texture!\n");
+      valid = FALSE;
+   }
+   /* XXX might check for other conditions in the future here */
+
+   if (!valid) {
+      /* emit a MOV dst, {1,1,1,1} instruction. */
+      struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+      begin_emit_instruction(emit);
+      emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
+      emit_dst_register(emit, &inst->Dst[0]);
+      emit_src_register(emit, &one);
+      end_emit_instruction(emit);
+   }
+
+   return valid;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_TEX (simple texture lookup)
+ */
+static boolean
+emit_tex(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   const uint unit = inst->Src[1].Register.Index;
+   unsigned target = inst->Texture.Texture;
+   unsigned opcode;
+   struct tgsi_full_src_register coord;
+   int offsets[3];
+   struct tex_swizzle_info swz_info;
+
+   /* check that the sampler returns a float */
+   if (!is_valid_tex_instruction(emit, inst))
+      return TRUE;
+
+   begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
+
+   get_texel_offsets(emit, inst, offsets);
+
+   coord = setup_texcoord(emit, unit, &inst->Src[0]);
+
+   /* SAMPLE dst, coord(s0), resource, sampler */
+   begin_emit_instruction(emit);
+
+   if (tgsi_is_shadow_target(target))
+      opcode = VGPU10_OPCODE_SAMPLE_C;
+   else
+      opcode = VGPU10_OPCODE_SAMPLE;
+
+   emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
+   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+   emit_src_register(emit, &coord);
+   emit_resource_register(emit, unit);
+   emit_sampler_register(emit, unit);
+   if (opcode == VGPU10_OPCODE_SAMPLE_C) {
+      emit_tex_compare_refcoord(emit, target, &coord);
+   }
+   end_emit_instruction(emit);
+
+   end_tex_swizzle(emit, &swz_info);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_TXP (projective texture)
+ */
+static boolean
+emit_txp(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   const uint unit = inst->Src[1].Register.Index;
+   unsigned target = inst->Texture.Texture;
+   unsigned opcode;
+   int offsets[3];
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register src0_wwww =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
+   struct tgsi_full_src_register coord;
+   struct tex_swizzle_info swz_info;
+
+   /* check that the sampler returns a float */
+   if (!is_valid_tex_instruction(emit, inst))
+      return TRUE;
+
+   begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
+
+   get_texel_offsets(emit, inst, offsets);
+
+   coord = setup_texcoord(emit, unit, &inst->Src[0]);
+
+   /* DIV tmp, coord, coord.wwww */
+   emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst,
+                        &coord, &src0_wwww, FALSE);
+
+   /* SAMPLE dst, coord(tmp), resource, sampler */
+   begin_emit_instruction(emit);
+
+   if (tgsi_is_shadow_target(target))
+      opcode = VGPU10_OPCODE_SAMPLE_C;
+   else
+      opcode = VGPU10_OPCODE_SAMPLE;
+
+   emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
+   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+   emit_src_register(emit, &tmp_src);  /* projected coord */
+   emit_resource_register(emit, unit);
+   emit_sampler_register(emit, unit);
+   if (opcode == VGPU10_OPCODE_SAMPLE_C) {
+      emit_tex_compare_refcoord(emit, target, &tmp_src);
+   }
+   end_emit_instruction(emit);
+
+   end_tex_swizzle(emit, &swz_info);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/*
+ * Emit code for TGSI_OPCODE_XPD instruction.
+ */
+static boolean
+emit_xpd(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   /* dst.x = src0.y * src1.z - src1.y * src0.z
+    * dst.y = src0.z * src1.x - src1.z * src0.x
+    * dst.z = src0.x * src1.y - src1.x * src0.y
+    * dst.w = 1
+    */
+   struct tgsi_full_src_register s0_xxxx =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
+   struct tgsi_full_src_register s0_yyyy =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
+   struct tgsi_full_src_register s0_zzzz =
+      scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
+
+   struct tgsi_full_src_register s1_xxxx =
+      scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
+   struct tgsi_full_src_register s1_yyyy =
+      scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
+   struct tgsi_full_src_register s1_zzzz =
+      scalar_src(&inst->Src[1], TGSI_SWIZZLE_Z);
+
+   unsigned tmp1 = get_temp_index(emit);
+   struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
+   struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
+
+   unsigned tmp2 = get_temp_index(emit);
+   struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
+   struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
+   struct tgsi_full_src_register neg_tmp2_src = negate_src(&tmp2_src);
+
+   unsigned tmp3 = get_temp_index(emit);
+   struct tgsi_full_src_register tmp3_src = make_src_temp_reg(tmp3);
+   struct tgsi_full_dst_register tmp3_dst = make_dst_temp_reg(tmp3);
+   struct tgsi_full_dst_register tmp3_dst_x =
+      writemask_dst(&tmp3_dst, TGSI_WRITEMASK_X);
+   struct tgsi_full_dst_register tmp3_dst_y =
+      writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Y);
+   struct tgsi_full_dst_register tmp3_dst_z =
+      writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Z);
+   struct tgsi_full_dst_register tmp3_dst_w =
+      writemask_dst(&tmp3_dst, TGSI_WRITEMASK_W);
+
+   /* Note: we put all the intermediate computations into tmp3 in case
+    * the XPD dest register is that same as one of the src regs (in which
+    * case we could clobber a src reg before we're done with it) .
+    *
+    * Note: we could get by with just one temp register instead of three
+    * since we're doing scalar operations and there's enough room in one
+    * temp for everything.
+    */
+
+   /* MUL tmp1, src0.y, src1.z */
+   /* MUL tmp2, src1.y, src0.z */
+   /* ADD tmp3.x, tmp1, -tmp2 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst,
+                           &s0_yyyy, &s1_zzzz, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst,
+                           &s1_yyyy, &s0_zzzz, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_x,
+                           &tmp1_src, &neg_tmp2_src, FALSE);
+   }
+
+   /* MUL tmp1, src0.z, src1.x */
+   /* MUL tmp2, src1.z, src0.x */
+   /* ADD tmp3.y, tmp1, -tmp2 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_zzzz,
+                           &s1_xxxx, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_zzzz,
+                           &s0_xxxx, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_y,
+                           &tmp1_src, &neg_tmp2_src, FALSE);
+   }
+
+   /* MUL tmp1, src0.x, src1.y */
+   /* MUL tmp2, src1.x, src0.y */
+   /* ADD tmp3.z, tmp1, -tmp2 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_xxxx,
+                           &s1_yyyy, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_xxxx,
+                           &s0_yyyy, FALSE);
+      emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_z,
+                           &tmp1_src, &neg_tmp2_src, FALSE);
+   }
+
+   /* MOV tmp3.w, 1.0 */
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+      struct tgsi_full_src_register one =
+         make_immediate_reg_float(emit, 1.0f);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp3_dst_w, &one, FALSE);
+   }
+
+   /* MOV dst, tmp3 */
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &tmp3_src,
+                        inst->Instruction.Saturate);
+
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_TXD (explicit derivatives)
+ */
+static boolean
+emit_txd(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   const uint unit = inst->Src[3].Register.Index;
+   unsigned target = inst->Texture.Texture;
+   int offsets[3];
+   struct tgsi_full_src_register coord;
+   struct tex_swizzle_info swz_info;
+
+   begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
+                     &swz_info);
+
+   get_texel_offsets(emit, inst, offsets);
+
+   coord = setup_texcoord(emit, unit, &inst->Src[0]);
+
+   /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */
+   begin_emit_instruction(emit);
+   emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D,
+                      inst->Instruction.Saturate, offsets);
+   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+   emit_src_register(emit, &coord);
+   emit_resource_register(emit, unit);
+   emit_sampler_register(emit, unit);
+   emit_src_register(emit, &inst->Src[1]);  /* Xderiv */
+   emit_src_register(emit, &inst->Src[2]);  /* Yderiv */
+   end_emit_instruction(emit);
+
+   end_tex_swizzle(emit, &swz_info);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_TXF (texel fetch)
+ */
+static boolean
+emit_txf(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   const uint unit = inst->Src[1].Register.Index;
+   const unsigned msaa = emit->key.tex[unit].texture_msaa;
+   int offsets[3];
+   struct tex_swizzle_info swz_info;
+
+   begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
+
+   get_texel_offsets(emit, inst, offsets);
+
+   if (msaa) {
+      /* Fetch one sample from an MSAA texture */
+      struct tgsi_full_src_register sampleIndex =
+         scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
+      /* LD_MS dst, coord(s0), resource, sampleIndex */
+      begin_emit_instruction(emit);
+      emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS,
+                         inst->Instruction.Saturate, offsets);
+      emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+      emit_src_register(emit, &inst->Src[0]);
+      emit_resource_register(emit, unit);
+      emit_src_register(emit, &sampleIndex);
+      end_emit_instruction(emit);
+   }
+   else {
+      /* Fetch one texel specified by integer coordinate */
+      /* LD dst, coord(s0), resource */
+      begin_emit_instruction(emit);
+      emit_sample_opcode(emit, VGPU10_OPCODE_LD,
+                         inst->Instruction.Saturate, offsets);
+      emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+      emit_src_register(emit, &inst->Src[0]);
+      emit_resource_register(emit, unit);
+      end_emit_instruction(emit);
+   }
+
+   end_tex_swizzle(emit, &swz_info);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias)
+ * or TGSI_OPCODE_TXB2 (for cube shadow maps).
+ */
+static boolean
+emit_txl_txb(struct svga_shader_emitter_v10 *emit,
+             const struct tgsi_full_instruction *inst)
+{
+   unsigned target = inst->Texture.Texture;
+   unsigned opcode, unit;
+   int offsets[3];
+   struct tgsi_full_src_register coord, lod_bias;
+   struct tex_swizzle_info swz_info;
+
+   assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
+          inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
+          inst->Instruction.Opcode == TGSI_OPCODE_TXB2);
+
+   if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
+      lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
+      unit = inst->Src[2].Register.Index;
+   }
+   else {
+      lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
+      unit = inst->Src[1].Register.Index;
+   }
+
+   begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
+                     &swz_info);
+
+   get_texel_offsets(emit, inst, offsets);
+
+   coord = setup_texcoord(emit, unit, &inst->Src[0]);
+
+   /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */
+   begin_emit_instruction(emit);
+   if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
+      opcode = VGPU10_OPCODE_SAMPLE_L;
+   }
+   else {
+      opcode = VGPU10_OPCODE_SAMPLE_B;
+   }
+   emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
+   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+   emit_src_register(emit, &coord);
+   emit_resource_register(emit, unit);
+   emit_sampler_register(emit, unit);
+   emit_src_register(emit, &lod_bias);
+   end_emit_instruction(emit);
+
+   end_tex_swizzle(emit, &swz_info);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_TXQ (texture query) instruction.
+ */
+static boolean
+emit_txq(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   const uint unit = inst->Src[1].Register.Index;
+
+   if (emit->key.tex[unit].texture_target == PIPE_BUFFER) {
+      /* RESINFO does not support querying texture buffers, so we instead
+       * store texture buffer sizes in shader constants, then copy them to
+       * implement TXQ instead of emitting RESINFO.
+       * MOV dst, const[texture_buffer_size_index[unit]]
+       */
+      struct tgsi_full_src_register size_src =
+         make_src_const_reg(emit->texture_buffer_size_index[unit]);
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src,
+                           FALSE);
+   } else {
+      /* RESINFO dst, srcMipLevel, resource */
+      begin_emit_instruction(emit);
+      emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
+      emit_dst_register(emit, &inst->Dst[0]);
+      emit_src_register(emit, &inst->Src[0]);
+      emit_resource_register(emit, unit);
+      end_emit_instruction(emit);
+   }
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit a simple instruction (like ADD, MUL, MIN, etc).
+ */
+static boolean
+emit_simple(struct svga_shader_emitter_v10 *emit,
+            const struct tgsi_full_instruction *inst)
+{
+   const unsigned opcode = inst->Instruction.Opcode;
+   const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
+   unsigned i;
+
+   begin_emit_instruction(emit);
+   emit_opcode(emit, translate_opcode(inst->Instruction.Opcode),
+               inst->Instruction.Saturate);
+   for (i = 0; i < op->num_dst; i++) {
+      emit_dst_register(emit, &inst->Dst[i]);
+   }
+   for (i = 0; i < op->num_src; i++) {
+      emit_src_register(emit, &inst->Src[i]);
+   }
+   end_emit_instruction(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Emit a simple VGPU10 instruction which writes to multiple dest registers,
+ * where TGSI only uses one dest register.
+ */
+static boolean
+emit_simple_1dst(struct svga_shader_emitter_v10 *emit,
+                 const struct tgsi_full_instruction *inst,
+                 unsigned dst_count,
+                 unsigned dst_index)
+{
+   const unsigned opcode = inst->Instruction.Opcode;
+   const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
+   unsigned i;
+
+   begin_emit_instruction(emit);
+   emit_opcode(emit, translate_opcode(inst->Instruction.Opcode),
+               inst->Instruction.Saturate);
+
+   for (i = 0; i < dst_count; i++) {
+      if (i == dst_index) {
+         emit_dst_register(emit, &inst->Dst[0]);
+      } else {
+         emit_null_dst_register(emit);
+      }
+   }
+
+   for (i = 0; i < op->num_src; i++) {
+      emit_src_register(emit, &inst->Src[i]);
+   }
+   end_emit_instruction(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Translate a single TGSI instruction to VGPU10.
+ */
+static boolean
+emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
+                        unsigned inst_number,
+                        const struct tgsi_full_instruction *inst)
+{
+   const unsigned opcode = inst->Instruction.Opcode;
+
+   switch (opcode) {
+   case TGSI_OPCODE_ADD:
+   case TGSI_OPCODE_AND:
+   case TGSI_OPCODE_BGNLOOP:
+   case TGSI_OPCODE_BRK:
+   case TGSI_OPCODE_CEIL:
+   case TGSI_OPCODE_CONT:
+   case TGSI_OPCODE_DDX:
+   case TGSI_OPCODE_DDY:
+   case TGSI_OPCODE_DIV:
+   case TGSI_OPCODE_DP2:
+   case TGSI_OPCODE_DP3:
+   case TGSI_OPCODE_DP4:
+   case TGSI_OPCODE_ELSE:
+   case TGSI_OPCODE_ENDIF:
+   case TGSI_OPCODE_ENDLOOP:
+   case TGSI_OPCODE_ENDSUB:
+   case TGSI_OPCODE_F2I:
+   case TGSI_OPCODE_F2U:
+   case TGSI_OPCODE_FLR:
+   case TGSI_OPCODE_FRC:
+   case TGSI_OPCODE_FSEQ:
+   case TGSI_OPCODE_FSGE:
+   case TGSI_OPCODE_FSLT:
+   case TGSI_OPCODE_FSNE:
+   case TGSI_OPCODE_I2F:
+   case TGSI_OPCODE_IMAX:
+   case TGSI_OPCODE_IMIN:
+   case TGSI_OPCODE_INEG:
+   case TGSI_OPCODE_ISGE:
+   case TGSI_OPCODE_ISHR:
+   case TGSI_OPCODE_ISLT:
+   case TGSI_OPCODE_MAD:
+   case TGSI_OPCODE_MAX:
+   case TGSI_OPCODE_MIN:
+   case TGSI_OPCODE_MOV:
+   case TGSI_OPCODE_MUL:
+   case TGSI_OPCODE_NOP:
+   case TGSI_OPCODE_NOT:
+   case TGSI_OPCODE_OR:
+   case TGSI_OPCODE_RET:
+   case TGSI_OPCODE_UADD:
+   case TGSI_OPCODE_USEQ:
+   case TGSI_OPCODE_USGE:
+   case TGSI_OPCODE_USLT:
+   case TGSI_OPCODE_UMIN:
+   case TGSI_OPCODE_UMAD:
+   case TGSI_OPCODE_UMAX:
+   case TGSI_OPCODE_ROUND:
+   case TGSI_OPCODE_SQRT:
+   case TGSI_OPCODE_SHL:
+   case TGSI_OPCODE_TRUNC:
+   case TGSI_OPCODE_U2F:
+   case TGSI_OPCODE_UCMP:
+   case TGSI_OPCODE_USHR:
+   case TGSI_OPCODE_USNE:
+   case TGSI_OPCODE_XOR:
+      /* simple instructions */
+      return emit_simple(emit, inst);
+
+
+   case TGSI_OPCODE_EMIT:
+      return emit_vertex(emit, inst);
+   case TGSI_OPCODE_ENDPRIM:
+      return emit_endprim(emit, inst);
+   case TGSI_OPCODE_ABS:
+      return emit_abs(emit, inst);
+   case TGSI_OPCODE_IABS:
+      return emit_iabs(emit, inst);
+   case TGSI_OPCODE_ARL:
+      /* fall-through */
+   case TGSI_OPCODE_UARL:
+      return emit_arl_uarl(emit, inst);
+   case TGSI_OPCODE_BGNSUB:
+      /* no-op */
+      return TRUE;
+   case TGSI_OPCODE_CAL:
+      return emit_cal(emit, inst);
+   case TGSI_OPCODE_CMP:
+      return emit_cmp(emit, inst);
+   case TGSI_OPCODE_COS:
+      return emit_sincos(emit, inst);
+   case TGSI_OPCODE_DP2A:
+      return emit_dp2a(emit, inst);
+   case TGSI_OPCODE_DPH:
+      return emit_dph(emit, inst);
+   case TGSI_OPCODE_DST:
+      return emit_dst(emit, inst);
+   case TGSI_OPCODE_EX2:
+      return emit_ex2(emit, inst);
+   case TGSI_OPCODE_EXP:
+      return emit_exp(emit, inst);
+   case TGSI_OPCODE_IF:
+      return emit_if(emit, inst);
+   case TGSI_OPCODE_KILL:
+      return emit_kill(emit, inst);
+   case TGSI_OPCODE_KILL_IF:
+      return emit_kill_if(emit, inst);
+   case TGSI_OPCODE_LG2:
+      return emit_lg2(emit, inst);
+   case TGSI_OPCODE_LIT:
+      return emit_lit(emit, inst);
+   case TGSI_OPCODE_LOG:
+      return emit_log(emit, inst);
+   case TGSI_OPCODE_LRP:
+      return emit_lrp(emit, inst);
+   case TGSI_OPCODE_POW:
+      return emit_pow(emit, inst);
+   case TGSI_OPCODE_RCP:
+      return emit_rcp(emit, inst);
+   case TGSI_OPCODE_RSQ:
+      return emit_rsq(emit, inst);
+   case TGSI_OPCODE_SAMPLE:
+      return emit_sample(emit, inst);
+   case TGSI_OPCODE_SCS:
+      return emit_scs(emit, inst);
+   case TGSI_OPCODE_SEQ:
+      return emit_seq(emit, inst);
+   case TGSI_OPCODE_SGE:
+      return emit_sge(emit, inst);
+   case TGSI_OPCODE_SGT:
+      return emit_sgt(emit, inst);
+   case TGSI_OPCODE_SIN:
+      return emit_sincos(emit, inst);
+   case TGSI_OPCODE_SLE:
+      return emit_sle(emit, inst);
+   case TGSI_OPCODE_SLT:
+      return emit_slt(emit, inst);
+   case TGSI_OPCODE_SNE:
+      return emit_sne(emit, inst);
+   case TGSI_OPCODE_SSG:
+      return emit_ssg(emit, inst);
+   case TGSI_OPCODE_ISSG:
+      return emit_issg(emit, inst);
+   case TGSI_OPCODE_SUB:
+      return emit_sub(emit, inst);
+   case TGSI_OPCODE_TEX:
+      return emit_tex(emit, inst);
+   case TGSI_OPCODE_TXP:
+      return emit_txp(emit, inst);
+   case TGSI_OPCODE_TXB:
+   case TGSI_OPCODE_TXB2:
+   case TGSI_OPCODE_TXL:
+      return emit_txl_txb(emit, inst);
+   case TGSI_OPCODE_TXD:
+      return emit_txd(emit, inst);
+   case TGSI_OPCODE_TXF:
+      return emit_txf(emit, inst);
+   case TGSI_OPCODE_TXQ:
+      return emit_txq(emit, inst);
+   case TGSI_OPCODE_UIF:
+      return emit_if(emit, inst);
+   case TGSI_OPCODE_XPD:
+      return emit_xpd(emit, inst);
+   case TGSI_OPCODE_UMUL_HI:
+   case TGSI_OPCODE_IMUL_HI:
+   case TGSI_OPCODE_UDIV:
+   case TGSI_OPCODE_IDIV:
+      /* These cases use only the FIRST of two destination registers */
+      return emit_simple_1dst(emit, inst, 2, 0);
+   case TGSI_OPCODE_UMUL:
+   case TGSI_OPCODE_UMOD:
+   case TGSI_OPCODE_MOD:
+      /* These cases use only the SECOND of two destination registers */
+      return emit_simple_1dst(emit, inst, 2, 1);
+   case TGSI_OPCODE_END:
+      if (!emit_post_helpers(emit))
+         return FALSE;
+      return emit_simple(emit, inst);
+
+   default:
+      debug_printf("Unimplemented tgsi instruction %s\n",
+                   tgsi_get_opcode_name(opcode));
+      return FALSE;
+   }
+
+   return TRUE;
+}
+
+
+/**
+ * Emit the extra instructions to adjust the vertex position.
+ * There are two possible adjustments:
+ * 1. Converting from Gallium to VGPU10 coordinate space by applying the
+ *    "prescale" and "pretranslate" values.
+ * 2. Undoing the viewport transformation when we use the swtnl/draw path.
+ * \param vs_pos_tmp_index  which temporary register contains the vertex pos.
+ */
+static void
+emit_vpos_instructions(struct svga_shader_emitter_v10 *emit,
+                       unsigned vs_pos_tmp_index)
+{
+   struct tgsi_full_src_register tmp_pos_src;
+   struct tgsi_full_dst_register pos_dst;
+
+   /* Don't bother to emit any extra vertex instructions if vertex position is
+    * not written out
+    */
+   if (emit->vposition.out_index == INVALID_INDEX)
+      return;
+
+   tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index);
+   pos_dst = make_dst_output_reg(emit->vposition.out_index);
+
+   /* If non-adjusted vertex position register index
+    * is valid, copy the vertex position from the temporary
+    * vertex position register before it is modified by the
+    * prescale computation.
+    */
+   if (emit->vposition.so_index != INVALID_INDEX) {
+      struct tgsi_full_dst_register pos_so_dst =
+         make_dst_output_reg(emit->vposition.so_index);
+
+      /* MOV pos_so, tmp_pos */
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst,
+                           &tmp_pos_src, FALSE);
+   }
+
+   if (emit->vposition.need_prescale) {
+      /* This code adjusts the vertex position to match the VGPU10 convention.
+       * If p is the position computed by the shader (usually by applying the
+       * modelview and projection matrices), the new position q is computed by:
+       *
+       * q.x = p.w * trans.x + p.x * scale.x
+       * q.y = p.w * trans.y + p.y * scale.y
+       * q.z = p.w * trans.z + p.z * scale.z;
+       * q.w = p.w * trans.w + p.w;
+       */
+      struct tgsi_full_src_register tmp_pos_src_w =
+         scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
+      struct tgsi_full_dst_register tmp_pos_dst =
+         make_dst_temp_reg(vs_pos_tmp_index);
+      struct tgsi_full_dst_register tmp_pos_dst_xyz =
+         writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ);
+
+      struct tgsi_full_src_register prescale_scale =
+         make_src_const_reg(emit->vposition.prescale_scale_index);
+      struct tgsi_full_src_register prescale_trans =
+         make_src_const_reg(emit->vposition.prescale_trans_index);
+
+      /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz,
+                           &tmp_pos_src, &prescale_scale, FALSE);
+
+      /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */
+      emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w,
+                           &prescale_trans, &tmp_pos_src, FALSE);
+   }
+   else if (emit->key.vs.undo_viewport) {
+      /* This code computes the final vertex position from the temporary
+       * vertex position by undoing the viewport transformation and the
+       * divide-by-W operation (we convert window coords back to clip coords).
+       * This is needed when we use the 'draw' module for fallbacks.
+       * If p is the temp pos in window coords, then the NDC coord q is:
+       *   q.x = (p.x - vp.x_trans) / vp.x_scale * p.w
+       *   q.y = (p.y - vp.y_trans) / vp.y_scale * p.w
+       *   q.z = p.z * p.w
+       *   q.w = p.w
+       * CONST[vs_viewport_index] contains:
+       *   { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans }
+       */
+      struct tgsi_full_dst_register tmp_pos_dst =
+         make_dst_temp_reg(vs_pos_tmp_index);
+      struct tgsi_full_dst_register tmp_pos_dst_xy =
+         writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY);
+      struct tgsi_full_src_register tmp_pos_src_wwww =
+         scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
+
+      struct tgsi_full_dst_register pos_dst_xyz =
+         writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ);
+      struct tgsi_full_dst_register pos_dst_w =
+         writemask_dst(&pos_dst, TGSI_WRITEMASK_W);
+
+      struct tgsi_full_src_register vp_xyzw =
+         make_src_const_reg(emit->vs.viewport_index);
+      struct tgsi_full_src_register vp_zwww =
+         swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
+                     TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
+
+      /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */
+      emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy,
+                           &tmp_pos_src, &vp_zwww, FALSE);
+
+      /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy,
+                           &tmp_pos_src, &vp_xyzw, FALSE);
+
+      /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */
+      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz,
+                           &tmp_pos_src, &tmp_pos_src_wwww, FALSE);
+
+      /* MOV pos.w, tmp_pos.w */
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w,
+                           &tmp_pos_src, FALSE);
+   }
+   else if (vs_pos_tmp_index != INVALID_INDEX) {
+      /* This code is to handle the case where the temporary vertex
+       * position register is created when the vertex shader has stream
+       * output and prescale is disabled because rasterization is to be
+       * discarded.
+       */
+      struct tgsi_full_dst_register pos_dst =
+         make_dst_output_reg(emit->vposition.out_index);
+
+      /* MOV pos, tmp_pos */
+      begin_emit_instruction(emit);
+      emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
+      emit_dst_register(emit, &pos_dst);
+      emit_src_register(emit, &tmp_pos_src);
+      end_emit_instruction(emit);
+   }
+}
+
+static void
+emit_clipping_instructions(struct svga_shader_emitter_v10 *emit)
+{
+   if (emit->clip_mode == CLIP_DISTANCE) {
+      /* Copy from copy distance temporary to CLIPDIST & the shadow copy */
+      emit_clip_distance_instructions(emit);
+
+   } else if (emit->clip_mode == CLIP_VERTEX) {
+      /* Convert TGSI CLIPVERTEX to CLIPDIST */
+      emit_clip_vertex_instructions(emit);
+   }
+
+   /**
+    * Emit vertex position and take care of legacy user planes only if
+    * there is a valid vertex position register index.
+    * This is to take care of the case
+    * where the shader doesn't output vertex position. Then in
+    * this case, don't bother to emit more vertex instructions.
+    */
+   if (emit->vposition.out_index == INVALID_INDEX)
+      return;
+
+   /**
+    * Emit per-vertex clipping instructions for legacy user defined clip planes.
+    * NOTE: we must emit the clip distance instructions before the
+    * emit_vpos_instructions() call since the later function will change
+    * the TEMP[vs_pos_tmp_index] value.
+    */
+   if (emit->clip_mode == CLIP_LEGACY) {
+      /* Emit CLIPDIST for legacy user defined clip planes */
+      emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index);
+   }
+}
+
+
+/**
+ * Emit extra per-vertex instructions.  This includes clip-coordinate
+ * space conversion and computing clip distances.  This is called for
+ * each GS emit-vertex instruction and at the end of VS translation.
+ */
+static void
+emit_vertex_instructions(struct svga_shader_emitter_v10 *emit)
+{
+   const unsigned vs_pos_tmp_index = emit->vposition.tmp_index;
+
+   /* Emit clipping instructions based on clipping mode */
+   emit_clipping_instructions(emit);
+
+   /**
+    * Reset the temporary vertex position register index
+    * so that emit_dst_register() will use the real vertex position output
+    */
+   emit->vposition.tmp_index = INVALID_INDEX;
+
+   /* Emit vertex position instructions */
+   emit_vpos_instructions(emit, vs_pos_tmp_index);
+
+   /* Restore original vposition.tmp_index value for the next GS vertex.
+    * It doesn't matter for VS.
+    */
+   emit->vposition.tmp_index = vs_pos_tmp_index;
+}
+
+/**
+ * Translate the TGSI_OPCODE_EMIT GS instruction.
+ */
+static boolean
+emit_vertex(struct svga_shader_emitter_v10 *emit,
+            const struct tgsi_full_instruction *inst)
+{
+   unsigned ret = TRUE;
+
+   assert(emit->unit == PIPE_SHADER_GEOMETRY);
+
+   emit_vertex_instructions(emit);
+
+   /* We can't use emit_simple() because the TGSI instruction has one
+    * operand (vertex stream number) which we must ignore for VGPU10.
+    */
+   begin_emit_instruction(emit);
+   emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE);
+   end_emit_instruction(emit);
+
+   return ret;
+}
+
+
+/**
+ * Emit the extra code to convert from VGPU10's boolean front-face
+ * register to TGSI's signed front-face register.
+ *
+ * TODO: Make temporary front-face register a scalar.
+ */
+static void
+emit_frontface_instructions(struct svga_shader_emitter_v10 *emit)
+{
+   assert(emit->unit == PIPE_SHADER_FRAGMENT);
+
+   if (emit->fs.face_input_index != INVALID_INDEX) {
+      /* convert vgpu10 boolean face register to gallium +/-1 value */
+      struct tgsi_full_dst_register tmp_dst =
+         make_dst_temp_reg(emit->fs.face_tmp_index);
+      struct tgsi_full_src_register one =
+         make_immediate_reg_float(emit, 1.0f);
+      struct tgsi_full_src_register neg_one =
+         make_immediate_reg_float(emit, -1.0f);
+
+      /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */
+      begin_emit_instruction(emit);
+      emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE);
+      emit_dst_register(emit, &tmp_dst);
+      emit_face_register(emit);
+      emit_src_register(emit, &one);
+      emit_src_register(emit, &neg_one);
+      end_emit_instruction(emit);
+   }
+}
+
+
+/**
+ * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w.
+ */
+static void
+emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit)
+{
+   assert(emit->unit == PIPE_SHADER_FRAGMENT);
+
+   if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
+      struct tgsi_full_dst_register tmp_dst =
+         make_dst_temp_reg(emit->fs.fragcoord_tmp_index);
+      struct tgsi_full_dst_register tmp_dst_xyz =
+         writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ);
+      struct tgsi_full_dst_register tmp_dst_w =
+         writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
+      struct tgsi_full_src_register one =
+         make_immediate_reg_float(emit, 1.0f);
+      struct tgsi_full_src_register fragcoord =
+         make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index);
+
+      /* save the input index */
+      unsigned fragcoord_input_index = emit->fs.fragcoord_input_index;
+      /* set to invalid to prevent substitution in emit_src_register() */
+      emit->fs.fragcoord_input_index = INVALID_INDEX;
+
+      /* MOV fragcoord_tmp.xyz, fragcoord.xyz */
+      begin_emit_instruction(emit);
+      emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
+      emit_dst_register(emit, &tmp_dst_xyz);
+      emit_src_register(emit, &fragcoord);
+      end_emit_instruction(emit);
+
+      /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */
+      begin_emit_instruction(emit);
+      emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE);
+      emit_dst_register(emit, &tmp_dst_w);
+      emit_src_register(emit, &one);
+      emit_src_register(emit, &fragcoord);
+      end_emit_instruction(emit);
+
+      /* restore saved value */
+      emit->fs.fragcoord_input_index = fragcoord_input_index;
+   }
+}
+
+
+/**
+ * Emit extra instructions to adjust VS inputs/attributes.  This can
+ * mean casting a vertex attribute from int to float or setting the
+ * W component to 1, or both.
+ */
+static void
+emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit)
+{
+   const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1;
+   const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof;
+   const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof;
+   const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra;
+   const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm;
+   const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled;
+   const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled;
+
+   unsigned adjust_mask = (save_w_1_mask |
+                           save_itof_mask |
+                           save_utof_mask |
+                           save_is_bgra_mask |
+                           save_puint_to_snorm_mask |
+                           save_puint_to_uscaled_mask |
+                           save_puint_to_sscaled_mask);
+
+   assert(emit->unit == PIPE_SHADER_VERTEX);
+
+   if (adjust_mask) {
+      struct tgsi_full_src_register one =
+         make_immediate_reg_float(emit, 1.0f);
+
+      struct tgsi_full_src_register one_int =
+         make_immediate_reg_int(emit, 1);
+
+      /* We need to turn off these bitmasks while emitting the
+       * instructions below, then restore them afterward.
+       */
+      emit->key.vs.adjust_attrib_w_1 = 0;
+      emit->key.vs.adjust_attrib_itof = 0;
+      emit->key.vs.adjust_attrib_utof = 0;
+      emit->key.vs.attrib_is_bgra = 0;
+      emit->key.vs.attrib_puint_to_snorm = 0;
+      emit->key.vs.attrib_puint_to_uscaled = 0;
+      emit->key.vs.attrib_puint_to_sscaled = 0;
+
+      while (adjust_mask) {
+         unsigned index = u_bit_scan(&adjust_mask);
+         unsigned tmp = emit->vs.adjusted_input[index];
+         struct tgsi_full_src_register input_src =
+            make_src_reg(TGSI_FILE_INPUT, index);
+
+         struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+         struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+         struct tgsi_full_dst_register tmp_dst_w =
+            writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
+
+         /* ITOF/UTOF/MOV tmp, input[index] */
+         if (save_itof_mask & (1 << index)) {
+            emit_instruction_op1(emit, VGPU10_OPCODE_ITOF,
+                                 &tmp_dst, &input_src, FALSE);
+         }
+         else if (save_utof_mask & (1 << index)) {
+            emit_instruction_op1(emit, VGPU10_OPCODE_UTOF,
+                                 &tmp_dst, &input_src, FALSE);
+         }
+         else if (save_puint_to_snorm_mask & (1 << index)) {
+            emit_puint_to_snorm(emit, &tmp_dst, &input_src);
+         }
+         else if (save_puint_to_uscaled_mask & (1 << index)) {
+            emit_puint_to_uscaled(emit, &tmp_dst, &input_src);
+         }
+         else if (save_puint_to_sscaled_mask & (1 << index)) {
+            emit_puint_to_sscaled(emit, &tmp_dst, &input_src);
+         }
+         else {
+            assert((save_w_1_mask | save_is_bgra_mask) & (1 << index));
+            emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+                                 &tmp_dst, &input_src, FALSE);
+         }
+
+         if (save_is_bgra_mask & (1 << index)) {
+            emit_swap_r_b(emit, &tmp_dst, &tmp_src);
+         }
+
+         if (save_w_1_mask & (1 << index)) {
+            /* MOV tmp.w, 1.0 */
+            if (emit->key.vs.attrib_is_pure_int & (1 << index)) {
+               emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+                                    &tmp_dst_w, &one_int, FALSE);
+            }
+            else {
+               emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+                                    &tmp_dst_w, &one, FALSE);
+            }
+         }
+      }
+
+      emit->key.vs.adjust_attrib_w_1 = save_w_1_mask;
+      emit->key.vs.adjust_attrib_itof = save_itof_mask;
+      emit->key.vs.adjust_attrib_utof = save_utof_mask;
+      emit->key.vs.attrib_is_bgra = save_is_bgra_mask;
+      emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask;
+      emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask;
+      emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask;
+   }
+}
+
+
+/**
+ * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed
+ * to implement some instructions.  We pre-allocate those values here
+ * in the immediate constant buffer.
+ */
+static void
+alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
+{
+   unsigned n = 0;
+
+   emit->common_immediate_pos[n++] =
+      alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f);
+
+   emit->common_immediate_pos[n++] =
+      alloc_immediate_float4(emit, 128.0f, -128.0f, 2.0f, 3.0f);
+
+   emit->common_immediate_pos[n++] =
+      alloc_immediate_int4(emit, 0, 1, 0, -1);
+
+   if (emit->key.vs.attrib_puint_to_snorm) {
+      emit->common_immediate_pos[n++] =
+         alloc_immediate_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
+   }
+
+   if (emit->key.vs.attrib_puint_to_uscaled) {
+      emit->common_immediate_pos[n++] =
+         alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f);
+   }
+
+   if (emit->key.vs.attrib_puint_to_sscaled) {
+      emit->common_immediate_pos[n++] =
+         alloc_immediate_int4(emit, 22, 12, 2, 0);
+
+      emit->common_immediate_pos[n++] =
+         alloc_immediate_int4(emit, 22, 30, 0, 0);
+   }
+
+   assert(n <= Elements(emit->common_immediate_pos));
+   emit->num_common_immediates = n;
+}
+
+
+/**
+ * Emit any extra/helper declarations/code that we might need between
+ * the declaration section and code section.
+ */
+static boolean
+emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
+{
+   /* Properties */
+   if (emit->unit == PIPE_SHADER_GEOMETRY)
+      emit_property_instructions(emit);
+
+   /* Declare inputs */
+   if (!emit_input_declarations(emit))
+      return FALSE;
+
+   /* Declare outputs */
+   if (!emit_output_declarations(emit))
+      return FALSE;
+
+   /* Declare temporary registers */
+   emit_temporaries_declaration(emit);
+
+   /* Declare constant registers */
+   emit_constant_declaration(emit);
+
+   /* Declare samplers and resources */
+   emit_sampler_declarations(emit);
+   emit_resource_declarations(emit);
+
+   /* Declare clip distance output registers */
+   if (emit->unit == PIPE_SHADER_VERTEX ||
+       emit->unit == PIPE_SHADER_GEOMETRY) {
+      emit_clip_distance_declarations(emit);
+   }
+
+   alloc_common_immediates(emit);
+
+   if (emit->unit == PIPE_SHADER_FRAGMENT &&
+       emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
+      float alpha = emit->key.fs.alpha_ref;
+      emit->fs.alpha_ref_index =
+         alloc_immediate_float4(emit, alpha, alpha, alpha, alpha);
+   }
+
+   /* Now, emit the constant block containing all the immediates
+    * declared by shader, as well as the extra ones seen above.
+    */
+   emit_vgpu10_immediates_block(emit);
+
+   if (emit->unit == PIPE_SHADER_FRAGMENT) {
+      emit_frontface_instructions(emit);
+      emit_fragcoord_instructions(emit);
+   }
+   else if (emit->unit == PIPE_SHADER_VERTEX) {
+      emit_vertex_attrib_instructions(emit);
+   }
+
+   return TRUE;
+}
+
+
+/**
+ * Emit alpha test code.  This compares TEMP[fs_color_tmp_index].w
+ * against the alpha reference value and discards the fragment if the
+ * comparison fails.
+ */
+static void
+emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
+                             unsigned fs_color_tmp_index)
+{
+   /* compare output color's alpha to alpha ref and kill */
+   unsigned tmp = get_temp_index(emit);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+   struct tgsi_full_src_register tmp_src_x =
+      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+   struct tgsi_full_src_register color_src =
+      make_src_temp_reg(fs_color_tmp_index);
+   struct tgsi_full_src_register color_src_w =
+      scalar_src(&color_src, TGSI_SWIZZLE_W);
+   struct tgsi_full_src_register ref_src =
+      make_src_immediate_reg(emit->fs.alpha_ref_index);
+   struct tgsi_full_dst_register color_dst =
+      make_dst_output_reg(emit->fs.color_out_index[0]);
+
+   assert(emit->unit == PIPE_SHADER_FRAGMENT);
+
+   /* dst = src0 'alpha_func' src1 */
+   emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst,
+                   &color_src_w, &ref_src);
+
+   /* DISCARD if dst.x == 0 */
+   begin_emit_instruction(emit);
+   emit_discard_opcode(emit, FALSE);  /* discard if src0.x is zero */
+   emit_src_register(emit, &tmp_src_x);
+   end_emit_instruction(emit);
+
+   /* If we don't need to broadcast the color below, emit final color here */
+   if (emit->key.fs.write_color0_to_n_cbufs <= 1) {
+      /* MOV output.color, tempcolor */
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst,
+                           &color_src, FALSE);     /* XXX saturate? */
+   }
+
+   free_temp_indexes(emit);
+}
+
+
+/**
+ * Emit instructions for writing a single color output to multiple
+ * color buffers.
+ * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS
+ * property is set and the number of render targets is greater than one.
+ * \param fs_color_tmp_index  index of the temp register that holds the
+ *                            color to broadcast.
+ */
+static void
+emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit,
+                                 unsigned fs_color_tmp_index)
+{
+   const unsigned n = emit->key.fs.write_color0_to_n_cbufs;
+   unsigned i;
+   struct tgsi_full_src_register color_src =
+      make_src_temp_reg(fs_color_tmp_index);
+
+   assert(emit->unit == PIPE_SHADER_FRAGMENT);
+   assert(n > 1);
+
+   for (i = 0; i < n; i++) {
+      unsigned output_reg = emit->fs.color_out_index[i];
+      struct tgsi_full_dst_register color_dst =
+         make_dst_output_reg(output_reg);
+
+      /* Fill in this semantic here since we'll use it later in
+       * emit_dst_register().
+       */
+      emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR;
+
+      /* MOV output.color[i], tempcolor */
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst,
+                           &color_src, FALSE);     /* XXX saturate? */
+   }
+}
+
+
+/**
+ * Emit extra helper code after the original shader code, but before the
+ * last END/RET instruction.
+ * For vertex shaders this means emitting the extra code to apply the
+ * prescale scale/translation.
+ */
+static boolean
+emit_post_helpers(struct svga_shader_emitter_v10 *emit)
+{
+   if (emit->unit == PIPE_SHADER_VERTEX) {
+      emit_vertex_instructions(emit);
+   }
+   else if (emit->unit == PIPE_SHADER_FRAGMENT) {
+      const unsigned fs_color_tmp_index = emit->fs.color_tmp_index;
+
+      /* We no longer want emit_dst_register() to substitute the
+       * temporary fragment color register for the real color output.
+       */
+      emit->fs.color_tmp_index = INVALID_INDEX;
+
+      if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
+         emit_alpha_test_instructions(emit, fs_color_tmp_index);
+      }
+      if (emit->key.fs.write_color0_to_n_cbufs > 1) {
+         emit_broadcast_color_instructions(emit, fs_color_tmp_index);
+      }
+   }
+
+   return TRUE;
+}
+
+
+/**
+ * Translate the TGSI tokens into VGPU10 tokens.
+ */
+static boolean
+emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit,
+                         const struct tgsi_token *tokens)
+{
+   struct tgsi_parse_context parse;
+   boolean ret = TRUE;
+   boolean pre_helpers_emitted = FALSE;
+   unsigned inst_number = 0;
+
+   tgsi_parse_init(&parse, tokens);
+
+   while (!tgsi_parse_end_of_tokens(&parse)) {
+      tgsi_parse_token(&parse);
+
+      switch (parse.FullToken.Token.Type) {
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate);
+         if (!ret)
+            goto done;
+         break;
+
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration);
+         if (!ret)
+            goto done;
+         break;
+
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         if (!pre_helpers_emitted) {
+            ret = emit_pre_helpers(emit);
+            if (!ret)
+               goto done;
+            pre_helpers_emitted = TRUE;
+         }
+         ret = emit_vgpu10_instruction(emit, inst_number++,
+                                       &parse.FullToken.FullInstruction);
+         if (!ret)
+            goto done;
+         break;
+
+      case TGSI_TOKEN_TYPE_PROPERTY:
+         ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty);
+         if (!ret)
+            goto done;
+         break;
+
+      default:
+         break;
+      }
+   }
+
+done:
+   tgsi_parse_free(&parse);
+   return ret;
+}
+
+
+/**
+ * Emit the first VGPU10 shader tokens.
+ */
+static boolean
+emit_vgpu10_header(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10ProgramToken ptoken;
+
+   /* First token: VGPU10ProgramToken  (version info, program type (VS,GS,PS)) */
+   ptoken.majorVersion = 4;
+   ptoken.minorVersion = 0;
+   ptoken.programType = translate_shader_type(emit->unit);
+   if (!emit_dword(emit, ptoken.value))
+      return FALSE;
+
+   /* Second token: total length of shader, in tokens.  We can't fill this
+    * in until we're all done.  Emit zero for now.
+    */
+   return emit_dword(emit, 0);
+}
+
+
+static boolean
+emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10ProgramToken *tokens;
+
+   /* Replace the second token with total shader length */
+   tokens = (VGPU10ProgramToken *) emit->buf;
+   tokens[1].value = emit_get_num_tokens(emit);
+
+   return TRUE;
+}
+
+
+/**
+ * Modify the FS to read the BCOLORs and use the FACE register
+ * to choose between the front/back colors.
+ */
+static const struct tgsi_token *
+transform_fs_twoside(const struct tgsi_token *tokens)
+{
+   if (0) {
+      debug_printf("Before tgsi_add_two_side ------------------\n");
+      tgsi_dump(tokens,0);
+   }
+   tokens = tgsi_add_two_side(tokens);
+   if (0) {
+      debug_printf("After tgsi_add_two_side ------------------\n");
+      tgsi_dump(tokens, 0);
+   }
+   return tokens;
+}
+
+
+/**
+ * Modify the FS to do polygon stipple.
+ */
+static const struct tgsi_token *
+transform_fs_pstipple(struct svga_shader_emitter_v10 *emit,
+                      const struct tgsi_token *tokens)
+{
+   const struct tgsi_token *new_tokens;
+   unsigned unit;
+
+   if (0) {
+      debug_printf("Before pstipple ------------------\n");
+      tgsi_dump(tokens,0);
+   }
+
+   new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0);
+
+   emit->fs.pstipple_sampler_unit = unit;
+
+   /* Setup texture state for stipple */
+   emit->key.tex[unit].texture_target = PIPE_TEXTURE_2D;
+   emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
+   emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
+   emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
+   emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
+
+   if (0) {
+      debug_printf("After pstipple ------------------\n");
+      tgsi_dump(new_tokens, 0);
+   }
+
+   return new_tokens;
+}
+
+/**
+ * Modify the FS to support anti-aliasing point.
+ */
+static const struct tgsi_token *
+transform_fs_aapoint(const struct tgsi_token *tokens,
+                     int aa_coord_index)
+{
+   if (0) {
+      debug_printf("Before tgsi_add_aa_point ------------------\n");
+      tgsi_dump(tokens,0);
+   }
+   tokens = tgsi_add_aa_point(tokens, aa_coord_index);
+   if (0) {
+      debug_printf("After tgsi_add_aa_point ------------------\n");
+      tgsi_dump(tokens, 0);
+   }
+   return tokens;
+}
+
+/**
+ * This is the main entrypoint for the TGSI -> VPGU10 translator.
+ */
+struct svga_shader_variant *
+svga_tgsi_vgpu10_translate(struct svga_context *svga,
+                           const struct svga_shader *shader,
+                           const struct svga_compile_key *key,
+                           unsigned unit)
+{
+   struct svga_shader_variant *variant = NULL;
+   struct svga_shader_emitter_v10 *emit;
+   const struct tgsi_token *tokens = shader->tokens;
+   struct svga_vertex_shader *vs = svga->curr.vs;
+   struct svga_geometry_shader *gs = svga->curr.gs;
+
+   assert(unit == PIPE_SHADER_VERTEX ||
+          unit == PIPE_SHADER_GEOMETRY ||
+          unit == PIPE_SHADER_FRAGMENT);
+
+   /* These two flags cannot be used together */
+   assert(key->vs.need_prescale + key->vs.undo_viewport <= 1);
+
+   /*
+    * Setup the code emitter
+    */
+   emit = alloc_emitter();
+   if (!emit)
+      return NULL;
+
+   emit->unit = unit;
+   emit->key = *key;
+
+   emit->vposition.need_prescale = (emit->key.vs.need_prescale ||
+                                   emit->key.gs.need_prescale);
+   emit->vposition.tmp_index = INVALID_INDEX;
+   emit->vposition.so_index = INVALID_INDEX;
+   emit->vposition.out_index = INVALID_INDEX;
+
+   emit->fs.color_tmp_index = INVALID_INDEX;
+   emit->fs.face_input_index = INVALID_INDEX;
+   emit->fs.fragcoord_input_index = INVALID_INDEX;
+
+   emit->gs.prim_id_index = INVALID_INDEX;
+
+   emit->clip_dist_out_index = INVALID_INDEX;
+   emit->clip_dist_tmp_index = INVALID_INDEX;
+   emit->clip_dist_so_index = INVALID_INDEX;
+   emit->clip_vertex_out_index = INVALID_INDEX;
+
+   if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) {
+      emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS;
+   }
+
+   if (unit == PIPE_SHADER_FRAGMENT) {
+      if (key->fs.light_twoside) {
+         tokens = transform_fs_twoside(tokens);
+      }
+      if (key->fs.pstipple) {
+         const struct tgsi_token *new_tokens =
+            transform_fs_pstipple(emit, tokens);
+         if (tokens != shader->tokens) {
+            /* free the two-sided shader tokens */
+            tgsi_free_tokens(tokens);
+         }
+         tokens = new_tokens;
+      }
+      if (key->fs.aa_point) {
+         tokens = transform_fs_aapoint(tokens, key->fs.aa_point_coord_index);
+      }
+   }
+
+   if (SVGA_DEBUG & DEBUG_TGSI) {
+      debug_printf("#####################################\n");
+      debug_printf("### TGSI Shader %u\n", shader->id);
+      tgsi_dump(tokens, 0);
+   }
+
+   /**
+    * Rescan the header if the token string is different from the one
+    * included in the shader; otherwise, the header info is already up-to-date
+    */
+   if (tokens != shader->tokens) {
+      tgsi_scan_shader(tokens, &emit->info);
+   } else {
+      emit->info = shader->info;
+   }
+
+   emit->num_outputs = emit->info.num_outputs;
+
+   if (unit == PIPE_SHADER_FRAGMENT) {
+      /* Compute FS input remapping to match the output from VS/GS */
+      if (gs) {
+         svga_link_shaders(&gs->base.info, &emit->info, &emit->linkage);
+      } else {
+         assert(vs);
+         svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage);
+      }
+   } else if (unit == PIPE_SHADER_GEOMETRY) {
+      assert(vs);
+      svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage);
+   }
+
+   determine_clipping_mode(emit);
+
+   if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX) {
+      if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) {
+         /* if there is stream output declarations associated
+          * with this shader or the shader writes to ClipDistance
+          * then reserve extra registers for the non-adjusted vertex position
+          * and the ClipDistance shadow copy
+          */
+         emit->vposition.so_index = emit->num_outputs++;
+
+         if (emit->clip_mode == CLIP_DISTANCE) {
+            emit->clip_dist_so_index = emit->num_outputs++;
+            if (emit->info.num_written_clipdistance > 4)
+               emit->num_outputs++;
+         }
+      }
+   }
+
+   /*
+    * Do actual shader translation.
+    */
+   if (!emit_vgpu10_header(emit)) {
+      debug_printf("svga: emit VGPU10 header failed\n");
+      goto cleanup;
+   }
+
+   if (!emit_vgpu10_instructions(emit, tokens)) {
+      debug_printf("svga: emit VGPU10 instructions failed\n");
+      goto cleanup;
+   }
+
+   if (!emit_vgpu10_tail(emit)) {
+      debug_printf("svga: emit VGPU10 tail failed\n");
+      goto cleanup;
+   }
+
+   if (emit->register_overflow) {
+      goto cleanup;
+   }
+
+   /*
+    * Create, initialize the 'variant' object.
+    */
+   variant = CALLOC_STRUCT(svga_shader_variant);
+   if (!variant)
+      goto cleanup;
+
+   variant->shader = shader;
+   variant->nr_tokens = emit_get_num_tokens(emit);
+   variant->tokens = (const unsigned *)emit->buf;
+   emit->buf = NULL;  /* buffer is no longer owed by emitter context */
+   memcpy(&variant->key, key, sizeof(*key));
+   variant->id = UTIL_BITMASK_INVALID_INDEX;
+
+   /* The extra constant starting offset starts with the number of
+    * shader constants declared in the shader.
+    */
+   variant->extra_const_start = emit->num_shader_consts[0];
+   if (key->gs.wide_point) {
+      /**
+       * The extra constant added in the transformed shader
+       * for inverse viewport scale is to be supplied by the driver.
+       * So the extra constant starting offset needs to be reduced by 1.
+       */
+      assert(variant->extra_const_start > 0);
+      variant->extra_const_start--;
+   }
+
+   variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
+
+   /** keep track in the variant if flat interpolation is used
+    *  for any of the varyings.
+    */
+   variant->uses_flat_interp = emit->uses_flat_interp;
+
+   if (tokens != shader->tokens) {
+      tgsi_free_tokens(tokens);
+   }
+
+cleanup:
+   free_emitter(emit);
+
+   return variant;
+}
diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h

index 19d074f..c750603 100644 (file)
--- a/src/gallium/drivers/svga/svga_winsys.h
+++ b/src/gallium/drivers/svga/svga_winsys.h
@@ -79,15 +79,18 @@ struct winsys_handle;
  #define SVGA_FENCE_FLAG_EXEC      (1 << 0)
  #define SVGA_FENCE_FLAG_QUERY     (1 << 1)
  
-#define SVGA_SURFACE_USAGE_SHARED (1 << 0)
+#define SVGA_SURFACE_USAGE_SHARED  (1 << 0)
+#define SVGA_SURFACE_USAGE_SCANOUT (1 << 1)
+
+#define SVGA_QUERY_FLAG_SET        (1 << 0)
+#define SVGA_QUERY_FLAG_REF        (1 << 1)
  
  /** Opaque surface handle */
  struct svga_winsys_surface;
  
-
  /** Opaque guest-backed objects */
  struct svga_winsys_gb_shader;
-
+struct svga_winsys_gb_query;
  
  
  /**
@@ -143,7 +146,8 @@ struct svga_winsys_context
                         uint32 *shid,
                         uint32 *mobid,
                         uint32 *offset,
-                       struct svga_winsys_gb_shader *shader);
+                       struct svga_winsys_gb_shader *shader,
+                        unsigned flags);
  
     /**
      * Emit a relocation for a guest-backed context.
@@ -173,6 +177,26 @@ struct svga_winsys_context
                      uint32 offset,
                      unsigned flags);
  
+   /**
+    * Emit a relocation for a guest-backed query object.
+    *
+    * NOTE: Order of this call does matter. It should be the same order
+    * as relocations appear in the command buffer.
+    */
+   void
+   (*query_relocation)(struct svga_winsys_context *swc,
+                      SVGAMobId *id,
+                      struct svga_winsys_gb_query *query);
+
+   /**
+    * Bind queries to context.
+    * \param flags  exactly one of SVGA_QUERY_FLAG_SET/REF
+    */
+   enum pipe_error
+   (*query_bind)(struct svga_winsys_context *sws,
+                 struct svga_winsys_gb_query *query,
+                 unsigned flags);
+
     void
     (*commit)(struct svga_winsys_context *swc);
     
@@ -219,6 +243,36 @@ struct svga_winsys_context
                      struct svga_winsys_surface *surface,
                      boolean *rebind);
  
+   /**
+    * Create and define a DX GB shader that resides in the device COTable.
+    * Caller of this function will issue the DXDefineShader command.
+    */
+   struct svga_winsys_gb_shader *
+   (*shader_create)(struct svga_winsys_context *swc,
+                    uint32 shaderId,
+                    SVGA3dShaderType shaderType,
+                    const uint32 *bytecode,
+                    uint32 bytecodeLen);
+
+   /**
+    * Destroy a DX GB shader.
+    * This function will issue the DXDestroyShader command.
+    */
+   void
+   (*shader_destroy)(struct svga_winsys_context *swc,
+                     struct svga_winsys_gb_shader *shader);
+
+   /**
+    * Rebind a DX GB resource to a context.
+    * This is called to reference a DX GB resource in the command stream in
+    * order to page in the associated resource in case the memory has been
+    * paged out, and to fence it if necessary after command submission.
+    */
+   enum pipe_error
+   (*resource_rebind)(struct svga_winsys_context *swc,
+                      struct svga_winsys_surface *surface,
+                      struct svga_winsys_gb_shader *shader,
+                      unsigned flags);
  };
  
  
@@ -260,7 +314,7 @@ struct svga_winsys_screen
      * \param format Format Device surface format
      * \param usage Winsys usage: bitmask of SVGA_SURFACE_USAGE_x flags
      * \param size Surface size given in device format
-    * \param numFaces Number of faces of the surface (1 or 6)
+    * \param numLayers Number of layers of the surface (or cube faces)
      * \param numMipLevels Number of mipmap levels for each face
      *
      * Returns the surface ID (sid). Surfaces are generic
@@ -274,7 +328,7 @@ struct svga_winsys_screen
      * - Each face has a list of mipmap levels
      *
      * - Each mipmap image may have multiple volume
-    *   slices, if the image is three dimensional.
+    *   slices for 3D image, or multiple 2D slices for texture array.
      *
      * - Each slice is a 2D array of 'blocks'
      *
@@ -296,8 +350,9 @@ struct svga_winsys_screen
                       SVGA3dSurfaceFormat format,
                       unsigned usage,
                       SVGA3dSize size,
-                     uint32 numFaces,
-                     uint32 numMipLevels);
+                     uint32 numLayers,
+                     uint32 numMipLevels,
+                     unsigned sampleCount);
  
     /**
      * Creates a surface from a winsys handle.
@@ -343,7 +398,7 @@ struct svga_winsys_screen
     (*surface_can_create)(struct svga_winsys_screen *sws,
                           SVGA3dSurfaceFormat format,
                           SVGA3dSize size,
-                         uint32 numFaces,
+                         uint32 numLayers,
                           uint32 numMipLevels);
  
     /**
@@ -420,7 +475,7 @@ struct svga_winsys_screen
      */
     struct svga_winsys_gb_shader *
     (*shader_create)(struct svga_winsys_screen *sws,
-                   SVGA3dShaderType type,
+                   SVGA3dShaderType shaderType,
                     const uint32 *bytecode,
                     uint32 bytecodeLen);
  
@@ -432,6 +487,46 @@ struct svga_winsys_screen
     (*shader_destroy)(struct svga_winsys_screen *sws,
                      struct svga_winsys_gb_shader *shader);
  
+   /**
+    * Create and define a GB query.
+    */
+   struct svga_winsys_gb_query *
+   (*query_create)(struct svga_winsys_screen *sws, uint32 len);
+
+   /**
+    * Destroy a GB query.
+    */
+   void
+   (*query_destroy)(struct svga_winsys_screen *sws,
+                   struct svga_winsys_gb_query *query);
+
+   /**
+    * Initialize the query state of the query that resides in the slot
+    * specified in offset
+    * \return zero on success.
+    */
+   int
+   (*query_init)(struct svga_winsys_screen *sws,
+                       struct svga_winsys_gb_query *query,
+                       unsigned offset,
+                       SVGA3dQueryState queryState);
+
+   /**
+    * Inquire for the query state and result of the query that resides
+    * in the slot specified in offset
+    */
+   void
+   (*query_get_result)(struct svga_winsys_screen *sws,
+                       struct svga_winsys_gb_query *query,
+                       unsigned offset,
+                       SVGA3dQueryState *queryState,
+                       void *result, uint32 resultLen);
+
+   /** Have VGPU v10 hardware? */
+   boolean have_vgpu10;
+
+   /** To rebind resources at the beginnning of a new command buffer */
+   boolean need_to_rebind_resources;
  };
  
  
diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.c b/src/gallium/drivers/svga/svgadump/svga_dump.c

index 0874d23..252e0d6 100644 (file)
--- a/src/gallium/drivers/svga/svgadump/svga_dump.c
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.c
@@ -1369,12 +1369,6 @@ dump_SVGA3dCmdDefineSurface(const SVGA3dCmdDefineSurface *cmd)
     case SVGA3D_BUMPL6V5U5:
        _debug_printf("\t\t.format = SVGA3D_BUMPL6V5U5\n");
        break;
-   case SVGA3D_BUMPX8L8V8U8:
-      _debug_printf("\t\t.format = SVGA3D_BUMPX8L8V8U8\n");
-      break;
-   case SVGA3D_BUMPL8V8U8:
-      _debug_printf("\t\t.format = SVGA3D_BUMPL8V8U8\n");
-      break;
     case SVGA3D_ARGB_S10E5:
        _debug_printf("\t\t.format = SVGA3D_ARGB_S10E5\n");
        break;
@@ -1528,15 +1522,6 @@ dump_SVGA3dCmdDestroyGBShader(const SVGA3dCmdDestroyGBShader *cmd)
  }
  
  static void
-dump_SVGA3dCmdBindGBShaderConsts(const SVGA3dCmdBindGBShaderConsts *cmd)
-{
-   _debug_printf("\t\t.cid = %u\n", cmd->cid);
-   _debug_printf("\t\t.shaderType = %u\n", cmd->shaderType);
-   _debug_printf("\t\t.shaderConstType = %u\n", cmd->shaderConstType);
-   _debug_printf("\t\t.sid = %u\n", cmd->sid);
-}
-
-static void
  dump_SVGA3dCmdBindGBSurface(const SVGA3dCmdBindGBSurface *cmd)
  {
     _debug_printf("\t\t.sid = %u\n", cmd->sid);
@@ -1929,14 +1914,6 @@ svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size)
           body = (const uint8_t *)&cmd[1];
        }
        break;
-   case SVGA_3D_CMD_BIND_SHADERCONSTS:
-      _debug_printf("\tSVGA_3D_CMD_BIND_SHADERCONSTS\n");
-      {
-         const SVGA3dCmdBindGBShaderConsts *cmd = (const SVGA3dCmdBindGBShaderConsts *) body;
-         dump_SVGA3dCmdBindGBShaderConsts(cmd);
-         body = (const uint8_t *)&cmd[1];
-      }
-      break;
     case SVGA_3D_CMD_BIND_GB_SURFACE:
        _debug_printf("\tSVGA_3D_CMD_BIND_GB_SURFACE\n");
        {
diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h

index 33b6ec2..7502293 100644 (file)
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -158,18 +158,17 @@ struct vc4_compiled_shader {
           * It doesn't include those that aren't part of the VPM, like
           * point/line coordinates.
           */
-        struct vc4_varying_semantic *input_semantics;
+        struct vc4_varying_slot *input_slots;
  };
  
  struct vc4_program_stateobj {
          struct vc4_uncompiled_shader *bind_vs, *bind_fs;
          struct vc4_compiled_shader *cs, *vs, *fs;
          uint8_t num_exports;
-        /* Indexed by semantic name or TGSI_SEMANTIC_COUNT + semantic index
-         * for TGSI_SEMANTIC_GENERIC.  Special vs exports (position and point-
-         * size) are not included in this
+        /* Indexed by slot.  Special vs exports (position and pointsize) are
+         * not included in this
           */
-        uint8_t export_linkage[63];
+        uint8_t export_linkage[VARYING_SLOT_VAR0 + 8];
  };
  
  struct vc4_constbuf_stateobj {
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c

index 808cbea..a842d60 100644 (file)
--- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
@@ -400,10 +400,11 @@ vc4_nir_lower_blend_block(nir_block *block, void *state)
                          }
                  }
                  assert(output_var);
-                unsigned semantic_name = output_var->data.location;
  
-                if (semantic_name != TGSI_SEMANTIC_COLOR)
+                if (output_var->data.location != FRAG_RESULT_COLOR &&
+                    output_var->data.location != FRAG_RESULT_DATA0) {
                          continue;
+                }
  
                  nir_function_impl *impl =
                          nir_cf_node_get_function(&block->cf_node);
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c

index b632370..a98d70d 100644 (file)
--- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
@@ -22,7 +22,6 @@
   */
  
  #include "vc4_qir.h"
-#include "tgsi/tgsi_info.h"
  #include "glsl/nir/nir_builder.h"
  
  /**
@@ -47,8 +46,7 @@ replace_intrinsic_with_vec4(nir_builder *b, nir_intrinsic_instr *intr,
          /* Replace the old intrinsic with a reference to our reconstructed
           * vec4.
           */
-        nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(vec),
-                                 ralloc_parent(b->impl));
+        nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(vec));
          nir_instr_remove(&intr->instr);
  }
  
@@ -72,8 +70,6 @@ vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
                  }
          }
          assert(input_var);
-        int semantic_name = input_var->data.location;
-        int semantic_index = input_var->data.index;
  
          /* All TGSI-to-NIR inputs are vec4. */
          assert(intr->num_components == 4);
@@ -93,8 +89,7 @@ vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
  
          switch (c->stage) {
          case QSTAGE_FRAG:
-                switch (semantic_name) {
-                case TGSI_SEMANTIC_FACE:
+                if (input_var->data.location == VARYING_SLOT_FACE) {
                          dests[0] = nir_fsub(b,
                                              nir_imm_float(b, 1.0),
                                              nir_fmul(b,
@@ -103,10 +98,10 @@ vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
                          dests[1] = nir_imm_float(b, 0.0);
                          dests[2] = nir_imm_float(b, 0.0);
                          dests[3] = nir_imm_float(b, 1.0);
-                        break;
-                case TGSI_SEMANTIC_GENERIC:
+                } else if (input_var->data.location >= VARYING_SLOT_VAR0) {
                          if (c->fs_key->point_sprite_mask &
-                            (1 << semantic_index)) {
+                            (1 << (input_var->data.location -
+                                   VARYING_SLOT_VAR0))) {
                                  if (!c->fs_key->is_points) {
                                          dests[0] = nir_imm_float(b, 0.0);
                                          dests[1] = nir_imm_float(b, 0.0);
@@ -119,7 +114,6 @@ vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
                                  dests[2] = nir_imm_float(b, 0.0);
                                  dests[3] = nir_imm_float(b, 1.0);
                          }
-                        break;
                  }
                  break;
          case QSTAGE_COORD:
@@ -142,17 +136,18 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b,
                  }
          }
          assert(output_var);
-        unsigned semantic_name = output_var->data.location;
  
          if (c->stage == QSTAGE_COORD &&
-            (semantic_name != TGSI_SEMANTIC_POSITION &&
-             semantic_name != TGSI_SEMANTIC_PSIZE)) {
+            output_var->data.location != VARYING_SLOT_POS &&
+            output_var->data.location != VARYING_SLOT_PSIZ) {
                  nir_instr_remove(&intr->instr);
                  return;
          }
  
          /* Color output is lowered by vc4_nir_lower_blend(). */
-        if (c->stage == QSTAGE_FRAG && semantic_name == TGSI_SEMANTIC_COLOR) {
+        if (c->stage == QSTAGE_FRAG &&
+            (output_var->data.location == FRAG_RESULT_COLOR ||
+             output_var->data.location == FRAG_RESULT_DATA0)) {
                  intr->const_index[0] *= 4;
                  return;
          }
diff --git a/src/gallium/drivers/vc4/vc4_opt_dead_code.c b/src/gallium/drivers/vc4/vc4_opt_dead_code.c

index ffd4242..9e79a2d 100644 (file)
--- a/src/gallium/drivers/vc4/vc4_opt_dead_code.c
+++ b/src/gallium/drivers/vc4/vc4_opt_dead_code.c
@@ -69,7 +69,7 @@ has_nonremovable_reads(struct vc4_compile *c, struct qinst *inst)
                  }
  
                  if (inst->src[i].file == QFILE_VARY &&
-                    c->input_semantics[inst->src[i].index].semantic == 0xff) {
+                    c->input_slots[inst->src[i].index].slot == 0xff) {
                          return true;
                  }
          }
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c

index e002983..01ea754 100644 (file)
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -30,7 +30,6 @@
  #include "util/ralloc.h"
  #include "util/hash_table.h"
  #include "tgsi/tgsi_dump.h"
-#include "tgsi/tgsi_info.h"
  #include "tgsi/tgsi_lowering.h"
  #include "tgsi/tgsi_parse.h"
  #include "glsl/nir/nir.h"
@@ -738,41 +737,36 @@ emit_fragcoord_input(struct vc4_compile *c, int attr)
  }
  
  static struct qreg
-emit_fragment_varying(struct vc4_compile *c, uint8_t semantic,
-                      uint8_t index, uint8_t swizzle)
+emit_fragment_varying(struct vc4_compile *c, gl_varying_slot slot,
+                      uint8_t swizzle)
  {
-        uint32_t i = c->num_input_semantics++;
+        uint32_t i = c->num_input_slots++;
          struct qreg vary = {
                  QFILE_VARY,
                  i
          };
  
-        if (c->num_input_semantics >= c->input_semantics_array_size) {
-                c->input_semantics_array_size =
-                        MAX2(4, c->input_semantics_array_size * 2);
+        if (c->num_input_slots >= c->input_slots_array_size) {
+                c->input_slots_array_size =
+                        MAX2(4, c->input_slots_array_size * 2);
  
-                c->input_semantics = reralloc(c, c->input_semantics,
-                                              struct vc4_varying_semantic,
-                                              c->input_semantics_array_size);
+                c->input_slots = reralloc(c, c->input_slots,
+                                          struct vc4_varying_slot,
+                                          c->input_slots_array_size);
          }
  
-        c->input_semantics[i].semantic = semantic;
-        c->input_semantics[i].index = index;
-        c->input_semantics[i].swizzle = swizzle;
+        c->input_slots[i].slot = slot;
+        c->input_slots[i].swizzle = swizzle;
  
          return qir_VARY_ADD_C(c, qir_FMUL(c, vary, qir_FRAG_W(c)));
  }
  
  static void
-emit_fragment_input(struct vc4_compile *c, int attr,
-                    unsigned semantic_name, unsigned semantic_index)
+emit_fragment_input(struct vc4_compile *c, int attr, gl_varying_slot slot)
  {
          for (int i = 0; i < 4; i++) {
                  c->inputs[attr * 4 + i] =
-                        emit_fragment_varying(c,
-                                              semantic_name,
-                                              semantic_index,
-                                              i);
+                        emit_fragment_varying(c, slot, i);
                  c->num_inputs++;
          }
  }
@@ -780,24 +774,22 @@ emit_fragment_input(struct vc4_compile *c, int attr,
  static void
  add_output(struct vc4_compile *c,
             uint32_t decl_offset,
-           uint8_t semantic_name,
-           uint8_t semantic_index,
-           uint8_t semantic_swizzle)
+           uint8_t slot,
+           uint8_t swizzle)
  {
          uint32_t old_array_size = c->outputs_array_size;
          resize_qreg_array(c, &c->outputs, &c->outputs_array_size,
                            decl_offset + 1);
  
          if (old_array_size != c->outputs_array_size) {
-                c->output_semantics = reralloc(c,
-                                               c->output_semantics,
-                                               struct vc4_varying_semantic,
-                                               c->outputs_array_size);
+                c->output_slots = reralloc(c,
+                                           c->output_slots,
+                                           struct vc4_varying_slot,
+                                           c->outputs_array_size);
          }
  
-        c->output_semantics[decl_offset].semantic = semantic_name;
-        c->output_semantics[decl_offset].index = semantic_index;
-        c->output_semantics[decl_offset].swizzle = semantic_swizzle;
+        c->output_slots[decl_offset].slot = slot;
+        c->output_slots[decl_offset].swizzle = swizzle;
  }
  
  static void
@@ -1129,10 +1121,10 @@ clip_distance_discard(struct vc4_compile *c)
                  if (!(c->key->ucp_enables & (1 << i)))
                          continue;
  
-                struct qreg dist = emit_fragment_varying(c,
-                                                         TGSI_SEMANTIC_CLIPDIST,
-                                                         i,
-                                                         TGSI_SWIZZLE_X);
+                struct qreg dist =
+                        emit_fragment_varying(c,
+                                              VARYING_SLOT_CLIP_DIST0 + (i / 4),
+                                              i % 4);
  
                  qir_SF(c, dist);
  
@@ -1285,9 +1277,8 @@ emit_ucp_clipdistance(struct vc4_compile *c)
                   */
                  uint32_t output_index = c->num_outputs++;
                  add_output(c, output_index,
-                           TGSI_SEMANTIC_CLIPDIST,
-                           plane,
-                           TGSI_SWIZZLE_X);
+                           VARYING_SLOT_CLIP_DIST0 + plane / 4,
+                           plane % 4);
  
  
                  struct qreg dist = qir_uniform_f(c, 0.0);
@@ -1305,7 +1296,7 @@ emit_ucp_clipdistance(struct vc4_compile *c)
  
  static void
  emit_vert_end(struct vc4_compile *c,
-              struct vc4_varying_semantic *fs_inputs,
+              struct vc4_varying_slot *fs_inputs,
                uint32_t num_fs_inputs)
  {
          struct qreg rcp_w = qir_RCP(c, c->outputs[c->output_position_index + 3]);
@@ -1320,15 +1311,14 @@ emit_vert_end(struct vc4_compile *c,
                  emit_point_size_write(c);
  
          for (int i = 0; i < num_fs_inputs; i++) {
-                struct vc4_varying_semantic *input = &fs_inputs[i];
+                struct vc4_varying_slot *input = &fs_inputs[i];
                  int j;
  
                  for (j = 0; j < c->num_outputs; j++) {
-                        struct vc4_varying_semantic *output =
-                                &c->output_semantics[j];
+                        struct vc4_varying_slot *output =
+                                &c->output_slots[j];
  
-                        if (input->semantic == output->semantic &&
-                            input->index == output->index &&
+                        if (input->slot == output->slot &&
                              input->swizzle == output->swizzle) {
                                  qir_VPM_WRITE(c, c->outputs[j]);
                                  break;
@@ -1412,9 +1402,6 @@ ntq_setup_inputs(struct vc4_compile *c)
          for (unsigned i = 0; i < num_entries; i++) {
                  nir_variable *var = vars[i];
                  unsigned array_len = MAX2(glsl_get_length(var->type), 1);
-                /* XXX: map loc slots to semantics */
-                unsigned semantic_name = var->data.location;
-                unsigned semantic_index = var->data.index;
                  unsigned loc = var->data.driver_location;
  
                  assert(array_len == 1);
@@ -1423,19 +1410,18 @@ ntq_setup_inputs(struct vc4_compile *c)
                                    (loc + 1) * 4);
  
                  if (c->stage == QSTAGE_FRAG) {
-                        if (semantic_name == TGSI_SEMANTIC_POSITION) {
+                        if (var->data.location == VARYING_SLOT_POS) {
                                  emit_fragcoord_input(c, loc);
-                        } else if (semantic_name == TGSI_SEMANTIC_FACE) {
+                        } else if (var->data.location == VARYING_SLOT_FACE) {
                                  c->inputs[loc * 4 + 0] = qir_FRAG_REV_FLAG(c);
-                        } else if (semantic_name == TGSI_SEMANTIC_GENERIC &&
+                        } else if (var->data.location >= VARYING_SLOT_VAR0 &&
                                     (c->fs_key->point_sprite_mask &
-                                    (1 << semantic_index))) {
+                                    (1 << (var->data.location -
+                                           VARYING_SLOT_VAR0)))) {
                                  c->inputs[loc * 4 + 0] = c->point_x;
                                  c->inputs[loc * 4 + 1] = c->point_y;
                          } else {
-                                emit_fragment_input(c, loc,
-                                                    semantic_name,
-                                                    semantic_index);
+                                emit_fragment_input(c, loc, var->data.location);
                          }
                  } else {
                          emit_vertex_input(c, loc);
@@ -1448,43 +1434,37 @@ ntq_setup_outputs(struct vc4_compile *c)
  {
          foreach_list_typed(nir_variable, var, node, &c->s->outputs) {
                  unsigned array_len = MAX2(glsl_get_length(var->type), 1);
-                /* XXX: map loc slots to semantics */
-                unsigned semantic_name = var->data.location;
-                unsigned semantic_index = var->data.index;
                  unsigned loc = var->data.driver_location * 4;
  
                  assert(array_len == 1);
                  (void)array_len;
  
-                /* NIR hack to pass through
-                 * TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS */
-                if (semantic_name == TGSI_SEMANTIC_COLOR &&
-                    semantic_index == -1)
-                        semantic_index = 0;
-
-                for (int i = 0; i < 4; i++) {
-                        add_output(c,
-                                   loc + i,
-                                   semantic_name,
-                                   semantic_index,
-                                   i);
-                }
+                for (int i = 0; i < 4; i++)
+                        add_output(c, loc + i, var->data.location, i);
  
-                switch (semantic_name) {
-                case TGSI_SEMANTIC_POSITION:
-                        c->output_position_index = loc;
-                        break;
-                case TGSI_SEMANTIC_CLIPVERTEX:
-                        c->output_clipvertex_index = loc;
-                        break;
-                case TGSI_SEMANTIC_COLOR:
-                        c->output_color_index = loc;
-                        break;
-                case TGSI_SEMANTIC_PSIZE:
-                        c->output_point_size_index = loc;
-                        break;
+                if (c->stage == QSTAGE_FRAG) {
+                        switch (var->data.location) {
+                        case FRAG_RESULT_COLOR:
+                        case FRAG_RESULT_DATA0:
+                                c->output_color_index = loc;
+                                break;
+                        case FRAG_RESULT_DEPTH:
+                                c->output_position_index = loc;
+                                break;
+                        }
+                } else {
+                        switch (var->data.location) {
+                        case VARYING_SLOT_POS:
+                                c->output_position_index = loc;
+                                break;
+                        case VARYING_SLOT_CLIP_VERTEX:
+                                c->output_clipvertex_index = loc;
+                                break;
+                        case VARYING_SLOT_PSIZ:
+                                c->output_point_size_index = loc;
+                                break;
+                        }
                  }
-
          }
  }
  
@@ -1743,10 +1723,10 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
          case QSTAGE_FRAG:
                  c->fs_key = (struct vc4_fs_key *)key;
                  if (c->fs_key->is_points) {
-                        c->point_x = emit_fragment_varying(c, ~0, ~0, 0);
-                        c->point_y = emit_fragment_varying(c, ~0, ~0, 0);
+                        c->point_x = emit_fragment_varying(c, ~0, 0);
+                        c->point_y = emit_fragment_varying(c, ~0, 0);
                  } else if (c->fs_key->is_lines) {
-                        c->line_x = emit_fragment_varying(c, ~0, ~0, 0);
+                        c->line_x = emit_fragment_varying(c, ~0, 0);
                  }
                  break;
          case QSTAGE_VERT:
@@ -1824,7 +1804,7 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
                  break;
          case QSTAGE_VERT:
                  emit_vert_end(c,
-                              vc4->prog.fs->input_semantics,
+                              vc4->prog.fs->input_slots,
                                vc4->prog.fs->num_inputs);
                  break;
          case QSTAGE_COORD:
@@ -1925,7 +1905,7 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage,
  
          shader->program_id = vc4->next_compiled_program_id++;
          if (stage == QSTAGE_FRAG) {
-                bool input_live[c->num_input_semantics];
+                bool input_live[c->num_input_slots];
  
                  memset(input_live, 0, sizeof(input_live));
                  list_for_each_entry(struct qinst, inst, &c->instructions, link) {
@@ -1935,26 +1915,28 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage,
                          }
                  }
  
-                shader->input_semantics = ralloc_array(shader,
-                                                       struct vc4_varying_semantic,
-                                                       c->num_input_semantics);
+                shader->input_slots = ralloc_array(shader,
+                                                   struct vc4_varying_slot,
+                                                   c->num_input_slots);
  
-                for (int i = 0; i < c->num_input_semantics; i++) {
-                        struct vc4_varying_semantic *sem = &c->input_semantics[i];
+                for (int i = 0; i < c->num_input_slots; i++) {
+                        struct vc4_varying_slot *slot = &c->input_slots[i];
  
                          if (!input_live[i])
                                  continue;
  
                          /* Skip non-VS-output inputs. */
-                        if (sem->semantic == (uint8_t)~0)
+                        if (slot->slot == (uint8_t)~0)
                                  continue;
  
-                        if (sem->semantic == TGSI_SEMANTIC_COLOR ||
-                            sem->semantic == TGSI_SEMANTIC_BCOLOR) {
+                        if (slot->slot == VARYING_SLOT_COL0 ||
+                            slot->slot == VARYING_SLOT_COL1 ||
+                            slot->slot == VARYING_SLOT_BFC0 ||
+                            slot->slot == VARYING_SLOT_BFC1) {
                                  shader->color_inputs |= (1 << shader->num_inputs);
                          }
  
-                        shader->input_semantics[shader->num_inputs] = *sem;
+                        shader->input_slots[shader->num_inputs] = *slot;
                          shader->num_inputs++;
                  }
          } else {
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c

index 9d93071..e385fbb 100644 (file)
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -147,7 +147,7 @@ qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst)
           */
          for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
                  if (inst->src[i].file == QFILE_VARY &&
-                    c->input_semantics[inst->src[i].index].semantic == 0xff) {
+                    c->input_slots[inst->src[i].index].slot == 0xff) {
                          return true;
                  }
  
@@ -314,6 +314,7 @@ qir_get_temp(struct vc4_compile *c)
  
          reg.file = QFILE_TEMP;
          reg.index = c->num_temps++;
+        reg.pack = 0;
  
          if (c->num_temps > c->defs_array_size) {
                  uint32_t old_size = c->defs_array_size;
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h

index a2b21fa..ddde96d 100644 (file)
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -254,9 +254,8 @@ enum quniform_contents {
          QUNIFORM_ALPHA_REF,
  };
  
-struct vc4_varying_semantic {
-        uint8_t semantic;
-        uint8_t index;
+struct vc4_varying_slot {
+        uint8_t slot;
          uint8_t swizzle;
  };
  
@@ -372,21 +371,21 @@ struct vc4_compile {
          uint8_t vattr_sizes[8];
  
          /**
-         * Array of the TGSI semantics of all FS QFILE_VARY reads.
+         * Array of the VARYING_SLOT_* of all FS QFILE_VARY reads.
           *
           * This includes those that aren't part of the VPM varyings, like
           * point/line coordinates.
           */
-        struct vc4_varying_semantic *input_semantics;
-        uint32_t num_input_semantics;
-        uint32_t input_semantics_array_size;
+        struct vc4_varying_slot *input_slots;
+        uint32_t num_input_slots;
+        uint32_t input_slots_array_size;
  
          /**
-         * An entry per outputs[] in the VS indicating what the semantic of
-         * the output is.  Used to emit from the VS in the order that the FS
-         * needs.
+         * An entry per outputs[] in the VS indicating what the VARYING_SLOT_*
+         * of the output is.  Used to emit from the VS in the order that the
+         * FS needs.
           */
-        struct vc4_varying_semantic *output_semantics;
+        struct vc4_varying_slot *output_slots;
  
          struct pipe_shader_state *shader_state;
          struct vc4_key *key;
diff --git a/src/gallium/drivers/vc4/vc4_qpu.c b/src/gallium/drivers/vc4/vc4_qpu.c

index f67e3f8..6aa6b24 100644 (file)
--- a/src/gallium/drivers/vc4/vc4_qpu.c
+++ b/src/gallium/drivers/vc4/vc4_qpu.c
@@ -454,8 +454,7 @@ qpu_merge_inst(uint64_t a, uint64_t b)
                                  QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG));
  
          /* Misc fields that have to match exactly. */
-        ok = ok && merge_fields(&merge, a, b, QPU_SF | QPU_PM,
-                                ~0);
+        ok = ok && merge_fields(&merge, a, b, QPU_SF, ~0);
  
          if (!merge_fields(&merge, a, b, QPU_RADDR_A_MASK,
                            QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A))) {
@@ -493,64 +492,94 @@ qpu_merge_inst(uint64_t a, uint64_t b)
                          return 0;
          }
  
-        /* packing: Make sure that non-NOP packs agree, then deal with
-         * special-case failing of adding a non-NOP pack to something with a
-         * NOP pack.
-         */
-        if (!merge_fields(&merge, a, b, QPU_PACK_MASK, 0))
-                return 0;
-        bool new_a_pack = (QPU_GET_FIELD(a, QPU_PACK) !=
-                           QPU_GET_FIELD(merge, QPU_PACK));
-        bool new_b_pack = (QPU_GET_FIELD(b, QPU_PACK) !=
-                           QPU_GET_FIELD(merge, QPU_PACK));
-        if (!(merge & QPU_PM)) {
-                /* Make sure we're not going to be putting a new
-                 * a-file packing on either half.
+        if (!merge_fields(&merge, a, b, QPU_PM, ~0)) {
+                /* If one instruction has PM bit set and the other not, the
+                 * one without PM shouldn't do packing/unpacking, and we
+                 * have to make sure non-NOP packing/unpacking from PM
+                 * instruction aren't added to it.
                   */
-                if (new_a_pack && writes_a_file(a))
-                        return 0;
+                uint64_t temp;
  
-                if (new_b_pack && writes_a_file(b))
-                        return 0;
-        } else {
-                /* Make sure we're not going to be putting new MUL packing on
-                 * either half.
-                 */
-                if (new_a_pack && QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP)
-                        return 0;
+                /* Let a be the one with PM bit */
+                if (!(a & QPU_PM)) {
+                        temp = a;
+                        a = b;
+                        b = temp;
+                }
  
-                if (new_b_pack && QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
+                if ((b & (QPU_PACK_MASK | QPU_UNPACK_MASK)) != 0)
                          return 0;
-        }
  
-        /* unpacking: Make sure that non-NOP unpacks agree, then deal with
-         * special-case failing of adding a non-NOP unpack to something with a
-         * NOP unpack.
-         */
-        if (!merge_fields(&merge, a, b, QPU_UNPACK_MASK, 0))
-                return 0;
-        bool new_a_unpack = (QPU_GET_FIELD(a, QPU_UNPACK) !=
-                             QPU_GET_FIELD(merge, QPU_UNPACK));
-        bool new_b_unpack = (QPU_GET_FIELD(b, QPU_UNPACK) !=
-                             QPU_GET_FIELD(merge, QPU_UNPACK));
-        if (!(merge & QPU_PM)) {
-                /* Make sure we're not going to be putting a new
-                 * a-file packing on either half.
-                 */
-                if (new_a_unpack && QPU_GET_FIELD(a, QPU_RADDR_A) != QPU_R_NOP)
+                if ((a & QPU_PACK_MASK) != 0 &&
+                    QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
                          return 0;
  
-                if (new_b_unpack && QPU_GET_FIELD(b, QPU_RADDR_A) != QPU_R_NOP)
+                if ((a & QPU_UNPACK_MASK) != 0 && reads_r4(b))
                          return 0;
          } else {
-                /* Make sure we're not going to be putting new r4 unpack on
-                 * either half.
+                /* packing: Make sure that non-NOP packs agree, then deal with
+                 * special-case failing of adding a non-NOP pack to something
+                 * with a NOP pack.
                   */
-                if (new_a_unpack && reads_r4(a))
+                if (!merge_fields(&merge, a, b, QPU_PACK_MASK, 0))
                          return 0;
+                bool new_a_pack = (QPU_GET_FIELD(a, QPU_PACK) !=
+                                QPU_GET_FIELD(merge, QPU_PACK));
+                bool new_b_pack = (QPU_GET_FIELD(b, QPU_PACK) !=
+                                QPU_GET_FIELD(merge, QPU_PACK));
+                if (!(merge & QPU_PM)) {
+                        /* Make sure we're not going to be putting a new
+                         * a-file packing on either half.
+                         */
+                        if (new_a_pack && writes_a_file(a))
+                                return 0;
+
+                        if (new_b_pack && writes_a_file(b))
+                                return 0;
+                } else {
+                        /* Make sure we're not going to be putting new MUL
+                         * packing oneither half.
+                         */
+                        if (new_a_pack &&
+                            QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP)
+                                return 0;
+
+                        if (new_b_pack &&
+                            QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
+                                return 0;
+                }
  
-                if (new_b_unpack && reads_r4(b))
+                /* unpacking: Make sure that non-NOP unpacks agree, then deal
+                 * with special-case failing of adding a non-NOP unpack to
+                 * something with a NOP unpack.
+                 */
+                if (!merge_fields(&merge, a, b, QPU_UNPACK_MASK, 0))
                          return 0;
+                bool new_a_unpack = (QPU_GET_FIELD(a, QPU_UNPACK) !=
+                                QPU_GET_FIELD(merge, QPU_UNPACK));
+                bool new_b_unpack = (QPU_GET_FIELD(b, QPU_UNPACK) !=
+                                QPU_GET_FIELD(merge, QPU_UNPACK));
+                if (!(merge & QPU_PM)) {
+                        /* Make sure we're not going to be putting a new
+                         * a-file packing on either half.
+                         */
+                        if (new_a_unpack &&
+                            QPU_GET_FIELD(a, QPU_RADDR_A) != QPU_R_NOP)
+                                return 0;
+
+                        if (new_b_unpack &&
+                            QPU_GET_FIELD(b, QPU_RADDR_A) != QPU_R_NOP)
+                                return 0;
+                } else {
+                        /* Make sure we're not going to be putting new r4
+                         * unpack on either half.
+                         */
+                        if (new_a_unpack && reads_r4(a))
+                                return 0;
+
+                        if (new_b_unpack && reads_r4(b))
+                                return 0;
+                }
          }
  
          if (ok)
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c

index 2dee1d4..c4b52e1 100644 (file)
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -180,6 +180,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
         case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
         case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
         case PIPE_CAP_DEPTH_BOUNDS_TEST:
+       case PIPE_CAP_TGSI_TXQS:
                  return 0;
  
                  /* Stream output. */
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h

index 9d8f5bd..6f9fe76 100644 (file)
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -592,6 +592,13 @@ struct pipe_context {
                                 float *out_value);
  
     /**
+    * Query a timestamp in nanoseconds.  This is completely equivalent to
+    * pipe_screen::get_timestamp() but takes a context handle for drivers
+    * that require a context.
+    */
+   uint64_t (*get_timestamp)(struct pipe_context *);
+
+   /**
      * Flush the resource cache, so that the resource can be used
      * by an external client. Possible usage:
      * - flushing a resource before presenting it on the screen
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h

index 88e37e9..47fa82a 100644 (file)
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -630,6 +630,7 @@ enum pipe_cap
     PIPE_CAP_TEXTURE_FLOAT_LINEAR,
     PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR,
     PIPE_CAP_DEPTH_BOUNDS_TEST,
+   PIPE_CAP_TGSI_TXQS,
  };
  
  #define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0)
diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h

index a7b7b72..a22fb93 100644 (file)
--- a/src/gallium/include/pipe/p_screen.h
+++ b/src/gallium/include/pipe/p_screen.h
@@ -173,6 +173,10 @@ struct pipe_screen {
      * Create a texture from a winsys_handle. The handle is often created in
      * another process by first creating a pipe texture and then calling
      * resource_get_handle.
+    *
+    * NOTE: in the case of DRM_API_HANDLE_TYPE_FD handles, the caller
+    * retains ownership of the FD.  (This is consistent with
+    * EGL_EXT_image_dma_buf_import)
      */
     struct pipe_resource * (*resource_from_handle)(struct pipe_screen *,
                                                   const struct pipe_resource *templat,
@@ -190,6 +194,10 @@ struct pipe_screen {
      * Get a winsys_handle from a texture. Some platforms/winsys requires
      * that the texture is created with a special usage flag like
      * DISPLAYTARGET or PRIMARY.
+    *
+    * NOTE: in the case of DRM_API_HANDLE_TYPE_FD handles, the caller
+    * takes ownership of the FD.  (This is consistent with
+    * EGL_MESA_image_dma_buf_export)
      */
     boolean (*resource_get_handle)(struct pipe_screen *,
                                   struct pipe_resource *tex,
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h

index 6e07b2c..b36e0a3 100644 (file)
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -402,6 +402,7 @@ struct tgsi_property_data {
  #define TGSI_OPCODE_ENDLOOP             101
  #define TGSI_OPCODE_ENDSUB              102
  #define TGSI_OPCODE_TXQ_LZ              103 /* TXQ for mipmap level 0 */
+#define TGSI_OPCODE_TXQS                104
                                  /* gap */
  #define TGSI_OPCODE_NOP                 107
  
diff --git a/src/gallium/state_trackers/clover/api/memory.cpp b/src/gallium/state_trackers/clover/api/memory.cpp

index 1efb95b..9b3cd8b 100644 (file)
--- a/src/gallium/state_trackers/clover/api/memory.cpp
+++ b/src/gallium/state_trackers/clover/api/memory.cpp
@@ -28,37 +28,53 @@
  using namespace clover;
  
  namespace {
-   const cl_mem_flags dev_access_flags =
-      CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY;
-   const cl_mem_flags host_ptr_flags =
-      CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR;
-   const cl_mem_flags host_access_flags =
-      CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS;
-   const cl_mem_flags all_mem_flags =
-      dev_access_flags | host_ptr_flags | host_access_flags;
-
-   void
-   validate_flags(cl_mem_flags flags, cl_mem_flags valid) {
-      if ((flags & ~valid) ||
-          util_bitcount(flags & dev_access_flags) > 1 ||
-          util_bitcount(flags & host_access_flags) > 1)
+   cl_mem_flags
+   validate_flags(cl_mem d_parent, cl_mem_flags d_flags) {
+      const cl_mem_flags dev_access_flags =
+         CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY;
+      const cl_mem_flags host_ptr_flags =
+         CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR;
+      const cl_mem_flags host_access_flags =
+         CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS;
+      const cl_mem_flags valid_flags =
+         dev_access_flags | host_access_flags | (d_parent ? 0 : host_ptr_flags);
+
+      if ((d_flags & ~valid_flags) ||
+          util_bitcount(d_flags & dev_access_flags) > 1 ||
+          util_bitcount(d_flags & host_access_flags) > 1)
           throw error(CL_INVALID_VALUE);
  
-      if ((flags & CL_MEM_USE_HOST_PTR) &&
-          (flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR)))
+      if ((d_flags & CL_MEM_USE_HOST_PTR) &&
+          (d_flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR)))
           throw error(CL_INVALID_VALUE);
+
+      if (d_parent) {
+         const auto &parent = obj(d_parent);
+         const cl_mem_flags flags = (d_flags |
+                                     (d_flags & dev_access_flags ? 0 :
+                                      parent.flags() & dev_access_flags) |
+                                     (d_flags & host_access_flags ? 0 :
+                                      parent.flags() & host_access_flags) |
+                                     (parent.flags() & host_ptr_flags));
+
+         if (~flags & parent.flags() &
+             ((dev_access_flags & ~CL_MEM_READ_WRITE) | host_access_flags))
+            throw error(CL_INVALID_VALUE);
+
+         return flags;
+
+      } else {
+         return d_flags | (d_flags & dev_access_flags ? 0 : CL_MEM_READ_WRITE);
+      }
     }
  }
  
  CLOVER_API cl_mem
  clCreateBuffer(cl_context d_ctx, cl_mem_flags d_flags, size_t size,
                 void *host_ptr, cl_int *r_errcode) try {
-   const cl_mem_flags flags = d_flags |
-      (d_flags & dev_access_flags ? 0 : CL_MEM_READ_WRITE);
+   const cl_mem_flags flags = validate_flags(NULL, d_flags);
     auto &ctx = obj(d_ctx);
  
-   validate_flags(d_flags, all_mem_flags);
-
     if (bool(host_ptr) != bool(flags & (CL_MEM_USE_HOST_PTR |
                                         CL_MEM_COPY_HOST_PTR)))
        throw error(CL_INVALID_HOST_PTR);
@@ -82,16 +98,7 @@ clCreateSubBuffer(cl_mem d_mem, cl_mem_flags d_flags,
                    cl_buffer_create_type op,
                    const void *op_info, cl_int *r_errcode) try {
     auto &parent = obj<root_buffer>(d_mem);
-   const cl_mem_flags flags = d_flags |
-      (d_flags & dev_access_flags ? 0 : parent.flags() & dev_access_flags) |
-      (d_flags & host_access_flags ? 0 : parent.flags() & host_access_flags) |
-      (parent.flags() & host_ptr_flags);
-
-   validate_flags(d_flags, dev_access_flags | host_access_flags);
-
-   if (~flags & parent.flags() &
-       ((dev_access_flags & ~CL_MEM_READ_WRITE) | host_access_flags))
-      throw error(CL_INVALID_VALUE);
+   const cl_mem_flags flags = validate_flags(d_mem, d_flags);
  
     if (op == CL_BUFFER_CREATE_TYPE_REGION) {
        auto reg = reinterpret_cast<const cl_buffer_region *>(op_info);
@@ -117,35 +124,90 @@ clCreateSubBuffer(cl_mem d_mem, cl_mem_flags d_flags,
  }
  
  CLOVER_API cl_mem
-clCreateImage2D(cl_context d_ctx, cl_mem_flags d_flags,
-                const cl_image_format *format,
-                size_t width, size_t height, size_t row_pitch,
-                void *host_ptr, cl_int *r_errcode) try {
-   const cl_mem_flags flags = d_flags |
-      (d_flags & dev_access_flags ? 0 : CL_MEM_READ_WRITE);
+clCreateImage(cl_context d_ctx, cl_mem_flags d_flags,
+              const cl_image_format *format,
+              const cl_image_desc *desc,
+              void *host_ptr, cl_int *r_errcode) try {
     auto &ctx = obj(d_ctx);
  
-   validate_flags(d_flags, all_mem_flags);
-
     if (!any_of(std::mem_fn(&device::image_support), ctx.devices()))
        throw error(CL_INVALID_OPERATION);
  
     if (!format)
        throw error(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR);
  
-   if (width < 1 || height < 1)
-      throw error(CL_INVALID_IMAGE_SIZE);
+   if (!desc)
+      throw error(CL_INVALID_IMAGE_DESCRIPTOR);
  
-   if (bool(host_ptr) != bool(flags & (CL_MEM_USE_HOST_PTR |
-                                       CL_MEM_COPY_HOST_PTR)))
+   if (desc->image_array_size == 0 &&
+       (desc->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
+        desc->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY))
+      throw error(CL_INVALID_IMAGE_DESCRIPTOR);
+
+   if (!host_ptr &&
+       (desc->image_row_pitch || desc->image_slice_pitch))
+      throw error(CL_INVALID_IMAGE_DESCRIPTOR);
+
+   if (desc->num_mip_levels || desc->num_samples)
+      throw error(CL_INVALID_IMAGE_DESCRIPTOR);
+
+   if (bool(desc->buffer) != (desc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER))
+      throw error(CL_INVALID_IMAGE_DESCRIPTOR);
+
+   if (bool(host_ptr) != bool(d_flags & (CL_MEM_USE_HOST_PTR |
+                                         CL_MEM_COPY_HOST_PTR)))
        throw error(CL_INVALID_HOST_PTR);
  
-   if (!supported_formats(ctx, CL_MEM_OBJECT_IMAGE2D).count(*format))
+   const cl_mem_flags flags = validate_flags(desc->buffer, d_flags);
+
+   if (!supported_formats(ctx, desc->image_type).count(*format))
        throw error(CL_IMAGE_FORMAT_NOT_SUPPORTED);
  
     ret_error(r_errcode, CL_SUCCESS);
-   return new image2d(ctx, flags, format, width, height,
-                      row_pitch, host_ptr);
+
+   switch (desc->image_type) {
+   case CL_MEM_OBJECT_IMAGE2D:
+      if (!desc->image_width || !desc->image_height)
+         throw error(CL_INVALID_IMAGE_SIZE);
+
+      if (all_of([=](const device &dev) {
+               const size_t max = 1 << dev.max_image_levels_2d();
+               return (desc->image_width > max ||
+                       desc->image_height > max);
+            }, ctx.devices()))
+         throw error(CL_INVALID_IMAGE_SIZE);
+
+      return new image2d(ctx, flags, format,
+                         desc->image_width, desc->image_height,
+                         desc->image_row_pitch, host_ptr);
+
+   case CL_MEM_OBJECT_IMAGE3D:
+      if (!desc->image_width || !desc->image_height || !desc->image_depth)
+         throw error(CL_INVALID_IMAGE_SIZE);
+
+      if (all_of([=](const device &dev) {
+               const size_t max = 1 << dev.max_image_levels_3d();
+               return (desc->image_width > max ||
+                       desc->image_height > max ||
+                       desc->image_depth > max);
+            }, ctx.devices()))
+         throw error(CL_INVALID_IMAGE_SIZE);
+
+      return new image3d(ctx, flags, format,
+                         desc->image_width, desc->image_height,
+                         desc->image_depth, desc->image_row_pitch,
+                         desc->image_slice_pitch, host_ptr);
+
+   case CL_MEM_OBJECT_IMAGE1D:
+   case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+   case CL_MEM_OBJECT_IMAGE1D_BUFFER:
+   case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+      // XXX - Not implemented.
+      throw error(CL_IMAGE_FORMAT_NOT_SUPPORTED);
+
+   default:
+      throw error(CL_INVALID_IMAGE_DESCRIPTOR);
+   }
  
  } catch (error &e) {
     ret_error(r_errcode, e);
@@ -153,40 +215,26 @@ clCreateImage2D(cl_context d_ctx, cl_mem_flags d_flags,
  }
  
  CLOVER_API cl_mem
+clCreateImage2D(cl_context d_ctx, cl_mem_flags d_flags,
+                const cl_image_format *format,
+                size_t width, size_t height, size_t row_pitch,
+                void *host_ptr, cl_int *r_errcode) {
+   const cl_image_desc desc = { CL_MEM_OBJECT_IMAGE2D, width, height, 0, 0,
+                                row_pitch, 0, 0, 0, NULL };
+
+   return clCreateImage(d_ctx, d_flags, format, &desc, host_ptr, r_errcode);
+}
+
+CLOVER_API cl_mem
  clCreateImage3D(cl_context d_ctx, cl_mem_flags d_flags,
                  const cl_image_format *format,
                  size_t width, size_t height, size_t depth,
                  size_t row_pitch, size_t slice_pitch,
-                void *host_ptr, cl_int *r_errcode) try {
-   const cl_mem_flags flags = d_flags |
-      (d_flags & dev_access_flags ? 0 : CL_MEM_READ_WRITE);
-   auto &ctx = obj(d_ctx);
-
-   validate_flags(d_flags, all_mem_flags);
-
-   if (!any_of(std::mem_fn(&device::image_support), ctx.devices()))
-      throw error(CL_INVALID_OPERATION);
+                void *host_ptr, cl_int *r_errcode) {
+   const cl_image_desc desc = { CL_MEM_OBJECT_IMAGE3D, width, height, depth, 0,
+                                row_pitch, slice_pitch, 0, 0, NULL };
  
-   if (!format)
-      throw error(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR);
-
-   if (width < 1 || height < 1 || depth < 2)
-      throw error(CL_INVALID_IMAGE_SIZE);
-
-   if (bool(host_ptr) != bool(flags & (CL_MEM_USE_HOST_PTR |
-                                       CL_MEM_COPY_HOST_PTR)))
-      throw error(CL_INVALID_HOST_PTR);
-
-   if (!supported_formats(ctx, CL_MEM_OBJECT_IMAGE3D).count(*format))
-      throw error(CL_IMAGE_FORMAT_NOT_SUPPORTED);
-
-   ret_error(r_errcode, CL_SUCCESS);
-   return new image3d(ctx, flags, format, width, height, depth,
-                      row_pitch, slice_pitch, host_ptr);
-
-} catch (error &e) {
-   ret_error(r_errcode, e);
-   return NULL;
+   return clCreateImage(d_ctx, d_flags, format, &desc, host_ptr, r_errcode);
  }
  
  CLOVER_API cl_int
@@ -196,7 +244,7 @@ clGetSupportedImageFormats(cl_context d_ctx, cl_mem_flags flags,
     auto &ctx = obj(d_ctx);
     auto formats = supported_formats(ctx, type);
  
-   validate_flags(flags, all_mem_flags);
+   validate_flags(NULL, flags);
  
     if (r_buf && !r_count)
        throw error(CL_INVALID_VALUE);
@@ -352,16 +400,6 @@ clSetMemObjectDestructorCallback(cl_mem d_mem,
     return e.get();
  }
  
-CLOVER_API cl_mem
-clCreateImage(cl_context d_ctx, cl_mem_flags flags,
-              const cl_image_format *format,
-              const cl_image_desc *image_desc,
-              void *host_ptr, cl_int *r_errcode) {
-   CLOVER_NOT_SUPPORTED_UNTIL("1.2");
-   ret_error(r_errcode, CL_INVALID_OPERATION);
-   return NULL;
-}
-
  CLOVER_API cl_int
  clEnqueueFillBuffer(cl_command_queue command_queue, cl_mem buffer,
                      const void *pattern, size_t pattern_size,
diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp

index 7c23a27..d74b50d 100644 (file)
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -465,7 +465,7 @@ namespace {
              const bool is_write_only = access_qual == "write_only";
              const bool is_read_only = access_qual == "read_only";
  
-            typename module::argument::type marg_type;
+            enum module::argument::type marg_type;
              if (is_image2d && is_read_only) {
                 marg_type = module::argument::image2d_rd;
              } else if (is_image2d && is_write_only) {
diff --git a/src/gallium/state_trackers/hgl/SConscript b/src/gallium/state_trackers/hgl/SConscript

index 82a0ce0..d2389c8 100644 (file)
--- a/src/gallium/state_trackers/hgl/SConscript
+++ b/src/gallium/state_trackers/hgl/SConscript
@@ -9,6 +9,7 @@ env.Append(CPPPATH = [
      '#/src',
      '#/src/mapi',
      '#/src/mesa',
+    '#/include/HaikuGL',
  ])
  
  sources = [
diff --git a/src/gallium/state_trackers/xa/xa_composite.c b/src/gallium/state_trackers/xa/xa_composite.c

index 7cfd1e1..bcb27ea 100644 (file)
--- a/src/gallium/state_trackers/xa/xa_composite.c
+++ b/src/gallium/state_trackers/xa/xa_composite.c
@@ -78,10 +78,12 @@ static const struct xa_composite_blend xa_blends[] = {
        0, 0, PIPE_BLENDFACTOR_ONE, PIPE_BLENDFACTOR_ONE},
  };
  
-
  /*
- * The alpha value stored in a luminance texture is read by the
- * hardware as color.
+ * The alpha value stored in a L8 texture is read by the
+ * hardware as color, and R8 is read as red. The source alpha value
+ * at the end of the fragment shader is stored in all color channels,
+ * so the correct approach is to blend using DST_COLOR instead of
+ * DST_ALPHA and then output any color channel (L8) or the red channel (R8).
   */
  static unsigned
  xa_convert_blend_for_luminance(unsigned factor)
@@ -97,7 +99,6 @@ xa_convert_blend_for_luminance(unsigned factor)
      return factor;
  }
  
-
  static boolean
  blend_for_op(struct xa_composite_blend *blend,
              enum xa_composite_op op,
@@ -131,9 +132,10 @@ blend_for_op(struct xa_composite_blend *blend,
      if (!dst_pic->srf)
         return supported;
  
-    if (dst_pic->srf->tex->format == PIPE_FORMAT_L8_UNORM) {
-       blend->rgb_src = xa_convert_blend_for_luminance(blend->rgb_src);
-       blend->rgb_dst = xa_convert_blend_for_luminance(blend->rgb_dst);
+    if ((dst_pic->srf->tex->format == PIPE_FORMAT_L8_UNORM ||
+         dst_pic->srf->tex->format == PIPE_FORMAT_R8_UNORM)) {
+        blend->rgb_src = xa_convert_blend_for_luminance(blend->rgb_src);
+        blend->rgb_dst = xa_convert_blend_for_luminance(blend->rgb_dst);
      }
  
      /*
@@ -298,7 +300,8 @@ picture_format_fixups(struct xa_picture *src_pic,
         ret |= mask ? FS_MASK_SET_ALPHA : FS_SRC_SET_ALPHA;
  
      if (src_hw_format == src_pic_format) {
-       if (src->tex->format == PIPE_FORMAT_L8_UNORM)
+       if (src->tex->format == PIPE_FORMAT_L8_UNORM ||
+            src->tex->format == PIPE_FORMAT_R8_UNORM)
             return ((mask) ? FS_MASK_LUMINANCE : FS_SRC_LUMINANCE);
  
         return ret;
@@ -372,7 +375,8 @@ bind_shaders(struct xa_context *ctx, const struct xa_composite *comp)
         fs_traits |= picture_format_fixups(mask_pic, 1);
      }
  
-    if (ctx->srf->format == PIPE_FORMAT_L8_UNORM)
+    if (ctx->srf->format == PIPE_FORMAT_L8_UNORM ||
+        ctx->srf->format == PIPE_FORMAT_R8_UNORM)
         fs_traits |= FS_DST_LUMINANCE;
  
      shader = xa_shaders_get(ctx->shaders, vs_traits, fs_traits);
diff --git a/src/gallium/state_trackers/xa/xa_renderer.c b/src/gallium/state_trackers/xa/xa_renderer.c

index fda07e5..bc55f87 100644 (file)
--- a/src/gallium/state_trackers/xa/xa_renderer.c
+++ b/src/gallium/state_trackers/xa/xa_renderer.c
@@ -465,9 +465,11 @@ renderer_copy_prepare(struct xa_context *r,
      }
  
      /* shaders */
-    if (src_texture->format == PIPE_FORMAT_L8_UNORM)
+    if (src_texture->format == PIPE_FORMAT_L8_UNORM ||
+        src_texture->format == PIPE_FORMAT_R8_UNORM)
         fs_traits |= FS_SRC_LUMINANCE;
-    if (dst_surface->format == PIPE_FORMAT_L8_UNORM)
+    if (dst_surface->format == PIPE_FORMAT_L8_UNORM ||
+        dst_surface->format == PIPE_FORMAT_R8_UNORM)
         fs_traits |= FS_DST_LUMINANCE;
      if (xa_format_a(dst_xa_format) != 0 &&
         xa_format_a(src_xa_format) == 0)
diff --git a/src/gallium/state_trackers/xa/xa_tracker.c b/src/gallium/state_trackers/xa/xa_tracker.c

index 21ca57c..4fdbdc9 100644 (file)
--- a/src/gallium/state_trackers/xa/xa_tracker.c
+++ b/src/gallium/state_trackers/xa/xa_tracker.c
@@ -82,7 +82,7 @@ static const unsigned int stype_bind[XA_LAST_SURFACE_TYPE] = { 0,
  };
  
  static struct xa_format_descriptor
-xa_get_pipe_format(enum xa_formats xa_format)
+xa_get_pipe_format(struct xa_tracker *xa, enum xa_formats xa_format)
  {
      struct xa_format_descriptor fdesc;
  
@@ -102,7 +102,13 @@ xa_get_pipe_format(enum xa_formats xa_format)
         fdesc.format = PIPE_FORMAT_B5G5R5A1_UNORM;
         break;
      case xa_format_a8:
-       fdesc.format = PIPE_FORMAT_L8_UNORM;
+        if (xa->screen->is_format_supported(xa->screen, PIPE_FORMAT_R8_UNORM,
+                                            PIPE_TEXTURE_2D, 0,
+                                            stype_bind[xa_type_a] |
+                                            PIPE_BIND_RENDER_TARGET))
+            fdesc.format = PIPE_FORMAT_R8_UNORM;
+        else
+            fdesc.format = PIPE_FORMAT_L8_UNORM;
         break;
      case xa_format_z24:
         fdesc.format = PIPE_FORMAT_Z24X8_UNORM;
@@ -126,7 +132,12 @@ xa_get_pipe_format(enum xa_formats xa_format)
         fdesc.format = PIPE_FORMAT_S8_UINT_Z24_UNORM;
         break;
      case xa_format_yuv8:
-       fdesc.format = PIPE_FORMAT_L8_UNORM;
+        if (xa->screen->is_format_supported(xa->screen, PIPE_FORMAT_R8_UNORM,
+                                            PIPE_TEXTURE_2D, 0,
+                                            stype_bind[xa_type_yuv_component]))
+            fdesc.format = PIPE_FORMAT_R8_UNORM;
+        else
+            fdesc.format = PIPE_FORMAT_L8_UNORM;
         break;
      default:
         fdesc.xa_format = xa_format_unknown;
@@ -184,7 +195,8 @@ xa_tracker_create(int drm_fd)
         for (i = 0; i < num_preferred[stype]; ++i) {
             xa_format = preferred[stype][i];
  
-           struct xa_format_descriptor fdesc = xa_get_pipe_format(xa_format);
+           struct xa_format_descriptor fdesc =
+                xa_get_pipe_format(xa, xa_format);
  
             if (xa->screen->is_format_supported(xa->screen, fdesc.format,
                                                 PIPE_TEXTURE_2D, 0, bind)) {
@@ -259,7 +271,7 @@ xa_get_format_stype_depth(struct xa_tracker *xa,
      int found = 0;
  
      for (i = xa->format_map[stype][0]; i <= xa->format_map[stype][1]; ++i) {
-       fdesc = xa_get_pipe_format(xa->supported_formats[i]);
+       fdesc = xa_get_pipe_format(xa, xa->supported_formats[i]);
         if (fdesc.xa_format != xa_format_unknown &&
             xa_format_depth(fdesc.xa_format) == depth) {
             found = 1;
@@ -277,7 +289,7 @@ XA_EXPORT int
  xa_format_check_supported(struct xa_tracker *xa,
                           enum xa_formats xa_format, unsigned int flags)
  {
-    struct xa_format_descriptor fdesc = xa_get_pipe_format(xa_format);
+    struct xa_format_descriptor fdesc = xa_get_pipe_format(xa, xa_format);
      unsigned int bind;
  
      if (fdesc.xa_format == xa_format_unknown)
@@ -298,6 +310,20 @@ xa_format_check_supported(struct xa_tracker *xa,
      return XA_ERR_NONE;
  }
  
+static unsigned
+handle_type(enum xa_handle_type type)
+{
+    switch (type) {
+    case xa_handle_type_kms:
+       return DRM_API_HANDLE_TYPE_KMS;
+    case xa_handle_type_fd:
+        return DRM_API_HANDLE_TYPE_FD;
+    case xa_handle_type_shared:
+    default:
+       return DRM_API_HANDLE_TYPE_SHARED;
+    }
+}
+
  static struct xa_surface *
  surface_create(struct xa_tracker *xa,
                   int width,
@@ -314,7 +340,7 @@ surface_create(struct xa_tracker *xa,
      if (xa_format == xa_format_unknown)
         fdesc = xa_get_format_stype_depth(xa, stype, depth);
      else
-       fdesc = xa_get_pipe_format(xa_format);
+       fdesc = xa_get_pipe_format(xa, xa_format);
  
      if (fdesc.xa_format == xa_format_unknown)
         return NULL;
@@ -380,9 +406,24 @@ xa_surface_from_handle(struct xa_tracker *xa,
                   enum xa_formats xa_format, unsigned int flags,
                   uint32_t handle, uint32_t stride)
  {
+    return xa_surface_from_handle2(xa, width, height, depth, stype, xa_format,
+                                   DRM_API_HANDLE_TYPE_SHARED, flags, handle,
+                                   stride);
+}
+
+XA_EXPORT struct xa_surface *
+xa_surface_from_handle2(struct xa_tracker *xa,
+                        int width,
+                        int height,
+                        int depth,
+                        enum xa_surface_type stype,
+                        enum xa_formats xa_format, unsigned int flags,
+                        enum xa_handle_type type,
+                        uint32_t handle, uint32_t stride)
+{
      struct winsys_handle whandle;
      memset(&whandle, 0, sizeof(whandle));
-    whandle.type = DRM_API_HANDLE_TYPE_SHARED;
+    whandle.type = handle_type(type);
      whandle.handle = handle;
      whandle.stride = stride;
      return surface_create(xa, width, height, depth, stype, xa_format, flags, &whandle);
@@ -411,7 +452,7 @@ xa_surface_redefine(struct xa_surface *srf,
      if (xa_format == xa_format_unknown)
         fdesc = xa_get_format_stype_depth(xa, stype, depth);
      else
-       fdesc = xa_get_pipe_format(xa_format);
+       fdesc = xa_get_pipe_format(xa, xa_format);
  
      if (width == template->width0 && height == template->height0 &&
         template->format == fdesc.format &&
@@ -511,15 +552,7 @@ xa_surface_handle(struct xa_surface *srf,
      boolean res;
  
      memset(&whandle, 0, sizeof(whandle));
-    switch (type) {
-    case xa_handle_type_kms:
-       whandle.type = DRM_API_HANDLE_TYPE_KMS;
-       break;
-    case xa_handle_type_shared:
-    default:
-       whandle.type = DRM_API_HANDLE_TYPE_SHARED;
-       break;
-    }
+    whandle.type = handle_type(type);
      res = screen->resource_get_handle(screen, srf->tex, &whandle);
      if (!res)
         return -XA_ERR_INVAL;
diff --git a/src/gallium/state_trackers/xa/xa_tracker.h b/src/gallium/state_trackers/xa/xa_tracker.h

index 5c6435e..44b3eb5 100644 (file)
--- a/src/gallium/state_trackers/xa/xa_tracker.h
+++ b/src/gallium/state_trackers/xa/xa_tracker.h
@@ -37,7 +37,7 @@
  #include <stdint.h>
  
  #define XA_TRACKER_VERSION_MAJOR 2
-#define XA_TRACKER_VERSION_MINOR 2
+#define XA_TRACKER_VERSION_MINOR 3
  #define XA_TRACKER_VERSION_PATCH 0
  
  #define XA_FLAG_SHARED         (1 << 0)
@@ -149,6 +149,7 @@ struct xa_box {
  enum xa_handle_type {
      xa_handle_type_shared,
      xa_handle_type_kms,
+    xa_handle_type_fd,
  };
  
  extern void xa_tracker_version(int *major, int *minor, int *patch);
@@ -177,6 +178,17 @@ extern struct xa_surface * xa_surface_from_handle(struct xa_tracker *xa,
                                             enum xa_formats pform,
                                             unsigned int flags,
                                             uint32_t handle, uint32_t stride);
+extern struct xa_surface *
+xa_surface_from_handle2(struct xa_tracker *xa,
+                        int width,
+                        int height,
+                        int depth,
+                        enum xa_surface_type stype,
+                        enum xa_formats xa_format,
+                        unsigned int flags,
+                        enum xa_handle_type type,
+                        uint32_t handle,
+                        uint32_t stride);
  
  enum xa_formats xa_surface_format(const struct xa_surface *srf);
  
diff --git a/src/gallium/targets/dri/dri.sym b/src/gallium/targets/dri/dri.sym

index 8e26fb9..1fdf18b 100644 (file)
--- a/src/gallium/targets/dri/dri.sym
+++ b/src/gallium/targets/dri/dri.sym
@@ -5,6 +5,7 @@
                 nouveau_drm_screen_create;
                 radeon_drm_winsys_create;
                 amdgpu_winsys_create;
+               fd_drm_screen_create;
         local:
                 *;
  };
diff --git a/src/gallium/targets/xa/xa.sym b/src/gallium/targets/xa/xa.sym

index 9c7f422..50ccc99 100644 (file)
--- a/src/gallium/targets/xa/xa.sym
+++ b/src/gallium/targets/xa/xa.sym
@@ -23,6 +23,7 @@
                 xa_surface_dma;
                 xa_surface_format;
                 xa_surface_from_handle;
+               xa_surface_from_handle2;
                 xa_surface_handle;
                 xa_surface_map;
                 xa_surface_redefine;
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c

index 50c42e3..fe55dc3 100644 (file)
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -684,6 +684,9 @@ static boolean amdgpu_bo_get_handle(struct pb_buffer *buffer,
     enum amdgpu_bo_handle_type type;
     int r;
  
+   if ((void*)bo != (void*)buffer)
+      pb_cache_manager_remove_buffer(buffer);
+
     switch (whandle->type) {
     case DRM_API_HANDLE_TYPE_SHARED:
        type = amdgpu_bo_handle_type_gem_flink_name;
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c

index 0f42298..84fc40b 100644 (file)
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -200,17 +200,19 @@ amdgpu_ctx_query_reset_status(struct radeon_winsys_ctx *rwctx)
  
  static bool amdgpu_get_new_ib(struct amdgpu_cs *cs)
  {
-   /* The maximum size is 4MB - 1B, which is unaligned.
-    * Use aligned size 4MB - 16B. */
-   const unsigned max_ib_size = (1024 * 1024 - 16) * 4;
-   const unsigned min_ib_size = 24 * 1024 * 4;
+   /* Small IBs are better than big IBs, because the GPU goes idle quicker
+    * and there is less waiting for buffers and fences. Proof:
+    *   http://www.phoronix.com/scan.php?page=article&item=mesa-111-si&num=1
+    */
+   const unsigned buffer_size = 128 * 1024 * 4;
+   const unsigned ib_size = 20 * 1024 * 4;
  
     cs->base.cdw = 0;
     cs->base.buf = NULL;
  
     /* Allocate a new buffer for IBs if the current buffer is all used. */
     if (!cs->big_ib_buffer ||
-       cs->used_ib_space + min_ib_size > cs->big_ib_buffer->size) {
+       cs->used_ib_space + ib_size > cs->big_ib_buffer->size) {
        struct radeon_winsys *ws = &cs->ctx->ws->base;
        struct radeon_winsys_cs_handle *winsys_bo;
  
@@ -219,7 +221,7 @@ static bool amdgpu_get_new_ib(struct amdgpu_cs *cs)
        cs->ib_mapped = NULL;
        cs->used_ib_space = 0;
  
-      cs->big_ib_buffer = ws->buffer_create(ws, max_ib_size,
+      cs->big_ib_buffer = ws->buffer_create(ws, buffer_size,
                                              4096, true,
                                              RADEON_DOMAIN_GTT,
                                              RADEON_FLAG_CPU_ACCESS);
@@ -239,7 +241,7 @@ static bool amdgpu_get_new_ib(struct amdgpu_cs *cs)
  
     cs->ib.ib_mc_address = cs->big_ib_winsys_buffer->va + cs->used_ib_space;
     cs->base.buf = (uint32_t*)(cs->ib_mapped + cs->used_ib_space);
-   cs->base.max_dw = (cs->big_ib_buffer->size - cs->used_ib_space) / 4;
+   cs->base.max_dw = ib_size / 4;
     return true;
  }
  
@@ -599,25 +601,13 @@ static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
     switch (cs->base.ring_type) {
     case RING_DMA:
        /* pad DMA ring to 8 DWs */
-      if (ws->info.chip_class <= SI) {
-         while (rcs->cdw & 7)
-            OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
-      } else {
-         while (rcs->cdw & 7)
-            OUT_CS(&cs->base, 0x00000000); /* NOP packet */
-      }
+      while (rcs->cdw & 7)
+         OUT_CS(&cs->base, 0x00000000); /* NOP packet */
        break;
     case RING_GFX:
-      /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
-             * r6xx, requires at least 4 dw alignment to avoid a hw bug.
-             */
-      if (ws->info.chip_class <= SI) {
-         while (rcs->cdw & 7)
-            OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
-      } else {
-         while (rcs->cdw & 7)
-            OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
-      }
+      /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements */
+      while (rcs->cdw & 7)
+         OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
        break;
     case RING_UVD:
        while (rcs->cdw & 15)
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c

index 875dcd0..c877249 100644 (file)
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -110,7 +110,7 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
     struct amdgpu_heap_info vram, gtt;
     struct drm_amdgpu_info_hw_ip dma = {}, uvd = {}, vce = {};
     uint32_t vce_version = 0, vce_feature = 0;
-   int r;
+   int r, i, j;
  
     /* Query hardware and driver information. */
     r = amdgpu_query_gpu_info(ws->dev, &ws->amdinfo);
@@ -248,7 +248,6 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
     ws->info.vram_size = vram.heap_size;
     /* convert the shader clock from KHz to MHz */
     ws->info.max_sclk = ws->amdinfo.max_engine_clk / 1000;
-   ws->info.max_compute_units = 1; /* TODO */
     ws->info.max_se = ws->amdinfo.num_shader_engines;
     ws->info.max_sh_per_se = ws->amdinfo.num_shader_arrays_per_engine;
     ws->info.has_uvd = uvd.available_rings != 0;
@@ -263,6 +262,18 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
     ws->info.r600_virtual_address = TRUE;
     ws->info.r600_has_dma = dma.available_rings != 0;
  
+   /* Guess what the maximum compute unit number is by looking at the mask
+    * of enabled CUs.
+    */
+   for (i = 0; i < ws->info.max_se; i++)
+      for (j = 0; j < ws->info.max_sh_per_se; j++) {
+         unsigned max = util_last_bit(ws->amdinfo.cu_bitmap[i][j]);
+
+         if (ws->info.max_compute_units < max)
+            ws->info.max_compute_units = max;
+      }
+   ws->info.max_compute_units *= ws->info.max_se * ws->info.max_sh_per_se;
+
     memcpy(ws->info.si_tile_mode_array, ws->amdinfo.gb_tile_mode,
            sizeof(ws->amdinfo.gb_tile_mode));
     ws->info.si_tile_mode_array_valid = TRUE;
diff --git a/src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c b/src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c

index 9eb9744..e4785f8 100644 (file)
--- a/src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c
+++ b/src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c
@@ -1,18 +1,127 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include <sys/stat.h>
+
  #include "pipe/p_context.h"
  #include "pipe/p_state.h"
  #include "util/u_format.h"
  #include "util/u_memory.h"
  #include "util/u_inlines.h"
+#include "util/u_hash_table.h"
+#include "os/os_thread.h"
  
  #include "freedreno_drm_public.h"
  
  #include "freedreno/freedreno_screen.h"
  
+static struct util_hash_table *fd_tab = NULL;
+
+pipe_static_mutex(fd_screen_mutex);
+
+static void
+fd_drm_screen_destroy(struct pipe_screen *pscreen)
+{
+       struct fd_screen *screen = fd_screen(pscreen);
+       boolean destroy;
+
+       pipe_mutex_lock(fd_screen_mutex);
+       destroy = --screen->refcnt == 0;
+       if (destroy) {
+               int fd = fd_device_fd(screen->dev);
+               util_hash_table_remove(fd_tab, intptr_to_pointer(fd));
+       }
+       pipe_mutex_unlock(fd_screen_mutex);
+
+       if (destroy) {
+               pscreen->destroy = screen->winsys_priv;
+               pscreen->destroy(pscreen);
+       }
+}
+
+static unsigned hash_fd(void *key)
+{
+       int fd = pointer_to_intptr(key);
+       struct stat stat;
+       fstat(fd, &stat);
+
+       return stat.st_dev ^ stat.st_ino ^ stat.st_rdev;
+}
+
+static int compare_fd(void *key1, void *key2)
+{
+       int fd1 = pointer_to_intptr(key1);
+       int fd2 = pointer_to_intptr(key2);
+       struct stat stat1, stat2;
+       fstat(fd1, &stat1);
+       fstat(fd2, &stat2);
+
+       return stat1.st_dev != stat2.st_dev ||
+                       stat1.st_ino != stat2.st_ino ||
+                       stat1.st_rdev != stat2.st_rdev;
+}
+
  struct pipe_screen *
  fd_drm_screen_create(int fd)
  {
-       struct fd_device *dev = fd_device_new_dup(fd);
-       if (!dev)
-               return NULL;
-       return fd_screen_create(dev);
+       struct pipe_screen *pscreen = NULL;
+
+       pipe_mutex_lock(fd_screen_mutex);
+       if (!fd_tab) {
+               fd_tab = util_hash_table_create(hash_fd, compare_fd);
+               if (!fd_tab)
+                       goto unlock;
+       }
+
+       pscreen = util_hash_table_get(fd_tab, intptr_to_pointer(fd));
+       if (pscreen) {
+               fd_screen(pscreen)->refcnt++;
+       } else {
+               struct fd_device *dev = fd_device_new_dup(fd);
+               if (!dev)
+                       goto unlock;
+
+               pscreen = fd_screen_create(dev);
+               if (pscreen) {
+                       int fd = fd_device_fd(dev);
+
+                       util_hash_table_set(fd_tab, intptr_to_pointer(fd), pscreen);
+
+                       /* Bit of a hack, to avoid circular linkage dependency,
+                        * ie. pipe driver having to call in to winsys, we
+                        * override the pipe drivers screen->destroy():
+                        */
+                       fd_screen(pscreen)->winsys_priv = pscreen->destroy;
+                       pscreen->destroy = fd_drm_screen_destroy;
+               }
+       }
+
+unlock:
+       pipe_mutex_unlock(fd_screen_mutex);
+       return pscreen;
  }
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c

index 600ced9..2878c8f 100644 (file)
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -1150,6 +1150,9 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer,
  
      memset(&flink, 0, sizeof(flink));
  
+    if ((void*)bo != (void*)buffer)
+       pb_cache_manager_remove_buffer(buffer);
+
      if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
          if (!bo->flink_name) {
              flink.handle = bo->handle;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c

index 341af55..2c4f990 100644 (file)
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -466,14 +466,10 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
          }
          break;
      case RING_GFX:
-        /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
+        /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements
           * r6xx, requires at least 4 dw alignment to avoid a hw bug.
-         * hawaii with old firmware needs type2 nop packet.
-         * accel_working2 with value 3 indicates the new firmware.
           */
-        if (cs->ws->info.chip_class <= SI ||
-            (cs->ws->info.family == CHIP_HAWAII &&
-             cs->ws->accel_working2 < 3)) {
+        if (cs->ws->info.gfx_ib_pad_with_type2) {
              while (rcs->cdw & 7)
                  OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
          } else {
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c

index 384d728..5d440eb 100644 (file)
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -469,6 +469,13 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
          ws->info.cik_macrotile_mode_array_valid = TRUE;
      }
  
+    /* Hawaii with old firmware needs type2 nop packet.
+     * accel_working2 with value 3 indicates the new firmware.
+     */
+    ws->info.gfx_ib_pad_with_type2 = ws->info.chip_class <= SI ||
+                                    (ws->info.family == CHIP_HAWAII &&
+                                     ws->accel_working2 < 3);
+
      return TRUE;
  }
  
diff --git a/src/gallium/winsys/svga/drm/Makefile.sources b/src/gallium/winsys/svga/drm/Makefile.sources

index ab2b932..f82b009 100644 (file)
--- a/src/gallium/winsys/svga/drm/Makefile.sources
+++ b/src/gallium/winsys/svga/drm/Makefile.sources
@@ -17,4 +17,6 @@ C_SOURCES := \
         vmw_surface.c \
         vmw_surface.h \
         vmw_shader.c \
-       vmw_shader.h
+       vmw_shader.h \
+       vmw_query.c \
+       vmw_query.h
diff --git a/src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c b/src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c

index 5ef95f3..c1b9eb9 100644 (file)
--- a/src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c
+++ b/src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c
@@ -1,6 +1,6 @@
  /**************************************************************************
   *
- * Copyright 2007-2010 VMware, Inc.
+ * Copyright 2007-2015 VMware, Inc.
   * All Rights Reserved.
   *
   * Permission is hereby granted, free of charge, to any person obtaining a
@@ -339,6 +339,7 @@ fenced_buffer_finish_locked(struct fenced_manager *fenced_mgr,
           /* TODO: remove consequents buffers with the same fence? */
  
           assert(!destroyed);
+         (void) destroyed;
  
           fenced_buf->flags &= ~PB_USAGE_GPU_READ_WRITE;
  
@@ -660,6 +661,7 @@ fenced_buffer_fence(struct pb_buffer *buf,
           boolean destroyed;
           destroyed = fenced_buffer_remove_locked(fenced_mgr, fenced_buf);
           assert(!destroyed);
+         (void) destroyed;
        }
        if (fence) {
           ops->fence_reference(ops, &fenced_buf->fence, fence);
diff --git a/src/gallium/winsys/svga/drm/svga_drm_public.h b/src/gallium/winsys/svga/drm/svga_drm_public.h

index e98c89d..fa2dcef 100644 (file)
--- a/src/gallium/winsys/svga/drm/svga_drm_public.h
+++ b/src/gallium/winsys/svga/drm/svga_drm_public.h
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 2010 VMware, Inc.  All rights reserved.
+ * Copyright 2010-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
diff --git a/src/gallium/winsys/svga/drm/vmw_buffer.c b/src/gallium/winsys/svga/drm/vmw_buffer.c

index 7eab3d0..c082dcc 100644 (file)
--- a/src/gallium/winsys/svga/drm/vmw_buffer.c
+++ b/src/gallium/winsys/svga/drm/vmw_buffer.c
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
diff --git a/src/gallium/winsys/svga/drm/vmw_buffer.h b/src/gallium/winsys/svga/drm/vmw_buffer.h

index b9cbb25..6e1151e 100644 (file)
--- a/src/gallium/winsys/svga/drm/vmw_buffer.h
+++ b/src/gallium/winsys/svga/drm/vmw_buffer.h
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
diff --git a/src/gallium/winsys/svga/drm/vmw_context.c b/src/gallium/winsys/svga/drm/vmw_context.c

index 31bedde..1675af4 100644 (file)
--- a/src/gallium/winsys/svga/drm/vmw_context.c
+++ b/src/gallium/winsys/svga/drm/vmw_context.c
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
@@ -41,6 +41,7 @@
  #include "vmw_surface.h"
  #include "vmw_fence.h"
  #include "vmw_shader.h"
+#include "vmw_query.h"
  
  #define VMW_COMMAND_SIZE (64*1024)
  #define VMW_SURFACE_RELOCS (1024)
@@ -391,24 +392,27 @@ vmw_swc_mob_relocation(struct svga_winsys_context *swc,
  {
     struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
     struct vmw_buffer_relocation *reloc;
+   struct pb_buffer *pb_buffer = vmw_pb_buffer(buffer);
  
-   assert(vswc->region.staged < vswc->region.reserved);
+   if (id) {
+      assert(vswc->region.staged < vswc->region.reserved);
  
-   reloc = &vswc->region.relocs[vswc->region.used + vswc->region.staged];
-   reloc->mob.id = id;
-   reloc->mob.offset_into_mob = offset_into_mob;
+      reloc = &vswc->region.relocs[vswc->region.used + vswc->region.staged];
+      reloc->mob.id = id;
+      reloc->mob.offset_into_mob = offset_into_mob;
  
-   /*
-    * pb_validate holds a refcount to the buffer, so no need to
-    * refcount it again in the relocation.
-    */
-   reloc->buffer = vmw_pb_buffer(buffer);
-   reloc->offset = offset;
-   reloc->is_mob = TRUE;
-   ++vswc->region.staged;
+      /*
+       * pb_validate holds a refcount to the buffer, so no need to
+       * refcount it again in the relocation.
+       */
+      reloc->buffer = pb_buffer;
+      reloc->offset = offset;
+      reloc->is_mob = TRUE;
+      ++vswc->region.staged;
+   }
  
-   if (vmw_swc_add_validate_buffer(vswc, reloc->buffer, flags)) {
-      vswc->seen_mobs += reloc->buffer->size;
+   if (vmw_swc_add_validate_buffer(vswc, pb_buffer, flags)) {
+      vswc->seen_mobs += pb_buffer->size;
        /* divide by 5, tested for best performance */
        if (vswc->seen_mobs >= vswc->vws->ioctl.max_mob_memory / VMW_MAX_MOB_MEM_FACTOR)
           vswc->preemptive_flush = TRUE;
@@ -481,7 +485,8 @@ vmw_swc_surface_only_relocation(struct svga_winsys_context *swc,
        p_atomic_inc(&vsurf->validated);
     }
  
-   *where = vsurf->sid;
+   if (where)
+      *where = vsurf->sid;
  }
  
  static void
@@ -495,7 +500,7 @@ vmw_swc_surface_relocation(struct svga_winsys_context *swc,
  
     assert(swc->have_gb_objects || mobid == NULL);
  
-   if(!surface) {
+   if (!surface) {
        *where = SVGA3D_INVALID_ID;
        if (mobid)
           *mobid = SVGA3D_INVALID_ID;
@@ -525,51 +530,67 @@ vmw_swc_shader_relocation(struct svga_winsys_context *swc,
                           uint32 *shid,
                           uint32 *mobid,
                           uint32 *offset,
-                         struct svga_winsys_gb_shader *shader)
+                         struct svga_winsys_gb_shader *shader,
+                          unsigned flags)
  {
     struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+   struct vmw_winsys_screen *vws = vswc->vws;
     struct vmw_svga_winsys_shader *vshader;
     struct vmw_ctx_validate_item *ishader;
+
     if(!shader) {
        *shid = SVGA3D_INVALID_ID;
        return;
     }
  
-   assert(vswc->shader.staged < vswc->shader.reserved);
     vshader = vmw_svga_winsys_shader(shader);
-   ishader = util_hash_table_get(vswc->hash, vshader);
  
-   if (ishader == NULL) {
-      ishader = &vswc->shader.items[vswc->shader.used + vswc->shader.staged];
-      vmw_svga_winsys_shader_reference(&ishader->vshader, vshader);
-      ishader->referenced = FALSE;
-      /*
-       * Note that a failure here may just fall back to unhashed behavior
-       * and potentially cause unnecessary flushing, so ignore the
-       * return code.
-       */
-      (void) util_hash_table_set(vswc->hash, vshader, ishader);
-      ++vswc->shader.staged;
-   }
+   if (!vws->base.have_vgpu10) {
+      assert(vswc->shader.staged < vswc->shader.reserved);
+      ishader = util_hash_table_get(vswc->hash, vshader);
+
+      if (ishader == NULL) {
+         ishader = &vswc->shader.items[vswc->shader.used + vswc->shader.staged];
+         vmw_svga_winsys_shader_reference(&ishader->vshader, vshader);
+         ishader->referenced = FALSE;
+         /*
+          * Note that a failure here may just fall back to unhashed behavior
+          * and potentially cause unnecessary flushing, so ignore the
+          * return code.
+          */
+         (void) util_hash_table_set(vswc->hash, vshader, ishader);
+         ++vswc->shader.staged;
+      }
  
-   if (!ishader->referenced) {
-      ishader->referenced = TRUE;
-      p_atomic_inc(&vshader->validated);
+      if (!ishader->referenced) {
+         ishader->referenced = TRUE;
+         p_atomic_inc(&vshader->validated);
+      }
     }
  
-   *shid = vshader->shid;
+   if (shid)
+      *shid = vshader->shid;
  
-   if (mobid != NULL && vshader->buf)
+   if (vshader->buf)
        vmw_swc_mob_relocation(swc, mobid, offset, vshader->buf,
                              0, SVGA_RELOC_READ);
  }
  
  static void
+vmw_swc_query_relocation(struct svga_winsys_context *swc,
+                         SVGAMobId *id,
+                         struct svga_winsys_gb_query *query)
+{
+   /* Queries are backed by one big MOB */
+   vmw_swc_mob_relocation(swc, id, NULL, query->buf, 0,
+                          SVGA_RELOC_READ | SVGA_RELOC_WRITE);
+}
+
+static void
  vmw_swc_commit(struct svga_winsys_context *swc)
  {
     struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
  
-   assert(vswc->command.reserved);
     assert(vswc->command.used + vswc->command.reserved <= vswc->command.size);
     vswc->command.used += vswc->command.reserved;
     vswc->command.reserved = 0;
@@ -633,6 +654,96 @@ static int vmw_ptr_compare(void *key1, void *key2)
     return (key1 == key2) ? 0 : 1;
  }
  
+
+/**
+ * vmw_svga_winsys_vgpu10_shader_screate - The winsys shader_crate callback
+ *
+ * @swc: The winsys context.
+ * @shaderId: Previously allocated shader id.
+ * @shaderType: The shader type.
+ * @bytecode: The shader bytecode
+ * @bytecodelen: The length of the bytecode.
+ *
+ * Creates an svga_winsys_gb_shader structure and allocates a buffer for the
+ * shader code and copies the shader code into the buffer. Shader
+ * resource creation is not done.
+ */
+static struct svga_winsys_gb_shader *
+vmw_svga_winsys_vgpu10_shader_create(struct svga_winsys_context *swc,
+                                     uint32 shaderId,
+                                     SVGA3dShaderType shaderType,
+                                     const uint32 *bytecode,
+                                     uint32 bytecodeLen)
+{
+   struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+   struct vmw_svga_winsys_shader *shader;
+   struct svga_winsys_gb_shader *gb_shader =
+      vmw_svga_winsys_shader_create(&vswc->vws->base, shaderType, bytecode,
+                                    bytecodeLen);
+   if (!gb_shader)
+      return NULL;
+
+   shader = vmw_svga_winsys_shader(gb_shader);
+   shader->shid = shaderId;
+
+   return gb_shader;
+}
+
+/**
+ * vmw_svga_winsys_vgpu10_shader_destroy - The winsys shader_destroy callback.
+ *
+ * @swc: The winsys context.
+ * @shader: A shader structure previously allocated by shader_create.
+ *
+ * Frees the shader structure and the buffer holding the shader code.
+ */
+static void
+vmw_svga_winsys_vgpu10_shader_destroy(struct svga_winsys_context *swc,
+                                      struct svga_winsys_gb_shader *shader)
+{
+   struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+
+   vmw_svga_winsys_shader_destroy(&vswc->vws->base, shader);
+}
+
+/**
+ * vmw_svga_winsys_resource_rebind - The winsys resource_rebind callback
+ *
+ * @swc: The winsys context.
+ * @surface: The surface to be referenced.
+ * @shader: The shader to be referenced.
+ * @flags: Relocation flags.
+ *
+ * This callback is needed because shader backing buffers are sub-allocated, and
+ * hence the kernel fencing is not sufficient. The buffers need to be put on
+ * the context's validation list and fenced after command submission to avoid
+ * reuse of busy shader buffers. In addition, surfaces need to be put on the
+ * validation list in order for the driver to regard them as referenced
+ * by the command stream.
+ */
+static enum pipe_error
+vmw_svga_winsys_resource_rebind(struct svga_winsys_context *swc,
+                                struct svga_winsys_surface *surface,
+                                struct svga_winsys_gb_shader *shader,
+                                unsigned flags)
+{
+   /**
+    * Need to reserve one validation item for either the surface or
+    * the shader.
+    */
+   if (!vmw_swc_reserve(swc, 0, 1))
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   if (surface)
+      vmw_swc_surface_relocation(swc, NULL, NULL, surface, flags);
+   else if (shader)
+      vmw_swc_shader_relocation(swc, NULL, NULL, NULL, shader, flags);
+
+   vmw_swc_commit(swc);
+
+   return PIPE_OK;
+}
+
  struct svga_winsys_context *
  vmw_svga_winsys_context_create(struct svga_winsys_screen *sws)
  {
@@ -648,6 +759,8 @@ vmw_svga_winsys_context_create(struct svga_winsys_screen *sws)
     vswc->base.surface_relocation = vmw_swc_surface_relocation;
     vswc->base.region_relocation = vmw_swc_region_relocation;
     vswc->base.mob_relocation = vmw_swc_mob_relocation;
+   vswc->base.query_relocation = vmw_swc_query_relocation;
+   vswc->base.query_bind = vmw_swc_query_bind;
     vswc->base.context_relocation = vmw_swc_context_relocation;
     vswc->base.shader_relocation = vmw_swc_shader_relocation;
     vswc->base.commit = vmw_swc_commit;
@@ -655,7 +768,19 @@ vmw_svga_winsys_context_create(struct svga_winsys_screen *sws)
     vswc->base.surface_map = vmw_svga_winsys_surface_map;
     vswc->base.surface_unmap = vmw_svga_winsys_surface_unmap;
  
-   vswc->base.cid = vmw_ioctl_context_create(vws);
+  vswc->base.shader_create = vmw_svga_winsys_vgpu10_shader_create;
+  vswc->base.shader_destroy = vmw_svga_winsys_vgpu10_shader_destroy;
+
+  vswc->base.resource_rebind = vmw_svga_winsys_resource_rebind;
+
+   if (sws->have_vgpu10)
+      vswc->base.cid = vmw_ioctl_extended_context_create(vws, sws->have_vgpu10);
+   else
+      vswc->base.cid = vmw_ioctl_context_create(vws);
+
+   if (vswc->base.cid == -1)
+      goto out_no_context;
+
     vswc->base.have_gb_objects = sws->have_gb_objects;
  
     vswc->vws = vws;
@@ -682,6 +807,8 @@ vmw_svga_winsys_context_create(struct svga_winsys_screen *sws)
  out_no_hash:
     pb_validate_destroy(vswc->validate);
  out_no_validate:
+   vmw_ioctl_context_destroy(vws, vswc->base.cid);
+out_no_context:
     FREE(vswc);
     return NULL;
  }
diff --git a/src/gallium/winsys/svga/drm/vmw_context.h b/src/gallium/winsys/svga/drm/vmw_context.h

index 2c2fb41..b71af6f 100644 (file)
--- a/src/gallium/winsys/svga/drm/vmw_context.h
+++ b/src/gallium/winsys/svga/drm/vmw_context.h
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
diff --git a/src/gallium/winsys/svga/drm/vmw_fence.c b/src/gallium/winsys/svga/drm/vmw_fence.c

index 17822ce..bcf473a 100644 (file)
--- a/src/gallium/winsys/svga/drm/vmw_fence.c
+++ b/src/gallium/winsys/svga/drm/vmw_fence.c
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 2009-2011 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
diff --git a/src/gallium/winsys/svga/drm/vmw_fence.h b/src/gallium/winsys/svga/drm/vmw_fence.h

index 56f1a0a..f6381fe 100644 (file)
--- a/src/gallium/winsys/svga/drm/vmw_fence.h
+++ b/src/gallium/winsys/svga/drm/vmw_fence.h
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
diff --git a/src/gallium/winsys/svga/drm/vmw_query.c b/src/gallium/winsys/svga/drm/vmw_query.c

new file mode 100644 (file)

index 0000000..7baf2c1
--- /dev/null
+++ b/src/gallium/winsys/svga/drm/vmw_query.c
@@ -0,0 +1,144 @@
+/**********************************************************
+ * Copyright 2015 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipebuffer/pb_bufmgr.h"
+#include "util/u_memory.h"
+
+#include "vmw_screen.h"
+#include "vmw_buffer.h"
+#include "vmw_query.h"
+
+
+
+struct svga_winsys_gb_query *
+vmw_svga_winsys_query_create(struct svga_winsys_screen *sws,
+                             uint32 queryResultLen)
+{
+   struct vmw_winsys_screen *vws = vmw_winsys_screen(sws);
+   struct pb_manager *provider = vws->pools.gmr;
+   struct pb_desc desc = {0};
+   struct pb_buffer *pb_buf;
+   struct svga_winsys_gb_query *query;
+
+   query = CALLOC_STRUCT(svga_winsys_gb_query);
+   if (!query)
+      return NULL;
+
+   /* Allocate memory to hold queries for this context */
+   desc.alignment = 4096;
+   pb_buf = provider->create_buffer(provider, queryResultLen, &desc);
+   query->buf = vmw_svga_winsys_buffer_wrap(pb_buf);
+
+   if (!query->buf) {
+      debug_printf("Failed to allocate memory for queries\n");
+      FREE(query);
+      query = NULL;
+   }
+
+   return query;
+}
+
+
+
+void
+vmw_svga_winsys_query_destroy(struct svga_winsys_screen *sws,
+                              struct svga_winsys_gb_query *query)
+{
+   vmw_svga_winsys_buffer_destroy(sws, query->buf);
+   FREE(query);
+}
+
+
+
+int
+vmw_svga_winsys_query_init(struct svga_winsys_screen *sws,
+                           struct svga_winsys_gb_query *query,
+                           unsigned offset,
+                           SVGA3dQueryState queryState)
+{
+   SVGA3dQueryState *state;
+
+   state = (SVGA3dQueryState *) vmw_svga_winsys_buffer_map(sws,
+                                       query->buf,
+                                       PIPE_TRANSFER_WRITE);
+   if (!state) {
+      debug_printf("Failed to map query result memory for initialization\n");
+      return -1;
+   }
+
+   /* Initialize the query state for the specified query slot */
+   state = (SVGA3dQueryState *)((char *)state + offset);
+   *state = queryState;
+
+   vmw_svga_winsys_buffer_unmap(sws, query->buf);
+
+   return 0;
+}
+
+
+
+void
+vmw_svga_winsys_query_get_result(struct svga_winsys_screen *sws,
+                                 struct svga_winsys_gb_query *query,
+                                 unsigned offset,
+                                 SVGA3dQueryState *queryState,
+                                 void *result, uint32 resultLen)
+{
+   SVGA3dQueryState *state;
+
+   state = (SVGA3dQueryState *) vmw_svga_winsys_buffer_map(sws,
+                                       query->buf,
+                                       PIPE_TRANSFER_READ);
+   if (!state) {
+      debug_printf("Failed to lock query result memory\n");
+
+      if (queryState)
+         *queryState = SVGA3D_QUERYSTATE_FAILED;
+
+      return;
+   }
+
+   state = (SVGA3dQueryState *)((char *)state + offset);
+
+   if (queryState)
+      *queryState = *state;
+
+   if (result) {
+      memcpy(result, state + 1, resultLen);
+   }
+
+   vmw_svga_winsys_buffer_unmap(sws, query->buf);
+}
+
+
+enum pipe_error
+vmw_swc_query_bind(struct svga_winsys_context *swc, 
+                   struct svga_winsys_gb_query *query,
+                   unsigned flags)
+{
+   /* no-op on Linux */
+   return PIPE_OK;
+}
+
diff --git a/src/gallium/winsys/svga/drm/vmw_query.h b/src/gallium/winsys/svga/drm/vmw_query.h

new file mode 100644 (file)

index 0000000..a8b58e6
--- /dev/null
+++ b/src/gallium/winsys/svga/drm/vmw_query.h
@@ -0,0 +1,67 @@
+/**********************************************************
+ * Copyright 2015 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef VMW_DRM_QUERY_H
+#define VMW_DRM_QUERY_H
+
+#include "svga3d_reg.h"
+
+
+
+/** Guest-backed query */
+struct svga_winsys_gb_query
+{
+   struct svga_winsys_buffer *buf;
+};
+
+
+struct svga_winsys_gb_query *
+vmw_svga_winsys_query_create(struct svga_winsys_screen *sws,
+                             uint32 queryResultLen);
+
+void
+vmw_svga_winsys_query_destroy(struct svga_winsys_screen *sws,
+                              struct svga_winsys_gb_query *query);
+
+int
+vmw_svga_winsys_query_init(struct svga_winsys_screen *sws,
+                           struct svga_winsys_gb_query *query,
+                           unsigned offset,
+                           SVGA3dQueryState queryState);
+
+void
+vmw_svga_winsys_query_get_result(struct svga_winsys_screen *sws,
+                       struct svga_winsys_gb_query *query,
+                       unsigned offset,
+                       SVGA3dQueryState *queryState,
+                       void *result, uint32 resultLen);
+
+enum pipe_error
+vmw_swc_query_bind(struct svga_winsys_context *swc, 
+                   struct svga_winsys_gb_query *query,
+                   unsigned flags);
+
+#endif /* VMW_DRM_QUERY_H */
+
diff --git a/src/gallium/winsys/svga/drm/vmw_screen.c b/src/gallium/winsys/svga/drm/vmw_screen.c

index 0c343cc..7fcb6d2 100644 (file)
--- a/src/gallium/winsys/svga/drm/vmw_screen.c
+++ b/src/gallium/winsys/svga/drm/vmw_screen.c
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
@@ -57,7 +57,7 @@ static unsigned vmw_dev_hash(void *key)
   */
  
  struct vmw_winsys_screen *
-vmw_winsys_create( int fd, boolean use_old_scanout_flag )
+vmw_winsys_create( int fd )
  {
     struct vmw_winsys_screen *vws;
     struct stat stat_buf;
@@ -84,8 +84,8 @@ vmw_winsys_create( int fd, boolean use_old_scanout_flag )
     vws->device = stat_buf.st_rdev;
     vws->open_count = 1;
     vws->ioctl.drm_fd = dup(fd);
-   vws->use_old_scanout_flag = use_old_scanout_flag;
     vws->base.have_gb_dma = TRUE;
+   vws->base.need_to_rebind_resources = FALSE;
  
     if (!vmw_ioctl_init(vws))
        goto out_no_ioctl;
diff --git a/src/gallium/winsys/svga/drm/vmw_screen.h b/src/gallium/winsys/svga/drm/vmw_screen.h

index ce98db9..79d0949 100644 (file)
--- a/src/gallium/winsys/svga/drm/vmw_screen.h
+++ b/src/gallium/winsys/svga/drm/vmw_screen.h
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
@@ -65,8 +65,6 @@ struct vmw_winsys_screen
  {
     struct svga_winsys_screen base;
  
-   boolean use_old_scanout_flag;
-
     struct {
        int drm_fd;
        uint32_t hwversion;
@@ -76,6 +74,8 @@ struct vmw_winsys_screen
        uint64_t max_surface_memory;
        uint64_t max_texture_size;
        boolean have_drm_2_6;
+      boolean have_drm_2_9;
+      uint32_t drm_execbuf_version;
     } ioctl;
  
     struct {
@@ -115,6 +115,10 @@ vmw_region_size(struct vmw_region *region);
  uint32
  vmw_ioctl_context_create(struct vmw_winsys_screen *vws);
  
+uint32
+vmw_ioctl_extended_context_create(struct vmw_winsys_screen *vws,
+                                  boolean vgpu10);
+
  void
  vmw_ioctl_context_destroy(struct vmw_winsys_screen *vws,
                            uint32 cid);
@@ -126,7 +130,8 @@ vmw_ioctl_surface_create(struct vmw_winsys_screen *vws,
                           unsigned usage,
                           SVGA3dSize size,
                           uint32 numFaces,
-                         uint32 numMipLevels);
+                         uint32 numMipLevels,
+                         unsigned sampleCount);
  uint32
  vmw_ioctl_gb_surface_create(struct vmw_winsys_screen *vws,
                             SVGA3dSurfaceFlags flags,
@@ -135,6 +140,7 @@ vmw_ioctl_gb_surface_create(struct vmw_winsys_screen *vws,
                             SVGA3dSize size,
                             uint32 numFaces,
                             uint32 numMipLevels,
+                            unsigned sampleCount,
                              uint32 buffer_handle,
                             struct vmw_region **p_region);
  
@@ -213,7 +219,7 @@ boolean vmw_winsys_screen_init_svga(struct vmw_winsys_screen *vws);
  void vmw_ioctl_cleanup(struct vmw_winsys_screen *vws);
  void vmw_pools_cleanup(struct vmw_winsys_screen *vws);
  
-struct vmw_winsys_screen *vmw_winsys_create(int fd, boolean use_old_scanout_flag);
+struct vmw_winsys_screen *vmw_winsys_create(int fd);
  void vmw_winsys_destroy(struct vmw_winsys_screen *sws);
  void vmw_winsys_screen_set_throttling(struct pipe_screen *screen,
                                       uint32_t throttle_us);
@@ -227,4 +233,13 @@ vmw_fences_signal(struct pb_fence_ops *fence_ops,
                    uint32_t emitted,
                    boolean has_emitted);
  
+struct svga_winsys_gb_shader *
+vmw_svga_winsys_shader_create(struct svga_winsys_screen *sws,
+                             SVGA3dShaderType type,
+                             const uint32 *bytecode,
+                             uint32 bytecodeLen);
+void
+vmw_svga_winsys_shader_destroy(struct svga_winsys_screen *sws,
+                              struct svga_winsys_gb_shader *shader);
+
  #endif /* VMW_SCREEN_H_ */
diff --git a/src/gallium/winsys/svga/drm/vmw_screen_dri.c b/src/gallium/winsys/svga/drm/vmw_screen_dri.c

index e70e0fe..01bb0e2 100644 (file)
--- a/src/gallium/winsys/svga/drm/vmw_screen_dri.c
+++ b/src/gallium/winsys/svga/drm/vmw_screen_dri.c
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
@@ -111,7 +111,7 @@ svga_drm_winsys_screen_create(int fd)
                                &drm_compat, "vmwgfx drm driver"))
        return NULL;
  
-   vws = vmw_winsys_create( fd, FALSE );
+   vws = vmw_winsys_create(fd);
     if (!vws)
        goto out_no_vws;
  
diff --git a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c

index e2f0da5..c86d95a 100644 (file)
--- a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c
+++ b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
@@ -63,13 +63,6 @@ struct vmw_region
     uint32_t size;
  };
  
-/* XXX: This isn't a real hardware flag, but just a hack for kernel to
- * know about primary surfaces. In newer versions of the kernel
- * interface the driver uses a special field.
- */
-#define SVGA3D_SURFACE_HINT_SCANOUT (1 << 9)
-
-
  uint32_t
  vmw_region_size(struct vmw_region *region)
  {
@@ -91,10 +84,30 @@ vmw_ioctl_context_create(struct vmw_winsys_screen *vws)
        return -1;
  
     vmw_printf("Context id is %d\n", c_arg.cid);
-
     return c_arg.cid;
  }
  
+uint32
+vmw_ioctl_extended_context_create(struct vmw_winsys_screen *vws,
+                                  boolean vgpu10)
+{
+   union drm_vmw_extended_context_arg c_arg;
+   int ret;
+
+   VMW_FUNC;
+   memset(&c_arg, 0, sizeof(c_arg));
+   c_arg.req = (vgpu10 ? drm_vmw_context_vgpu10 : drm_vmw_context_legacy);
+   ret = drmCommandWriteRead(vws->ioctl.drm_fd,
+                             DRM_VMW_CREATE_EXTENDED_CONTEXT,
+                             &c_arg, sizeof(c_arg));
+
+   if (ret)
+      return -1;
+
+   vmw_printf("Context id is %d\n", c_arg.cid);
+   return c_arg.rep.cid;
+}
+
  void
  vmw_ioctl_context_destroy(struct vmw_winsys_screen *vws, uint32 cid)
  {
@@ -116,7 +129,8 @@ vmw_ioctl_surface_create(struct vmw_winsys_screen *vws,
                           SVGA3dSurfaceFormat format,
                           unsigned usage,
                           SVGA3dSize size,
-                         uint32_t numFaces, uint32_t numMipLevels)
+                         uint32_t numFaces, uint32_t numMipLevels,
+                         unsigned sampleCount)
  {
     union drm_vmw_surface_create_arg s_arg;
     struct drm_vmw_surface_create_req *req = &s_arg.req;
@@ -131,17 +145,8 @@ vmw_ioctl_surface_create(struct vmw_winsys_screen *vws,
     vmw_printf("%s flags %d format %d\n", __FUNCTION__, flags, format);
  
     memset(&s_arg, 0, sizeof(s_arg));
-   if (vws->use_old_scanout_flag &&
-       (flags & SVGA3D_SURFACE_HINT_SCANOUT)) {
-      req->flags = (uint32_t) flags;
-      req->scanout = false;
-   } else if (flags & SVGA3D_SURFACE_HINT_SCANOUT) {
-      req->flags = (uint32_t) (flags & ~SVGA3D_SURFACE_HINT_SCANOUT);
-      req->scanout = true;
-   } else {
-      req->flags = (uint32_t) flags;
-      req->scanout = false;
-   }
+   req->flags = (uint32_t) flags;
+   req->scanout = !!(usage & SVGA_SURFACE_USAGE_SCANOUT);
     req->format = (uint32_t) format;
     req->shareable = !!(usage & SVGA_SURFACE_USAGE_SHARED);
  
@@ -188,6 +193,7 @@ vmw_ioctl_gb_surface_create(struct vmw_winsys_screen *vws,
                             SVGA3dSize size,
                             uint32_t numFaces,
                             uint32_t numMipLevels,
+                            unsigned sampleCount,
                              uint32_t buffer_handle,
                             struct vmw_region **p_region)
  {
@@ -206,25 +212,29 @@ vmw_ioctl_gb_surface_create(struct vmw_winsys_screen *vws,
     }
  
     memset(&s_arg, 0, sizeof(s_arg));
-   if (flags & SVGA3D_SURFACE_HINT_SCANOUT) {
-      req->svga3d_flags = (uint32_t) (flags & ~SVGA3D_SURFACE_HINT_SCANOUT);
-      req->drm_surface_flags = drm_vmw_surface_flag_scanout;
-   } else {
-      req->svga3d_flags = (uint32_t) flags;
-   }
+   req->svga3d_flags = (uint32_t) flags;
+   if (usage & SVGA_SURFACE_USAGE_SCANOUT)
+      req->drm_surface_flags |= drm_vmw_surface_flag_scanout;
     req->format = (uint32_t) format;
     if (usage & SVGA_SURFACE_USAGE_SHARED)
        req->drm_surface_flags |= drm_vmw_surface_flag_shareable;
     req->drm_surface_flags |= drm_vmw_surface_flag_create_buffer; 
-
-   assert(numFaces * numMipLevels < DRM_VMW_MAX_SURFACE_FACES*
-         DRM_VMW_MAX_MIP_LEVELS);
     req->base_size.width = size.width;
     req->base_size.height = size.height;
     req->base_size.depth = size.depth;
     req->mip_levels = numMipLevels;
     req->multisample_count = 0;
     req->autogen_filter = SVGA3D_TEX_FILTER_NONE;
+
+   if (vws->base.have_vgpu10) {
+      req->array_size = numFaces;
+      req->multisample_count = sampleCount;
+   } else {
+      assert(numFaces * numMipLevels < DRM_VMW_MAX_SURFACE_FACES*
+            DRM_VMW_MAX_MIP_LEVELS);
+      req->array_size = 0;
+   }
+
     if (buffer_handle)
        req->buffer_handle = buffer_handle;
     else
@@ -403,6 +413,7 @@ vmw_ioctl_command(struct vmw_winsys_screen *vws, int32_t cid,
     struct drm_vmw_execbuf_arg arg;
     struct drm_vmw_fence_rep rep;
     int ret;
+   int argsize;
  
  #ifdef DEBUG
     {
@@ -433,13 +444,21 @@ vmw_ioctl_command(struct vmw_winsys_screen *vws, int32_t cid,
     arg.commands = (unsigned long)commands;
     arg.command_size = size;
     arg.throttle_us = throttle_us;
-   arg.version = DRM_VMW_EXECBUF_VERSION;
-
+   arg.version = vws->ioctl.drm_execbuf_version;
+   arg.context_handle = (vws->base.have_vgpu10 ? cid : SVGA3D_INVALID_ID);
+
+   /* In DRM_VMW_EXECBUF_VERSION 1, the drm_vmw_execbuf_arg structure ends with
+    * the flags field. The structure size sent to drmCommandWrite must match
+    * the drm_execbuf_version. Otherwise, an invalid value will be returned.
+    */
+   argsize = vws->ioctl.drm_execbuf_version > 1 ? sizeof(arg) :
+                offsetof(struct drm_vmw_execbuf_arg, context_handle);
     do {
-       ret = drmCommandWrite(vws->ioctl.drm_fd, DRM_VMW_EXECBUF, &arg, sizeof(arg));
+       ret = drmCommandWrite(vws->ioctl.drm_fd, DRM_VMW_EXECBUF, &arg, argsize);
     } while(ret == -ERESTART);
     if (ret) {
        vmw_error("%s error %s.\n", __FUNCTION__, strerror(-ret));
+      abort();
     }
  
     if (rep.error) {
@@ -832,6 +851,7 @@ vmw_ioctl_init(struct vmw_winsys_screen *vws)
     int ret;
     uint32_t *cap_buffer;
     drmVersionPtr version;
+   boolean drm_gb_capable;
     boolean have_drm_2_5;
  
     VMW_FUNC;
@@ -844,6 +864,12 @@ vmw_ioctl_init(struct vmw_winsys_screen *vws)
        (version->version_major == 2 && version->version_minor > 4);
     vws->ioctl.have_drm_2_6 = version->version_major > 2 ||
        (version->version_major == 2 && version->version_minor > 5);
+   vws->ioctl.have_drm_2_9 = version->version_major > 2 ||
+      (version->version_major == 2 && version->version_minor > 8);
+
+   vws->ioctl.drm_execbuf_version = vws->ioctl.have_drm_2_9 ? 2 : 1;
+
+   drm_gb_capable = have_drm_2_5;
  
     memset(&gp_arg, 0, sizeof(gp_arg));
     gp_arg.param = DRM_VMW_PARAM_3D;
@@ -875,9 +901,10 @@ vmw_ioctl_init(struct vmw_winsys_screen *vws)
        vws->base.have_gb_objects =
           !!(gp_arg.value & (uint64_t) SVGA_CAP_GBOBJECTS);
     
-   if (vws->base.have_gb_objects && !have_drm_2_5)
+   if (vws->base.have_gb_objects && !drm_gb_capable)
        goto out_no_3d;
  
+   vws->base.have_vgpu10 = FALSE;
     if (vws->base.have_gb_objects) {
        memset(&gp_arg, 0, sizeof(gp_arg));
        gp_arg.param = DRM_VMW_PARAM_3D_CAPS_SIZE;
@@ -918,6 +945,27 @@ vmw_ioctl_init(struct vmw_winsys_screen *vws)
  
        /* Never early flush surfaces, mobs do accounting. */
        vws->ioctl.max_surface_memory = -1;
+
+      if (vws->ioctl.have_drm_2_9) {
+
+         memset(&gp_arg, 0, sizeof(gp_arg));
+         gp_arg.param = DRM_VMW_PARAM_VGPU10;
+         ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GET_PARAM,
+                                   &gp_arg, sizeof(gp_arg));
+         if (ret == 0 && gp_arg.value != 0) {
+            const char *vgpu10_val;
+
+            debug_printf("Have VGPU10 interface and hardware.\n");
+            vws->base.have_vgpu10 = TRUE;
+            vgpu10_val = getenv("SVGA_VGPU10");
+            if (vgpu10_val && strcmp(vgpu10_val, "0") == 0) {
+               debug_printf("Disabling VGPU10 interface.\n");
+               vws->base.have_vgpu10 = FALSE;
+            } else {
+               debug_printf("Enabling VGPU10 interface.\n");
+            }
+         }
+      }
     } else {
        vws->ioctl.num_cap_3d = SVGA3D_DEVCAP_MAX;
  
@@ -938,6 +986,9 @@ vmw_ioctl_init(struct vmw_winsys_screen *vws)
        size = SVGA_FIFO_3D_CAPS_SIZE * sizeof(uint32_t);
     }
  
+   debug_printf("VGPU10 interface is %s.\n",
+                vws->base.have_vgpu10 ? "on" : "off");
+
     cap_buffer = calloc(1, size);
     if (!cap_buffer) {
        debug_printf("Failed alloc fifo 3D caps buffer.\n");
diff --git a/src/gallium/winsys/svga/drm/vmw_screen_pools.c b/src/gallium/winsys/svga/drm/vmw_screen_pools.c

index 1815bfa..48c95e5 100644 (file)
--- a/src/gallium/winsys/svga/drm/vmw_screen_pools.c
+++ b/src/gallium/winsys/svga/drm/vmw_screen_pools.c
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
diff --git a/src/gallium/winsys/svga/drm/vmw_screen_svga.c b/src/gallium/winsys/svga/drm/vmw_screen_svga.c

index 32f16cd..a18dd82 100644 (file)
--- a/src/gallium/winsys/svga/drm/vmw_screen_svga.c
+++ b/src/gallium/winsys/svga/drm/vmw_screen_svga.c
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
@@ -48,6 +48,7 @@
  #include "vmw_buffer.h"
  #include "vmw_fence.h"
  #include "vmw_shader.h"
+#include "vmw_query.h"
  #include "svga3d_surfacedefs.h"
  
  /**
@@ -137,8 +138,9 @@ vmw_svga_winsys_surface_create(struct svga_winsys_screen *sws,
                                 SVGA3dSurfaceFormat format,
                                 unsigned usage,
                                 SVGA3dSize size,
-                               uint32 numFaces,
-                               uint32 numMipLevels)
+                               uint32 numLayers,
+                               uint32 numMipLevels,
+                               unsigned sampleCount)
  {
     struct vmw_winsys_screen *vws = vmw_winsys_screen(sws);
     struct vmw_svga_winsys_surface *surface;
@@ -146,7 +148,6 @@ vmw_svga_winsys_surface_create(struct svga_winsys_screen *sws,
     struct pb_manager *provider;
     uint32_t buffer_size;
  
-
     memset(&desc, 0, sizeof(desc));
     surface = CALLOC_STRUCT(vmw_svga_winsys_surface);
     if(!surface)
@@ -163,7 +164,11 @@ vmw_svga_winsys_surface_create(struct svga_winsys_screen *sws,
      * Used for the backing buffer GB surfaces, and to approximate
      * when to flush on non-GB hosts.
      */
-   buffer_size = svga3dsurface_get_serialized_size(format, size, numMipLevels, (numFaces == 6));
+   buffer_size = svga3dsurface_get_serialized_size(format, size, numMipLevels, 
+                                                   numLayers);
+   if (flags & SVGA3D_SURFACE_BIND_STREAM_OUTPUT)
+      buffer_size += sizeof(SVGA3dDXSOState);
+
     if (buffer_size > vws->ioctl.max_texture_size) {
        goto no_sid;
     }
@@ -189,8 +194,9 @@ vmw_svga_winsys_surface_create(struct svga_winsys_screen *sws,
        }
  
        surface->sid = vmw_ioctl_gb_surface_create(vws, flags, format, usage,
-                                                 size, numFaces,
-                                                 numMipLevels, ptr.gmrId,
+                                                 size, numLayers,
+                                                 numMipLevels, sampleCount,
+                                                 ptr.gmrId,
                                                   surface->buf ? NULL :
                                                  &desc.region);
  
@@ -205,9 +211,9 @@ vmw_svga_winsys_surface_create(struct svga_winsys_screen *sws,
           vmw_svga_winsys_buffer_destroy(sws, surface->buf);
           surface->buf = NULL;
           surface->sid = vmw_ioctl_gb_surface_create(vws, flags, format, usage,
-                                                    size, numFaces,
-                                                    numMipLevels, 0,
-                                                    &desc.region);
+                                                    size, numLayers,
+                                                    numMipLevels, sampleCount,
+                                                    0, &desc.region);
           if (surface->sid == SVGA3D_INVALID_ID)
              goto no_sid;
        }
@@ -233,7 +239,8 @@ vmw_svga_winsys_surface_create(struct svga_winsys_screen *sws,
        }
     } else {
        surface->sid = vmw_ioctl_surface_create(vws, flags, format, usage,
-                                              size, numFaces, numMipLevels);
+                                              size, numLayers, numMipLevels,
+                                              sampleCount);
        if(surface->sid == SVGA3D_INVALID_ID)
           goto no_sid;
  
@@ -257,7 +264,7 @@ static boolean
  vmw_svga_winsys_surface_can_create(struct svga_winsys_screen *sws,
                                 SVGA3dSurfaceFormat format,
                                 SVGA3dSize size,
-                               uint32 numFaces,
+                               uint32 numLayers,
                                 uint32 numMipLevels)
  {
     struct vmw_winsys_screen *vws = vmw_winsys_screen(sws);
@@ -265,7 +272,7 @@ vmw_svga_winsys_surface_can_create(struct svga_winsys_screen *sws,
  
     buffer_size = svga3dsurface_get_serialized_size(format, size, 
                                                     numMipLevels, 
-                                                   (numFaces == 6));
+                                                   numLayers);
     if (buffer_size > vws->ioctl.max_texture_size) {
         return FALSE;
     }
@@ -323,14 +330,16 @@ vmw_svga_winsys_get_cap(struct svga_winsys_screen *sws,
  {   
     struct vmw_winsys_screen *vws = vmw_winsys_screen(sws);
  
-   if (index > vws->ioctl.num_cap_3d || !vws->ioctl.cap_3d[index].has_cap)      
+   if (index > vws->ioctl.num_cap_3d ||
+       index >= SVGA3D_DEVCAP_MAX ||
+       !vws->ioctl.cap_3d[index].has_cap)
        return FALSE;
  
     *result = vws->ioctl.cap_3d[index].result;
     return TRUE;
  }
  
-static struct svga_winsys_gb_shader *
+struct svga_winsys_gb_shader *
  vmw_svga_winsys_shader_create(struct svga_winsys_screen *sws,
                               SVGA3dShaderType type,
                               const uint32 *bytecode,
@@ -360,9 +369,11 @@ vmw_svga_winsys_shader_create(struct svga_winsys_screen *sws,
     memcpy(code, bytecode, bytecodeLen);
     vmw_svga_winsys_buffer_unmap(sws, shader->buf);
  
-   shader->shid = vmw_ioctl_shader_create(vws, type, bytecodeLen);
-   if(shader->shid == SVGA3D_INVALID_ID)
-      goto out_no_shid;
+   if (!sws->have_vgpu10) {
+      shader->shid = vmw_ioctl_shader_create(vws, type, bytecodeLen);
+      if (shader->shid == SVGA3D_INVALID_ID)
+         goto out_no_shid;
+   }
  
     return svga_winsys_shader(shader);
  
@@ -374,7 +385,7 @@ out_no_shader:
     return NULL;
  }
  
-static void
+void
  vmw_svga_winsys_shader_destroy(struct svga_winsys_screen *sws,
                                struct svga_winsys_gb_shader *shader)
  {
@@ -405,6 +416,11 @@ vmw_winsys_screen_init_svga(struct vmw_winsys_screen *vws)
     vws->base.shader_destroy = vmw_svga_winsys_shader_destroy;
     vws->base.fence_finish = vmw_svga_winsys_fence_finish;
  
+   vws->base.query_create = vmw_svga_winsys_query_create;
+   vws->base.query_init = vmw_svga_winsys_query_init;
+   vws->base.query_destroy = vmw_svga_winsys_query_destroy;
+   vws->base.query_get_result = vmw_svga_winsys_query_get_result;
+
     return TRUE;
  }
  
diff --git a/src/gallium/winsys/svga/drm/vmw_shader.c b/src/gallium/winsys/svga/drm/vmw_shader.c

index e82486a..56ffdd1 100644 (file)
--- a/src/gallium/winsys/svga/drm/vmw_shader.c
+++ b/src/gallium/winsys/svga/drm/vmw_shader.c
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 2009-2012 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
@@ -50,7 +50,8 @@ vmw_svga_winsys_shader_reference(struct vmw_svga_winsys_shader **pdst,
     if (pipe_reference(dst_ref, src_ref)) {
        struct svga_winsys_screen *sws = &dst->screen->base;
  
-      vmw_ioctl_shader_destroy(dst->screen, dst->shid);
+      if (!sws->have_vgpu10)
+         vmw_ioctl_shader_destroy(dst->screen, dst->shid);
  #ifdef DEBUG
        /* to detect dangling pointers */
        assert(p_atomic_read(&dst->validated) == 0);
diff --git a/src/gallium/winsys/svga/drm/vmw_shader.h b/src/gallium/winsys/svga/drm/vmw_shader.h

index 28f9971..c9a3638 100644 (file)
--- a/src/gallium/winsys/svga/drm/vmw_shader.h
+++ b/src/gallium/winsys/svga/drm/vmw_shader.h
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 2009-2012 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
diff --git a/src/gallium/winsys/svga/drm/vmw_surface.c b/src/gallium/winsys/svga/drm/vmw_surface.c

index cf648b4..6c0ad3b 100644 (file)
--- a/src/gallium/winsys/svga/drm/vmw_surface.c
+++ b/src/gallium/winsys/svga/drm/vmw_surface.c
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
diff --git a/src/gallium/winsys/svga/drm/vmw_surface.h b/src/gallium/winsys/svga/drm/vmw_surface.h

index 1291f38..f8b582d 100644 (file)
--- a/src/gallium/winsys/svga/drm/vmw_surface.h
+++ b/src/gallium/winsys/svga/drm/vmw_surface.h
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
diff --git a/src/gallium/winsys/svga/drm/vmwgfx_drm.h b/src/gallium/winsys/svga/drm/vmwgfx_drm.h

index 73ad205..807ec90 100644 (file)
--- a/src/gallium/winsys/svga/drm/vmwgfx_drm.h
+++ b/src/gallium/winsys/svga/drm/vmwgfx_drm.h
@@ -1,6 +1,6 @@
  /**************************************************************************
   *
- * Copyright © 2009 VMware, Inc., Palo Alto, CA., USA
+ * Copyright © 2009-2015 VMware, Inc., Palo Alto, CA., USA
   * All Rights Reserved.
   *
   * Permission is hereby granted, free of charge, to any person obtaining a
@@ -64,6 +64,7 @@
  #define DRM_VMW_GB_SURFACE_CREATE    23
  #define DRM_VMW_GB_SURFACE_REF       24
  #define DRM_VMW_SYNCCPU              25
+#define DRM_VMW_CREATE_EXTENDED_CONTEXT 26
  
  /*************************************************************************/
  /**
@@ -88,6 +89,8 @@
  #define DRM_VMW_PARAM_3D_CAPS_SIZE     8
  #define DRM_VMW_PARAM_MAX_MOB_MEMORY   9
  #define DRM_VMW_PARAM_MAX_MOB_SIZE     10
+#define DRM_VMW_PARAM_SCREEN_TARGET    11
+#define DRM_VMW_PARAM_VGPU10           12
  
  /**
   * enum drm_vmw_handle_type - handle type for ref ioctls
@@ -296,7 +299,7 @@ union drm_vmw_surface_reference_arg {
   * Argument to the DRM_VMW_EXECBUF Ioctl.
   */
  
-#define DRM_VMW_EXECBUF_VERSION 1
+#define DRM_VMW_EXECBUF_VERSION 2
  
  struct drm_vmw_execbuf_arg {
         uint64_t commands;
@@ -305,6 +308,8 @@ struct drm_vmw_execbuf_arg {
         uint64_t fence_rep;
         uint32_t version;
         uint32_t flags;
+       uint32_t context_handle;
+       uint32_t pad64;
  };
  
  /**
@@ -826,7 +831,6 @@ struct drm_vmw_update_layout_arg {
  enum drm_vmw_shader_type {
         drm_vmw_shader_type_vs = 0,
         drm_vmw_shader_type_ps,
-       drm_vmw_shader_type_gs
  };
  
  
@@ -908,6 +912,8 @@ enum drm_vmw_surface_flags {
   * @buffer_handle     Buffer handle of backup buffer. SVGA3D_INVALID_ID
   *                    if none.
   * @base_size         Size of the base mip level for all faces.
+ * @array_size        Must be zero for non-vgpu10 hardware, and if non-zero
+ *                    svga3d_flags must have proper bind flags setup.
   *
   * Input argument to the  DRM_VMW_GB_SURFACE_CREATE Ioctl.
   * Part of output argument for the DRM_VMW_GB_SURFACE_REF Ioctl.
@@ -920,7 +926,7 @@ struct drm_vmw_gb_surface_create_req {
         uint32_t multisample_count;
         uint32_t autogen_filter;
         uint32_t buffer_handle;
-       uint32_t pad64;
+       uint32_t array_size;
         struct drm_vmw_size base_size;
  };
  
@@ -1060,4 +1066,28 @@ struct drm_vmw_synccpu_arg {
         uint32_t pad64;
  };
  
+/*************************************************************************/
+/**
+ * DRM_VMW_CREATE_EXTENDED_CONTEXT - Create a host context.
+ *
+ * Allocates a device unique context id, and queues a create context command
+ * for the host. Does not wait for host completion.
+ */
+enum drm_vmw_extended_context {
+       drm_vmw_context_legacy,
+       drm_vmw_context_vgpu10
+};
+
+/**
+ * union drm_vmw_extended_context_arg
+ *
+ * @req: Context type.
+ * @rep: Context identifier.
+ *
+ * Argument to the DRM_VMW_CREATE_EXTENDED_CONTEXT Ioctl.
+ */
+union drm_vmw_extended_context_arg {
+       enum drm_vmw_extended_context req;
+       struct drm_vmw_context_arg rep;
+};
  #endif
diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c

index ccc3cc6..57cdeac 100644 (file)
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -706,14 +706,30 @@ gbm_dri_bo_import(struct gbm_device *gbm,
     {
        struct gbm_import_fd_data *fd_data = buffer;
        int stride = fd_data->stride, offset = 0;
+      int dri_format;
+
+      switch (fd_data->format) {
+      case GBM_BO_FORMAT_XRGB8888:
+         dri_format = GBM_FORMAT_XRGB8888;
+         break;
+      case GBM_BO_FORMAT_ARGB8888:
+         dri_format = GBM_FORMAT_ARGB8888;
+         break;
+      default:
+         dri_format = fd_data->format;
+      }
  
        image = dri->image->createImageFromFds(dri->screen,
                                               fd_data->width,
                                               fd_data->height,
-                                             fd_data->format,
+                                             dri_format,
                                               &fd_data->fd, 1,
                                               &stride, &offset,
                                               NULL);
+      if (image == NULL) {
+         errno = EINVAL;
+         return NULL;
+      }
        gbm_format = fd_data->format;
        break;
     }
diff --git a/src/glsl/Android.gen.mk b/src/glsl/Android.gen.mk

index 0835871..6898fb0 100644 (file)
--- a/src/glsl/Android.gen.mk
+++ b/src/glsl/Android.gen.mk
@@ -29,18 +29,7 @@ endif
  
  intermediates := $(call local-generated-sources-dir)
  
-sources := \
-       glsl_lexer.cpp \
-       glsl_parser.cpp \
-       glcpp/glcpp-lex.c \
-       glcpp/glcpp-parse.c \
-       nir/nir_builder_opcodes.h \
-       nir/nir_constant_expressions.c \
-       nir/nir_opcodes.c \
-       nir/nir_opcodes.h \
-       nir/nir_opt_algebraic.c
-
-LOCAL_SRC_FILES := $(filter-out $(sources), $(LOCAL_SRC_FILES))
+LOCAL_SRC_FILES := $(LOCAL_SRC_FILES)
  
  LOCAL_C_INCLUDES += \
         $(intermediates)/glcpp \
@@ -51,8 +40,10 @@ LOCAL_C_INCLUDES += \
  LOCAL_EXPORT_C_INCLUDE_DIRS += \
         $(intermediates)/nir
  
-sources := $(addprefix $(intermediates)/, $(sources))
-LOCAL_GENERATED_SOURCES += $(sources)
+LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \
+       $(LIBGLCPP_GENERATED_FILES) \
+       $(NIR_GENERATED_FILES) \
+       $(LIBGLSL_GENERATED_CXX_FILES))
  
  define local-l-or-ll-to-c-or-cpp
         @mkdir -p $(dir $@)
@@ -102,8 +93,7 @@ $(intermediates)/nir/nir_builder_opcodes.h: $(nir_builder_opcodes_deps)
  nir_constant_expressions_gen := $(LOCAL_PATH)/nir/nir_constant_expressions.py
  nir_constant_expressions_deps := \
         $(LOCAL_PATH)/nir/nir_opcodes.py \
-       $(LOCAL_PATH)/nir/nir_constant_expressions.py \
-       $(LOCAL_PATH)/nir/nir_constant_expressions.h
+       $(LOCAL_PATH)/nir/nir_constant_expressions.py
  
  $(intermediates)/nir/nir_constant_expressions.c: $(nir_constant_expressions_deps)
         @mkdir -p $(dir $@)
diff --git a/src/glsl/Makefile.am b/src/glsl/Makefile.am

index 2ab4050..0836831 100644 (file)
--- a/src/glsl/Makefile.am
+++ b/src/glsl/Makefile.am
@@ -50,12 +50,14 @@ EXTRA_DIST = tests glcpp/tests README TODO glcpp/README     \
         nir/nir_opcodes_c.py                            \
         nir/nir_opcodes_h.py                            \
         nir/nir_opt_algebraic.py                        \
+       nir/tests                                       \
         SConscript
  
  include Makefile.sources
  
  TESTS = glcpp/tests/glcpp-test                         \
         glcpp/tests/glcpp-test-cr-lf                    \
+        nir/tests/control_flow_tests                   \
         tests/blob-test                                 \
         tests/general-ir-test                           \
         tests/optimization-test                         \
@@ -70,6 +72,7 @@ noinst_LTLIBRARIES = libnir.la libglsl.la libglcpp.la
  check_PROGRAMS =                                       \
         glcpp/glcpp                                     \
         glsl_test                                       \
+       nir/tests/control_flow_tests                    \
         tests/blob-test                                 \
         tests/general-ir-test                           \
         tests/sampler-types-test                        \
@@ -140,13 +143,16 @@ libglsl_la_SOURCES =                                      \
         glsl_parser.cpp                                 \
         glsl_parser.h                                   \
         $(LIBGLSL_FILES)                                \
-       $(NIR_FILES)
+       $(NIR_FILES)                                    \
+       $(NIR_GENERATED_FILES)
+
  
  libnir_la_SOURCES =                                    \
         glsl_types.cpp                                  \
         builtin_types.cpp                               \
         glsl_symbol_table.cpp                           \
-       $(NIR_FILES)
+       $(NIR_FILES)                                    \
+       $(NIR_GENERATED_FILES)
  
  glsl_compiler_SOURCES = \
         $(GLSL_COMPILER_CXX_FILES)
@@ -207,19 +213,23 @@ am__v_YACC_ = $(am__v_YACC_$(AM_DEFAULT_VERBOSITY))
  am__v_YACC_0 = @echo "  YACC    " $@;
  am__v_YACC_1 =
  
+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS)
+LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS)
+
  glsl_parser.cpp glsl_parser.h: glsl_parser.yy
-       $(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $<
+       $(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $(srcdir)/glsl_parser.yy
  
  glsl_lexer.cpp: glsl_lexer.ll
-       $(AM_V_LEX) $(LEX) $(LFLAGS) -o $@ $<
+       $(LEX_GEN) -o $@ $(srcdir)/glsl_lexer.ll
  
  glcpp/glcpp-parse.c glcpp/glcpp-parse.h: glcpp/glcpp-parse.y
-       $(AM_V_at)$(MKDIR_P) glcpp
-       $(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $<
+       $(MKDIR_GEN)
+       $(YACC_GEN) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $(srcdir)/glcpp/glcpp-parse.y
  
  glcpp/glcpp-lex.c: glcpp/glcpp-lex.l
-       $(AM_V_at)$(MKDIR_P) glcpp
-       $(AM_V_LEX) $(LEX) $(LFLAGS) -o $@ $<
+       $(MKDIR_GEN)
+       $(LEX_GEN) -o $@ $(srcdir)/glcpp/glcpp-lex.l
  
  # Only the parsers (specifically the header files generated at the same time)
  # need to be in BUILT_SOURCES. Though if we list the parser headers YACC is
@@ -232,11 +242,7 @@ BUILT_SOURCES =                                            \
         glsl_lexer.cpp                                  \
         glcpp/glcpp-parse.c                             \
         glcpp/glcpp-lex.c                               \
-       nir/nir_builder_opcodes.h                               \
-       nir/nir_constant_expressions.c                  \
-       nir/nir_opcodes.c                               \
-       nir/nir_opcodes.h                               \
-       nir/nir_opt_algebraic.c
+       $(NIR_GENERATED_FILES)
  CLEANFILES =                                           \
         glcpp/glcpp-parse.h                             \
         glsl_parser.h                                   \
@@ -249,22 +255,35 @@ dist-hook:
         $(RM) glcpp/tests/*.out
         $(RM) glcpp/tests/subtest*/*.out
  
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
+
  nir/nir_builder_opcodes.h: nir/nir_opcodes.py nir/nir_builder_opcodes_h.py
-       $(AM_V_at)$(MKDIR_P) nir
-       $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_builder_opcodes_h.py > $@
+       $(MKDIR_GEN)
+       $(PYTHON_GEN) $(srcdir)/nir/nir_builder_opcodes_h.py > $@
  
-nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py nir/nir_constant_expressions.h
-       $(AM_V_at)$(MKDIR_P) nir
-       $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_constant_expressions.py > $@
+nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py
+       $(MKDIR_GEN)
+       $(PYTHON_GEN) $(srcdir)/nir/nir_constant_expressions.py > $@
  
  nir/nir_opcodes.h: nir/nir_opcodes.py nir/nir_opcodes_h.py
-       $(AM_V_at)$(MKDIR_P) nir
-       $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_h.py > $@
+       $(MKDIR_GEN)
+       $(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_h.py > $@
  
  nir/nir_opcodes.c: nir/nir_opcodes.py nir/nir_opcodes_c.py
-       $(AM_V_at)$(MKDIR_P) nir
-       $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_c.py > $@
+       $(MKDIR_GEN)
+       $(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_c.py > $@
  
  nir/nir_opt_algebraic.c: nir/nir_opt_algebraic.py nir/nir_algebraic.py
-       $(AM_V_at)$(MKDIR_P) nir
-       $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opt_algebraic.py > $@
+       $(MKDIR_GEN)
+       $(PYTHON_GEN) $(srcdir)/nir/nir_opt_algebraic.py > $@
+
+nir_tests_control_flow_tests_SOURCES =                 \
+       nir/tests/control_flow_tests.cpp
+nir_tests_control_flow_tests_CFLAGS =                  \
+       $(PTHREAD_CFLAGS)
+nir_tests_control_flow_tests_LDADD =                   \
+       $(top_builddir)/src/gtest/libgtest.la           \
+       $(top_builddir)/src/glsl/libnir.la              \
+       $(top_builddir)/src/libglsl_util.la             \
+       $(top_builddir)/src/util/libmesautil.la         \
+       $(PTHREAD_LIBS)
diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources

index acd13ef..0cd3d28 100644 (file)
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -30,31 +30,37 @@ NIR_FILES = \
         nir/nir_control_flow_private.h \
         nir/nir_dominance.c \
         nir/nir_from_ssa.c \
+       nir/nir_gs_count_vertices.c \
         nir/nir_intrinsics.c \
         nir/nir_intrinsics.h \
         nir/nir_live_variables.c \
         nir/nir_lower_alu_to_scalar.c \
         nir/nir_lower_atomics.c \
+       nir/nir_lower_clip.c \
         nir/nir_lower_global_vars_to_local.c \
+       nir/nir_lower_gs_intrinsics.c \
         nir/nir_lower_load_const_to_scalar.c \
         nir/nir_lower_locals_to_regs.c \
         nir/nir_lower_idiv.c \
         nir/nir_lower_io.c \
         nir/nir_lower_outputs_to_temporaries.c \
         nir/nir_lower_phis_to_scalar.c \
-       nir/nir_lower_samplers.cpp \
+       nir/nir_lower_samplers.c \
         nir/nir_lower_system_values.c \
-       nir/nir_lower_tex_projector.c \
+       nir/nir_lower_tex.c \
         nir/nir_lower_to_source_mods.c \
+       nir/nir_lower_two_sided_color.c \
         nir/nir_lower_vars_to_ssa.c \
         nir/nir_lower_var_copies.c \
         nir/nir_lower_vec_to_movs.c \
         nir/nir_metadata.c \
+       nir/nir_move_vec_src_uses_to_dest.c \
         nir/nir_normalize_cubemap_coords.c \
         nir/nir_opt_constant_folding.c \
         nir/nir_opt_copy_propagate.c \
         nir/nir_opt_cse.c \
         nir/nir_opt_dce.c \
+       nir/nir_opt_dead_cf.c \
         nir/nir_opt_gcm.c \
         nir/nir_opt_global_to_local.c \
         nir/nir_opt_peephole_ffma.c \
@@ -76,8 +82,7 @@ NIR_FILES = \
         nir/nir_worklist.h \
         nir/nir_types.cpp \
         nir/spirv_to_nir.c \
-       nir/spirv_glsl450_to_nir.c \
-       $(NIR_GENERATED_FILES)
+       nir/spirv_glsl450_to_nir.c
  
  # libglsl
  
diff --git a/src/glsl/ast.h b/src/glsl/ast.h

index d8c6cea..4c31436 100644 (file)
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -491,6 +491,7 @@ struct ast_type_qualifier {
          /** \name Layout qualifiers for GL_ARB_uniform_buffer_object */
          /** \{ */
           unsigned std140:1;
+         unsigned std430:1;
           unsigned shared:1;
           unsigned packed:1;
           unsigned column_major:1;
@@ -639,6 +640,9 @@ struct ast_type_qualifier {
      */
     glsl_base_type image_base_type;
  
+   /** Flag to know if this represents a default value for a qualifier */
+   bool is_default_qualifier;
+
     /**
      * Return true if and only if an interpolation qualifier is present.
      */
@@ -1169,4 +1173,9 @@ extern void
  check_builtin_array_max_size(const char *name, unsigned size,
                               YYLTYPE loc, struct _mesa_glsl_parse_state *state);
  
+extern void _mesa_ast_process_interface_block(YYLTYPE *locp,
+                                              _mesa_glsl_parse_state *state,
+                                              ast_interface_block *const block,
+                                              const struct ast_type_qualifier q);
+
  #endif /* AST_H */
diff --git a/src/glsl/ast_array_index.cpp b/src/glsl/ast_array_index.cpp

index ae399f0..dfb3107 100644 (file)
--- a/src/glsl/ast_array_index.cpp
+++ b/src/glsl/ast_array_index.cpp
@@ -226,7 +226,8 @@ _mesa_ast_array_index_to_hir(void *mem_ctx,
               * by the linker.
               */
           }
-         else {
+         else if (array->variable_referenced()->data.mode !=
+                  ir_var_shader_storage) {
              _mesa_glsl_error(&loc, state, "unsized array index must be constant");
           }
        } else if (array->type->fields.array->is_interface()
diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp

index 803edf5..26d4c62 100644 (file)
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -142,6 +142,31 @@ verify_image_parameter(YYLTYPE *loc, _mesa_glsl_parse_state *state,
     return true;
  }
  
+static bool
+verify_first_atomic_ssbo_parameter(YYLTYPE *loc, _mesa_glsl_parse_state *state,
+                                   ir_variable *var)
+{
+   if (!var || !var->is_in_shader_storage_block()) {
+      _mesa_glsl_error(loc, state, "First argument to atomic function "
+                       "must be a buffer variable");
+      return false;
+   }
+   return true;
+}
+
+static bool
+is_atomic_ssbo_function(const char *func_name)
+{
+   return !strcmp(func_name, "atomicAdd") ||
+          !strcmp(func_name, "atomicMin") ||
+          !strcmp(func_name, "atomicMax") ||
+          !strcmp(func_name, "atomicAnd") ||
+          !strcmp(func_name, "atomicOr") ||
+          !strcmp(func_name, "atomicXor") ||
+          !strcmp(func_name, "atomicExchange") ||
+          !strcmp(func_name, "atomicCompSwap");
+}
+
  /**
   * Verify that 'out' and 'inout' actual parameters are lvalues.  Also, verify
   * that 'const_in' formal parameters (an extension in our IR) correspond to
@@ -256,6 +281,23 @@ verify_parameter_modes(_mesa_glsl_parse_state *state,
        actual_ir_node  = actual_ir_node->next;
        actual_ast_node = actual_ast_node->next;
     }
+
+   /* The first parameter of atomic functions must be a buffer variable */
+   const char *func_name = sig->function_name();
+   bool is_atomic_ssbo = is_atomic_ssbo_function(func_name);
+   if (is_atomic_ssbo) {
+      const ir_rvalue *const actual = (ir_rvalue *) actual_ir_parameters.head;
+
+      const ast_expression *const actual_ast =
+         exec_node_data(ast_expression, actual_ast_parameters.head, link);
+      YYLTYPE loc = actual_ast->get_location();
+
+      if (!verify_first_atomic_ssbo_parameter(&loc, state,
+                                              actual->variable_referenced())) {
+         return false;
+      }
+   }
+
     return true;
  }
  
@@ -1593,11 +1635,16 @@ ast_function_expression::handle_method(exec_list *instructions,
  
        if (op->type->is_array()) {
           if (op->type->is_unsized_array()) {
-            _mesa_glsl_error(&loc, state, "length called on unsized array");
-            goto fail;
+            if (!state->has_shader_storage_buffer_objects()) {
+               _mesa_glsl_error(&loc, state, "length called on unsized array"
+                                             " only available with "
+                                             "ARB_shader_storage_buffer_object");
+            }
+            /* Calculate length of an unsized array in run-time */
+            result = new(ctx) ir_expression(ir_unop_ssbo_unsized_array_length, op);
+         } else {
+            result = new(ctx) ir_constant(op->type->array_size());
           }
-
-         result = new(ctx) ir_constant(op->type->array_size());
        } else if (op->type->is_vector()) {
           if (state->ARB_shading_language_420pack_enable) {
              /* .length() returns int. */
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp

index 81b44bd..351aafc 100644 (file)
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -67,6 +67,48 @@ static void
  remove_per_vertex_blocks(exec_list *instructions,
                           _mesa_glsl_parse_state *state, ir_variable_mode mode);
  
+/**
+ * Visitor class that finds the first instance of any write-only variable that
+ * is ever read, if any
+ */
+class read_from_write_only_variable_visitor : public ir_hierarchical_visitor
+{
+public:
+   read_from_write_only_variable_visitor() : found(NULL)
+   {
+   }
+
+   virtual ir_visitor_status visit(ir_dereference_variable *ir)
+   {
+      if (this->in_assignee)
+         return visit_continue;
+
+      ir_variable *var = ir->variable_referenced();
+      /* We can have image_write_only set on both images and buffer variables,
+       * but in the former there is a distinction between reads from
+       * the variable itself (write_only) and from the memory they point to
+       * (image_write_only), while in the case of buffer variables there is
+       * no such distinction, that is why this check here is limited to
+       * buffer variables alone.
+       */
+      if (!var || var->data.mode != ir_var_shader_storage)
+         return visit_continue;
+
+      if (var->data.image_write_only) {
+         found = var;
+         return visit_stop;
+      }
+
+      return visit_continue;
+   }
+
+   ir_variable *get_variable() {
+      return found;
+   }
+
+private:
+   ir_variable *found;
+};
  
  void
  _mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state)
@@ -162,6 +204,20 @@ _mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state)
      */
     remove_per_vertex_blocks(instructions, state, ir_var_shader_in);
     remove_per_vertex_blocks(instructions, state, ir_var_shader_out);
+
+   /* Check that we don't have reads from write-only variables */
+   read_from_write_only_variable_visitor v;
+   v.run(instructions);
+   ir_variable *error_var = v.get_variable();
+   if (error_var) {
+      /* It would be nice to have proper location information, but for that
+       * we would need to check this as we process each kind of AST node
+       */
+      YYLTYPE loc;
+      memset(&loc, 0, sizeof(loc));
+      _mesa_glsl_error(&loc, state, "Read from write-only variable `%s'",
+                       error_var->name);
+   }
  }
  
  
@@ -820,7 +876,16 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state,
                            "assignment to %s",
                            non_lvalue_description);
           error_emitted = true;
-      } else if (lhs_var != NULL && lhs_var->data.read_only) {
+      } else if (lhs_var != NULL && (lhs_var->data.read_only ||
+                 (lhs_var->data.mode == ir_var_shader_storage &&
+                  lhs_var->data.image_read_only))) {
+         /* We can have image_read_only set on both images and buffer variables,
+          * but in the former there is a distinction between assignments to
+          * the variable itself (read_only) and to the memory they point to
+          * (image_read_only), while in the case of buffer variables there is
+          * no such distinction, that is why this check here is limited to
+          * buffer variables alone.
+          */
           _mesa_glsl_error(&lhs_loc, state,
                            "assignment to read-only variable '%s'",
                            lhs_var->name);
@@ -2115,7 +2180,7 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
     }
  
     const struct gl_context *const ctx = state->ctx;
-   unsigned elements = type->is_array() ? type->length : 1;
+   unsigned elements = type->is_array() ? type->arrays_of_arrays_size() : 1;
     unsigned max_index = qual->binding + elements - 1;
     const glsl_type *base_type = type->without_array();
  
@@ -2921,11 +2986,13 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
         var->data.depth_layout = ir_depth_layout_none;
  
     if (qual->flags.q.std140 ||
+       qual->flags.q.std430 ||
         qual->flags.q.packed ||
         qual->flags.q.shared) {
        _mesa_glsl_error(loc, state,
-                       "uniform block layout qualifiers std140, packed, and "
-                       "shared can only be applied to uniform blocks, not "
+                       "uniform and shader storage block layout qualifiers "
+                       "std140, std430, packed, and shared can only be "
+                       "applied to uniform or shader storage blocks, not "
                         "members");
     }
  
@@ -4579,7 +4646,7 @@ ast_function::hir(exec_list *instructions,
     if (state->es_shader && state->language_version >= 300) {
        /* Local shader has no exact candidates; check the built-ins. */
        _mesa_glsl_initialize_builtin_functions();
-      if (_mesa_glsl_find_builtin_function_by_name(state, name)) {
+      if (_mesa_glsl_find_builtin_function_by_name(name)) {
           YYLTYPE loc = this->get_location();
           _mesa_glsl_error(& loc, state,
                            "A shader cannot redefine or overload built-in "
@@ -5605,10 +5672,19 @@ ast_process_structure_or_interface_block(exec_list *instructions,
                                           bool is_interface,
                                           enum glsl_matrix_layout matrix_layout,
                                           bool allow_reserved_names,
-                                         ir_variable_mode var_mode)
+                                         ir_variable_mode var_mode,
+                                         ast_type_qualifier *layout)
  {
     unsigned decl_count = 0;
  
+   /* For blocks that accept memory qualifiers (i.e. shader storage), verify
+    * that we don't have incompatible qualifiers
+    */
+   if (layout && layout->flags.q.read_only && layout->flags.q.write_only) {
+      _mesa_glsl_error(&loc, state,
+                       "Interface block sets both readonly and writeonly");
+   }
+
     /* Make an initial pass over the list of fields to determine how
      * many there are.  Each element in this list is an ast_declarator_list.
      * This means that we actually need to count the number of elements in the
@@ -5657,17 +5733,16 @@ ast_process_structure_or_interface_block(exec_list *instructions,
            * is_interface case, will have resulted in compilation having
            * already halted due to a syntax error.
            */
-         const struct glsl_type *field_type =
-            decl_type != NULL ? decl_type : glsl_type::error_type;
+         assert(decl_type);
  
-         if (is_interface && field_type->contains_opaque()) {
+         if (is_interface && decl_type->contains_opaque()) {
              YYLTYPE loc = decl_list->get_location();
              _mesa_glsl_error(&loc, state,
                               "uniform/buffer in non-default interface block contains "
                               "opaque variable");
           }
  
-         if (field_type->contains_atomic()) {
+         if (decl_type->contains_atomic()) {
              /* From section 4.1.7.3 of the GLSL 4.40 spec:
               *
               *    "Members of structures cannot be declared as atomic counter
@@ -5678,7 +5753,7 @@ ast_process_structure_or_interface_block(exec_list *instructions,
                               "shader storage block or uniform block");
           }
  
-         if (field_type->contains_image()) {
+         if (decl_type->contains_image()) {
              /* FINISHME: Same problem as with atomic counters.
               * FINISHME: Request clarification from Khronos and add
               * FINISHME: spec quotation here.
@@ -5692,12 +5767,14 @@ ast_process_structure_or_interface_block(exec_list *instructions,
           const struct ast_type_qualifier *const qual =
              & decl_list->type->qualifier;
           if (qual->flags.q.std140 ||
+             qual->flags.q.std430 ||
               qual->flags.q.packed ||
               qual->flags.q.shared) {
              _mesa_glsl_error(&loc, state,
                               "uniform/shader storage block layout qualifiers "
-                             "std140, packed, and shared can only be applied "
-                             "to uniform/shader storage blocks, not members");
+                             "std140, std430, packed, and shared can only be "
+                             "applied to uniform/shader storage blocks, not "
+                             "members");
           }
  
           if (qual->flags.q.constant) {
@@ -5707,8 +5784,8 @@ ast_process_structure_or_interface_block(exec_list *instructions,
                               "to struct or interface block members");
           }
  
-         field_type = process_array_type(&loc, decl_type,
-                                         decl->array_specifier, state);
+         const struct glsl_type *field_type =
+            process_array_type(&loc, decl_type, decl->array_specifier, state);
           fields[i].type = field_type;
           fields[i].name = decl->identifier;
           fields[i].location = -1;
@@ -5768,6 +5845,44 @@ ast_process_structure_or_interface_block(exec_list *instructions,
                     || fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR);
           }
  
+         /* Image qualifiers are allowed on buffer variables, which can only
+          * be defined inside shader storage buffer objects
+          */
+         if (layout && var_mode == ir_var_shader_storage) {
+            if (qual->flags.q.read_only && qual->flags.q.write_only) {
+               _mesa_glsl_error(&loc, state,
+                                "buffer variable `%s' can't be "
+                                "readonly and writeonly.", fields[i].name);
+            }
+
+            /* For readonly and writeonly qualifiers the field definition,
+             * if set, overwrites the layout qualifier.
+             */
+            bool read_only = layout->flags.q.read_only;
+            bool write_only = layout->flags.q.write_only;
+
+            if (qual->flags.q.read_only) {
+               read_only = true;
+               write_only = false;
+            } else if (qual->flags.q.write_only) {
+               read_only = false;
+               write_only = true;
+            }
+
+            fields[i].image_read_only = read_only;
+            fields[i].image_write_only = write_only;
+
+            /* For other qualifiers, we set the flag if either the layout
+             * qualifier or the field qualifier are set
+             */
+            fields[i].image_coherent = qual->flags.q.coherent ||
+                                        layout->flags.q.coherent;
+            fields[i].image_volatile = qual->flags.q._volatile ||
+                                        layout->flags.q._volatile;
+            fields[i].image_restrict = qual->flags.q.restrict_flag ||
+                                        layout->flags.q.restrict_flag;
+         }
+
           i++;
        }
     }
@@ -5822,7 +5937,8 @@ ast_struct_specifier::hir(exec_list *instructions,
                                                 false,
                                                 GLSL_MATRIX_LAYOUT_INHERITED,
                                                 false /* allow_reserved_names */,
-                                               ir_var_auto);
+                                               ir_var_auto,
+                                               NULL);
  
     validate_identifier(this->name, loc, state);
  
@@ -5881,6 +5997,19 @@ private:
     bool found;
  };
  
+static bool
+is_unsized_array_last_element(ir_variable *v)
+{
+   const glsl_type *interface_type = v->get_interface_type();
+   int length = interface_type->length;
+
+   assert(v->type->is_unsized_array());
+
+   /* Check if it is the last element of the interface */
+   if (strcmp(interface_type->fields.structure[length-1].name, v->name) == 0)
+      return true;
+   return false;
+}
  
  ir_rvalue *
  ast_interface_block::hir(exec_list *instructions,
@@ -5896,6 +6025,13 @@ ast_interface_block::hir(exec_list *instructions,
                         this->block_name);
     }
  
+   if (!this->layout.flags.q.buffer &&
+       this->layout.flags.q.std430) {
+      _mesa_glsl_error(&loc, state,
+                       "std430 storage block layout qualifier is supported "
+                       "only for shader storage blocks");
+   }
+
     /* The ast_interface_block has a list of ast_declarator_lists.  We
      * need to turn those into ir_variables with an association
      * with this uniform block.
@@ -5905,6 +6041,8 @@ ast_interface_block::hir(exec_list *instructions,
        packing = GLSL_INTERFACE_PACKING_SHARED;
     } else if (this->layout.flags.q.packed) {
        packing = GLSL_INTERFACE_PACKING_PACKED;
+   } else if (this->layout.flags.q.std430) {
+      packing = GLSL_INTERFACE_PACKING_STD430;
     } else {
        /* The default layout is std140.
         */
@@ -5955,7 +6093,8 @@ ast_interface_block::hir(exec_list *instructions,
                                                 true,
                                                 matrix_layout,
                                                 redeclaring_per_vertex,
-                                               var_mode);
+                                               var_mode,
+                                               &this->layout);
  
     state->struct_specifier_depth--;
  
@@ -6253,6 +6392,33 @@ ast_interface_block::hir(exec_list *instructions,
        else if (state->stage == MESA_SHADER_TESS_CTRL && var_mode == ir_var_shader_out)
           handle_tess_ctrl_shader_output_decl(state, loc, var);
  
+      for (unsigned i = 0; i < num_variables; i++) {
+         if (fields[i].type->is_unsized_array()) {
+            if (var_mode == ir_var_shader_storage) {
+               if (i != (num_variables - 1)) {
+                  _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+                                   "only last member of a shader storage block "
+                                   "can be defined as unsized array",
+                                   fields[i].name);
+               }
+            } else {
+               /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays":
+               *
+               * "If an array is declared as the last member of a shader storage
+               * block and the size is not specified at compile-time, it is
+               * sized at run-time. In all other cases, arrays are sized only
+               * at compile-time."
+               */
+               if (state->es_shader) {
+                  _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+                                 "only last member of a shader storage block "
+                                 "can be defined as unsized array",
+                                 fields[i].name);
+               }
+            }
+         }
+      }
+
        if (ir_variable *earlier =
            state->symbols->get_variable(this->instance_name)) {
           if (!redeclaring_per_vertex) {
@@ -6312,6 +6478,14 @@ ast_interface_block::hir(exec_list *instructions,
  
           var->data.stream = this->layout.stream;
  
+         if (var->data.mode == ir_var_shader_storage) {
+            var->data.image_read_only = fields[i].image_read_only;
+            var->data.image_write_only = fields[i].image_write_only;
+            var->data.image_coherent = fields[i].image_coherent;
+            var->data.image_volatile = fields[i].image_volatile;
+            var->data.image_restrict = fields[i].image_restrict;
+         }
+
           /* Examine var name here since var may get deleted in the next call */
           bool var_is_gl_id = is_gl_identifier(var->name);
  
@@ -6344,6 +6518,32 @@ ast_interface_block::hir(exec_list *instructions,
           var->data.explicit_binding = this->layout.flags.q.explicit_binding;
           var->data.binding = this->layout.binding;
  
+         if (var->type->is_unsized_array()) {
+            if (var->is_in_shader_storage_block()) {
+               if (!is_unsized_array_last_element(var)) {
+                  _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+                                   "only last member of a shader storage block "
+                                   "can be defined as unsized array",
+                                   var->name);
+               }
+               var->data.from_ssbo_unsized_array = true;
+            } else {
+               /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays":
+               *
+               * "If an array is declared as the last member of a shader storage
+               * block and the size is not specified at compile-time, it is
+               * sized at run-time. In all other cases, arrays are sized only
+               * at compile-time."
+               */
+               if (state->es_shader) {
+                  _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+                                 "only last member of a shader storage block "
+                                 "can be defined as unsized array",
+                                 var->name);
+               }
+            }
+         }
+
           state->symbols->add_variable(var);
           instructions->push_tail(var);
        }
diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp

index a4671e2..08a4504 100644 (file)
--- a/src/glsl/ast_type.cpp
+++ b/src/glsl/ast_type.cpp
@@ -65,6 +65,7 @@ ast_type_qualifier::has_layout() const
            || this->flags.q.depth_less
            || this->flags.q.depth_unchanged
            || this->flags.q.std140
+          || this->flags.q.std430
            || this->flags.q.shared
            || this->flags.q.column_major
            || this->flags.q.row_major
@@ -123,6 +124,7 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
     ubo_layout_mask.flags.q.std140 = 1;
     ubo_layout_mask.flags.q.packed = 1;
     ubo_layout_mask.flags.q.shared = 1;
+   ubo_layout_mask.flags.q.std430 = 1;
  
     ast_type_qualifier ubo_binding_mask;
     ubo_binding_mask.flags.i = 0;
diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp

index 5e05199..f0f6be2 100644 (file)
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -401,6 +401,12 @@ shader_atomic_counters(const _mesa_glsl_parse_state *state)
  }
  
  static bool
+shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
+{
+   return state->ARB_shader_storage_buffer_object_enable;
+}
+
+static bool
  shader_trinary_minmax(const _mesa_glsl_parse_state *state)
  {
     return state->AMD_shader_trinary_minmax_enable;
@@ -428,6 +434,13 @@ shader_image_size(const _mesa_glsl_parse_state *state)
  }
  
  static bool
+shader_samples(const _mesa_glsl_parse_state *state)
+{
+   return state->is_version(450, 0) ||
+          state->ARB_shader_texture_image_samples_enable;
+}
+
+static bool
  gs_streams(const _mesa_glsl_parse_state *state)
  {
     return gpu_shader5(state) && gs_only(state);
@@ -522,7 +535,6 @@ private:
     void add_function(const char *name, ...);
  
     typedef ir_function_signature *(builtin_builder::*image_prototype_ctr)(const glsl_type *image_type,
-                                                                          const char *intrinsic_name,
                                                                            unsigned num_arguments,
                                                                            unsigned flags);
  
@@ -533,7 +545,8 @@ private:
        IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE = (1 << 3),
        IMAGE_FUNCTION_READ_ONLY = (1 << 4),
        IMAGE_FUNCTION_WRITE_ONLY = (1 << 5),
-      IMAGE_FUNCTION_AVAIL_ATOMIC = (1 << 6)
+      IMAGE_FUNCTION_AVAIL_ATOMIC = (1 << 6),
+      IMAGE_FUNCTION_MS_ONLY = (1 << 7),
     };
  
     /**
@@ -667,6 +680,7 @@ private:
     B1(all);
     B1(not);
     BA2(textureSize);
+   B1(textureSamples);
  
  /** Flags to _texture() */
  #define TEX_PROJECT 1
@@ -729,22 +743,34 @@ private:
     B1(interpolateAtOffset)
     B1(interpolateAtSample)
  
-   ir_function_signature *_atomic_intrinsic(builtin_available_predicate avail);
-   ir_function_signature *_atomic_op(const char *intrinsic,
-                                     builtin_available_predicate avail);
+   ir_function_signature *_atomic_counter_intrinsic(builtin_available_predicate avail);
+   ir_function_signature *_atomic_counter_op(const char *intrinsic,
+                                             builtin_available_predicate avail);
+
+   ir_function_signature *_atomic_ssbo_intrinsic2(builtin_available_predicate avail,
+                                                  const glsl_type *type);
+   ir_function_signature *_atomic_ssbo_op2(const char *intrinsic,
+                                           builtin_available_predicate avail,
+                                           const glsl_type *type);
+   ir_function_signature *_atomic_ssbo_intrinsic3(builtin_available_predicate avail,
+                                                  const glsl_type *type);
+   ir_function_signature *_atomic_ssbo_op3(const char *intrinsic,
+                                           builtin_available_predicate avail,
+                                           const glsl_type *type);
  
     B1(min3)
     B1(max3)
     B1(mid3)
  
     ir_function_signature *_image_prototype(const glsl_type *image_type,
-                                           const char *intrinsic_name,
                                             unsigned num_arguments,
                                             unsigned flags);
     ir_function_signature *_image_size_prototype(const glsl_type *image_type,
-                                                const char *intrinsic_name,
                                                  unsigned num_arguments,
                                                  unsigned flags);
+   ir_function_signature *_image_samples_prototype(const glsl_type *image_type,
+                                                   unsigned num_arguments,
+                                                   unsigned flags);
     ir_function_signature *_image(image_prototype_ctr prototype,
                                   const glsl_type *image_type,
                                   const char *intrinsic_name,
@@ -863,13 +889,62 @@ void
  builtin_builder::create_intrinsics()
  {
     add_function("__intrinsic_atomic_read",
-                _atomic_intrinsic(shader_atomic_counters),
+                _atomic_counter_intrinsic(shader_atomic_counters),
                  NULL);
     add_function("__intrinsic_atomic_increment",
-                _atomic_intrinsic(shader_atomic_counters),
+                _atomic_counter_intrinsic(shader_atomic_counters),
                  NULL);
     add_function("__intrinsic_atomic_predecrement",
-                _atomic_intrinsic(shader_atomic_counters),
+                _atomic_counter_intrinsic(shader_atomic_counters),
+                NULL);
+
+   add_function("__intrinsic_ssbo_atomic_add",
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::uint_type),
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::int_type),
+                NULL);
+   add_function("__intrinsic_ssbo_atomic_min",
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::uint_type),
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::int_type),
+                NULL);
+   add_function("__intrinsic_ssbo_atomic_max",
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::uint_type),
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::int_type),
+                NULL);
+   add_function("__intrinsic_ssbo_atomic_and",
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::uint_type),
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::int_type),
+                NULL);
+   add_function("__intrinsic_ssbo_atomic_or",
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::uint_type),
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::int_type),
+                NULL);
+   add_function("__intrinsic_ssbo_atomic_xor",
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::uint_type),
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::int_type),
+                NULL);
+   add_function("__intrinsic_ssbo_atomic_exchange",
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::uint_type),
+                _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+                                        glsl_type::int_type),
+                NULL);
+   add_function("__intrinsic_ssbo_atomic_comp_swap",
+                _atomic_ssbo_intrinsic3(shader_storage_buffer_object,
+                                        glsl_type::uint_type),
+                _atomic_ssbo_intrinsic3(shader_storage_buffer_object,
+                                        glsl_type::int_type),
                  NULL);
  
     add_image_functions(false);
@@ -1410,6 +1485,16 @@ builtin_builder::create_builtins()
                  _textureSize(texture_multisample_array, glsl_type::ivec3_type, glsl_type::usampler2DMSArray_type),
                  NULL);
  
+   add_function("textureSamples",
+                _textureSamples(glsl_type::sampler2DMS_type),
+                _textureSamples(glsl_type::isampler2DMS_type),
+                _textureSamples(glsl_type::usampler2DMS_type),
+
+                _textureSamples(glsl_type::sampler2DMSArray_type),
+                _textureSamples(glsl_type::isampler2DMSArray_type),
+                _textureSamples(glsl_type::usampler2DMSArray_type),
+                NULL);
+
     add_function("texture",
                  _texture(ir_tex, v130, glsl_type::vec4_type,  glsl_type::sampler1D_type,  glsl_type::float_type),
                  _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::float_type),
@@ -2522,16 +2607,81 @@ builtin_builder::create_builtins()
                  NULL);
  
     add_function("atomicCounter",
-                _atomic_op("__intrinsic_atomic_read",
-                           shader_atomic_counters),
+                _atomic_counter_op("__intrinsic_atomic_read",
+                                   shader_atomic_counters),
                  NULL);
     add_function("atomicCounterIncrement",
-                _atomic_op("__intrinsic_atomic_increment",
-                           shader_atomic_counters),
+                _atomic_counter_op("__intrinsic_atomic_increment",
+                                   shader_atomic_counters),
                  NULL);
     add_function("atomicCounterDecrement",
-                _atomic_op("__intrinsic_atomic_predecrement",
-                           shader_atomic_counters),
+                _atomic_counter_op("__intrinsic_atomic_predecrement",
+                                   shader_atomic_counters),
+                NULL);
+
+   add_function("atomicAdd",
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_add",
+                                 shader_storage_buffer_object,
+                                 glsl_type::uint_type),
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_add",
+                                 shader_storage_buffer_object,
+                                 glsl_type::int_type),
+                NULL);
+   add_function("atomicMin",
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_min",
+                                 shader_storage_buffer_object,
+                                 glsl_type::uint_type),
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_min",
+                                 shader_storage_buffer_object,
+                                 glsl_type::int_type),
+                NULL);
+   add_function("atomicMax",
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_max",
+                                 shader_storage_buffer_object,
+                                 glsl_type::uint_type),
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_max",
+                                 shader_storage_buffer_object,
+                                 glsl_type::int_type),
+                NULL);
+   add_function("atomicAnd",
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_and",
+                                 shader_storage_buffer_object,
+                                 glsl_type::uint_type),
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_and",
+                                 shader_storage_buffer_object,
+                                 glsl_type::int_type),
+                NULL);
+   add_function("atomicOr",
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_or",
+                                 shader_storage_buffer_object,
+                                 glsl_type::uint_type),
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_or",
+                                 shader_storage_buffer_object,
+                                 glsl_type::int_type),
+                NULL);
+   add_function("atomicXor",
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_xor",
+                                 shader_storage_buffer_object,
+                                 glsl_type::uint_type),
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_xor",
+                                 shader_storage_buffer_object,
+                                 glsl_type::int_type),
+                NULL);
+   add_function("atomicExchange",
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_exchange",
+                                 shader_storage_buffer_object,
+                                 glsl_type::uint_type),
+                _atomic_ssbo_op2("__intrinsic_ssbo_atomic_exchange",
+                                 shader_storage_buffer_object,
+                                 glsl_type::int_type),
+                NULL);
+   add_function("atomicCompSwap",
+                _atomic_ssbo_op3("__intrinsic_ssbo_atomic_comp_swap",
+                                 shader_storage_buffer_object,
+                                 glsl_type::uint_type),
+                _atomic_ssbo_op3("__intrinsic_ssbo_atomic_comp_swap",
+                                 shader_storage_buffer_object,
+                                 glsl_type::int_type),
                  NULL);
  
     add_function("min3",
@@ -2670,8 +2820,10 @@ builtin_builder::add_image_function(const char *name,
     ir_function *f = new(mem_ctx) ir_function(name);
  
     for (unsigned i = 0; i < ARRAY_SIZE(types); ++i) {
-      if (types[i]->sampler_type != GLSL_TYPE_FLOAT ||
-          (flags & IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE))
+      if ((types[i]->sampler_type != GLSL_TYPE_FLOAT ||
+           (flags & IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE)) &&
+          (types[i]->sampler_dimensionality == GLSL_SAMPLER_DIM_MS ||
+           !(flags & IMAGE_FUNCTION_MS_ONLY)))
           f->add_signature(_image(prototype, types[i], intrinsic_name,
                                   num_arguments, flags));
     }
@@ -2739,6 +2891,12 @@ builtin_builder::add_image_functions(bool glsl)
                        "__intrinsic_image_size",
                        &builtin_builder::_image_size_prototype, 1,
                        flags | IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE);
+
+   add_image_function(glsl ? "imageSamples" : "__intrinsic_image_samples",
+                      "__intrinsic_image_samples",
+                      &builtin_builder::_image_samples_prototype, 1,
+                      flags | IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE |
+                      IMAGE_FUNCTION_MS_ONLY);
  }
  
  ir_variable *
@@ -4173,6 +4331,19 @@ builtin_builder::_textureSize(builtin_available_predicate avail,
  }
  
  ir_function_signature *
+builtin_builder::_textureSamples(const glsl_type *sampler_type)
+{
+   ir_variable *s = in_var(sampler_type, "sampler");
+   MAKE_SIG(glsl_type::int_type, shader_samples, 1, s);
+
+   ir_texture *tex = new(mem_ctx) ir_texture(ir_texture_samples);
+   tex->set_sampler(new(mem_ctx) ir_dereference_variable(s), glsl_type::int_type);
+   body.emit(ret(tex));
+
+   return sig;
+}
+
+ir_function_signature *
  builtin_builder::_texture(ir_texture_opcode opcode,
                            builtin_available_predicate avail,
                            const glsl_type *return_type,
@@ -4801,7 +4972,7 @@ builtin_builder::_interpolateAtSample(const glsl_type *type)
  }
  
  ir_function_signature *
-builtin_builder::_atomic_intrinsic(builtin_available_predicate avail)
+builtin_builder::_atomic_counter_intrinsic(builtin_available_predicate avail)
  {
     ir_variable *counter = in_var(glsl_type::atomic_uint_type, "counter");
     MAKE_INTRINSIC(glsl_type::uint_type, avail, 1, counter);
@@ -4809,8 +4980,29 @@ builtin_builder::_atomic_intrinsic(builtin_available_predicate avail)
  }
  
  ir_function_signature *
-builtin_builder::_atomic_op(const char *intrinsic,
-                            builtin_available_predicate avail)
+builtin_builder::_atomic_ssbo_intrinsic2(builtin_available_predicate avail,
+                                         const glsl_type *type)
+{
+   ir_variable *atomic = in_var(type, "atomic");
+   ir_variable *data = in_var(type, "data");
+   MAKE_INTRINSIC(type, avail, 2, atomic, data);
+   return sig;
+}
+
+ir_function_signature *
+builtin_builder::_atomic_ssbo_intrinsic3(builtin_available_predicate avail,
+                                         const glsl_type *type)
+{
+   ir_variable *atomic = in_var(type, "atomic");
+   ir_variable *data1 = in_var(type, "data1");
+   ir_variable *data2 = in_var(type, "data2");
+   MAKE_INTRINSIC(type, avail, 3, atomic, data1, data2);
+   return sig;
+}
+
+ir_function_signature *
+builtin_builder::_atomic_counter_op(const char *intrinsic,
+                                    builtin_available_predicate avail)
  {
     ir_variable *counter = in_var(glsl_type::atomic_uint_type, "atomic_counter");
     MAKE_SIG(glsl_type::uint_type, avail, 1, counter);
@@ -4823,6 +5015,39 @@ builtin_builder::_atomic_op(const char *intrinsic,
  }
  
  ir_function_signature *
+builtin_builder::_atomic_ssbo_op2(const char *intrinsic,
+                                  builtin_available_predicate avail,
+                                  const glsl_type *type)
+{
+   ir_variable *atomic = in_var(type, "atomic_var");
+   ir_variable *data = in_var(type, "atomic_data");
+   MAKE_SIG(type, avail, 2, atomic, data);
+
+   ir_variable *retval = body.make_temp(type, "atomic_retval");
+   body.emit(call(shader->symbols->get_function(intrinsic), retval,
+                  sig->parameters));
+   body.emit(ret(retval));
+   return sig;
+}
+
+ir_function_signature *
+builtin_builder::_atomic_ssbo_op3(const char *intrinsic,
+                                  builtin_available_predicate avail,
+                                  const glsl_type *type)
+{
+   ir_variable *atomic = in_var(type, "atomic_var");
+   ir_variable *data1 = in_var(type, "atomic_data1");
+   ir_variable *data2 = in_var(type, "atomic_data2");
+   MAKE_SIG(type, avail, 3, atomic, data1, data2);
+
+   ir_variable *retval = body.make_temp(type, "atomic_retval");
+   body.emit(call(shader->symbols->get_function(intrinsic), retval,
+                  sig->parameters));
+   body.emit(ret(retval));
+   return sig;
+}
+
+ir_function_signature *
  builtin_builder::_min3(const glsl_type *type)
  {
     ir_variable *x = in_var(type, "x");
@@ -4866,7 +5091,6 @@ builtin_builder::_mid3(const glsl_type *type)
  
  ir_function_signature *
  builtin_builder::_image_prototype(const glsl_type *image_type,
-                                  const char *intrinsic_name,
                                    unsigned num_arguments,
                                    unsigned flags)
  {
@@ -4916,9 +5140,8 @@ builtin_builder::_image_prototype(const glsl_type *image_type,
  
  ir_function_signature *
  builtin_builder::_image_size_prototype(const glsl_type *image_type,
-                                       const char *intrinsic_name,
-                                       unsigned num_arguments,
-                                       unsigned flags)
+                                       unsigned /* num_arguments */,
+                                       unsigned /* flags */)
  {
     const glsl_type *ret_type;
     unsigned num_components = image_type->coordinate_components();
@@ -4956,13 +5179,38 @@ builtin_builder::_image_size_prototype(const glsl_type *image_type,
  }
  
  ir_function_signature *
+builtin_builder::_image_samples_prototype(const glsl_type *image_type,
+                                          unsigned num_arguments,
+                                          unsigned flags)
+{
+   ir_variable *image = in_var(image_type, "image");
+   ir_function_signature *sig =
+      new_sig(glsl_type::int_type, shader_samples, 1, image);
+
+   /* Set the maximal set of qualifiers allowed for this image
+    * built-in.  Function calls with arguments having fewer
+    * qualifiers than present in the prototype are allowed by the
+    * spec, but not with more, i.e. this will make the compiler
+    * accept everything that needs to be accepted, and reject cases
+    * like loads from write-only or stores to read-only images.
+    */
+   image->data.image_read_only = true;
+   image->data.image_write_only = true;
+   image->data.image_coherent = true;
+   image->data.image_volatile = true;
+   image->data.image_restrict = true;
+
+   return sig;
+}
+
+ir_function_signature *
  builtin_builder::_image(image_prototype_ctr prototype,
                          const glsl_type *image_type,
                          const char *intrinsic_name,
                          unsigned num_arguments,
                          unsigned flags)
  {
-   ir_function_signature *sig = (this->*prototype)(image_type, intrinsic_name,
+   ir_function_signature *sig = (this->*prototype)(image_type,
                                                     num_arguments, flags);
  
     if (flags & IMAGE_FUNCTION_EMIT_STUB) {
@@ -5043,8 +5291,7 @@ _mesa_glsl_find_builtin_function(_mesa_glsl_parse_state *state,
  }
  
  ir_function *
-_mesa_glsl_find_builtin_function_by_name(_mesa_glsl_parse_state *state,
-                                         const char *name)
+_mesa_glsl_find_builtin_function_by_name(const char *name)
  {
     ir_function *f;
     mtx_lock(&builtins_lock);
@@ -5059,4 +5306,32 @@ _mesa_glsl_get_builtin_function_shader()
     return builtins.shader;
  }
  
+
+/**
+ * Get the function signature for main from a shader
+ */
+ir_function_signature *
+_mesa_get_main_function_signature(gl_shader *sh)
+{
+   ir_function *const f = sh->symbols->get_function("main");
+   if (f != NULL) {
+      exec_list void_parameters;
+
+      /* Look for the 'void main()' signature and ensure that it's defined.
+       * This keeps the linker from accidentally pick a shader that just
+       * contains a prototype for main.
+       *
+       * We don't have to check for multiple definitions of main (in multiple
+       * shaders) because that would have already been caught above.
+       */
+      ir_function_signature *sig =
+         f->matching_signature(NULL, &void_parameters, false);
+      if ((sig != NULL) && sig->is_defined) {
+         return sig;
+      }
+   }
+
+   return NULL;
+}
+
  /** @} */
diff --git a/src/glsl/builtin_types.cpp b/src/glsl/builtin_types.cpp

index 0d0d71d..0aedbb3 100644 (file)
--- a/src/glsl/builtin_types.cpp
+++ b/src/glsl/builtin_types.cpp
@@ -127,7 +127,7 @@ static const struct glsl_struct_field gl_FogParameters_fields[] = {
  #define T(TYPE, MIN_GL, MIN_ES) \
     { glsl_type::TYPE##_type, MIN_GL, MIN_ES },
  
-const static struct builtin_type_versions {
+static const struct builtin_type_versions {
     const glsl_type *const type;
     int min_gl;
     int min_es;
diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp

index dd7804f..a6ad105 100644 (file)
--- a/src/glsl/builtin_variables.cpp
+++ b/src/glsl/builtin_variables.cpp
@@ -22,6 +22,8 @@
   */
  
  #include "ir.h"
+#include "ir_builder.h"
+#include "linker.h"
  #include "glsl_parser_extras.h"
  #include "glsl_symbol_table.h"
  #include "main/core.h"
@@ -29,6 +31,8 @@
  #include "program/prog_statevars.h"
  #include "program/prog_instruction.h"
  
+using namespace ir_builder;
+
  static const struct gl_builtin_uniform_element gl_NumSamples_elements[] = {
     {NULL, {STATE_NUM_SAMPLES, 0, 0}, SWIZZLE_XXXX}
  };
@@ -383,8 +387,7 @@ private:
     ir_variable *add_uniform(const glsl_type *type, const char *name);
     ir_variable *add_const(const char *name, int value);
     ir_variable *add_const_ivec3(const char *name, int x, int y, int z);
-   void add_varying(int slot, const glsl_type *type, const char *name,
-                    const char *name_as_gs_input);
+   void add_varying(int slot, const glsl_type *type, const char *name);
  
     exec_list * const instructions;
     struct _mesa_glsl_parse_state * const state;
@@ -399,10 +402,12 @@ private:
  
     const glsl_type * const bool_t;
     const glsl_type * const int_t;
+   const glsl_type * const uint_t;
     const glsl_type * const float_t;
     const glsl_type * const vec2_t;
     const glsl_type * const vec3_t;
     const glsl_type * const vec4_t;
+   const glsl_type * const uvec3_t;
     const glsl_type * const mat3_t;
     const glsl_type * const mat4_t;
  
@@ -416,8 +421,10 @@ builtin_variable_generator::builtin_variable_generator(
     : instructions(instructions), state(state), symtab(state->symbols),
       compatibility(!state->is_version(140, 100)),
       bool_t(glsl_type::bool_type), int_t(glsl_type::int_type),
+     uint_t(glsl_type::uint_type),
       float_t(glsl_type::float_type), vec2_t(glsl_type::vec2_type),
       vec3_t(glsl_type::vec3_type), vec4_t(glsl_type::vec4_type),
+     uvec3_t(glsl_type::uvec3_type),
       mat3_t(glsl_type::mat3_type), mat4_t(glsl_type::mat4_type)
  {
  }
@@ -673,14 +680,10 @@ builtin_variable_generator::generate_constants()
        if (!state->es_shader) {
           add_const("gl_MaxGeometryAtomicCounters",
                     state->Const.MaxGeometryAtomicCounters);
-
-        if (state->is_version(400, 0) ||
-             state->ARB_tessellation_shader_enable) {
-                add_const("gl_MaxTessControlAtomicCounters",
-                           state->Const.MaxTessControlAtomicCounters);
-                add_const("gl_MaxTessEvaluationAtomicCounters",
-                           state->Const.MaxTessEvaluationAtomicCounters);
-        }
+         add_const("gl_MaxTessControlAtomicCounters",
+                   state->Const.MaxTessControlAtomicCounters);
+         add_const("gl_MaxTessEvaluationAtomicCounters",
+                   state->Const.MaxTessEvaluationAtomicCounters);
        }
     }
  
@@ -1052,20 +1055,23 @@ builtin_variable_generator::generate_fs_special_vars()
  void
  builtin_variable_generator::generate_cs_special_vars()
  {
-   /* TODO: finish this. */
+   add_system_value(SYSTEM_VALUE_LOCAL_INVOCATION_ID, uvec3_t,
+                    "gl_LocalInvocationID");
+   add_system_value(SYSTEM_VALUE_WORK_GROUP_ID, uvec3_t, "gl_WorkGroupID");
+   add_system_value(SYSTEM_VALUE_NUM_WORK_GROUPS, uvec3_t, "gl_NumWorkGroups");
+   add_variable("gl_GlobalInvocationID", uvec3_t, ir_var_auto, 0);
+   add_variable("gl_LocalInvocationIndex", uint_t, ir_var_auto, 0);
  }
  
  
  /**
   * Add a single "varying" variable.  The variable's type and direction (input
   * or output) are adjusted as appropriate for the type of shader being
- * compiled.  For geometry shaders using {ARB,EXT}_geometry_shader4,
- * name_as_gs_input is used for the input (to avoid ambiguity).
+ * compiled.
   */
  void
  builtin_variable_generator::add_varying(int slot, const glsl_type *type,
-                                        const char *name,
-                                        const char *name_as_gs_input)
+                                        const char *name)
  {
     switch (state->stage) {
     case MESA_SHADER_TESS_CTRL:
@@ -1093,32 +1099,29 @@ builtin_variable_generator::add_varying(int slot, const glsl_type *type,
  void
  builtin_variable_generator::generate_varyings()
  {
-#define ADD_VARYING(loc, type, name) \
-   add_varying(loc, type, name, name "In")
-
     /* gl_Position and gl_PointSize are not visible from fragment shaders. */
     if (state->stage != MESA_SHADER_FRAGMENT) {
-      ADD_VARYING(VARYING_SLOT_POS, vec4_t, "gl_Position");
-      ADD_VARYING(VARYING_SLOT_PSIZ, float_t, "gl_PointSize");
+      add_varying(VARYING_SLOT_POS, vec4_t, "gl_Position");
+      add_varying(VARYING_SLOT_PSIZ, float_t, "gl_PointSize");
     }
  
     if (state->is_version(130, 0)) {
-       ADD_VARYING(VARYING_SLOT_CLIP_DIST0, array(float_t, 0),
+       add_varying(VARYING_SLOT_CLIP_DIST0, array(float_t, 0),
                     "gl_ClipDistance");
     }
  
     if (compatibility) {
-      ADD_VARYING(VARYING_SLOT_TEX0, array(vec4_t, 0), "gl_TexCoord");
-      ADD_VARYING(VARYING_SLOT_FOGC, float_t, "gl_FogFragCoord");
+      add_varying(VARYING_SLOT_TEX0, array(vec4_t, 0), "gl_TexCoord");
+      add_varying(VARYING_SLOT_FOGC, float_t, "gl_FogFragCoord");
        if (state->stage == MESA_SHADER_FRAGMENT) {
-         ADD_VARYING(VARYING_SLOT_COL0, vec4_t, "gl_Color");
-         ADD_VARYING(VARYING_SLOT_COL1, vec4_t, "gl_SecondaryColor");
+         add_varying(VARYING_SLOT_COL0, vec4_t, "gl_Color");
+         add_varying(VARYING_SLOT_COL1, vec4_t, "gl_SecondaryColor");
        } else {
-         ADD_VARYING(VARYING_SLOT_CLIP_VERTEX, vec4_t, "gl_ClipVertex");
-         ADD_VARYING(VARYING_SLOT_COL0, vec4_t, "gl_FrontColor");
-         ADD_VARYING(VARYING_SLOT_BFC0, vec4_t, "gl_BackColor");
-         ADD_VARYING(VARYING_SLOT_COL1, vec4_t, "gl_FrontSecondaryColor");
-         ADD_VARYING(VARYING_SLOT_BFC1, vec4_t, "gl_BackSecondaryColor");
+         add_varying(VARYING_SLOT_CLIP_VERTEX, vec4_t, "gl_ClipVertex");
+         add_varying(VARYING_SLOT_COL0, vec4_t, "gl_FrontColor");
+         add_varying(VARYING_SLOT_BFC0, vec4_t, "gl_BackColor");
+         add_varying(VARYING_SLOT_COL1, vec4_t, "gl_FrontSecondaryColor");
+         add_varying(VARYING_SLOT_BFC1, vec4_t, "gl_BackSecondaryColor");
        }
     }
  
@@ -1208,3 +1211,84 @@ _mesa_glsl_initialize_variables(exec_list *instructions,
        break;
     }
  }
+
+
+/**
+ * Initialize compute shader variables with values that are derived from other
+ * compute shader variable.
+ */
+static void
+initialize_cs_derived_variables(gl_shader *shader,
+                                ir_function_signature *const main_sig)
+{
+   assert(shader->Stage == MESA_SHADER_COMPUTE);
+
+   ir_variable *gl_GlobalInvocationID =
+      shader->symbols->get_variable("gl_GlobalInvocationID");
+   assert(gl_GlobalInvocationID);
+   ir_variable *gl_WorkGroupID =
+      shader->symbols->get_variable("gl_WorkGroupID");
+   assert(gl_WorkGroupID);
+   ir_variable *gl_WorkGroupSize =
+      shader->symbols->get_variable("gl_WorkGroupSize");
+   if (gl_WorkGroupSize == NULL) {
+      void *const mem_ctx = ralloc_parent(shader->ir);
+      gl_WorkGroupSize = new(mem_ctx) ir_variable(glsl_type::uvec3_type,
+                                                  "gl_WorkGroupSize",
+                                                  ir_var_auto);
+      gl_WorkGroupSize->data.how_declared = ir_var_declared_implicitly;
+      gl_WorkGroupSize->data.read_only = true;
+      shader->ir->push_head(gl_WorkGroupSize);
+   }
+   ir_variable *gl_LocalInvocationID =
+      shader->symbols->get_variable("gl_LocalInvocationID");
+   assert(gl_LocalInvocationID);
+
+   /* gl_GlobalInvocationID =
+    *    gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
+    */
+   ir_instruction *inst =
+      assign(gl_GlobalInvocationID,
+             add(mul(gl_WorkGroupID, gl_WorkGroupSize),
+                 gl_LocalInvocationID));
+   main_sig->body.push_head(inst);
+
+   /* gl_LocalInvocationIndex =
+    *    gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
+    *    gl_LocalInvocationID.y * gl_WorkGroupSize.x +
+    *    gl_LocalInvocationID.x;
+    */
+   ir_expression *index_z =
+      mul(mul(swizzle_z(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize)),
+          swizzle_y(gl_WorkGroupSize));
+   ir_expression *index_y =
+      mul(swizzle_y(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize));
+   ir_expression *index_y_plus_z = add(index_y, index_z);
+   operand index_x(swizzle_x(gl_LocalInvocationID));
+   ir_expression *index_x_plus_y_plus_z = add(index_y_plus_z, index_x);
+   ir_variable *gl_LocalInvocationIndex =
+      shader->symbols->get_variable("gl_LocalInvocationIndex");
+   assert(gl_LocalInvocationIndex);
+   inst = assign(gl_LocalInvocationIndex, index_x_plus_y_plus_z);
+   main_sig->body.push_head(inst);
+}
+
+
+/**
+ * Initialize builtin variables with values based on other builtin variables.
+ * These are initialized in the main function.
+ */
+void
+_mesa_glsl_initialize_derived_variables(gl_shader *shader)
+{
+   /* We only need to set CS variables currently. */
+   if (shader->Stage != MESA_SHADER_COMPUTE)
+      return;
+
+   ir_function_signature *const main_sig =
+      _mesa_get_main_function_signature(shader);
+   if (main_sig == NULL)
+      return;
+
+   initialize_cs_derived_variables(shader, main_sig);
+}
diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y

index 2d631f0..1d7a3af 100644 (file)
--- a/src/glsl/glcpp/glcpp-parse.y
+++ b/src/glsl/glcpp/glcpp-parse.y
@@ -2483,6 +2483,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
                if (extensions->ARB_shader_image_size)
                   add_builtin_define(parser, "GL_ARB_shader_image_size", 1);
  
+              if (extensions->ARB_shader_texture_image_samples)
+                 add_builtin_define(parser, "GL_ARB_shader_texture_image_samples", 1);
+
                if (extensions->ARB_derivative_control)
                   add_builtin_define(parser, "GL_ARB_derivative_control", 1);
  
diff --git a/src/glsl/glsl_lexer.ll b/src/glsl/glsl_lexer.ll

index 90e84ed..2142817 100644 (file)
--- a/src/glsl/glsl_lexer.ll
+++ b/src/glsl/glsl_lexer.ll
@@ -406,11 +406,11 @@ image2DShadow           KEYWORD(130, 300, 0, 0, IMAGE2DSHADOW);
  image1DArrayShadow      KEYWORD(130, 300, 0, 0, IMAGE1DARRAYSHADOW);
  image2DArrayShadow      KEYWORD(130, 300, 0, 0, IMAGE2DARRAYSHADOW);
  
-coherent       KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, COHERENT);
-volatile       KEYWORD_WITH_ALT(110, 100, 420, 310, yyextra->ARB_shader_image_load_store_enable, VOLATILE);
-restrict       KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, RESTRICT);
-readonly       KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, READONLY);
-writeonly      KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, WRITEONLY);
+coherent       KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, COHERENT);
+volatile       KEYWORD_WITH_ALT(110, 100, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, VOLATILE);
+restrict       KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, RESTRICT);
+readonly       KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, READONLY);
+writeonly      KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, WRITEONLY);
  
  atomic_uint     KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_atomic_counters_enable, ATOMIC_UINT);
  
diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy

index 59e4527..f0abeb0 100644 (file)
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -169,6 +169,7 @@ static bool match_layout_qualifier(const char *s1, const char *s2,
  %token <identifier> IDENTIFIER TYPE_IDENTIFIER NEW_IDENTIFIER
  %type <identifier> any_identifier
  %type <interface_block> instance_name_opt
+%type <interface_block> buffer_instance_name_opt
  %token <real> FLOATCONSTANT
  %token <dreal> DOUBLECONSTANT
  %token <n> INTCONSTANT UINTCONSTANT BOOLCONSTANT
@@ -218,6 +219,7 @@ static bool match_layout_qualifier(const char *s1, const char *s2,
  %type <type_qualifier> subroutine_qualifier
  %type <subroutine_list> subroutine_type_list
  %type <type_qualifier> interface_qualifier
+%type <type_qualifier> buffer_interface_qualifier
  %type <type_specifier> type_specifier
  %type <type_specifier> type_specifier_nonarray
  %type <array_specifier> array_specifier
@@ -1197,6 +1199,8 @@ layout_qualifier_id:
              $$.flags.q.std140 = 1;
           } else if (match_layout_qualifier($1, "shared", state) == 0) {
              $$.flags.q.shared = 1;
+         } else if (match_layout_qualifier($1, "std430", state) == 0) {
+            $$.flags.q.std430 = 1;
           } else if (match_layout_qualifier($1, "column_major", state) == 0) {
              $$.flags.q.column_major = 1;
           /* "row_major" is a reserved word in GLSL 1.30+. Its token is parsed
@@ -2595,13 +2599,22 @@ interface_block:
     {
        $$ = $1;
     }
-   | layout_qualifier basic_interface_block
+   | layout_qualifier interface_block
     {
-      ast_interface_block *block = $2;
+      ast_interface_block *block = (ast_interface_block *) $2;
+
+      if (!state->has_420pack() && block->layout.has_layout() &&
+          !block->layout.is_default_qualifier) {
+         _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers");
+         YYERROR;
+      }
+
        if (!block->layout.merge_qualifier(& @1, state, $1)) {
           YYERROR;
        }
  
+      block->layout.is_default_qualifier = false;
+
        foreach_list_typed (ast_declarator_list, member, link, &block->declarations) {
           ast_type_qualifier& qualifier = member->type->qualifier;
           if (qualifier.flags.q.stream && qualifier.stream != block->layout.stream) {
@@ -2615,6 +2628,20 @@ interface_block:
        }
        $$ = block;
     }
+   | memory_qualifier interface_block
+   {
+      ast_interface_block *block = (ast_interface_block *)$2;
+
+      if (!block->layout.flags.q.buffer) {
+            _mesa_glsl_error(& @1, state,
+                             "memory qualifiers can only be used in the "
+                             "declaration of shader storage blocks");
+      }
+      if (!block->layout.merge_qualifier(& @1, state, $1)) {
+         YYERROR;
+      }
+      $$ = block;
+   }
     ;
  
  basic_interface_block:
@@ -2625,132 +2652,18 @@ basic_interface_block:
        block->block_name = $2;
        block->declarations.push_degenerate_list_at_head(& $4->link);
  
-      if ($1.flags.q.buffer) {
-         if (!state->has_shader_storage_buffer_objects()) {
-            _mesa_glsl_error(& @1, state,
-                             "#version 430 / GL_ARB_shader_storage_buffer_object "
-                             "required for defining shader storage blocks");
-         } else if (state->ARB_shader_storage_buffer_object_warn) {
-            _mesa_glsl_warning(& @1, state,
-                               "#version 430 / GL_ARB_shader_storage_buffer_object "
-                               "required for defining shader storage blocks");
-         }
-      } else if ($1.flags.q.uniform) {
-         if (!state->has_uniform_buffer_objects()) {
-            _mesa_glsl_error(& @1, state,
-                             "#version 140 / GL_ARB_uniform_buffer_object "
-                             "required for defining uniform blocks");
-         } else if (state->ARB_uniform_buffer_object_warn) {
-            _mesa_glsl_warning(& @1, state,
-                               "#version 140 / GL_ARB_uniform_buffer_object "
-                               "required for defining uniform blocks");
-         }
-      } else {
-         if (state->es_shader || state->language_version < 150) {
-            _mesa_glsl_error(& @1, state,
-                             "#version 150 required for using "
-                             "interface blocks");
-         }
-      }
-
-      /* From the GLSL 1.50.11 spec, section 4.3.7 ("Interface Blocks"):
-       * "It is illegal to have an input block in a vertex shader
-       *  or an output block in a fragment shader"
-       */
-      if ((state->stage == MESA_SHADER_VERTEX) && $1.flags.q.in) {
-         _mesa_glsl_error(& @1, state,
-                          "`in' interface block is not allowed for "
-                          "a vertex shader");
-      } else if ((state->stage == MESA_SHADER_FRAGMENT) && $1.flags.q.out) {
-         _mesa_glsl_error(& @1, state,
-                          "`out' interface block is not allowed for "
-                          "a fragment shader");
-      }
-
-      /* Since block arrays require names, and both features are added in
-       * the same language versions, we don't have to explicitly
-       * version-check both things.
-       */
-      if (block->instance_name != NULL) {
-         state->check_version(150, 300, & @1, "interface blocks with "
-                               "an instance name are not allowed");
-      }
-
-      uint64_t interface_type_mask;
-      struct ast_type_qualifier temp_type_qualifier;
-
-      /* Get a bitmask containing only the in/out/uniform/buffer
-       * flags, allowing us to ignore other irrelevant flags like
-       * interpolation qualifiers.
-       */
-      temp_type_qualifier.flags.i = 0;
-      temp_type_qualifier.flags.q.uniform = true;
-      temp_type_qualifier.flags.q.buffer = true;
-      temp_type_qualifier.flags.q.in = true;
-      temp_type_qualifier.flags.q.out = true;
-      interface_type_mask = temp_type_qualifier.flags.i;
-
-      /* Get the block's interface qualifier.  The interface_qualifier
-       * production rule guarantees that only one bit will be set (and
-       * it will be in/out/uniform).
-       */
-      uint64_t block_interface_qualifier = $1.flags.i;
-
-      block->layout.flags.i |= block_interface_qualifier;
+      _mesa_ast_process_interface_block(& @1, state, block, $1);
  
-      if (state->stage == MESA_SHADER_GEOMETRY &&
-          state->has_explicit_attrib_stream()) {
-         /* Assign global layout's stream value. */
-         block->layout.flags.q.stream = 1;
-         block->layout.flags.q.explicit_stream = 0;
-         block->layout.stream = state->out_qualifier->stream;
-      }
+      $$ = block;
+   }
+   | buffer_interface_qualifier NEW_IDENTIFIER '{' member_list '}' buffer_instance_name_opt ';'
+   {
+      ast_interface_block *const block = $6;
  
-      foreach_list_typed (ast_declarator_list, member, link, &block->declarations) {
-         ast_type_qualifier& qualifier = member->type->qualifier;
-         if ((qualifier.flags.i & interface_type_mask) == 0) {
-            /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks):
-             * "If no optional qualifier is used in a member declaration, the
-             *  qualifier of the variable is just in, out, or uniform as declared
-             *  by interface-qualifier."
-             */
-            qualifier.flags.i |= block_interface_qualifier;
-         } else if ((qualifier.flags.i & interface_type_mask) !=
-                    block_interface_qualifier) {
-            /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks):
-             * "If optional qualifiers are used, they can include interpolation
-             *  and storage qualifiers and they must declare an input, output,
-             *  or uniform variable consistent with the interface qualifier of
-             *  the block."
-             */
-            _mesa_glsl_error(& @1, state,
-                             "uniform/in/out qualifier on "
-                             "interface block member does not match "
-                             "the interface block");
-         }
+      block->block_name = $2;
+      block->declarations.push_degenerate_list_at_head(& $4->link);
  
-         /* From GLSL ES 3.0, chapter 4.3.7 "Interface Blocks":
-          *
-          * "GLSL ES 3.0 does not support interface blocks for shader inputs or
-          * outputs."
-          *
-          * And from GLSL ES 3.0, chapter 4.6.1 "The invariant qualifier":.
-          *
-          * "Only variables output from a shader can be candidates for
-          * invariance."
-          *
-          * From GLSL 4.40 and GLSL 1.50, section "Interface Blocks":
-          *
-          * "If optional qualifiers are used, they can include interpolation
-          * qualifiers, auxiliary storage qualifiers, and storage qualifiers
-          * and they must declare an input, output, or uniform member
-          * consistent with the interface qualifier of the block"
-          */
-         if (qualifier.flags.q.invariant)
-            _mesa_glsl_error(&@1, state,
-                             "invariant qualifiers cannot be used "
-                             "with interface blocks members");
-      }
+      _mesa_ast_process_interface_block(& @1, state, block, $1);
  
        $$ = block;
     }
@@ -2772,7 +2685,10 @@ interface_qualifier:
        memset(& $$, 0, sizeof($$));
        $$.flags.q.uniform = 1;
     }
-   | BUFFER
+   ;
+
+buffer_interface_qualifier:
+   BUFFER
     {
        memset(& $$, 0, sizeof($$));
        $$.flags.q.buffer = 1;
@@ -2799,6 +2715,26 @@ instance_name_opt:
     }
     ;
  
+buffer_instance_name_opt:
+   /* empty */
+   {
+      $$ = new(state) ast_interface_block(*state->default_shader_storage_qualifier,
+                                          NULL, NULL);
+   }
+   | NEW_IDENTIFIER
+   {
+      $$ = new(state) ast_interface_block(*state->default_shader_storage_qualifier,
+                                          $1, NULL);
+      $$->set_location(@1);
+   }
+   | NEW_IDENTIFIER array_specifier
+   {
+      $$ = new(state) ast_interface_block(*state->default_shader_storage_qualifier,
+                                          $1, $2);
+      $$->set_location_range(@1, @2);
+   }
+   ;
+
  member_list:
     member_declaration
     {
@@ -2845,6 +2781,14 @@ layout_defaults:
        $$ = NULL;
     }
  
+   | layout_qualifier BUFFER ';'
+   {
+      if (!state->default_shader_storage_qualifier->merge_qualifier(& @1, state, $1)) {
+         YYERROR;
+      }
+      $$ = NULL;
+   }
+
     | layout_qualifier IN_TOK ';'
     {
        $$ = NULL;
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp

index 5c8f98b..c02d28c 100644 (file)
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -244,6 +244,12 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
     this->default_uniform_qualifier = new(this) ast_type_qualifier();
     this->default_uniform_qualifier->flags.q.shared = 1;
     this->default_uniform_qualifier->flags.q.column_major = 1;
+   this->default_uniform_qualifier->is_default_qualifier = true;
+
+   this->default_shader_storage_qualifier = new(this) ast_type_qualifier();
+   this->default_shader_storage_qualifier->flags.q.shared = 1;
+   this->default_shader_storage_qualifier->flags.q.column_major = 1;
+   this->default_shader_storage_qualifier->is_default_qualifier = true;
  
     this->fs_uses_gl_fragcoord = false;
     this->fs_redeclares_gl_fragcoord = false;
@@ -604,8 +610,9 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
     EXT(ARB_shader_image_size,            true,  false,     ARB_shader_image_size),
     EXT(ARB_shader_precision,             true,  false,     ARB_shader_precision),
     EXT(ARB_shader_stencil_export,        true,  false,     ARB_shader_stencil_export),
-   EXT(ARB_shader_storage_buffer_object, true,  false,     ARB_shader_storage_buffer_object),
+   EXT(ARB_shader_storage_buffer_object, true,  true,      ARB_shader_storage_buffer_object),
     EXT(ARB_shader_subroutine,            true,  false,     ARB_shader_subroutine),
+   EXT(ARB_shader_texture_image_samples, true,  false,     ARB_shader_texture_image_samples),
     EXT(ARB_shader_texture_lod,           true,  false,     ARB_shader_texture_lod),
     EXT(ARB_shading_language_420pack,     true,  false,     ARB_shading_language_420pack),
     EXT(ARB_shading_language_packing,     true,  false,     ARB_shading_language_packing),
@@ -859,6 +866,139 @@ _mesa_ast_set_aggregate_type(const glsl_type *type,
     }
  }
  
+void
+_mesa_ast_process_interface_block(YYLTYPE *locp,
+                                  _mesa_glsl_parse_state *state,
+                                  ast_interface_block *const block,
+                                  const struct ast_type_qualifier q)
+{
+   if (q.flags.q.buffer) {
+      if (!state->has_shader_storage_buffer_objects()) {
+         _mesa_glsl_error(locp, state,
+                          "#version 430 / GL_ARB_shader_storage_buffer_object "
+                          "required for defining shader storage blocks");
+      } else if (state->ARB_shader_storage_buffer_object_warn) {
+         _mesa_glsl_warning(locp, state,
+                            "#version 430 / GL_ARB_shader_storage_buffer_object "
+                            "required for defining shader storage blocks");
+      }
+   } else if (q.flags.q.uniform) {
+      if (!state->has_uniform_buffer_objects()) {
+         _mesa_glsl_error(locp, state,
+                          "#version 140 / GL_ARB_uniform_buffer_object "
+                          "required for defining uniform blocks");
+      } else if (state->ARB_uniform_buffer_object_warn) {
+         _mesa_glsl_warning(locp, state,
+                            "#version 140 / GL_ARB_uniform_buffer_object "
+                            "required for defining uniform blocks");
+      }
+   } else {
+      if (state->es_shader || state->language_version < 150) {
+         _mesa_glsl_error(locp, state,
+                          "#version 150 required for using "
+                          "interface blocks");
+      }
+   }
+
+   /* From the GLSL 1.50.11 spec, section 4.3.7 ("Interface Blocks"):
+    * "It is illegal to have an input block in a vertex shader
+    *  or an output block in a fragment shader"
+    */
+   if ((state->stage == MESA_SHADER_VERTEX) && q.flags.q.in) {
+      _mesa_glsl_error(locp, state,
+                       "`in' interface block is not allowed for "
+                       "a vertex shader");
+   } else if ((state->stage == MESA_SHADER_FRAGMENT) && q.flags.q.out) {
+      _mesa_glsl_error(locp, state,
+                       "`out' interface block is not allowed for "
+                       "a fragment shader");
+   }
+
+   /* Since block arrays require names, and both features are added in
+    * the same language versions, we don't have to explicitly
+    * version-check both things.
+    */
+   if (block->instance_name != NULL) {
+      state->check_version(150, 300, locp, "interface blocks with "
+                           "an instance name are not allowed");
+   }
+
+   uint64_t interface_type_mask;
+   struct ast_type_qualifier temp_type_qualifier;
+
+   /* Get a bitmask containing only the in/out/uniform/buffer
+    * flags, allowing us to ignore other irrelevant flags like
+    * interpolation qualifiers.
+    */
+   temp_type_qualifier.flags.i = 0;
+   temp_type_qualifier.flags.q.uniform = true;
+   temp_type_qualifier.flags.q.in = true;
+   temp_type_qualifier.flags.q.out = true;
+   temp_type_qualifier.flags.q.buffer = true;
+   interface_type_mask = temp_type_qualifier.flags.i;
+
+   /* Get the block's interface qualifier.  The interface_qualifier
+    * production rule guarantees that only one bit will be set (and
+    * it will be in/out/uniform).
+    */
+   uint64_t block_interface_qualifier = q.flags.i;
+
+   block->layout.flags.i |= block_interface_qualifier;
+
+   if (state->stage == MESA_SHADER_GEOMETRY &&
+       state->has_explicit_attrib_stream()) {
+      /* Assign global layout's stream value. */
+      block->layout.flags.q.stream = 1;
+      block->layout.flags.q.explicit_stream = 0;
+      block->layout.stream = state->out_qualifier->stream;
+   }
+
+   foreach_list_typed (ast_declarator_list, member, link, &block->declarations) {
+      ast_type_qualifier& qualifier = member->type->qualifier;
+      if ((qualifier.flags.i & interface_type_mask) == 0) {
+         /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks):
+          * "If no optional qualifier is used in a member declaration, the
+          *  qualifier of the variable is just in, out, or uniform as declared
+          *  by interface-qualifier."
+          */
+         qualifier.flags.i |= block_interface_qualifier;
+      } else if ((qualifier.flags.i & interface_type_mask) !=
+                 block_interface_qualifier) {
+         /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks):
+          * "If optional qualifiers are used, they can include interpolation
+          *  and storage qualifiers and they must declare an input, output,
+          *  or uniform variable consistent with the interface qualifier of
+          *  the block."
+          */
+         _mesa_glsl_error(locp, state,
+                          "uniform/in/out qualifier on "
+                          "interface block member does not match "
+                          "the interface block");
+      }
+
+      /* From GLSL ES 3.0, chapter 4.3.7 "Interface Blocks":
+       *
+       * "GLSL ES 3.0 does not support interface blocks for shader inputs or
+       * outputs."
+       *
+       * And from GLSL ES 3.0, chapter 4.6.1 "The invariant qualifier":.
+       *
+       * "Only variables output from a shader can be candidates for
+       * invariance."
+       *
+       * From GLSL 4.40 and GLSL 1.50, section "Interface Blocks":
+       *
+       * "If optional qualifiers are used, they can include interpolation
+       * qualifiers, auxiliary storage qualifiers, and storage qualifiers
+       * and they must declare an input, output, or uniform member
+       * consistent with the interface qualifier of the block"
+       */
+      if (qualifier.flags.q.invariant)
+         _mesa_glsl_error(locp, state,
+                          "invariant qualifiers cannot be used "
+                          "with interface blocks members");
+   }
+}
  
  void
  _mesa_ast_type_qualifier_print(const struct ast_type_qualifier *q)
@@ -1695,6 +1835,8 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,
        }
     }
  
+   _mesa_glsl_initialize_derived_variables(shader);
+
     delete state->symbols;
     ralloc_free(state);
  }
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h

index 295cd10..7fee43e 100644 (file)
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -217,7 +217,7 @@ struct _mesa_glsl_parse_state {
  
     bool has_shader_storage_buffer_objects() const
     {
-      return ARB_shader_storage_buffer_object_enable || is_version(430, 0);
+      return ARB_shader_storage_buffer_object_enable || is_version(430, 310);
     }
  
     bool has_separate_shader_objects() const
@@ -275,6 +275,13 @@ struct _mesa_glsl_parse_state {
     struct ast_type_qualifier *default_uniform_qualifier;
  
     /**
+    * Default shader storage layout qualifiers tracked during parsing.
+    * Currently affects shader storage blocks and shader storage buffer
+    * variables in those blocks.
+    */
+   struct ast_type_qualifier *default_shader_storage_qualifier;
+
+   /**
      * Variables to track different cases if a fragment shader redeclares
      * built-in variable gl_FragCoord.
      *
@@ -510,6 +517,8 @@ struct _mesa_glsl_parse_state {
     bool ARB_shader_storage_buffer_object_warn;
     bool ARB_shader_subroutine_enable;
     bool ARB_shader_subroutine_warn;
+   bool ARB_shader_texture_image_samples_enable;
+   bool ARB_shader_texture_image_samples_warn;
     bool ARB_shader_texture_lod_enable;
     bool ARB_shader_texture_lod_warn;
     bool ARB_shading_language_420pack_enable;
diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp

index c737fb6..15cd45e 100644 (file)
--- a/src/glsl/glsl_types.cpp
+++ b/src/glsl/glsl_types.cpp
@@ -46,8 +46,8 @@ glsl_type::init_ralloc_type_ctx(void)
  }
  
  glsl_type::glsl_type(GLenum gl_type,
-                    glsl_base_type base_type, unsigned vector_elements,
-                    unsigned matrix_columns, const char *name) :
+                     glsl_base_type base_type, unsigned vector_elements,
+                     unsigned matrix_columns, const char *name) :
     gl_type(gl_type),
     base_type(base_type),
     sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
@@ -70,8 +70,8 @@ glsl_type::glsl_type(GLenum gl_type,
  }
  
  glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type,
-                    enum glsl_sampler_dim dim, bool shadow, bool array,
-                    unsigned type, const char *name) :
+                     enum glsl_sampler_dim dim, bool shadow, bool array,
+                     unsigned type, const char *name) :
     gl_type(gl_type),
     base_type(base_type),
     sampler_dimensionality(dim), sampler_shadow(shadow),
@@ -97,7 +97,7 @@ glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type,
  }
  
  glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
-                    const char *name) :
+                     const char *name) :
     gl_type(0),
     base_type(GLSL_TYPE_STRUCT),
     sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
@@ -113,25 +113,30 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
     assert(name != NULL);
     this->name = ralloc_strdup(this->mem_ctx, name);
     this->fields.structure = ralloc_array(this->mem_ctx,
-                                        glsl_struct_field, length);
+                                         glsl_struct_field, length);
  
     for (i = 0; i < length; i++) {
        this->fields.structure[i].type = fields[i].type;
        this->fields.structure[i].name = ralloc_strdup(this->fields.structure,
-                                                    fields[i].name);
+                                                     fields[i].name);
        this->fields.structure[i].location = fields[i].location;
        this->fields.structure[i].interpolation = fields[i].interpolation;
        this->fields.structure[i].centroid = fields[i].centroid;
        this->fields.structure[i].sample = fields[i].sample;
        this->fields.structure[i].matrix_layout = fields[i].matrix_layout;
        this->fields.structure[i].patch = fields[i].patch;
+      this->fields.structure[i].image_read_only = fields[i].image_read_only;
+      this->fields.structure[i].image_write_only = fields[i].image_write_only;
+      this->fields.structure[i].image_coherent = fields[i].image_coherent;
+      this->fields.structure[i].image_volatile = fields[i].image_volatile;
+      this->fields.structure[i].image_restrict = fields[i].image_restrict;
     }
  
     mtx_unlock(&glsl_type::mutex);
  }
  
  glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
-                    enum glsl_interface_packing packing, const char *name) :
+                     enum glsl_interface_packing packing, const char *name) :
     gl_type(0),
     base_type(GLSL_TYPE_INTERFACE),
     sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
@@ -147,11 +152,11 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
     assert(name != NULL);
     this->name = ralloc_strdup(this->mem_ctx, name);
     this->fields.structure = ralloc_array(this->mem_ctx,
-                                        glsl_struct_field, length);
+                                         glsl_struct_field, length);
     for (i = 0; i < length; i++) {
        this->fields.structure[i].type = fields[i].type;
        this->fields.structure[i].name = ralloc_strdup(this->fields.structure,
-                                                    fields[i].name);
+                                                     fields[i].name);
        this->fields.structure[i].location = fields[i].location;
        this->fields.structure[i].interpolation = fields[i].interpolation;
        this->fields.structure[i].centroid = fields[i].centroid;
@@ -220,8 +225,8 @@ glsl_type::contains_sampler() const
        return this->fields.array->contains_sampler();
     } else if (this->is_record()) {
        for (unsigned int i = 0; i < this->length; i++) {
-        if (this->fields.structure[i].type->contains_sampler())
-           return true;
+         if (this->fields.structure[i].type->contains_sampler())
+            return true;
        }
        return false;
     } else {
@@ -237,8 +242,8 @@ glsl_type::contains_integer() const
        return this->fields.array->contains_integer();
     } else if (this->is_record()) {
        for (unsigned int i = 0; i < this->length; i++) {
-        if (this->fields.structure[i].type->contains_integer())
-           return true;
+         if (this->fields.structure[i].type->contains_integer())
+            return true;
        }
        return false;
     } else {
@@ -253,8 +258,8 @@ glsl_type::contains_double() const
        return this->fields.array->contains_double();
     } else if (this->is_record()) {
        for (unsigned int i = 0; i < this->length; i++) {
-        if (this->fields.structure[i].type->contains_double())
-           return true;
+         if (this->fields.structure[i].type->contains_double())
+            return true;
        }
        return false;
     } else {
@@ -289,8 +294,8 @@ glsl_type::contains_subroutine() const
        return this->fields.array->contains_subroutine();
     } else if (this->is_record()) {
        for (unsigned int i = 0; i < this->length; i++) {
-        if (this->fields.structure[i].type->contains_subroutine())
-           return true;
+         if (this->fields.structure[i].type->contains_subroutine())
+            return true;
        }
        return false;
     } else {
@@ -335,8 +340,8 @@ glsl_type::contains_image() const
        return this->fields.array->contains_image();
     } else if (this->is_record()) {
        for (unsigned int i = 0; i < this->length; i++) {
-        if (this->fields.structure[i].type->contains_image())
-           return true;
+         if (this->fields.structure[i].type->contains_image())
+            return true;
        }
        return false;
     } else {
@@ -536,21 +541,21 @@ glsl_type::get_instance(unsigned base_type, unsigned rows, unsigned columns)
     if (columns == 1) {
        switch (base_type) {
        case GLSL_TYPE_UINT:
-        return uvec(rows);
+         return uvec(rows);
        case GLSL_TYPE_INT:
-        return ivec(rows);
+         return ivec(rows);
        case GLSL_TYPE_FLOAT:
-        return vec(rows);
+         return vec(rows);
        case GLSL_TYPE_DOUBLE:
-        return dvec(rows);
+         return dvec(rows);
        case GLSL_TYPE_BOOL:
-        return bvec(rows);
+         return bvec(rows);
        default:
-        return error_type;
+         return error_type;
        }
     } else {
        if ((base_type != GLSL_TYPE_FLOAT && base_type != GLSL_TYPE_DOUBLE) || (rows == 1))
-        return error_type;
+         return error_type;
  
        /* GLSL matrix types are named mat{COLUMNS}x{ROWS}.  Only the following
         * combinations are valid:
@@ -772,10 +777,10 @@ glsl_type::record_compare(const glsl_type *b) const
  
     for (unsigned i = 0; i < this->length; i++) {
        if (this->fields.structure[i].type != b->fields.structure[i].type)
-        return false;
+         return false;
        if (strcmp(this->fields.structure[i].name,
-                b->fields.structure[i].name) != 0)
-        return false;
+                 b->fields.structure[i].name) != 0)
+         return false;
        if (this->fields.structure[i].matrix_layout
           != b->fields.structure[i].matrix_layout)
          return false;
@@ -794,6 +799,21 @@ glsl_type::record_compare(const glsl_type *b) const
        if (this->fields.structure[i].patch
            != b->fields.structure[i].patch)
           return false;
+      if (this->fields.structure[i].image_read_only
+          != b->fields.structure[i].image_read_only)
+         return false;
+      if (this->fields.structure[i].image_write_only
+          != b->fields.structure[i].image_write_only)
+         return false;
+      if (this->fields.structure[i].image_coherent
+          != b->fields.structure[i].image_coherent)
+         return false;
+      if (this->fields.structure[i].image_volatile
+          != b->fields.structure[i].image_volatile)
+         return false;
+      if (this->fields.structure[i].image_restrict
+          != b->fields.structure[i].image_restrict)
+         return false;
     }
  
     return true;
@@ -836,8 +856,8 @@ glsl_type::record_key_hash(const void *a)
  
  const glsl_type *
  glsl_type::get_record_instance(const glsl_struct_field *fields,
-                              unsigned num_fields,
-                              const char *name)
+                               unsigned num_fields,
+                               const char *name)
  {
     const glsl_type key(fields, num_fields, name);
  
@@ -870,9 +890,9 @@ glsl_type::get_record_instance(const glsl_struct_field *fields,
  
  const glsl_type *
  glsl_type::get_interface_instance(const glsl_struct_field *fields,
-                                 unsigned num_fields,
-                                 enum glsl_interface_packing packing,
-                                 const char *block_name)
+                                  unsigned num_fields,
+                                  enum glsl_interface_packing packing,
+                                  const char *block_name)
  {
     const glsl_type key(fields, num_fields, packing, block_name);
  
@@ -1078,7 +1098,7 @@ glsl_type::field_type(const char *name) const
  
     for (unsigned i = 0; i < this->length; i++) {
        if (strcmp(name, this->fields.structure[i].name) == 0)
-        return this->fields.structure[i].type;
+         return this->fields.structure[i].type;
     }
  
     return error_type;
@@ -1094,7 +1114,7 @@ glsl_type::field_index(const char *name) const
  
     for (unsigned i = 0; i < this->length; i++) {
        if (strcmp(name, this->fields.structure[i].name) == 0)
-        return i;
+         return i;
     }
  
     return -1;
@@ -1119,7 +1139,7 @@ glsl_type::component_slots() const
        unsigned size = 0;
  
        for (unsigned i = 0; i < this->length; i++)
-        size += this->fields.structure[i].type->component_slots();
+         size += this->fields.structure[i].type->component_slots();
  
        return size;
     }
@@ -1144,6 +1164,32 @@ glsl_type::component_slots() const
  }
  
  unsigned
+glsl_type::record_location_offset(unsigned length) const
+{
+   unsigned offset = 0;
+   const glsl_type *t = this->without_array();
+   if (t->is_record()) {
+      assert(length <= t->length);
+
+      for (unsigned i = 0; i < length; i++) {
+         const glsl_type *st = t->fields.structure[i].type;
+         const glsl_type *wa = st->without_array();
+         if (wa->is_record()) {
+            unsigned r_offset = wa->record_location_offset(wa->length);
+            offset += st->is_array() ? st->length * r_offset : r_offset;
+         } else {
+            /* We dont worry about arrays here because unless the array
+             * contains a structure or another array it only takes up a single
+             * uniform slot.
+             */
+            offset += 1;
+         }
+      }
+   }
+   return offset;
+}
+
+unsigned
  glsl_type::uniform_locations() const
  {
     unsigned size = 0;
@@ -1231,12 +1277,12 @@ glsl_type::std140_base_alignment(bool row_major) const
     if (this->is_scalar() || this->is_vector()) {
        switch (this->vector_elements) {
        case 1:
-        return N;
+         return N;
        case 2:
-        return 2 * N;
+         return 2 * N;
        case 3:
        case 4:
-        return 4 * N;
+         return 4 * N;
        }
     }
  
@@ -1261,13 +1307,13 @@ glsl_type::std140_base_alignment(bool row_major) const
      */
     if (this->is_array()) {
        if (this->fields.array->is_scalar() ||
-         this->fields.array->is_vector() ||
-         this->fields.array->is_matrix()) {
-        return MAX2(this->fields.array->std140_base_alignment(row_major), 16);
+          this->fields.array->is_vector() ||
+          this->fields.array->is_matrix()) {
+         return MAX2(this->fields.array->std140_base_alignment(row_major), 16);
        } else {
-        assert(this->fields.array->is_record() ||
+         assert(this->fields.array->is_record() ||
                  this->fields.array->is_array());
-        return this->fields.array->std140_base_alignment(row_major);
+         return this->fields.array->std140_base_alignment(row_major);
        }
     }
  
@@ -1286,11 +1332,11 @@ glsl_type::std140_base_alignment(bool row_major) const
        int r = this->vector_elements;
  
        if (row_major) {
-        vec_type = get_instance(base_type, c, 1);
-        array_type = glsl_type::get_array_instance(vec_type, r);
+         vec_type = get_instance(base_type, c, 1);
+         array_type = glsl_type::get_array_instance(vec_type, r);
        } else {
-        vec_type = get_instance(base_type, r, 1);
-        array_type = glsl_type::get_array_instance(vec_type, c);
+         vec_type = get_instance(base_type, r, 1);
+         array_type = glsl_type::get_array_instance(vec_type, c);
        }
  
        return array_type->std140_base_alignment(false);
@@ -1320,9 +1366,9 @@ glsl_type::std140_base_alignment(bool row_major) const
              field_row_major = false;
           }
  
-        const struct glsl_type *field_type = this->fields.structure[i].type;
-        base_alignment = MAX2(base_alignment,
-                              field_type->std140_base_alignment(field_row_major));
+         const struct glsl_type *field_type = this->fields.structure[i].type;
+         base_alignment = MAX2(base_alignment,
+                               field_type->std140_base_alignment(field_row_major));
        }
        return base_alignment;
     }
@@ -1374,25 +1420,25 @@ glsl_type::std140_size(bool row_major) const
        unsigned int array_len;
  
        if (this->is_array()) {
-        element_type = this->fields.array;
-        array_len = this->length;
+         element_type = this->fields.array;
+         array_len = this->length;
        } else {
-        element_type = this;
-        array_len = 1;
+         element_type = this;
+         array_len = 1;
        }
  
        if (row_major) {
           vec_type = get_instance(element_type->base_type,
                                   element_type->matrix_columns, 1);
  
-        array_len *= element_type->vector_elements;
+         array_len *= element_type->vector_elements;
        } else {
-        vec_type = get_instance(element_type->base_type,
-                                element_type->vector_elements, 1);
-        array_len *= element_type->matrix_columns;
+         vec_type = get_instance(element_type->base_type,
+                                 element_type->vector_elements, 1);
+         array_len *= element_type->matrix_columns;
        }
        const glsl_type *array_type = glsl_type::get_array_instance(vec_type,
-                                                                 array_len);
+                                                                  array_len);
  
        return array_type->std140_size(false);
     }
@@ -1409,11 +1455,11 @@ glsl_type::std140_size(bool row_major) const
      */
     if (this->is_array()) {
        if (this->fields.array->is_record()) {
-        return this->length * this->fields.array->std140_size(row_major);
+         return this->length * this->fields.array->std140_size(row_major);
        } else {
-        unsigned element_base_align =
-           this->fields.array->std140_base_alignment(row_major);
-        return this->length * MAX2(element_base_align, 16);
+         unsigned element_base_align =
+            this->fields.array->std140_base_alignment(row_major);
+         return this->length * MAX2(element_base_align, 16);
        }
     }
  
@@ -1429,7 +1475,7 @@ glsl_type::std140_size(bool row_major) const
      *     rounded up to the next multiple of the base alignment of the
      *     structure.
      */
-   if (this->is_record()) {
+   if (this->is_record() || this->is_interface()) {
        unsigned size = 0;
        unsigned max_align = 0;
  
@@ -1443,10 +1489,15 @@ glsl_type::std140_size(bool row_major) const
              field_row_major = false;
           }
  
-        const struct glsl_type *field_type = this->fields.structure[i].type;
-        unsigned align = field_type->std140_base_alignment(field_row_major);
-        size = glsl_align(size, align);
-        size += field_type->std140_size(field_row_major);
+         const struct glsl_type *field_type = this->fields.structure[i].type;
+         unsigned align = field_type->std140_base_alignment(field_row_major);
+
+         /* Ignore unsized arrays when calculating size */
+         if (field_type->is_unsized_array())
+            continue;
+
+         size = glsl_align(size, align);
+         size += field_type->std140_size(field_row_major);
  
           max_align = MAX2(align, max_align);
  
@@ -1461,6 +1512,213 @@ glsl_type::std140_size(bool row_major) const
     return -1;
  }
  
+unsigned
+glsl_type::std430_base_alignment(bool row_major) const
+{
+
+   unsigned N = is_double() ? 8 : 4;
+
+   /* (1) If the member is a scalar consuming <N> basic machine units, the
+    *     base alignment is <N>.
+    *
+    * (2) If the member is a two- or four-component vector with components
+    *     consuming <N> basic machine units, the base alignment is 2<N> or
+    *     4<N>, respectively.
+    *
+    * (3) If the member is a three-component vector with components consuming
+    *     <N> basic machine units, the base alignment is 4<N>.
+    */
+   if (this->is_scalar() || this->is_vector()) {
+      switch (this->vector_elements) {
+      case 1:
+         return N;
+      case 2:
+         return 2 * N;
+      case 3:
+      case 4:
+         return 4 * N;
+      }
+   }
+
+   /* OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout":
+    *
+    * "When using the std430 storage layout, shader storage blocks will be
+    * laid out in buffer storage identically to uniform and shader storage
+    * blocks using the std140 layout, except that the base alignment and
+    * stride of arrays of scalars and vectors in rule 4 and of structures
+    * in rule 9 are not rounded up a multiple of the base alignment of a vec4.
+    */
+
+   /* (1) If the member is a scalar consuming <N> basic machine units, the
+    *     base alignment is <N>.
+    *
+    * (2) If the member is a two- or four-component vector with components
+    *     consuming <N> basic machine units, the base alignment is 2<N> or
+    *     4<N>, respectively.
+    *
+    * (3) If the member is a three-component vector with components consuming
+    *     <N> basic machine units, the base alignment is 4<N>.
+    */
+   if (this->is_array())
+      return this->fields.array->std430_base_alignment(row_major);
+
+   /* (5) If the member is a column-major matrix with <C> columns and
+    *     <R> rows, the matrix is stored identically to an array of
+    *     <C> column vectors with <R> components each, according to
+    *     rule (4).
+    *
+    * (7) If the member is a row-major matrix with <C> columns and <R>
+    *     rows, the matrix is stored identically to an array of <R>
+    *     row vectors with <C> components each, according to rule (4).
+    */
+   if (this->is_matrix()) {
+      const struct glsl_type *vec_type, *array_type;
+      int c = this->matrix_columns;
+      int r = this->vector_elements;
+
+      if (row_major) {
+         vec_type = get_instance(base_type, c, 1);
+         array_type = glsl_type::get_array_instance(vec_type, r);
+      } else {
+         vec_type = get_instance(base_type, r, 1);
+         array_type = glsl_type::get_array_instance(vec_type, c);
+      }
+
+      return array_type->std430_base_alignment(false);
+   }
+
+      /* (9) If the member is a structure, the base alignment of the
+    *     structure is <N>, where <N> is the largest base alignment
+    *     value of any of its members, and rounded up to the base
+    *     alignment of a vec4. The individual members of this
+    *     sub-structure are then assigned offsets by applying this set
+    *     of rules recursively, where the base offset of the first
+    *     member of the sub-structure is equal to the aligned offset
+    *     of the structure. The structure may have padding at the end;
+    *     the base offset of the member following the sub-structure is
+    *     rounded up to the next multiple of the base alignment of the
+    *     structure.
+    */
+   if (this->is_record()) {
+      unsigned base_alignment = 0;
+      for (unsigned i = 0; i < this->length; i++) {
+         bool field_row_major = row_major;
+         const enum glsl_matrix_layout matrix_layout =
+            glsl_matrix_layout(this->fields.structure[i].matrix_layout);
+         if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) {
+            field_row_major = true;
+         } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) {
+            field_row_major = false;
+         }
+
+         const struct glsl_type *field_type = this->fields.structure[i].type;
+         base_alignment = MAX2(base_alignment,
+                               field_type->std430_base_alignment(field_row_major));
+      }
+      return base_alignment;
+   }
+   assert(!"not reached");
+   return -1;
+}
+
+unsigned
+glsl_type::std430_array_stride(bool row_major) const
+{
+   unsigned N = is_double() ? 8 : 4;
+
+   /* Notice that the array stride of a vec3 is not 3 * N but 4 * N.
+    * See OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout"
+    *
+    * (3) If the member is a three-component vector with components consuming
+    *     <N> basic machine units, the base alignment is 4<N>.
+    */
+   if (this->is_vector() && this->vector_elements == 3)
+      return 4 * N;
+
+   /* By default use std430_size(row_major) */
+   return this->std430_size(row_major);
+}
+
+unsigned
+glsl_type::std430_size(bool row_major) const
+{
+   unsigned N = is_double() ? 8 : 4;
+
+   /* OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout":
+    *
+    * "When using the std430 storage layout, shader storage blocks will be
+    * laid out in buffer storage identically to uniform and shader storage
+    * blocks using the std140 layout, except that the base alignment and
+    * stride of arrays of scalars and vectors in rule 4 and of structures
+    * in rule 9 are not rounded up a multiple of the base alignment of a vec4.
+    */
+   if (this->is_scalar() || this->is_vector())
+         return this->vector_elements * N;
+
+   if (this->without_array()->is_matrix()) {
+      const struct glsl_type *element_type;
+      const struct glsl_type *vec_type;
+      unsigned int array_len;
+
+      if (this->is_array()) {
+         element_type = this->fields.array;
+         array_len = this->length;
+      } else {
+         element_type = this;
+         array_len = 1;
+      }
+
+      if (row_major) {
+         vec_type = get_instance(element_type->base_type,
+                                 element_type->matrix_columns, 1);
+
+         array_len *= element_type->vector_elements;
+      } else {
+         vec_type = get_instance(element_type->base_type,
+                                 element_type->vector_elements, 1);
+         array_len *= element_type->matrix_columns;
+      }
+      const glsl_type *array_type = glsl_type::get_array_instance(vec_type,
+                                                                  array_len);
+
+      return array_type->std430_size(false);
+   }
+
+   if (this->is_array()) {
+      if (this->fields.array->is_record())
+         return this->length * this->fields.array->std430_size(row_major);
+      else
+         return this->length * this->fields.array->std430_base_alignment(row_major);
+   }
+
+   if (this->is_record() || this->is_interface()) {
+      unsigned size = 0;
+      unsigned max_align = 0;
+
+      for (unsigned i = 0; i < this->length; i++) {
+         bool field_row_major = row_major;
+         const enum glsl_matrix_layout matrix_layout =
+            glsl_matrix_layout(this->fields.structure[i].matrix_layout);
+         if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) {
+            field_row_major = true;
+         } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) {
+            field_row_major = false;
+         }
+
+         const struct glsl_type *field_type = this->fields.structure[i].type;
+         unsigned align = field_type->std430_base_alignment(field_row_major);
+         size = glsl_align(size, align);
+         size += field_type->std430_size(field_row_major);
+
+         max_align = MAX2(align, max_align);
+      }
+      size = glsl_align(size, max_align);
+      return size;
+   }
+
+   assert(!"not reached");
+   return -1;
+}
  
  unsigned
  glsl_type::count_attribute_slots() const
diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h

index 0a8a0b9..b83e1ca 100644 (file)
--- a/src/glsl/glsl_types.h
+++ b/src/glsl/glsl_types.h
@@ -78,7 +78,8 @@ enum glsl_sampler_dim {
  enum glsl_interface_packing {
     GLSL_INTERFACE_PACKING_STD140,
     GLSL_INTERFACE_PACKING_SHARED,
-   GLSL_INTERFACE_PACKING_PACKED
+   GLSL_INTERFACE_PACKING_PACKED,
+   GLSL_INTERFACE_PACKING_STD430
  };
  
  enum glsl_matrix_layout {
@@ -300,6 +301,14 @@ struct glsl_type {
     unsigned component_slots() const;
  
     /**
+    * Calculate offset between the base location of the struct in
+    * uniform storage and a struct member.
+    * For the initial call, length is the index of the member to find the
+    * offset for.
+    */
+   unsigned record_location_offset(unsigned length) const;
+
+   /**
      * Calculate the number of unique values from glGetUniformLocation for the
      * elements of the type.
      *
@@ -334,6 +343,25 @@ struct glsl_type {
     unsigned std140_size(bool row_major) const;
  
     /**
+    * Alignment in bytes of the start of this type in a std430 shader
+    * storage block.
+    */
+   unsigned std430_base_alignment(bool row_major) const;
+
+   /**
+    * Calculate array stride in bytes of this type in a std430 shader storage
+    * block.
+    */
+   unsigned std430_array_stride(bool row_major) const;
+
+   /**
+    * Size in bytes of this type in a std430 shader storage block.
+    *
+    * Note that this is not GL_BUFFER_SIZE
+    */
+   unsigned std430_size(bool row_major) const;
+
+   /**
      * \brief Can this type be implicitly converted to another?
      *
      * \return True if the types are identical or if this type can be converted
@@ -558,6 +586,25 @@ struct glsl_type {
     }
  
     /**
+    * Return the total number of elements in an array including the elements
+    * in arrays of arrays.
+    */
+   unsigned arrays_of_arrays_size() const
+   {
+      if (!is_array())
+         return 0;
+
+      unsigned size = length;
+      const glsl_type *base_type = fields.array;
+
+      while (base_type->is_array()) {
+         size = size * base_type->length;
+         base_type = base_type->fields.array;
+      }
+      return size;
+   }
+
+   /**
      * Return the amount of atomic counter storage required for a type.
      */
     unsigned atomic_size() const
@@ -801,6 +848,16 @@ struct glsl_struct_field {
      */
     int stream;
  
+   /**
+    * Image qualifiers, applicable to buffer variables defined in shader
+    * storage buffer objects (SSBOs)
+    */
+   unsigned image_read_only:1;
+   unsigned image_write_only:1;
+   unsigned image_coherent:1;
+   unsigned image_volatile:1;
+   unsigned image_restrict:1;
+
  #ifdef __cplusplus
     glsl_struct_field(const struct glsl_type *_type, const char *_name)
        : type(_type), name(_name), location(-1), interpolation(0), centroid(0),
diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp

index 724861b..2c45b9e 100644 (file)
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -342,6 +342,11 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
                                            op0->type->vector_elements, 1);
        break;
  
+   case ir_unop_get_buffer_size:
+   case ir_unop_ssbo_unsized_array_length:
+      this->type = glsl_type::int_type;
+      break;
+
     default:
        assert(!"not reached: missing automatic type setup for ir_expression");
        this->type = op0->type;
@@ -571,6 +576,8 @@ static const char *const operator_strs[] = {
     "noise",
     "subroutine_to_int",
     "interpolate_at_centroid",
+   "get_buffer_size",
+   "ssbo_unsized_array_length",
     "+",
     "-",
     "*",
@@ -1398,7 +1405,7 @@ ir_dereference::is_lvalue() const
  }
  
  
-static const char * const tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txf_ms", "txs", "lod", "tg4", "query_levels" };
+static const char * const tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txf_ms", "txs", "lod", "tg4", "query_levels", "texture_samples" };
  
  const char *ir_texture::opcode_string()
  {
@@ -1427,7 +1434,8 @@ ir_texture::set_sampler(ir_dereference *sampler, const glsl_type *type)
     this->sampler = sampler;
     this->type = type;
  
-   if (this->op == ir_txs || this->op == ir_query_levels) {
+   if (this->op == ir_txs || this->op == ir_query_levels ||
+       this->op == ir_texture_samples) {
        assert(type->base_type == GLSL_TYPE_INT);
     } else if (this->op == ir_lod) {
        assert(type->vector_elements == 2);
@@ -1657,6 +1665,7 @@ ir_variable::ir_variable(const struct glsl_type *type, const char *name,
     this->data.image_coherent = false;
     this->data.image_volatile = false;
     this->data.image_restrict = false;
+   this->data.from_ssbo_unsized_array = false;
  
     if (type != NULL) {
        if (type->base_type == GLSL_TYPE_SAMPLER)
diff --git a/src/glsl/ir.h b/src/glsl/ir.h

index ede8caa..43a2bf0 100644 (file)
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -453,6 +453,15 @@ public:
     }
  
     /**
+    * Determine whether or not a variable is part of a shader storage block.
+    */
+   inline bool is_in_shader_storage_block() const
+   {
+      return this->data.mode == ir_var_shader_storage &&
+             this->interface_type != NULL;
+   }
+
+   /**
      * Determine whether or not a variable is the declaration of an interface
      * block
      *
@@ -778,6 +787,11 @@ public:
        unsigned image_restrict:1;
  
        /**
+       * ARB_shader_storage_buffer_object
+       */
+      unsigned from_ssbo_unsized_array:1; /**< unsized array buffer variable. */
+
+      /**
         * Emit a warning if this variable is accessed.
         */
     private:
@@ -819,6 +833,8 @@ public:
         *   - Fragment shader output: one of the values from \c gl_frag_result.
         *   - Uniforms: Per-stage uniform slot number for default uniform block.
         *   - Uniforms: Index within the uniform block definition for UBO members.
+       *   - Non-UBO Uniforms: explicit location until linking then reused to
+       *     store uniform slot number.
         *   - Other: This field is not currently used.
         *
         * If the variable is a uniform, shader input, or shader output, and the
@@ -1409,9 +1425,26 @@ enum ir_expression_operation {
     ir_unop_interpolate_at_centroid,
  
     /**
+    * Ask the driver for the total size of a buffer block.
+    *
+    * operand0 is the ir_constant buffer block index in the linked shader.
+    */
+   ir_unop_get_buffer_size,
+
+   /**
+    * Calculate length of an unsized array inside a buffer block.
+    * This opcode is going to be replaced in a lowering pass inside
+    * the linker.
+    *
+    * operand0 is the unsized array's ir_value for the calculation
+    * of its length.
+    */
+   ir_unop_ssbo_unsized_array_length,
+
+   /**
      * A sentinel marking the last of the unary operations.
      */
-   ir_last_unop = ir_unop_interpolate_at_centroid,
+   ir_last_unop = ir_unop_ssbo_unsized_array_length,
  
     ir_binop_add,
     ir_binop_sub,
@@ -1914,7 +1947,8 @@ enum ir_texture_opcode {
     ir_txs,             /**< Texture size */
     ir_lod,             /**< Texture lod query */
     ir_tg4,             /**< Texture gather */
-   ir_query_levels      /**< Texture levels query */
+   ir_query_levels,     /**< Texture levels query */
+   ir_texture_samples,  /**< Texture samples query */
  };
  
  
@@ -2513,6 +2547,9 @@ _mesa_glsl_initialize_variables(exec_list *instructions,
                                 struct _mesa_glsl_parse_state *state);
  
  extern void
+_mesa_glsl_initialize_derived_variables(gl_shader *shader);
+
+extern void
  _mesa_glsl_initialize_functions(_mesa_glsl_parse_state *state);
  
  extern void
@@ -2523,12 +2560,14 @@ _mesa_glsl_find_builtin_function(_mesa_glsl_parse_state *state,
                                   const char *name, exec_list *actual_parameters);
  
  extern ir_function *
-_mesa_glsl_find_builtin_function_by_name(_mesa_glsl_parse_state *state,
-                                         const char *name);
+_mesa_glsl_find_builtin_function_by_name(const char *name);
  
  extern gl_shader *
  _mesa_glsl_get_builtin_function_shader(void);
  
+extern ir_function_signature *
+_mesa_get_main_function_signature(gl_shader *sh);
+
  extern void
  _mesa_glsl_release_functions(void);
  
diff --git a/src/glsl/ir_clone.cpp b/src/glsl/ir_clone.cpp

index 4edf70d..d6b06ee 100644 (file)
--- a/src/glsl/ir_clone.cpp
+++ b/src/glsl/ir_clone.cpp
@@ -222,6 +222,7 @@ ir_texture::clone(void *mem_ctx, struct hash_table *ht) const
     case ir_tex:
     case ir_lod:
     case ir_query_levels:
+   case ir_texture_samples:
        break;
     case ir_txb:
        new_tex->lod_info.bias = this->lod_info.bias->clone(mem_ctx, ht);
diff --git a/src/glsl/ir_equals.cpp b/src/glsl/ir_equals.cpp

index cc1964e..5f0785e 100644 (file)
--- a/src/glsl/ir_equals.cpp
+++ b/src/glsl/ir_equals.cpp
@@ -151,6 +151,7 @@ ir_texture::equals(const ir_instruction *ir, enum ir_node_type ignore) const
     case ir_tex:
     case ir_lod:
     case ir_query_levels:
+   case ir_texture_samples:
        break;
     case ir_txb:
        if (!lod_info.bias->equals(other->lod_info.bias, ignore))
diff --git a/src/glsl/ir_hv_accept.cpp b/src/glsl/ir_hv_accept.cpp

index d3662cf..6495cc4 100644 (file)
--- a/src/glsl/ir_hv_accept.cpp
+++ b/src/glsl/ir_hv_accept.cpp
@@ -194,6 +194,7 @@ ir_texture::accept(ir_hierarchical_visitor *v)
     case ir_tex:
     case ir_lod:
     case ir_query_levels:
+   case ir_texture_samples:
        break;
     case ir_txb:
        s = this->lod_info.bias->accept(v);
diff --git a/src/glsl/ir_print_visitor.cpp b/src/glsl/ir_print_visitor.cpp

index 8dbd938..b919690 100644 (file)
--- a/src/glsl/ir_print_visitor.cpp
+++ b/src/glsl/ir_print_visitor.cpp
@@ -274,7 +274,8 @@ void ir_print_visitor::visit(ir_texture *ir)
     ir->sampler->accept(this);
     fprintf(f, " ");
  
-   if (ir->op != ir_txs && ir->op != ir_query_levels) {
+   if (ir->op != ir_txs && ir->op != ir_query_levels &&
+       ir->op != ir_texture_samples) {
        ir->coordinate->accept(this);
  
        fprintf(f, " ");
@@ -290,7 +291,7 @@ void ir_print_visitor::visit(ir_texture *ir)
  
     if (ir->op != ir_txf && ir->op != ir_txf_ms &&
         ir->op != ir_txs && ir->op != ir_tg4 &&
-       ir->op != ir_query_levels) {
+       ir->op != ir_query_levels && ir->op != ir_texture_samples) {
        if (ir->projector)
          ir->projector->accept(this);
        else
@@ -310,6 +311,7 @@ void ir_print_visitor::visit(ir_texture *ir)
     case ir_tex:
     case ir_lod:
     case ir_query_levels:
+   case ir_texture_samples:
        break;
     case ir_txb:
        ir->lod_info.bias->accept(this);
@@ -586,7 +588,7 @@ ir_print_visitor::visit(ir_end_primitive *ir)
  }
  
  void
-ir_print_visitor::visit(ir_barrier *ir)
+ir_print_visitor::visit(ir_barrier *)
  {
     fprintf(f, "(barrier)\n");
  }
diff --git a/src/glsl/ir_reader.cpp b/src/glsl/ir_reader.cpp

index 469837f..07720e2 100644 (file)
--- a/src/glsl/ir_reader.cpp
+++ b/src/glsl/ir_reader.cpp
@@ -26,7 +26,7 @@
  #include "glsl_types.h"
  #include "s_expression.h"
  
-const static bool debug = false;
+static const bool debug = false;
  
  namespace {
  
@@ -960,6 +960,8 @@ ir_reader::read_texture(s_expression *expr)
        { "tg4", s_type, s_sampler, s_coord, s_offset, s_component };
     s_pattern query_levels_pattern[] =
        { "query_levels", s_type, s_sampler };
+   s_pattern texture_samples_pattern[] =
+      { "samples", s_type, s_sampler };
     s_pattern other_pattern[] =
        { tag, s_type, s_sampler, s_coord, s_offset, s_proj, s_shadow, s_lod };
  
@@ -977,6 +979,8 @@ ir_reader::read_texture(s_expression *expr)
        op = ir_tg4;
     } else if (MATCH(expr, query_levels_pattern)) {
        op = ir_query_levels;
+   } else if (MATCH(expr, texture_samples_pattern)) {
+      op = ir_texture_samples;
     } else if (MATCH(expr, other_pattern)) {
        op = ir_texture::get_opcode(tag->value());
        if (op == (ir_texture_opcode) -1)
@@ -1029,7 +1033,7 @@ ir_reader::read_texture(s_expression *expr)
  
     if (op != ir_txf && op != ir_txf_ms &&
         op != ir_txs && op != ir_lod && op != ir_tg4 &&
-       op != ir_query_levels) {
+       op != ir_query_levels && op != ir_texture_samples) {
        s_int *proj_as_int = SX_AS_INT(s_proj);
        if (proj_as_int && proj_as_int->value() == 1) {
          tex->projector = NULL;
diff --git a/src/glsl/ir_rvalue_visitor.cpp b/src/glsl/ir_rvalue_visitor.cpp

index 2eee3da..a6966f5 100644 (file)
--- a/src/glsl/ir_rvalue_visitor.cpp
+++ b/src/glsl/ir_rvalue_visitor.cpp
@@ -58,6 +58,7 @@ ir_rvalue_base_visitor::rvalue_visit(ir_texture *ir)
     case ir_tex:
     case ir_lod:
     case ir_query_levels:
+   case ir_texture_samples:
        break;
     case ir_txb:
        handle_rvalue(&ir->lod_info.bias);
diff --git a/src/glsl/ir_uniform.h b/src/glsl/ir_uniform.h

index 0b6f720..858a7da 100644 (file)
--- a/src/glsl/ir_uniform.h
+++ b/src/glsl/ir_uniform.h
@@ -194,6 +194,11 @@ struct gl_uniform_storage {
      * This is a built-in uniform that should not be modified through any gl API.
      */
     bool builtin;
+
+   /**
+    * This is a shader storage buffer variable, not an uniform.
+    */
+   bool is_shader_storage;
  };
  
  #ifdef __cplusplus
diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp

index 3f0dea7..935571a 100644 (file)
--- a/src/glsl/ir_validate.cpp
+++ b/src/glsl/ir_validate.cpp
@@ -409,6 +409,17 @@ ir_validate::visit_leave(ir_expression *ir)
        assert(ir->operands[0]->type->is_float());
        break;
  
+   case ir_unop_get_buffer_size:
+      assert(ir->type == glsl_type::int_type);
+      assert(ir->operands[0]->type == glsl_type::uint_type);
+      break;
+
+   case ir_unop_ssbo_unsized_array_length:
+      assert(ir->type == glsl_type::int_type);
+      assert(ir->operands[0]->type->is_array());
+      assert(ir->operands[0]->type->is_unsized_array());
+      break;
+
     case ir_unop_d2f:
        assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
        assert(ir->type->base_type == GLSL_TYPE_FLOAT);
diff --git a/src/glsl/link_uniform_block_active_visitor.cpp b/src/glsl/link_uniform_block_active_visitor.cpp

index 5102947..bcf17fe 100644 (file)
--- a/src/glsl/link_uniform_block_active_visitor.cpp
+++ b/src/glsl/link_uniform_block_active_visitor.cpp
@@ -77,9 +77,6 @@ link_uniform_block_active_visitor::visit(ir_variable *var)
     if (!var->is_in_buffer_block())
        return visit_continue;
  
-   const glsl_type *const block_type = var->is_interface_instance()
-      ? var->type : var->get_interface_type();
-
     /* Section 2.11.6 (Uniform Variables) of the OpenGL ES 3.0.3 spec says:
      *
      *     "All members of a named uniform block declared with a shared or
@@ -88,7 +85,8 @@ link_uniform_block_active_visitor::visit(ir_variable *var)
      *     also considered active, even if no member of the block is
      *     referenced."
      */
-   if (block_type->interface_packing == GLSL_INTERFACE_PACKING_PACKED)
+   if (var->get_interface_type()->interface_packing ==
+       GLSL_INTERFACE_PACKING_PACKED)
        return visit_continue;
  
     /* Process the block.  Bail if there was an error.
@@ -106,6 +104,22 @@ link_uniform_block_active_visitor::visit(ir_variable *var)
     assert(b->num_array_elements == 0);
     assert(b->array_elements == NULL);
     assert(b->type != NULL);
+   assert(!b->type->is_array() || b->has_instance_name);
+
+   /* For uniform block arrays declared with a shared or std140 layout
+    * qualifier, mark all its instances as used.
+    */
+   if (b->type->is_array() && b->type->length > 0) {
+      b->num_array_elements = b->type->length;
+      b->array_elements = reralloc(this->mem_ctx,
+                                   b->array_elements,
+                                   unsigned,
+                                   b->num_array_elements);
+
+      for (unsigned i = 0; i < b->num_array_elements; i++) {
+         b->array_elements[i] = i;
+      }
+   }
  
     return visit_continue;
  }
@@ -147,6 +161,14 @@ link_uniform_block_active_visitor::visit_enter(ir_dereference_array *ir)
     assert((b->num_array_elements == 0) == (b->array_elements == NULL));
     assert(b->type != NULL);
  
+   /* If the block array was declared with a shared or
+    * std140 layout qualifier, all its instances have been already marked
+    * as used in link_uniform_block_active_visitor::visit(ir_variable *).
+    */
+   if (var->get_interface_type()->interface_packing !=
+       GLSL_INTERFACE_PACKING_PACKED)
+      return visit_continue_with_parent;
+
     ir_constant *c = ir->array_index->as_constant();
  
     if (c) {
diff --git a/src/glsl/link_uniform_blocks.cpp b/src/glsl/link_uniform_blocks.cpp

index 4df39e2..7ceffee 100644 (file)
--- a/src/glsl/link_uniform_blocks.cpp
+++ b/src/glsl/link_uniform_blocks.cpp
@@ -68,14 +68,18 @@ private:
     }
  
     virtual void enter_record(const glsl_type *type, const char *,
-                             bool row_major) {
+                             bool row_major, const unsigned packing) {
        assert(type->is_record());
-      this->offset = glsl_align(
+      if (packing == GLSL_INTERFACE_PACKING_STD430)
+         this->offset = glsl_align(
+            this->offset, type->std430_base_alignment(row_major));
+      else
+         this->offset = glsl_align(
              this->offset, type->std140_base_alignment(row_major));
     }
  
     virtual void leave_record(const glsl_type *type, const char *,
-                             bool row_major) {
+                             bool row_major, const unsigned packing) {
        assert(type->is_record());
  
        /* If this is the last field of a structure, apply rule #9.  The
@@ -85,12 +89,17 @@ private:
         *     the member following the sub-structure is rounded up to the next
         *     multiple of the base alignment of the structure."
         */
-      this->offset = glsl_align(
+      if (packing == GLSL_INTERFACE_PACKING_STD430)
+         this->offset = glsl_align(
+            this->offset, type->std430_base_alignment(row_major));
+      else
+         this->offset = glsl_align(
              this->offset, type->std140_base_alignment(row_major));
     }
  
     virtual void visit_field(const glsl_type *type, const char *name,
                              bool row_major, const glsl_type *,
+                            const unsigned packing,
                              bool /* last_field */)
     {
        assert(this->index < this->num_variables);
@@ -119,8 +128,16 @@ private:
           v->IndexName = v->Name;
        }
  
-      const unsigned alignment = type->std140_base_alignment(v->RowMajor);
-      unsigned size = type->std140_size(v->RowMajor);
+      unsigned alignment = 0;
+      unsigned size = 0;
+
+      if (packing == GLSL_INTERFACE_PACKING_STD430) {
+         alignment = type->std430_base_alignment(v->RowMajor);
+         size = type->std430_size(v->RowMajor);
+      } else {
+         alignment = type->std140_base_alignment(v->RowMajor);
+         size = type->std140_size(v->RowMajor);
+      }
  
        this->offset = glsl_align(this->offset, alignment);
        v->Offset = this->offset;
@@ -170,6 +187,7 @@ struct block {
  
  unsigned
  link_uniform_blocks(void *mem_ctx,
+                    struct gl_context *ctx,
                      struct gl_shader_program *prog,
                      struct gl_shader **shader_list,
                      unsigned num_shaders,
@@ -255,7 +273,8 @@ link_uniform_blocks(void *mem_ctx,
                   == unsigned(ubo_packing_shared));
     STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_PACKED)
                   == unsigned(ubo_packing_packed));
-
+   STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_STD430)
+                 == unsigned(ubo_packing_std430));
  
     hash_table_foreach (block_hash, entry) {
        const struct link_uniform_block_active *const b =
@@ -290,6 +309,15 @@ link_uniform_blocks(void *mem_ctx,
  
              blocks[i].UniformBufferSize = parcel.buffer_size;
  
+            /* Check SSBO size is lower than maximum supported size for SSBO */
+            if (b->is_shader_storage &&
+                parcel.buffer_size > ctx->Const.MaxShaderStorageBlockSize) {
+               linker_error(prog, "shader storage block `%s' has size %d, "
+                            "which is larger than than the maximum allowed (%d)",
+                            block_type->name,
+                            parcel.buffer_size,
+                            ctx->Const.MaxShaderStorageBlockSize);
+            }
              blocks[i].NumUniforms =
                 (unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms);
  
@@ -310,6 +338,15 @@ link_uniform_blocks(void *mem_ctx,
  
           blocks[i].UniformBufferSize = parcel.buffer_size;
  
+         /* Check SSBO size is lower than maximum supported size for SSBO */
+         if (b->is_shader_storage &&
+             parcel.buffer_size > ctx->Const.MaxShaderStorageBlockSize) {
+            linker_error(prog, "shader storage block `%s' has size %d, "
+                         "which is larger than than the maximum allowed (%d)",
+                         block_type->name,
+                         parcel.buffer_size,
+                         ctx->Const.MaxShaderStorageBlockSize);
+         }
           blocks[i].NumUniforms =
              (unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms);
  
diff --git a/src/glsl/link_uniform_initializers.cpp b/src/glsl/link_uniform_initializers.cpp

index f238513..b0a4ec3 100644 (file)
--- a/src/glsl/link_uniform_initializers.cpp
+++ b/src/glsl/link_uniform_initializers.cpp
@@ -48,7 +48,7 @@ static unsigned
  get_uniform_block_index(const gl_shader_program *shProg,
                          const char *uniformBlockName)
  {
-   for (unsigned i = 0; i < shProg->NumUniformBlocks; i++) {
+   for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
        if (!strcmp(shProg->UniformBlocks[i].Name, uniformBlockName))
          return i;
     }
diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp

index a0cb618..47d49c8 100644 (file)
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -28,6 +28,7 @@
  #include "glsl_symbol_table.h"
  #include "program/hash_table.h"
  #include "program.h"
+#include "util/hash_table.h"
  
  /**
   * \file link_uniforms.cpp
@@ -62,18 +63,28 @@ program_resource_visitor::process(const glsl_type *type, const char *name)
     assert(type->without_array()->is_record()
            || type->without_array()->is_interface());
  
+   unsigned record_array_count = 1;
     char *name_copy = ralloc_strdup(NULL, name);
-   recursion(type, &name_copy, strlen(name), false, NULL, false);
+   unsigned packing = type->interface_packing;
+
+   recursion(type, &name_copy, strlen(name), false, NULL, packing, false,
+             record_array_count);
     ralloc_free(name_copy);
  }
  
  void
  program_resource_visitor::process(ir_variable *var)
  {
+   unsigned record_array_count = 1;
     const glsl_type *t = var->type;
+   const glsl_type *t_without_array = var->type->without_array();
     const bool row_major =
        var->data.matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR;
  
+   const unsigned packing = var->get_interface_type() ?
+      var->get_interface_type()->interface_packing :
+      var->type->interface_packing;
+
     /* false is always passed for the row_major parameter to the other
      * processing functions because no information is available to do
      * otherwise.  See the warning in linker.h.
@@ -110,7 +121,8 @@ program_resource_visitor::process(ir_variable *var)
            * lowering is only applied to non-uniform interface blocks, so we
            * can safely pass false for row_major.
            */
-         recursion(var->type, &name, new_length, row_major, NULL, false);
+         recursion(var->type, &name, new_length, row_major, NULL, packing,
+                   false, record_array_count);
        }
        ralloc_free(name);
     } else if (var->data.from_named_ifc_block_nonarray) {
@@ -134,22 +146,21 @@ program_resource_visitor::process(ir_variable *var)
         * is only applied to non-uniform interface blocks, so we can safely
         * pass false for row_major.
         */
-      recursion(var->type, &name, strlen(name), row_major, NULL, false);
+      recursion(var->type, &name, strlen(name), row_major, NULL, packing,
+                false, record_array_count);
        ralloc_free(name);
     } else if (t->without_array()->is_record()) {
        char *name = ralloc_strdup(NULL, var->name);
-      recursion(var->type, &name, strlen(name), row_major, NULL, false);
-      ralloc_free(name);
-   } else if (t->is_interface()) {
-      char *name = ralloc_strdup(NULL, var->type->name);
-      recursion(var->type, &name, strlen(name), row_major, NULL, false);
+      recursion(var->type, &name, strlen(name), row_major, NULL, packing,
+                false, record_array_count);
        ralloc_free(name);
-   } else if (t->is_array() && t->fields.array->is_interface()) {
-      char *name = ralloc_strdup(NULL, var->type->fields.array->name);
-      recursion(var->type, &name, strlen(name), row_major, NULL, false);
+   } else if (t_without_array->is_interface()) {
+      char *name = ralloc_strdup(NULL, t_without_array->name);
+      recursion(var->type, &name, strlen(name), row_major, NULL, packing,
+                false, record_array_count);
        ralloc_free(name);
     } else {
-      this->visit_field(t, var->name, row_major, NULL, false);
+      this->visit_field(t, var->name, row_major, NULL, packing, false);
     }
  }
  
@@ -157,7 +168,9 @@ void
  program_resource_visitor::recursion(const glsl_type *t, char **name,
                                      size_t name_length, bool row_major,
                                      const glsl_type *record_type,
-                                    bool last_field)
+                                    const unsigned packing,
+                                    bool last_field,
+                                    unsigned record_array_count)
  {
     /* Records need to have each field processed individually.
      *
@@ -170,7 +183,7 @@ program_resource_visitor::recursion(const glsl_type *t, char **name,
           record_type = t;
  
        if (t->is_record())
-         this->enter_record(t, *name, row_major);
+         this->enter_record(t, *name, row_major, packing);
  
        for (unsigned i = 0; i < t->length; i++) {
          const char *field = t->fields.structure[i].name;
@@ -204,7 +217,8 @@ program_resource_visitor::recursion(const glsl_type *t, char **name,
           recursion(t->fields.structure[i].type, name, new_length,
                     field_row_major,
                     record_type,
-                   (i + 1) == t->length);
+                   packing,
+                   (i + 1) == t->length, record_array_count);
  
           /* Only the first leaf-field of the record gets called with the
            * record type pointer.
@@ -214,14 +228,22 @@ program_resource_visitor::recursion(const glsl_type *t, char **name,
  
        if (t->is_record()) {
           (*name)[name_length] = '\0';
-         this->leave_record(t, *name, row_major);
+         this->leave_record(t, *name, row_major, packing);
        }
-   } else if (t->is_array() && (t->fields.array->is_record()
-                                || t->fields.array->is_interface())) {
+   } else if (t->without_array()->is_record() ||
+              t->without_array()->is_interface()) {
        if (record_type == NULL && t->fields.array->is_record())
           record_type = t->fields.array;
  
-      for (unsigned i = 0; i < t->length; i++) {
+      unsigned length = t->length;
+      /* Shader storage block unsized arrays: add subscript [0] to variable
+       * names */
+      if (t->is_unsized_array())
+         length = 1;
+
+      record_array_count *= length;
+
+      for (unsigned i = 0; i < length; i++) {
          size_t new_length = name_length;
  
          /* Append the subscript to the current variable name */
@@ -229,7 +251,8 @@ program_resource_visitor::recursion(const glsl_type *t, char **name,
  
           recursion(t->fields.array, name, new_length, row_major,
                     record_type,
-                   (i + 1) == t->length);
+                   packing,
+                   (i + 1) == t->length, record_array_count);
  
           /* Only the first leaf-field of the record gets called with the
            * record type pointer.
@@ -237,7 +260,8 @@ program_resource_visitor::recursion(const glsl_type *t, char **name,
           record_type = NULL;
        }
     } else {
-      this->visit_field(t, *name, row_major, record_type, last_field);
+      this->set_record_array_count(record_array_count);
+      this->visit_field(t, *name, row_major, record_type, packing, last_field);
     }
  }
  
@@ -245,6 +269,7 @@ void
  program_resource_visitor::visit_field(const glsl_type *type, const char *name,
                                        bool row_major,
                                        const glsl_type *,
+                                      const unsigned,
                                        bool /* last_field */)
  {
     visit_field(type, name, row_major);
@@ -258,12 +283,19 @@ program_resource_visitor::visit_field(const glsl_struct_field *field)
  }
  
  void
-program_resource_visitor::enter_record(const glsl_type *, const char *, bool)
+program_resource_visitor::enter_record(const glsl_type *, const char *, bool,
+                                       const unsigned)
+{
+}
+
+void
+program_resource_visitor::leave_record(const glsl_type *, const char *, bool,
+                                       const unsigned)
  {
  }
  
  void
-program_resource_visitor::leave_record(const glsl_type *, const char *, bool)
+program_resource_visitor::set_record_array_count(unsigned)
  {
  }
  
@@ -281,11 +313,13 @@ namespace {
   */
  class count_uniform_size : public program_resource_visitor {
  public:
-   count_uniform_size(struct string_to_uint_map *map)
-      : num_active_uniforms(0), num_values(0), num_shader_samplers(0),
-        num_shader_images(0), num_shader_uniform_components(0),
-        num_shader_subroutines(0),
-        is_ubo_var(false), map(map)
+   count_uniform_size(struct string_to_uint_map *map,
+                      struct string_to_uint_map *hidden_map)
+      : num_active_uniforms(0), num_hidden_uniforms(0), num_values(0),
+        num_shader_samplers(0), num_shader_images(0),
+        num_shader_uniform_components(0), num_shader_subroutines(0),
+        is_ubo_var(false), is_shader_storage(false), map(map),
+        hidden_map(hidden_map)
     {
        /* empty */
     }
@@ -300,7 +334,9 @@ public:
  
     void process(ir_variable *var)
     {
+      this->current_var = var;
        this->is_ubo_var = var->is_in_buffer_block();
+      this->is_shader_storage = var->is_in_shader_storage_block();
        if (var->is_interface_instance())
           program_resource_visitor::process(var->get_interface_type(),
                                             var->get_interface_type()->name);
@@ -313,6 +349,8 @@ public:
      */
     unsigned num_active_uniforms;
  
+   unsigned num_hidden_uniforms;
+
     /**
      * Number of data values required to back the storage for the active uniforms
      */
@@ -339,6 +377,9 @@ public:
     unsigned num_shader_subroutines;
  
     bool is_ubo_var;
+   bool is_shader_storage;
+
+   struct string_to_uint_map *map;
  
  private:
     virtual void visit_field(const glsl_type *type, const char *name,
@@ -367,13 +408,14 @@ private:
            * components in the default block.  The spec allows image
            * uniforms to use up no more than one scalar slot.
            */
-         this->num_shader_uniform_components += values;
+         if(!is_shader_storage)
+            this->num_shader_uniform_components += values;
        } else {
          /* Accumulate the total number of uniform slots used by this shader.
           * Note that samplers do not count against this limit because they
           * don't use any storage on current hardware.
           */
-        if (!is_ubo_var)
+        if (!is_ubo_var && !is_shader_storage)
             this->num_shader_uniform_components += values;
        }
  
@@ -383,7 +425,13 @@ private:
        if (this->map->get(id, name))
          return;
  
-      this->map->put(this->num_active_uniforms, name);
+      if (this->current_var->data.how_declared == ir_var_hidden) {
+         this->hidden_map->put(this->num_hidden_uniforms, name);
+         this->num_hidden_uniforms++;
+      } else {
+         this->map->put(this->num_active_uniforms-this->num_hidden_uniforms,
+                        name);
+      }
  
        /* Each leaf uniform occupies one entry in the list of active
         * uniforms.
@@ -392,7 +440,12 @@ private:
        this->num_values += values;
     }
  
-   struct string_to_uint_map *map;
+   struct string_to_uint_map *hidden_map;
+
+   /**
+    * Current variable being processed.
+    */
+   ir_variable *current_var;
  };
  
  } /* anonymous namespace */
@@ -431,6 +484,7 @@ public:
        this->next_sampler = 0;
        this->next_image = 0;
        this->next_subroutine = 0;
+      this->record_array_count = 1;
        memset(this->targets, 0, sizeof(this->targets));
     }
  
@@ -439,13 +493,14 @@ public:
     {
        current_var = var;
        field_counter = 0;
+      this->record_next_sampler = new string_to_uint_map;
  
        ubo_block_index = -1;
        if (var->is_in_buffer_block()) {
           if (var->is_interface_instance() && var->type->is_array()) {
              unsigned l = strlen(var->get_interface_type()->name);
  
-            for (unsigned i = 0; i < prog->NumUniformBlocks; i++) {
+            for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) {
                 if (strncmp(var->get_interface_type()->name,
                             prog->UniformBlocks[i].Name,
                             l) == 0
@@ -455,7 +510,7 @@ public:
                 }
              }
           } else {
-            for (unsigned i = 0; i < prog->NumUniformBlocks; i++) {
+            for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) {
                 if (strcmp(var->get_interface_type()->name,
                            prog->UniformBlocks[i].Name) == 0) {
                    ubo_block_index = i;
@@ -490,8 +545,16 @@ public:
                      var->get_interface_type()->name);
           else
              process(var);
-      } else
+      } else {
+         /* Store any explicit location and reset data location so we can
+          * reuse this variable for storing the uniform slot number.
+          */
+         this->explicit_location = current_var->data.location;
+         current_var->data.location = -1;
+
           process(var);
+      }
+      delete this->record_next_sampler;
     }
  
     int ubo_block_index;
@@ -500,17 +563,65 @@ public:
  
  private:
     void handle_samplers(const glsl_type *base_type,
-                        struct gl_uniform_storage *uniform)
+                        struct gl_uniform_storage *uniform, const char *name)
     {
        if (base_type->is_sampler()) {
-         uniform->sampler[shader_type].index = this->next_sampler;
           uniform->sampler[shader_type].active = true;
  
-         /* Increment the sampler by 1 for non-arrays and by the number of
-          * array elements for arrays.
-          */
-         this->next_sampler +=
-               MAX2(1, uniform->array_elements);
+         /* Handle multiple samplers inside struct arrays */
+         if (this->record_array_count > 1) {
+            unsigned inner_array_size = MAX2(1, uniform->array_elements);
+            char *name_copy = ralloc_strdup(NULL, name);
+
+            /* Remove all array subscripts from the sampler name */
+            char *str_start;
+            const char *str_end;
+            while((str_start = strchr(name_copy, '[')) &&
+                  (str_end = strchr(name_copy, ']'))) {
+               memmove(str_start, str_end + 1, 1 + strlen(str_end));
+            }
+
+            unsigned index = 0;
+            if (this->record_next_sampler->get(index, name_copy)) {
+               /* In this case, we've already seen this uniform so we just use
+                * the next sampler index recorded the last time we visited.
+                */
+               uniform->sampler[shader_type].index = index;
+               index = inner_array_size + uniform->sampler[shader_type].index;
+               this->record_next_sampler->put(index, name_copy);
+
+               ralloc_free(name_copy);
+               /* Return as everything else has already been initialised in a
+                * previous pass.
+                */
+               return;
+            } else {
+               /* We've never seen this uniform before so we need to allocate
+                * enough indices to store it.
+                *
+                * Nested struct arrays behave like arrays of arrays so we need
+                * to increase the index by the total number of elements of the
+                * sampler in case there is more than one sampler inside the
+                * structs. This allows the offset to be easily calculated for
+                * indirect indexing.
+                */
+               uniform->sampler[shader_type].index = this->next_sampler;
+               this->next_sampler +=
+                  inner_array_size * this->record_array_count;
+
+               /* Store the next index for future passes over the struct array
+                */
+               index = uniform->sampler[shader_type].index + inner_array_size;
+               this->record_next_sampler->put(index, name_copy);
+               ralloc_free(name_copy);
+            }
+         } else {
+            /* Increment the sampler by 1 for non-arrays and by the number of
+             * array elements for arrays.
+             */
+            uniform->sampler[shader_type].index = this->next_sampler;
+            this->next_sampler += MAX2(1, uniform->array_elements);
+         }
  
           const gl_texture_index target = base_type->sampler_index();
           const unsigned shadow = base_type->sampler_shadow;
@@ -563,6 +674,11 @@ private:
        }
     }
  
+   virtual void set_record_array_count(unsigned record_array_count)
+   {
+      this->record_array_count = record_array_count;
+   }
+
     virtual void visit_field(const glsl_type *type, const char *name,
                              bool row_major)
     {
@@ -573,25 +689,34 @@ private:
     }
  
     virtual void enter_record(const glsl_type *type, const char *,
-                             bool row_major) {
+                             bool row_major, const unsigned packing) {
        assert(type->is_record());
        if (this->ubo_block_index == -1)
           return;
-      this->ubo_byte_offset = glsl_align(
+      if (packing == GLSL_INTERFACE_PACKING_STD430)
+         this->ubo_byte_offset = glsl_align(
+            this->ubo_byte_offset, type->std430_base_alignment(row_major));
+      else
+         this->ubo_byte_offset = glsl_align(
              this->ubo_byte_offset, type->std140_base_alignment(row_major));
     }
  
     virtual void leave_record(const glsl_type *type, const char *,
-                             bool row_major) {
+                             bool row_major, const unsigned packing) {
        assert(type->is_record());
        if (this->ubo_block_index == -1)
           return;
-      this->ubo_byte_offset = glsl_align(
+      if (packing == GLSL_INTERFACE_PACKING_STD430)
+         this->ubo_byte_offset = glsl_align(
+            this->ubo_byte_offset, type->std430_base_alignment(row_major));
+      else
+         this->ubo_byte_offset = glsl_align(
              this->ubo_byte_offset, type->std140_base_alignment(row_major));
     }
  
     virtual void visit_field(const glsl_type *type, const char *name,
                              bool row_major, const glsl_type *record_type,
+                            const unsigned packing,
                              bool /* last_field */)
     {
        assert(!type->without_array()->is_record());
@@ -614,10 +739,17 @@ private:
        }
  
        /* This assigns uniform indices to sampler and image uniforms. */
-      handle_samplers(base_type, &this->uniforms[id]);
+      handle_samplers(base_type, &this->uniforms[id], name);
        handle_images(base_type, &this->uniforms[id]);
        handle_subroutines(base_type, &this->uniforms[id]);
  
+      /* For array of arrays or struct arrays the base location may have
+       * already been set so dont set it again.
+       */
+      if (ubo_block_index == -1 && current_var->data.location == -1) {
+         current_var->data.location = id;
+      }
+
        /* If there is already storage associated with this uniform or if the
         * uniform is set as builtin, it means that it was set while processing
         * an earlier shader stage.  For example, we may be processing the
@@ -634,10 +766,10 @@ private:
           if (record_type != NULL) {
              const unsigned entries = MAX2(1, this->uniforms[id].array_elements);
              this->uniforms[id].remap_location =
-               current_var->data.location + field_counter;
+               this->explicit_location + field_counter;
              field_counter += entries;
           } else {
-            this->uniforms[id].remap_location = current_var->data.location;
+         this->uniforms[id].remap_location = this->explicit_location;
           }
        } else {
           /* Initialize to to indicate that no location is set */
@@ -658,17 +790,29 @@ private:
        if (!this->uniforms[id].builtin)
           this->uniforms[id].storage = this->values;
  
-      if (this->ubo_block_index != -1) {
-        this->uniforms[id].block_index = this->ubo_block_index;
-
-        const unsigned alignment = type->std140_base_alignment(row_major);
-        this->ubo_byte_offset = glsl_align(this->ubo_byte_offset, alignment);
-        this->uniforms[id].offset = this->ubo_byte_offset;
-        this->ubo_byte_offset += type->std140_size(row_major);
+      this->uniforms[id].is_shader_storage =
+         current_var->is_in_shader_storage_block();
  
-        if (type->is_array()) {
-           this->uniforms[id].array_stride =
-              glsl_align(type->fields.array->std140_size(row_major), 16);
+      if (this->ubo_block_index != -1) {
+         this->uniforms[id].block_index = this->ubo_block_index;
+
+         unsigned alignment = type->std140_base_alignment(row_major);
+         if (packing == GLSL_INTERFACE_PACKING_STD430)
+            alignment = type->std430_base_alignment(row_major);
+         this->ubo_byte_offset = glsl_align(this->ubo_byte_offset, alignment);
+         this->uniforms[id].offset = this->ubo_byte_offset;
+         if (packing == GLSL_INTERFACE_PACKING_STD430)
+            this->ubo_byte_offset += type->std430_size(row_major);
+         else
+            this->ubo_byte_offset += type->std140_size(row_major);
+
+         if (type->is_array()) {
+            if (packing == GLSL_INTERFACE_PACKING_STD430)
+               this->uniforms[id].array_stride =
+                  type->fields.array->std430_array_stride(row_major);
+            else
+               this->uniforms[id].array_stride =
+                  glsl_align(type->fields.array->std140_size(row_major), 16);
          } else {
             this->uniforms[id].array_stride = 0;
          }
@@ -679,7 +823,11 @@ private:
              const unsigned items = row_major ? matrix->matrix_columns : matrix->vector_elements;
  
              assert(items <= 4);
-            this->uniforms[id].matrix_stride = glsl_align(items * N, 16);
+            if (packing == GLSL_INTERFACE_PACKING_STD430)
+               this->uniforms[id].matrix_stride = items < 3 ? items * N :
+                                                          glsl_align(items * N, 16);
+            else
+               this->uniforms[id].matrix_stride = glsl_align(items * N, 16);
             this->uniforms[id].row_major = row_major;
          } else {
             this->uniforms[id].matrix_stride = 0;
@@ -703,21 +851,34 @@ private:
     unsigned next_image;
     unsigned next_subroutine;
  
-public:
-   union gl_constant_value *values;
-
-   gl_texture_index targets[MAX_SAMPLERS];
+   /**
+    * Field counter is used to take care that uniform structures
+    * with explicit locations get sequential locations.
+    */
+   unsigned field_counter;
  
     /**
      * Current variable being processed.
      */
     ir_variable *current_var;
  
-   /**
-    * Field counter is used to take care that uniform structures
-    * with explicit locations get sequential locations.
+   /* Used to store the explicit location from current_var so that we can
+    * reuse the location field for storing the uniform slot id.
      */
-   unsigned field_counter;
+   int explicit_location;
+
+   /* Stores total struct array elements including nested structs */
+   unsigned record_array_count;
+
+   /* Map for temporarily storing next sampler index when handling samplers in
+    * struct arrays.
+    */
+   struct string_to_uint_map *record_next_sampler;
+
+public:
+   union gl_constant_value *values;
+
+   gl_texture_index targets[MAX_SAMPLERS];
  
     /**
      * Mask of samplers used by the current shader stage.
@@ -809,8 +970,7 @@ link_update_uniform_buffer_variables(struct gl_shader *shader)
  
        if (var->type->is_record()) {
           sentinel = '.';
-      } else if (var->type->is_array()
-                 && var->type->fields.array->is_record()) {
+      } else if (var->type->without_array()->is_record()) {
           sentinel = '[';
        }
  
@@ -882,47 +1042,19 @@ link_set_image_access_qualifiers(struct gl_shader_program *prog)
  }
  
  /**
- * Sort the array of uniform storage so that the non-hidden uniforms are first
- *
- * This function sorts the list "in place."  This is important because some of
- * the storage accessible from \c uniforms has \c uniforms as its \c ralloc
- * context.  If \c uniforms is freed, some other storage will also be freed.
+ * Combine the hidden uniform hash map with the uniform hash map so that the
+ * hidden uniforms will be given indicies at the end of the uniform storage
+ * array.
   */
-static unsigned
-move_hidden_uniforms_to_end(struct gl_shader_program *prog,
-                            struct gl_uniform_storage *uniforms,
-                            unsigned num_elements)
+static void
+assign_hidden_uniform_slot_id(const char *name, unsigned hidden_id,
+                              void *closure)
  {
-   struct gl_uniform_storage *sorted_uniforms =
-      ralloc_array(prog, struct gl_uniform_storage, num_elements);
-   unsigned hidden_uniforms = 0;
-   unsigned j = 0;
-
-   /* Add the non-hidden uniforms. */
-   for (unsigned i = 0; i < num_elements; i++) {
-      if (!uniforms[i].hidden)
-         sorted_uniforms[j++] = uniforms[i];
-   }
+   count_uniform_size *uniform_size = (count_uniform_size *) closure;
+   unsigned hidden_uniform_start = uniform_size->num_active_uniforms -
+      uniform_size->num_hidden_uniforms;
  
-   /* Add and count the hidden uniforms. */
-   for (unsigned i = 0; i < num_elements; i++) {
-      if (uniforms[i].hidden) {
-         sorted_uniforms[j++] = uniforms[i];
-         hidden_uniforms++;
-      }
-   }
-
-   assert(prog->UniformHash != NULL);
-   prog->UniformHash->clear();
-   for (unsigned i = 0; i < num_elements; i++) {
-      if (sorted_uniforms[i].name != NULL)
-         prog->UniformHash->put(i, sorted_uniforms[i].name);
-   }
-
-   memcpy(uniforms, sorted_uniforms, sizeof(uniforms[0]) * num_elements);
-   ralloc_free(sorted_uniforms);
-
-   return hidden_uniforms;
+   uniform_size->map->put(hidden_uniform_start + hidden_id, name);
  }
  
  void
@@ -946,7 +1078,8 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
      * Note: this is *NOT* the index that is returned to the application by
      * glGetUniformLocation.
      */
-   count_uniform_size uniform_size(prog->UniformHash);
+   struct string_to_uint_map *hiddenUniforms = new string_to_uint_map;
+   count_uniform_size uniform_size(prog->UniformHash, hiddenUniforms);
     for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
        struct gl_shader *sh = prog->_LinkedShaders[i];
  
@@ -987,19 +1120,26 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
        sh->num_combined_uniform_components = sh->num_uniform_components;
  
        for (unsigned i = 0; i < sh->NumUniformBlocks; i++) {
-        sh->num_combined_uniform_components +=
-           sh->UniformBlocks[i].UniformBufferSize / 4;
+         if (!sh->UniformBlocks[i].IsShaderStorage) {
+           sh->num_combined_uniform_components +=
+              sh->UniformBlocks[i].UniformBufferSize / 4;
+         }
        }
     }
  
     const unsigned num_uniforms = uniform_size.num_active_uniforms;
     const unsigned num_data_slots = uniform_size.num_values;
+   const unsigned hidden_uniforms = uniform_size.num_hidden_uniforms;
  
     /* On the outside chance that there were no uniforms, bail out.
      */
     if (num_uniforms == 0)
        return;
  
+   /* assign hidden uniforms a slot id */
+   hiddenUniforms->iterate(assign_hidden_uniform_slot_id, &uniform_size);
+   delete hiddenUniforms;
+
     struct gl_uniform_storage *uniforms =
        rzalloc_array(prog, struct gl_uniform_storage, num_uniforms);
     union gl_constant_value *data =
@@ -1033,9 +1173,6 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
               sizeof(prog->_LinkedShaders[i]->SamplerTargets));
     }
  
-   const unsigned hidden_uniforms =
-      move_hidden_uniforms_to_end(prog, uniforms, num_uniforms);
-
     /* Reserve all the explicit locations of the active uniforms. */
     for (unsigned i = 0; i < num_uniforms; i++) {
        if (uniforms[i].type->is_subroutine())
diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp

index f7a7b8c..7e77a67 100644 (file)
--- a/src/glsl/link_varyings.cpp
+++ b/src/glsl/link_varyings.cpp
@@ -956,9 +956,16 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
           type = type->fields.array;
        }
  
-      slots = (type->is_array()
-            ? (type->length * type->fields.array->matrix_columns)
-            : type->matrix_columns);
+      if (type->is_array()) {
+         slots = 1;
+         while (type->is_array()) {
+            slots *= type->length;
+            type = type->fields.array;
+         }
+         slots *= type->matrix_columns;
+      } else {
+         slots = type->matrix_columns;
+      }
        this->matches[this->num_matches].num_components = 4 * slots;
     } else {
        this->matches[this->num_matches].num_components
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp

index 47f7d25..87c7d4b 100644 (file)
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -317,38 +317,38 @@ public:
        return visit_continue;
     }
  
-   virtual ir_visitor_status visit_leave(ir_function *ir)
+   virtual ir_visitor_status visit_leave(ir_function *)
     {
        in_main = false;
        after_return = false;
        return visit_continue;
     }
  
-   virtual ir_visitor_status visit_leave(ir_return *ir)
+   virtual ir_visitor_status visit_leave(ir_return *)
     {
        after_return = true;
        return visit_continue;
     }
  
-   virtual ir_visitor_status visit_enter(ir_if *ir)
+   virtual ir_visitor_status visit_enter(ir_if *)
     {
        ++control_flow;
        return visit_continue;
     }
  
-   virtual ir_visitor_status visit_leave(ir_if *ir)
+   virtual ir_visitor_status visit_leave(ir_if *)
     {
        --control_flow;
        return visit_continue;
     }
  
-   virtual ir_visitor_status visit_enter(ir_loop *ir)
+   virtual ir_visitor_status visit_enter(ir_loop *)
     {
        ++control_flow;
        return visit_continue;
     }
  
-   virtual ir_visitor_status visit_leave(ir_loop *ir)
+   virtual ir_visitor_status visit_leave(ir_loop *)
     {
        --control_flow;
        return visit_continue;
@@ -877,30 +877,40 @@ validate_intrastage_arrays(struct gl_shader_program *prog,
      * In addition, set the type of the linked variable to the
      * explicitly sized array.
      */
-   if (var->type->is_array() && existing->type->is_array() &&
-       (var->type->fields.array == existing->type->fields.array) &&
-       ((var->type->length == 0)|| (existing->type->length == 0))) {
-      if (var->type->length != 0) {
-         if (var->type->length <= existing->data.max_array_access) {
-            linker_error(prog, "%s `%s' declared as type "
-                         "`%s' but outermost dimension has an index"
-                         " of `%i'\n",
-                         mode_string(var),
-                         var->name, var->type->name,
-                         existing->data.max_array_access);
-         }
-         existing->type = var->type;
-         return true;
-      } else if (existing->type->length != 0) {
-         if(existing->type->length <= var->data.max_array_access) {
-            linker_error(prog, "%s `%s' declared as type "
-                         "`%s' but outermost dimension has an index"
-                         " of `%i'\n",
-                         mode_string(var),
-                         var->name, existing->type->name,
-                         var->data.max_array_access);
+   if (var->type->is_array() && existing->type->is_array()) {
+      if ((var->type->fields.array == existing->type->fields.array) &&
+          ((var->type->length == 0)|| (existing->type->length == 0))) {
+         if (var->type->length != 0) {
+            if (var->type->length <= existing->data.max_array_access) {
+               linker_error(prog, "%s `%s' declared as type "
+                           "`%s' but outermost dimension has an index"
+                           " of `%i'\n",
+                           mode_string(var),
+                           var->name, var->type->name,
+                           existing->data.max_array_access);
+            }
+            existing->type = var->type;
+            return true;
+         } else if (existing->type->length != 0) {
+            if(existing->type->length <= var->data.max_array_access &&
+               !existing->data.from_ssbo_unsized_array) {
+               linker_error(prog, "%s `%s' declared as type "
+                           "`%s' but outermost dimension has an index"
+                           " of `%i'\n",
+                           mode_string(var),
+                           var->name, existing->type->name,
+                           var->data.max_array_access);
+            }
+            return true;
           }
-         return true;
+      } else {
+         /* The arrays of structs could have different glsl_type pointers but
+          * they are actually the same type. Use record_compare() to check that.
+          */
+         if (existing->type->fields.array->is_record() &&
+             var->type->fields.array->is_record() &&
+             existing->type->fields.array->record_compare(var->type->fields.array))
+            return true;
        }
     }
     return false;
@@ -959,12 +969,24 @@ cross_validate_globals(struct gl_shader_program *prog,
                        && existing->type->record_compare(var->type)) {
                       existing->type = var->type;
                    } else {
-                     linker_error(prog, "%s `%s' declared as type "
-                                  "`%s' and type `%s'\n",
-                                  mode_string(var),
-                                  var->name, var->type->name,
-                                  existing->type->name);
-                     return;
+                     /* If it is an unsized array in a Shader Storage Block,
+                      * two different shaders can access to different elements.
+                      * Because of that, they might be converted to different
+                      * sized arrays, then check that they are compatible but
+                      * ignore the array size.
+                      */
+                     if (!(var->data.mode == ir_var_shader_storage &&
+                           var->data.from_ssbo_unsized_array &&
+                           existing->data.mode == ir_var_shader_storage &&
+                           existing->data.from_ssbo_unsized_array &&
+                           var->type->gl_type == existing->type->gl_type)) {
+                        linker_error(prog, "%s `%s' declared as type "
+                                    "`%s' and type `%s'\n",
+                                    mode_string(var),
+                                    var->name, var->type->name,
+                                    existing->type->name);
+                        return;
+                     }
                    }
                }
             }
@@ -1165,7 +1187,7 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog)
        for (unsigned int j = 0; j < sh->NumUniformBlocks; j++) {
          int index = link_cross_validate_uniform_block(prog,
                                                        &prog->UniformBlocks,
-                                                      &prog->NumUniformBlocks,
+                                                      &prog->NumBufferInterfaceBlocks,
                                                        &sh->UniformBlocks[j]);
  
          if (index == -1) {
@@ -1341,33 +1363,6 @@ move_non_declarations(exec_list *instructions, exec_node *last,
     return last;
  }
  
-/**
- * Get the function signature for main from a shader
- */
-ir_function_signature *
-link_get_main_function_signature(gl_shader *sh)
-{
-   ir_function *const f = sh->symbols->get_function("main");
-   if (f != NULL) {
-      exec_list void_parameters;
-
-      /* Look for the 'void main()' signature and ensure that it's defined.
-       * This keeps the linker from accidentally pick a shader that just
-       * contains a prototype for main.
-       *
-       * We don't have to check for multiple definitions of main (in multiple
-       * shaders) because that would have already been caught above.
-       */
-      ir_function_signature *sig =
-         f->matching_signature(NULL, &void_parameters, false);
-      if ((sig != NULL) && sig->is_defined) {
-        return sig;
-      }
-   }
-
-   return NULL;
-}
-
  
  /**
   * This class is only used in link_intrastage_shaders() below but declaring
@@ -1391,12 +1386,14 @@ public:
  
     virtual ir_visitor_status visit(ir_variable *var)
     {
-      fixup_type(&var->type, var->data.max_array_access);
+      fixup_type(&var->type, var->data.max_array_access,
+                 var->data.from_ssbo_unsized_array);
        if (var->type->is_interface()) {
           if (interface_contains_unsized_arrays(var->type)) {
              const glsl_type *new_type =
                 resize_interface_members(var->type,
-                                        var->get_max_ifc_array_access());
+                                        var->get_max_ifc_array_access(),
+                                        var->is_in_shader_storage_block());
              var->type = new_type;
              var->change_interface_type(new_type);
           }
@@ -1405,7 +1402,8 @@ public:
           if (interface_contains_unsized_arrays(var->type->fields.array)) {
              const glsl_type *new_type =
                 resize_interface_members(var->type->fields.array,
-                                        var->get_max_ifc_array_access());
+                                        var->get_max_ifc_array_access(),
+                                        var->is_in_shader_storage_block());
              var->change_interface_type(new_type);
              var->type = update_interface_members_array(var->type, new_type);
           }
@@ -1446,9 +1444,10 @@ private:
      * If the type pointed to by \c type represents an unsized array, replace
      * it with a sized array whose size is determined by max_array_access.
      */
-   static void fixup_type(const glsl_type **type, unsigned max_array_access)
+   static void fixup_type(const glsl_type **type, unsigned max_array_access,
+                          bool from_ssbo_unsized_array)
     {
-      if ((*type)->is_unsized_array()) {
+      if (!from_ssbo_unsized_array && (*type)->is_unsized_array()) {
           *type = glsl_type::get_array_instance((*type)->fields.array,
                                                 max_array_access + 1);
           assert(*type != NULL);
@@ -1491,14 +1490,23 @@ private:
      */
     static const glsl_type *
     resize_interface_members(const glsl_type *type,
-                            const unsigned *max_ifc_array_access)
+                            const unsigned *max_ifc_array_access,
+                            bool is_ssbo)
     {
        unsigned num_fields = type->length;
        glsl_struct_field *fields = new glsl_struct_field[num_fields];
        memcpy(fields, type->fields.structure,
               num_fields * sizeof(*fields));
        for (unsigned i = 0; i < num_fields; i++) {
-         fixup_type(&fields[i].type, max_ifc_array_access[i]);
+         /* If SSBO last member is unsized array, we don't replace it by a sized
+          * array.
+          */
+         if (is_ssbo && i == (num_fields - 1))
+            fixup_type(&fields[i].type, max_ifc_array_access[i],
+                       true);
+         else
+            fixup_type(&fields[i].type, max_ifc_array_access[i],
+                       false);
        }
        glsl_interface_packing packing =
           (glsl_interface_packing) type->interface_packing;
@@ -1988,7 +1996,7 @@ link_intrastage_shaders(void *mem_ctx,
  
     /* Link up uniform blocks defined within this stage. */
     const unsigned num_uniform_blocks =
-      link_uniform_blocks(mem_ctx, prog, shader_list, num_shaders,
+      link_uniform_blocks(mem_ctx, ctx, prog, shader_list, num_shaders,
                            &uniform_blocks);
     if (!prog->LinkStatus)
        return NULL;
@@ -2040,7 +2048,7 @@ link_intrastage_shaders(void *mem_ctx,
      */
     gl_shader *main = NULL;
     for (unsigned i = 0; i < num_shaders; i++) {
-      if (link_get_main_function_signature(shader_list[i]) != NULL) {
+      if (_mesa_get_main_function_signature(shader_list[i]) != NULL) {
          main = shader_list[i];
          break;
        }
@@ -2072,7 +2080,7 @@ link_intrastage_shaders(void *mem_ctx,
      * copy of the original shader that contained the main function).
      */
     ir_function_signature *const main_sig =
-      link_get_main_function_signature(linked);
+      _mesa_get_main_function_signature(linked);
  
     /* Move any instructions other than variable declarations or function
      * declarations into main.
@@ -2339,6 +2347,7 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
      */
     unsigned used_locations = (max_index >= 32)
        ? ~0 : ~((1 << max_index) - 1);
+   unsigned double_storage_locations = 0;
  
     assert((target_index == MESA_SHADER_VERTEX)
           || (target_index == MESA_SHADER_FRAGMENT));
@@ -2389,7 +2398,6 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
     } to_assign[16];
  
     unsigned num_attr = 0;
-   unsigned total_attribs_size = 0;
  
     foreach_in_list(ir_instruction, node, sh->ir) {
        ir_variable *const var = node->as_variable();
@@ -2452,34 +2460,6 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
  
        const unsigned slots = var->type->count_attribute_slots();
  
-      /* From GL4.5 core spec, section 11.1.1 (Vertex Attributes):
-       *
-       * "A program with more than the value of MAX_VERTEX_ATTRIBS active
-       * attribute variables may fail to link, unless device-dependent
-       * optimizations are able to make the program fit within available
-       * hardware resources. For the purposes of this test, attribute variables
-       * of the type dvec3, dvec4, dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3,
-       * and dmat4 may count as consuming twice as many attributes as equivalent
-       * single-precision types. While these types use the same number of
-       * generic attributes as their single-precision equivalents,
-       * implementations are permitted to consume two single-precision vectors
-       * of internal storage for each three- or four-component double-precision
-       * vector."
-       * Until someone has a good reason in Mesa, enforce that now.
-       */
-      if (target_index == MESA_SHADER_VERTEX) {
-        total_attribs_size += slots;
-        if (var->type->without_array() == glsl_type::dvec3_type ||
-            var->type->without_array() == glsl_type::dvec4_type ||
-            var->type->without_array() == glsl_type::dmat2x3_type ||
-            var->type->without_array() == glsl_type::dmat2x4_type ||
-            var->type->without_array() == glsl_type::dmat3_type ||
-            var->type->without_array() == glsl_type::dmat3x4_type ||
-            var->type->without_array() == glsl_type::dmat4x3_type ||
-            var->type->without_array() == glsl_type::dmat4_type)
-           total_attribs_size += slots;
-      }
-
        /* If the variable is not a built-in and has a location statically
         * assigned in the shader (presumably via a layout qualifier), make sure
         * that it doesn't collide with other assigned locations.  Otherwise,
@@ -2594,6 +2574,38 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
             }
  
             used_locations |= (use_mask << attr);
+
+            /* From the GL 4.5 core spec, section 11.1.1 (Vertex Attributes):
+             *
+             * "A program with more than the value of MAX_VERTEX_ATTRIBS
+             *  active attribute variables may fail to link, unless
+             *  device-dependent optimizations are able to make the program
+             *  fit within available hardware resources. For the purposes
+             *  of this test, attribute variables of the type dvec3, dvec4,
+             *  dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3, and dmat4 may
+             *  count as consuming twice as many attributes as equivalent
+             *  single-precision types. While these types use the same number
+             *  of generic attributes as their single-precision equivalents,
+             *  implementations are permitted to consume two single-precision
+             *  vectors of internal storage for each three- or four-component
+             *  double-precision vector."
+             *
+             * Mark this attribute slot as taking up twice as much space
+             * so we can count it properly against limits.  According to
+             * issue (3) of the GL_ARB_vertex_attrib_64bit behavior, this
+             * is optional behavior, but it seems preferable.
+             */
+            const glsl_type *type = var->type->without_array();
+            if (type == glsl_type::dvec3_type ||
+                type == glsl_type::dvec4_type ||
+                type == glsl_type::dmat2x3_type ||
+                type == glsl_type::dmat2x4_type ||
+                type == glsl_type::dmat3_type ||
+                type == glsl_type::dmat3x4_type ||
+                type == glsl_type::dmat4x3_type ||
+                type == glsl_type::dmat4_type) {
+               double_storage_locations |= (use_mask << attr);
+            }
          }
  
          continue;
@@ -2605,6 +2617,9 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
     }
  
     if (target_index == MESA_SHADER_VERTEX) {
+      unsigned total_attribs_size =
+         _mesa_bitcount(used_locations & ((1 << max_index) - 1)) +
+         _mesa_bitcount(double_storage_locations);
        if (total_attribs_size > max_index) {
          linker_error(prog,
                       "attempt to use %d vertex attribute slots only %d available ",
@@ -2784,25 +2799,44 @@ check_resources(struct gl_context *ctx, struct gl_shader_program *prog)
  
     unsigned blocks[MESA_SHADER_STAGES] = {0};
     unsigned total_uniform_blocks = 0;
+   unsigned shader_blocks[MESA_SHADER_STAGES] = {0};
+   unsigned total_shader_storage_blocks = 0;
  
-   for (unsigned i = 0; i < prog->NumUniformBlocks; i++) {
-      if (prog->UniformBlocks[i].UniformBufferSize > ctx->Const.MaxUniformBlockSize) {
+   for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) {
+      /* Don't check SSBOs for Uniform Block Size */
+      if (!prog->UniformBlocks[i].IsShaderStorage &&
+          prog->UniformBlocks[i].UniformBufferSize > ctx->Const.MaxUniformBlockSize) {
           linker_error(prog, "Uniform block %s too big (%d/%d)\n",
                        prog->UniformBlocks[i].Name,
                        prog->UniformBlocks[i].UniformBufferSize,
                        ctx->Const.MaxUniformBlockSize);
        }
  
+      if (prog->UniformBlocks[i].IsShaderStorage &&
+          prog->UniformBlocks[i].UniformBufferSize > ctx->Const.MaxShaderStorageBlockSize) {
+         linker_error(prog, "Shader storage block %s too big (%d/%d)\n",
+                      prog->UniformBlocks[i].Name,
+                      prog->UniformBlocks[i].UniformBufferSize,
+                      ctx->Const.MaxShaderStorageBlockSize);
+      }
+
        for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) {
          if (prog->UniformBlockStageIndex[j][i] != -1) {
-           blocks[j]++;
-           total_uniform_blocks++;
+            struct gl_shader *sh = prog->_LinkedShaders[j];
+            int stage_index = prog->UniformBlockStageIndex[j][i];
+            if (sh && sh->UniformBlocks[stage_index].IsShaderStorage) {
+               shader_blocks[j]++;
+               total_shader_storage_blocks++;
+            } else {
+               blocks[j]++;
+               total_uniform_blocks++;
+            }
          }
        }
  
        if (total_uniform_blocks > ctx->Const.MaxCombinedUniformBlocks) {
          linker_error(prog, "Too many combined uniform blocks (%d/%d)\n",
-                     prog->NumUniformBlocks,
+                     total_uniform_blocks,
                       ctx->Const.MaxCombinedUniformBlocks);
        } else {
          for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
@@ -2817,11 +2851,29 @@ check_resources(struct gl_context *ctx, struct gl_shader_program *prog)
             }
          }
        }
+
+      if (total_shader_storage_blocks > ctx->Const.MaxCombinedShaderStorageBlocks) {
+         linker_error(prog, "Too many combined shader storage blocks (%d/%d)\n",
+                      total_shader_storage_blocks,
+                      ctx->Const.MaxCombinedShaderStorageBlocks);
+      } else {
+         for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+            const unsigned max_shader_storage_blocks =
+               ctx->Const.Program[i].MaxShaderStorageBlocks;
+            if (shader_blocks[i] > max_shader_storage_blocks) {
+               linker_error(prog, "Too many %s shader storage blocks (%d/%d)\n",
+                            _mesa_shader_stage_to_string(i),
+                            shader_blocks[i],
+                            max_shader_storage_blocks);
+               break;
+            }
+         }
+      }
     }
  }
  
  static void
-link_calculate_subroutine_compat(struct gl_context *ctx, struct gl_shader_program *prog)
+link_calculate_subroutine_compat(struct gl_shader_program *prog)
  {
     for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
        struct gl_shader *sh = prog->_LinkedShaders[i];
@@ -2851,7 +2903,7 @@ link_calculate_subroutine_compat(struct gl_context *ctx, struct gl_shader_progra
  }
  
  static void
-check_subroutine_resources(struct gl_context *ctx, struct gl_shader_program *prog)
+check_subroutine_resources(struct gl_shader_program *prog)
  {
     for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
        struct gl_shader *sh = prog->_LinkedShaders[i];
@@ -2871,6 +2923,7 @@ check_image_resources(struct gl_context *ctx, struct gl_shader_program *prog)
  {
     unsigned total_image_units = 0;
     unsigned fragment_outputs = 0;
+   unsigned total_shader_storage_blocks = 0;
  
     if (!ctx->Extensions.ARB_shader_image_load_store)
        return;
@@ -2886,6 +2939,12 @@ check_image_resources(struct gl_context *ctx, struct gl_shader_program *prog)
  
           total_image_units += sh->NumImages;
  
+         for (unsigned j = 0; j < prog->NumBufferInterfaceBlocks; j++) {
+            int stage_index = prog->UniformBlockStageIndex[i][j];
+            if (stage_index != -1 && sh->UniformBlocks[stage_index].IsShaderStorage)
+               total_shader_storage_blocks++;
+         }
+
           if (i == MESA_SHADER_FRAGMENT) {
              foreach_in_list(ir_instruction, node, sh->ir) {
                 ir_variable *var = node->as_variable();
@@ -2899,9 +2958,10 @@ check_image_resources(struct gl_context *ctx, struct gl_shader_program *prog)
     if (total_image_units > ctx->Const.MaxCombinedImageUniforms)
        linker_error(prog, "Too many combined image uniforms\n");
  
-   if (total_image_units + fragment_outputs >
+   if (total_image_units + fragment_outputs + total_shader_storage_blocks >
         ctx->Const.MaxCombinedShaderOutputResources)
-      linker_error(prog, "Too many combined image uniforms and fragment outputs\n");
+      linker_error(prog, "Too many combined image uniforms, shader storage "
+                         " buffers and fragment outputs\n");
  }
  
  
@@ -3107,6 +3167,35 @@ add_program_resource(struct gl_shader_program *prog, GLenum type,
     return true;
  }
  
+/* Function checks if a variable var is a packed varying and
+ * if given name is part of packed varying's list.
+ *
+ * If a variable is a packed varying, it has a name like
+ * 'packed:a,b,c' where a, b and c are separate variables.
+ */
+static bool
+included_in_packed_varying(ir_variable *var, const char *name)
+{
+   if (strncmp(var->name, "packed:", 7) != 0)
+      return false;
+
+   char *list = strdup(var->name + 7);
+   assert(list);
+
+   bool found = false;
+   char *saveptr;
+   char *token = strtok_r(list, ",", &saveptr);
+   while (token) {
+      if (strcmp(token, name) == 0) {
+         found = true;
+         break;
+      }
+      token = strtok_r(NULL, ",", &saveptr);
+   }
+   free(list);
+   return found;
+}
+
  /**
   * Function builds a stage reference bitmask from variable name.
   */
@@ -3134,6 +3223,11 @@ build_stageref(struct gl_shader_program *shProg, const char *name,
           if (var) {
              unsigned baselen = strlen(var->name);
  
+            if (included_in_packed_varying(var, name)) {
+                  stages |= (1 << i);
+                  break;
+            }
+
              /* Type needs to match if specified, otherwise we might
               * pick a variable with same name but different interface.
               */
@@ -3159,9 +3253,9 @@ build_stageref(struct gl_shader_program *shProg, const char *name,
  
  static bool
  add_interface_variables(struct gl_shader_program *shProg,
-                        struct gl_shader *sh, GLenum programInterface)
+                        exec_list *ir, GLenum programInterface)
  {
-   foreach_in_list(ir_instruction, node, sh->ir) {
+   foreach_in_list(ir_instruction, node, ir) {
        ir_variable *var = node->as_variable();
        uint8_t mask = 0;
  
@@ -3196,6 +3290,12 @@ add_interface_variables(struct gl_shader_program *shProg,
           continue;
        };
  
+      /* Skip packed varyings, packed varyings are handled separately
+       * by add_packed_varyings.
+       */
+      if (strncmp(var->name, "packed:", 7) == 0)
+         continue;
+
        if (!add_program_resource(shProg, programInterface, var,
                                  build_stageref(shProg, var->name,
                                                 var->data.mode) | mask))
@@ -3204,13 +3304,43 @@ add_interface_variables(struct gl_shader_program *shProg,
     return true;
  }
  
+static bool
+add_packed_varyings(struct gl_shader_program *shProg, int stage)
+{
+   struct gl_shader *sh = shProg->_LinkedShaders[stage];
+   GLenum iface;
+
+   if (!sh || !sh->packed_varyings)
+      return true;
+
+   foreach_in_list(ir_instruction, node, sh->packed_varyings) {
+      ir_variable *var = node->as_variable();
+      if (var) {
+         switch (var->data.mode) {
+         case ir_var_shader_in:
+            iface = GL_PROGRAM_INPUT;
+            break;
+         case ir_var_shader_out:
+            iface = GL_PROGRAM_OUTPUT;
+            break;
+         default:
+            unreachable("unexpected type");
+         }
+         if (!add_program_resource(shProg, iface, var,
+                                   build_stageref(shProg, var->name,
+                                                  var->data.mode)))
+            return false;
+      }
+   }
+   return true;
+}
+
  /**
   * Builds up a list of program resources that point to existing
   * resource data.
   */
  void
-build_program_resource_list(struct gl_context *ctx,
-                            struct gl_shader_program *shProg)
+build_program_resource_list(struct gl_shader_program *shProg)
  {
     /* Rebuild resource list. */
     if (shProg->ProgramResourceList) {
@@ -3237,12 +3367,17 @@ build_program_resource_list(struct gl_context *ctx,
     if (input_stage == MESA_SHADER_STAGES && output_stage == 0)
        return;
  
+   if (!add_packed_varyings(shProg, input_stage))
+      return;
+   if (!add_packed_varyings(shProg, output_stage))
+      return;
+
     /* Add inputs and outputs to the resource list. */
-   if (!add_interface_variables(shProg, shProg->_LinkedShaders[input_stage],
+   if (!add_interface_variables(shProg, shProg->_LinkedShaders[input_stage]->ir,
                                  GL_PROGRAM_INPUT))
        return;
  
-   if (!add_interface_variables(shProg, shProg->_LinkedShaders[output_stage],
+   if (!add_interface_variables(shProg, shProg->_LinkedShaders[output_stage]->ir,
                                  GL_PROGRAM_OUTPUT))
        return;
  
@@ -3275,14 +3410,18 @@ build_program_resource_list(struct gl_context *ctx,
           }
        }
  
-      if (!add_program_resource(shProg, GL_UNIFORM,
+      bool is_shader_storage =  shProg->UniformStorage[i].is_shader_storage;
+      GLenum type = is_shader_storage ? GL_BUFFER_VARIABLE : GL_UNIFORM;
+      if (!add_program_resource(shProg, type,
                                  &shProg->UniformStorage[i], stageref))
           return;
     }
  
-   /* Add program uniform blocks. */
-   for (unsigned i = 0; i < shProg->NumUniformBlocks; i++) {
-      if (!add_program_resource(shProg, GL_UNIFORM_BLOCK,
+   /* Add program uniform blocks and shader storage blocks. */
+   for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
+      bool is_shader_storage = shProg->UniformBlocks[i].IsShaderStorage;
+      GLenum type = is_shader_storage ? GL_SHADER_STORAGE_BLOCK : GL_UNIFORM_BLOCK;
+      if (!add_program_resource(shProg, type,
            &shProg->UniformBlocks[i], 0))
           return;
     }
@@ -3364,9 +3503,8 @@ validate_sampler_array_indexing(struct gl_context *ctx,
     return true;
  }
  
-void
-link_assign_subroutine_types(struct gl_context *ctx,
-                             struct gl_shader_program *prog)
+static void
+link_assign_subroutine_types(struct gl_shader_program *prog)
  {
     for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
        gl_shader *sh = prog->_LinkedShaders[i];
@@ -3460,6 +3598,25 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
     prog->Version = max_version;
     prog->IsES = is_es_prog;
  
+   /* From OpenGL 4.5 Core specification (7.3 Program Objects):
+    *     "Linking can fail for a variety of reasons as specified in the OpenGL
+    *     Shading Language Specification, as well as any of the following
+    *     reasons:
+    *
+    *     * No shader objects are attached to program.
+    *
+    *     ..."
+    *
+    *     Same rule applies for OpenGL ES >= 3.1.
+    */
+
+   if (prog->NumShaders == 0 &&
+       ((ctx->API == API_OPENGL_CORE && ctx->Version >= 45) ||
+        (ctx->API == API_OPENGLES2 && ctx->Version >= 31))) {
+      linker_error(prog, "No shader objects are attached to program.\n");
+      goto done;
+   }
+
     /* Some shaders have to be linked with some other shaders present.
      */
     if (num_shaders[MESA_SHADER_GEOMETRY] > 0 &&
@@ -3588,7 +3745,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
     }
  
     check_explicit_uniform_locations(ctx, prog);
-   link_assign_subroutine_types(ctx, prog);
+   link_assign_subroutine_types(prog);
  
     if (!prog->LinkStatus)
        goto done;
@@ -3848,9 +4005,9 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
     link_assign_atomic_counter_resources(ctx, prog);
     store_fragdepth_layout(prog);
  
-   link_calculate_subroutine_compat(ctx, prog);
+   link_calculate_subroutine_compat(prog);
     check_resources(ctx, prog);
-   check_subroutine_resources(ctx, prog);
+   check_subroutine_resources(prog);
     check_image_resources(ctx, prog);
     link_check_atomic_counter_resources(ctx, prog);
  
@@ -3864,10 +4021,31 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
      * behavior specified in GLSL specification.
      */
     if (!prog->SeparateShader && ctx->API == API_OPENGLES2) {
-      if (prog->_LinkedShaders[MESA_SHADER_VERTEX] == NULL) {
-        linker_error(prog, "program lacks a vertex shader\n");
-      } else if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL) {
-        linker_error(prog, "program lacks a fragment shader\n");
+      /* With ES < 3.1 one needs to have always vertex + fragment shader. */
+      if (ctx->Version < 31) {
+         if (prog->_LinkedShaders[MESA_SHADER_VERTEX] == NULL) {
+           linker_error(prog, "program lacks a vertex shader\n");
+         } else if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL) {
+           linker_error(prog, "program lacks a fragment shader\n");
+         }
+      } else {
+         /* From OpenGL ES 3.1 specification (7.3 Program Objects):
+          *     "Linking can fail for a variety of reasons as specified in the
+          *     OpenGL ES Shading Language Specification, as well as any of the
+          *     following reasons:
+          *
+          *     ...
+          *
+          *     * program contains objects to form either a vertex shader or
+          *       fragment shader, and program is not separable, and does not
+          *       contain objects to form both a vertex shader and fragment
+          *       shader."
+          */
+         if (!!prog->_LinkedShaders[MESA_SHADER_VERTEX] ^
+             !!prog->_LinkedShaders[MESA_SHADER_FRAGMENT]) {
+            linker_error(prog, "Program needs to contain both vertex and "
+                         "fragment shaders.\n");
+         }
        }
     }
  
diff --git a/src/glsl/linker.h b/src/glsl/linker.h

index ce3dc32..c80be1c 100644 (file)
--- a/src/glsl/linker.h
+++ b/src/glsl/linker.h
@@ -26,9 +26,6 @@
  #ifndef GLSL_LINKER_H
  #define GLSL_LINKER_H
  
-ir_function_signature *
-link_get_main_function_signature(gl_shader *sh);
-
  extern bool
  link_function_calls(gl_shader_program *prog, gl_shader *main,
                     gl_shader **shader_list, unsigned num_shaders);
@@ -56,6 +53,7 @@ link_uniform_blocks_are_compatible(const gl_uniform_block *a,
  
  extern unsigned
  link_uniform_blocks(void *mem_ctx,
+                    struct gl_context *ctx,
                      struct gl_shader_program *prog,
                      struct gl_shader **shader_list,
                      unsigned num_shaders,
@@ -153,6 +151,7 @@ protected:
      */
     virtual void visit_field(const glsl_type *type, const char *name,
                              bool row_major, const glsl_type *record_type,
+                            const unsigned packing,
                              bool last_field);
  
     /**
@@ -176,10 +175,12 @@ protected:
     virtual void visit_field(const glsl_struct_field *field);
  
     virtual void enter_record(const glsl_type *type, const char *name,
-                             bool row_major);
+                             bool row_major, const unsigned packing);
  
     virtual void leave_record(const glsl_type *type, const char *name,
-                             bool row_major);
+                             bool row_major, const unsigned packing);
+
+   virtual void set_record_array_count(unsigned record_array_count);
  
  private:
     /**
@@ -191,7 +192,8 @@ private:
      */
     void recursion(const glsl_type *t, char **name, size_t name_length,
                    bool row_major, const glsl_type *record_type,
-                  bool last_field);
+                  const unsigned packing,
+                  bool last_field, unsigned record_array_count);
  };
  
  void
diff --git a/src/glsl/lower_packed_varyings.cpp b/src/glsl/lower_packed_varyings.cpp

index cfe414a..5d66ca9 100644 (file)
--- a/src/glsl/lower_packed_varyings.cpp
+++ b/src/glsl/lower_packed_varyings.cpp
@@ -170,7 +170,7 @@ public:
                                   exec_list *out_instructions,
                                   exec_list *out_variables);
  
-   void run(exec_list *instructions);
+   void run(struct gl_shader *shader);
  
  private:
     void bitwise_assign_pack(ir_rvalue *lhs, ir_rvalue *rhs);
@@ -252,9 +252,9 @@ lower_packed_varyings_visitor::lower_packed_varyings_visitor(
  }
  
  void
-lower_packed_varyings_visitor::run(exec_list *instructions)
+lower_packed_varyings_visitor::run(struct gl_shader *shader)
  {
-   foreach_in_list(ir_instruction, node, instructions) {
+   foreach_in_list(ir_instruction, node, shader->ir) {
        ir_variable *var = node->as_variable();
        if (var == NULL)
           continue;
@@ -272,6 +272,14 @@ lower_packed_varyings_visitor::run(exec_list *instructions)
        assert(var->data.interpolation == INTERP_QUALIFIER_FLAT ||
               !var->type->contains_integer());
  
+      /* Clone the variable for program resource list before
+       * it gets modified and lost.
+       */
+      if (!shader->packed_varyings)
+         shader->packed_varyings = new (shader) exec_list;
+
+      shader->packed_varyings->push_tail(var->clone(shader, NULL));
+
        /* Change the old varying into an ordinary global. */
        assert(var->data.mode != ir_var_temporary);
        var->data.mode = ir_var_auto;
@@ -711,7 +719,7 @@ lower_packed_varyings(void *mem_ctx, unsigned locations_used,
                                           gs_input_vertices,
                                           &new_instructions,
                                           &new_variables);
-   visitor.run(instructions);
+   visitor.run(shader);
     if (mode == ir_var_shader_out) {
        if (shader->Stage == MESA_SHADER_GEOMETRY) {
           /* For geometry shaders, outputs need to be lowered before each call
diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp

index 8b08107..e581306 100644 (file)
--- a/src/glsl/lower_ubo_reference.cpp
+++ b/src/glsl/lower_ubo_reference.cpp
@@ -57,7 +57,7 @@ using namespace ir_builder;
   * thing referenced is row-major.
   */
  static bool
-is_dereferenced_thing_row_major(const ir_dereference *deref)
+is_dereferenced_thing_row_major(const ir_rvalue *deref)
  {
     bool matrix = false;
     const ir_rvalue *ir = deref;
@@ -143,11 +143,12 @@ public:
     ir_visitor_status visit_enter(ir_assignment *ir);
  
     void setup_for_load_or_store(ir_variable *var,
-                                ir_dereference *deref,
+                                ir_rvalue *deref,
                                  ir_rvalue **offset,
                                  unsigned *const_offset,
                                  bool *row_major,
-                                int *matrix_columns);
+                                int *matrix_columns,
+                                unsigned packing);
     ir_expression *ubo_load(const struct glsl_type *type,
                            ir_rvalue *offset);
     ir_call *ssbo_load(const struct glsl_type *type,
@@ -164,7 +165,24 @@ public:
     void emit_access(bool is_write, ir_dereference *deref,
                      ir_variable *base_offset, unsigned int deref_offset,
                      bool row_major, int matrix_columns,
-                    unsigned write_mask);
+                    unsigned packing, unsigned write_mask);
+
+   ir_visitor_status visit_enter(class ir_expression *);
+   ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr);
+   void check_ssbo_unsized_array_length_expression(class ir_expression *);
+   void check_ssbo_unsized_array_length_assignment(ir_assignment *ir);
+
+   ir_expression *process_ssbo_unsized_array_length(ir_rvalue **,
+                                                    ir_dereference *,
+                                                    ir_variable *);
+   ir_expression *emit_ssbo_get_buffer_size();
+
+   unsigned calculate_unsized_array_stride(ir_dereference *deref,
+                                           unsigned packing);
+
+   ir_call *lower_ssbo_atomic_intrinsic(ir_call *ir);
+   ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir);
+   ir_visitor_status visit_enter(ir_call *ir);
  
     void *mem_ctx;
     struct gl_shader *shader;
@@ -182,7 +200,7 @@ public:
   * \c UniformBlocks array.
   */
  static const char *
-interface_field_name(void *mem_ctx, char *base_name, ir_dereference *d,
+interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d,
                       ir_rvalue **nonconst_block_index)
  {
     ir_rvalue *previous_index = NULL;
@@ -228,7 +246,12 @@ interface_field_name(void *mem_ctx, char *base_name, ir_dereference *d,
  
           break;
        }
+      case ir_type_swizzle: {
+         ir_swizzle *s = (ir_swizzle *) d;
  
+         d = s->val->as_dereference();
+         break;
+      }
        default:
           assert(!"Should not get here.");
           break;
@@ -241,11 +264,12 @@ interface_field_name(void *mem_ctx, char *base_name, ir_dereference *d,
  
  void
  lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
-                                                     ir_dereference *deref,
+                                                     ir_rvalue *deref,
                                                       ir_rvalue **offset,
                                                       unsigned *const_offset,
                                                       bool *row_major,
-                                                     int *matrix_columns)
+                                                     int *matrix_columns,
+                                                     unsigned packing)
  {
     /* Determine the name of the interface block */
     ir_rvalue *nonconst_block_index;
@@ -331,8 +355,15 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
              const bool array_row_major =
                 is_dereferenced_thing_row_major(deref_array);
  
-            array_stride = deref_array->type->std140_size(array_row_major);
-            array_stride = glsl_align(array_stride, 16);
+            /* The array type will give the correct interface packing
+             * information
+             */
+            if (packing == GLSL_INTERFACE_PACKING_STD430) {
+               array_stride = deref_array->type->std430_array_stride(array_row_major);
+            } else {
+               array_stride = deref_array->type->std140_size(array_row_major);
+               array_stride = glsl_align(array_stride, 16);
+            }
           }
  
           ir_rvalue *array_index = deref_array->array_index;
@@ -368,7 +399,12 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
  
              ralloc_free(field_deref);
  
-            unsigned field_align = type->std140_base_alignment(field_row_major);
+            unsigned field_align = 0;
+
+            if (packing == GLSL_INTERFACE_PACKING_STD430)
+               field_align = type->std430_base_alignment(field_row_major);
+            else
+               field_align = type->std140_base_alignment(field_row_major);
  
              intra_struct_offset = glsl_align(intra_struct_offset, field_align);
  
@@ -376,7 +412,10 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
                         deref_record->field) == 0)
                 break;
  
-            intra_struct_offset += type->std140_size(field_row_major);
+            if (packing == GLSL_INTERFACE_PACKING_STD430)
+               intra_struct_offset += type->std430_size(field_row_major);
+            else
+               intra_struct_offset += type->std140_size(field_row_major);
  
              /* If the field just examined was itself a structure, apply rule
               * #9:
@@ -397,6 +436,16 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
           break;
        }
  
+      case ir_type_swizzle: {
+         ir_swizzle *deref_swizzle = (ir_swizzle *) deref;
+
+         assert(deref_swizzle->mask.num_components == 1);
+
+         *const_offset += deref_swizzle->mask.x * sizeof(int);
+         deref = deref_swizzle->val->as_dereference();
+         break;
+      }
+
        default:
           assert(!"not reached");
           deref = NULL;
@@ -425,13 +474,15 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
     unsigned const_offset;
     bool row_major;
     int matrix_columns;
+   unsigned packing = var->get_interface_type()->interface_packing;
  
     /* Compute the offset to the start if the dereference as well as other
      * information we need to configure the write
      */
     setup_for_load_or_store(var, deref,
                             &offset, &const_offset,
-                           &row_major, &matrix_columns);
+                           &row_major, &matrix_columns,
+                           packing);
     assert(offset);
  
     /* Now that we've calculated the offset to the start of the
@@ -451,7 +502,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
  
     deref = new(mem_ctx) ir_dereference_variable(load_var);
     emit_access(false, deref, load_offset, const_offset,
-               row_major, matrix_columns, 0);
+               row_major, matrix_columns, packing, 0);
     *rvalue = deref;
  
     progress = true;
@@ -569,6 +620,7 @@ lower_ubo_reference_visitor::emit_access(bool is_write,
                                           unsigned int deref_offset,
                                           bool row_major,
                                           int matrix_columns,
+                                         unsigned packing,
                                           unsigned write_mask)
  {
     if (deref->type->is_record()) {
@@ -587,7 +639,7 @@ lower_ubo_reference_visitor::emit_access(bool is_write,
  
           emit_access(is_write, field_deref, base_offset,
                       deref_offset + field_offset,
-                     row_major, 1,
+                     row_major, 1, packing,
                       writemask_for_size(field_deref->type->vector_elements));
  
           field_offset += field->type->std140_size(row_major);
@@ -596,7 +648,8 @@ lower_ubo_reference_visitor::emit_access(bool is_write,
     }
  
     if (deref->type->is_array()) {
-      unsigned array_stride =
+      unsigned array_stride = packing == GLSL_INTERFACE_PACKING_STD430 ?
+         deref->type->fields.array->std430_array_stride(row_major) :
           glsl_align(deref->type->fields.array->std140_size(row_major), 16);
  
        for (unsigned i = 0; i < deref->type->length; i++) {
@@ -606,7 +659,7 @@ lower_ubo_reference_visitor::emit_access(bool is_write,
                                                element);
           emit_access(is_write, element_deref, base_offset,
                       deref_offset + i * array_stride,
-                     row_major, 1,
+                     row_major, 1, packing,
                       writemask_for_size(element_deref->type->vector_elements));
        }
        return;
@@ -625,18 +678,33 @@ lower_ubo_reference_visitor::emit_access(bool is_write,
              int size_mul = deref->type->is_double() ? 8 : 4;
              emit_access(is_write, col_deref, base_offset,
                          deref_offset + i * size_mul,
-                        row_major, deref->type->matrix_columns,
+                        row_major, deref->type->matrix_columns, packing,
                          writemask_for_size(col_deref->type->vector_elements));
           } else {
-            /* std140 always rounds the stride of arrays (and matrices) to a
-             * vec4, so matrices are always 16 between columns/rows. With
-             * doubles, they will be 32 apart when there are more than 2 rows.
-             */
-            int size_mul = (deref->type->is_double() &&
-                            deref->type->vector_elements > 2) ? 32 : 16;
+            int size_mul;
+
+            /* std430 doesn't round up vec2 size to a vec4 size */
+            if (packing == GLSL_INTERFACE_PACKING_STD430 &&
+                deref->type->vector_elements == 2 &&
+                !deref->type->is_double()) {
+               size_mul = 8;
+            } else {
+               /* std140 always rounds the stride of arrays (and matrices) to a
+                * vec4, so matrices are always 16 between columns/rows. With
+                * doubles, they will be 32 apart when there are more than 2 rows.
+                *
+                * For both std140 and std430, if the member is a
+                * three-'component vector with components consuming N basic
+                * machine units, the base alignment is 4N. For vec4, base
+                * alignment is 4N.
+                */
+               size_mul = (deref->type->is_double() &&
+                           deref->type->vector_elements > 2) ? 32 : 16;
+            }
+
              emit_access(is_write, col_deref, base_offset,
                          deref_offset + i * size_mul,
-                        row_major, deref->type->matrix_columns,
+                        row_major, deref->type->matrix_columns, packing,
                          writemask_for_size(col_deref->type->vector_elements));
           }
        }
@@ -715,13 +783,15 @@ lower_ubo_reference_visitor::write_to_memory(ir_dereference *deref,
     unsigned const_offset;
     bool row_major;
     int matrix_columns;
+   unsigned packing = var->get_interface_type()->interface_packing;
  
     /* Compute the offset to the start if the dereference as well as other
      * information we need to configure the write
      */
     setup_for_load_or_store(var, deref,
                             &offset, &const_offset,
-                           &row_major, &matrix_columns);
+                           &row_major, &matrix_columns,
+                           packing);
     assert(offset);
  
     /* Now emit writes from the temporary to memory */
@@ -735,7 +805,190 @@ lower_ubo_reference_visitor::write_to_memory(ir_dereference *deref,
  
     deref = new(mem_ctx) ir_dereference_variable(write_var);
     emit_access(true, deref, write_offset, const_offset,
-               row_major, matrix_columns, write_mask);
+               row_major, matrix_columns, packing, write_mask);
+}
+
+ir_visitor_status
+lower_ubo_reference_visitor::visit_enter(ir_expression *ir)
+{
+   check_ssbo_unsized_array_length_expression(ir);
+   return rvalue_visit(ir);
+}
+
+ir_expression *
+lower_ubo_reference_visitor::calculate_ssbo_unsized_array_length(ir_expression *expr)
+{
+   if (expr->operation !=
+       ir_expression_operation(ir_unop_ssbo_unsized_array_length))
+      return NULL;
+
+   ir_rvalue *rvalue = expr->operands[0]->as_rvalue();
+   if (!rvalue ||
+       !rvalue->type->is_array() || !rvalue->type->is_unsized_array())
+      return NULL;
+
+   ir_dereference *deref = expr->operands[0]->as_dereference();
+   if (!deref)
+      return NULL;
+
+   ir_variable *var = expr->operands[0]->variable_referenced();
+   if (!var || !var->is_in_shader_storage_block())
+      return NULL;
+   return process_ssbo_unsized_array_length(&rvalue, deref, var);
+}
+
+void
+lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression *ir)
+{
+   if (ir->operation ==
+       ir_expression_operation(ir_unop_ssbo_unsized_array_length)) {
+         /* Don't replace this unop if it is found alone. It is going to be
+          * removed by the optimization passes or replaced if it is part of
+          * an ir_assignment or another ir_expression.
+          */
+         return;
+   }
+
+   for (unsigned i = 0; i < ir->get_num_operands(); i++) {
+      if (ir->operands[i]->ir_type != ir_type_expression)
+         continue;
+      ir_expression *expr = (ir_expression *) ir->operands[i];
+      ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
+      if (!temp)
+         continue;
+
+      delete expr;
+      ir->operands[i] = temp;
+   }
+}
+
+void
+lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment *ir)
+{
+   if (!ir->rhs || ir->rhs->ir_type != ir_type_expression)
+      return;
+
+   ir_expression *expr = (ir_expression *) ir->rhs;
+   ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
+   if (!temp)
+      return;
+
+   delete expr;
+   ir->rhs = temp;
+   return;
+}
+
+ir_expression *
+lower_ubo_reference_visitor::emit_ssbo_get_buffer_size()
+{
+   ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
+   return new(mem_ctx) ir_expression(ir_unop_get_buffer_size,
+                                     glsl_type::int_type,
+                                     block_ref);
+}
+
+unsigned
+lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref,
+                                                            unsigned packing)
+{
+   unsigned array_stride = 0;
+
+   switch (deref->ir_type) {
+   case ir_type_dereference_variable:
+   {
+      ir_dereference_variable *deref_var = (ir_dereference_variable *)deref;
+      const struct glsl_type *unsized_array_type = NULL;
+      /* An unsized array can be sized by other lowering passes, so pick
+       * the first field of the array which has the data type of the unsized
+       * array.
+       */
+      unsized_array_type = deref_var->var->type->fields.array;
+
+      /* Whether or not the field is row-major (because it might be a
+       * bvec2 or something) does not affect the array itself. We need
+       * to know whether an array element in its entirety is row-major.
+       */
+      const bool array_row_major =
+         is_dereferenced_thing_row_major(deref_var);
+
+      if (packing == GLSL_INTERFACE_PACKING_STD430) {
+         array_stride = unsized_array_type->std430_array_stride(array_row_major);
+      } else {
+         array_stride = unsized_array_type->std140_size(array_row_major);
+         array_stride = glsl_align(array_stride, 16);
+      }
+      break;
+   }
+   case ir_type_dereference_record:
+   {
+      ir_dereference_record *deref_record = (ir_dereference_record *) deref;
+      ir_dereference *interface_deref =
+         deref_record->record->as_dereference();
+      assert(interface_deref != NULL);
+      const struct glsl_type *interface_type = interface_deref->type;
+      unsigned record_length = interface_type->length;
+      /* Unsized array is always the last element of the interface */
+      const struct glsl_type *unsized_array_type =
+         interface_type->fields.structure[record_length - 1].type->fields.array;
+
+      const bool array_row_major =
+         is_dereferenced_thing_row_major(deref_record);
+
+      if (packing == GLSL_INTERFACE_PACKING_STD430) {
+         array_stride = unsized_array_type->std430_array_stride(array_row_major);
+      } else {
+         array_stride = unsized_array_type->std140_size(array_row_major);
+         array_stride = glsl_align(array_stride, 16);
+      }
+      break;
+   }
+   default:
+      unreachable("Unsupported dereference type");
+   }
+   return array_stride;
+}
+
+ir_expression *
+lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalue,
+                                                               ir_dereference *deref,
+                                                               ir_variable *var)
+{
+   mem_ctx = ralloc_parent(*rvalue);
+
+   ir_rvalue *base_offset = NULL;
+   unsigned const_offset;
+   bool row_major;
+   int matrix_columns;
+   unsigned packing = var->get_interface_type()->interface_packing;
+   int unsized_array_stride = calculate_unsized_array_stride(deref, packing);
+
+   /* Compute the offset to the start if the dereference as well as other
+    * information we need to calculate the length.
+    */
+   setup_for_load_or_store(var, deref,
+                           &base_offset, &const_offset,
+                           &row_major, &matrix_columns,
+                           packing);
+   /* array.length() =
+    *  max((buffer_object_size - offset_of_array) / stride_of_array, 0)
+    */
+   ir_expression *buffer_size = emit_ssbo_get_buffer_size();
+
+   ir_expression *offset_of_array = new(mem_ctx)
+      ir_expression(ir_binop_add, base_offset,
+                    new(mem_ctx) ir_constant(const_offset));
+   ir_expression *offset_of_array_int = new(mem_ctx)
+      ir_expression(ir_unop_u2i, offset_of_array);
+
+   ir_expression *sub = new(mem_ctx)
+      ir_expression(ir_binop_sub, buffer_size, offset_of_array_int);
+   ir_expression *div =  new(mem_ctx)
+      ir_expression(ir_binop_div, sub,
+                    new(mem_ctx) ir_constant(unsized_array_stride));
+   ir_expression *max = new(mem_ctx)
+      ir_expression(ir_binop_max, div, new(mem_ctx) ir_constant(0));
+
+   return max;
  }
  
  void
@@ -777,10 +1030,149 @@ lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir)
  ir_visitor_status
  lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
  {
+   check_ssbo_unsized_array_length_assignment(ir);
     check_for_ssbo_store(ir);
     return rvalue_visit(ir);
  }
  
+/* Lowers the intrinsic call to a new internal intrinsic that swaps the
+ * access to the buffer variable in the first parameter by an offset
+ * and block index. This involves creating the new internal intrinsic
+ * (i.e. the new function signature).
+ */
+ir_call *
+lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
+{
+   /* SSBO atomics usually have 2 parameters, the buffer variable and an
+    * integer argument. The exception is CompSwap, that has an additional
+    * integer parameter.
+    */
+   int param_count = ir->actual_parameters.length();
+   assert(param_count == 2 || param_count == 3);
+
+   /* First argument must be a scalar integer buffer variable */
+   exec_node *param = ir->actual_parameters.get_head();
+   ir_instruction *inst = (ir_instruction *) param;
+   assert(inst->ir_type == ir_type_dereference_variable ||
+          inst->ir_type == ir_type_dereference_array ||
+          inst->ir_type == ir_type_dereference_record ||
+          inst->ir_type == ir_type_swizzle);
+
+   ir_rvalue *deref = (ir_rvalue *) inst;
+   assert(deref->type->is_scalar() && deref->type->is_integer());
+
+   ir_variable *var = deref->variable_referenced();
+   assert(var);
+
+   /* Compute the offset to the start if the dereference and the
+    * block index
+    */
+   mem_ctx = ralloc_parent(shader->ir);
+
+   ir_rvalue *offset = NULL;
+   unsigned const_offset;
+   bool row_major;
+   int matrix_columns;
+   unsigned packing = var->get_interface_type()->interface_packing;
+
+   setup_for_load_or_store(var, deref,
+                           &offset, &const_offset,
+                           &row_major, &matrix_columns,
+                           packing);
+   assert(offset);
+   assert(!row_major);
+   assert(matrix_columns == 1);
+
+   ir_rvalue *deref_offset =
+      add(offset, new(mem_ctx) ir_constant(const_offset));
+   ir_rvalue *block_index = this->uniform_block->clone(mem_ctx, NULL);
+
+   /* Create the new internal function signature that will take a block
+    * index and offset instead of a buffer variable
+    */
+   exec_list sig_params;
+   ir_variable *sig_param = new(mem_ctx)
+      ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
+   sig_params.push_tail(sig_param);
+
+   sig_param = new(mem_ctx)
+      ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
+   sig_params.push_tail(sig_param);
+
+   const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ?
+      glsl_type::int_type : glsl_type::uint_type;
+   sig_param = new(mem_ctx)
+         ir_variable(type, "data1", ir_var_function_in);
+   sig_params.push_tail(sig_param);
+
+   if (param_count == 3) {
+      sig_param = new(mem_ctx)
+            ir_variable(type, "data2", ir_var_function_in);
+      sig_params.push_tail(sig_param);
+   }
+
+   ir_function_signature *sig =
+      new(mem_ctx) ir_function_signature(deref->type,
+                                         shader_storage_buffer_object);
+   assert(sig);
+   sig->replace_parameters(&sig_params);
+   sig->is_intrinsic = true;
+
+   char func_name[64];
+   sprintf(func_name, "%s_internal", ir->callee_name());
+   ir_function *f = new(mem_ctx) ir_function(func_name);
+   f->add_signature(sig);
+
+   /* Now, create the call to the internal intrinsic */
+   exec_list call_params;
+   call_params.push_tail(block_index);
+   call_params.push_tail(deref_offset);
+   param = ir->actual_parameters.get_head()->get_next();
+   ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
+   call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
+   if (param_count == 3) {
+      param = param->get_next();
+      param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
+      call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
+   }
+   ir_dereference_variable *return_deref =
+      ir->return_deref->clone(mem_ctx, NULL);
+   return new(mem_ctx) ir_call(sig, return_deref, &call_params);
+}
+
+ir_call *
+lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir)
+{
+   const char *callee = ir->callee_name();
+   if (!strcmp("__intrinsic_ssbo_atomic_add", callee) ||
+       !strcmp("__intrinsic_ssbo_atomic_min", callee) ||
+       !strcmp("__intrinsic_ssbo_atomic_max", callee) ||
+       !strcmp("__intrinsic_ssbo_atomic_and", callee) ||
+       !strcmp("__intrinsic_ssbo_atomic_or", callee) ||
+       !strcmp("__intrinsic_ssbo_atomic_xor", callee) ||
+       !strcmp("__intrinsic_ssbo_atomic_exchange", callee) ||
+       !strcmp("__intrinsic_ssbo_atomic_comp_swap", callee)) {
+      return lower_ssbo_atomic_intrinsic(ir);
+   }
+
+   return ir;
+}
+
+
+ir_visitor_status
+lower_ubo_reference_visitor::visit_enter(ir_call *ir)
+{
+   ir_call *new_ir = check_for_ssbo_atomic_intrinsic(ir);
+   if (new_ir != ir) {
+      progress = true;
+      base_ir->replace_with(new_ir);
+      return visit_continue_with_parent;
+   }
+
+   return rvalue_visit(ir);
+}
+
+
  } /* unnamed namespace */
  
  void
diff --git a/src/glsl/lower_vertex_id.cpp b/src/glsl/lower_vertex_id.cpp

index fc90bc8..3da7a2f 100644 (file)
--- a/src/glsl/lower_vertex_id.cpp
+++ b/src/glsl/lower_vertex_id.cpp
@@ -130,7 +130,7 @@ lower_vertex_id(gl_shader *shader)
        return false;
  
     ir_function_signature *const main_sig =
-      link_get_main_function_signature(shader);
+      _mesa_get_main_function_signature(shader);
     if (main_sig == NULL) {
        assert(main_sig != NULL);
        return false;
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp

index e3597e5..5ee6ff1 100644 (file)
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -140,6 +140,11 @@ glsl_to_nir(struct gl_shader *sh, const nir_shader_compiler_options *options)
     v2.run(sh->ir);
     visit_exec_list(sh->ir, &v1);
  
+   nir_lower_outputs_to_temporaries(shader);
+
+   shader->gs.vertices_out = sh->Geom.VerticesOut;
+   shader->gs.invocations = sh->Geom.Invocations;
+
     return shader;
  }
  
@@ -646,11 +651,34 @@ nir_visitor::visit(ir_call *ir)
           op = nir_intrinsic_memory_barrier;
        } else if (strcmp(ir->callee_name(), "__intrinsic_image_size") == 0) {
           op = nir_intrinsic_image_size;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_image_samples") == 0) {
+         op = nir_intrinsic_image_samples;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_store_ssbo") == 0) {
+         op = nir_intrinsic_store_ssbo;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_load_ssbo") == 0) {
+         op = nir_intrinsic_load_ssbo;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_add_internal") == 0) {
+         op = nir_intrinsic_ssbo_atomic_add;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_and_internal") == 0) {
+         op = nir_intrinsic_ssbo_atomic_and;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_or_internal") == 0) {
+         op = nir_intrinsic_ssbo_atomic_or;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_xor_internal") == 0) {
+         op = nir_intrinsic_ssbo_atomic_xor;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_min_internal") == 0) {
+         op = nir_intrinsic_ssbo_atomic_min;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_max_internal") == 0) {
+         op = nir_intrinsic_ssbo_atomic_max;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_exchange_internal") == 0) {
+         op = nir_intrinsic_ssbo_atomic_exchange;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_comp_swap_internal") == 0) {
+         op = nir_intrinsic_ssbo_atomic_comp_swap;
        } else {
           unreachable("not reached");
        }
  
        nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op);
+      nir_dest *dest = &instr->dest;
  
        switch (op) {
        case nir_intrinsic_atomic_counter_read_var:
@@ -660,6 +688,7 @@ nir_visitor::visit(ir_call *ir)
              (ir_dereference *) ir->actual_parameters.get_head();
           instr->variables[0] = evaluate_deref(&instr->instr, param);
           nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+         nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
           break;
        }
        case nir_intrinsic_image_load:
@@ -672,6 +701,7 @@ nir_visitor::visit(ir_call *ir)
        case nir_intrinsic_image_atomic_xor:
        case nir_intrinsic_image_atomic_exchange:
        case nir_intrinsic_image_atomic_comp_swap:
+      case nir_intrinsic_image_samples:
        case nir_intrinsic_image_size: {
           nir_ssa_undef_instr *instr_undef =
              nir_ssa_undef_instr_create(shader, 1);
@@ -695,8 +725,11 @@ nir_visitor::visit(ir_call *ir)
                                info->dest_components, NULL);
           }
  
-         if (op == nir_intrinsic_image_size)
+         if (op == nir_intrinsic_image_size ||
+             op == nir_intrinsic_image_samples) {
+            nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
              break;
+         }
  
           /* Set the address argument, extending the coordinate vector to four
            * components.
@@ -738,16 +771,157 @@ nir_visitor::visit(ir_call *ir)
              instr->src[3] = evaluate_rvalue((ir_dereference *)param);
              param = param->get_next();
           }
+         nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
           break;
        }
        case nir_intrinsic_memory_barrier:
+         nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
           break;
+      case nir_intrinsic_store_ssbo: {
+         exec_node *param = ir->actual_parameters.get_head();
+         ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
+
+         param = param->get_next();
+         ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+         param = param->get_next();
+         ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+
+         param = param->get_next();
+         ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
+         assert(write_mask);
+
+         /* Check if we need the indirect version */
+         ir_constant *const_offset = offset->as_constant();
+         if (!const_offset) {
+            op = nir_intrinsic_store_ssbo_indirect;
+            ralloc_free(instr);
+            instr = nir_intrinsic_instr_create(shader, op);
+            instr->src[2] = evaluate_rvalue(offset);
+            instr->const_index[0] = 0;
+            dest = &instr->dest;
+         } else {
+            instr->const_index[0] = const_offset->value.u[0];
+         }
+
+         instr->const_index[1] = write_mask->value.u[0];
+
+         instr->src[0] = evaluate_rvalue(val);
+         instr->num_components = val->type->vector_elements;
+
+         instr->src[1] = evaluate_rvalue(block);
+         nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+         break;
+      }
+      case nir_intrinsic_load_ssbo: {
+         exec_node *param = ir->actual_parameters.get_head();
+         ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
+
+         param = param->get_next();
+         ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+         /* Check if we need the indirect version */
+         ir_constant *const_offset = offset->as_constant();
+         if (!const_offset) {
+            op = nir_intrinsic_load_ssbo_indirect;
+            ralloc_free(instr);
+            instr = nir_intrinsic_instr_create(shader, op);
+            instr->src[1] = evaluate_rvalue(offset);
+            instr->const_index[0] = 0;
+            dest = &instr->dest;
+         } else {
+            instr->const_index[0] = const_offset->value.u[0];
+         }
+
+         instr->src[0] = evaluate_rvalue(block);
+
+         const glsl_type *type = ir->return_deref->var->type;
+         instr->num_components = type->vector_elements;
+
+         /* Setup destination register */
+         nir_ssa_dest_init(&instr->instr, &instr->dest,
+                           type->vector_elements, NULL);
+
+         /* Insert the created nir instruction now since in the case of boolean
+          * result we will need to emit another instruction after it
+          */
+         nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+
+         /*
+          * In SSBO/UBO's, a true boolean value is any non-zero value, but we
+          * consider a true boolean to be ~0. Fix this up with a != 0
+          * comparison.
+          */
+         if (type->base_type == GLSL_TYPE_BOOL) {
+            nir_load_const_instr *const_zero =
+               nir_load_const_instr_create(shader, 1);
+            const_zero->value.u[0] = 0;
+            nir_instr_insert_after_cf_list(this->cf_node_list,
+                                           &const_zero->instr);
+
+            nir_alu_instr *load_ssbo_compare =
+               nir_alu_instr_create(shader, nir_op_ine);
+            load_ssbo_compare->src[0].src.is_ssa = true;
+            load_ssbo_compare->src[0].src.ssa = &instr->dest.ssa;
+            load_ssbo_compare->src[1].src.is_ssa = true;
+            load_ssbo_compare->src[1].src.ssa = &const_zero->def;
+            for (unsigned i = 0; i < type->vector_elements; i++)
+               load_ssbo_compare->src[1].swizzle[i] = 0;
+            nir_ssa_dest_init(&load_ssbo_compare->instr,
+                              &load_ssbo_compare->dest.dest,
+                              type->vector_elements, NULL);
+            load_ssbo_compare->dest.write_mask = (1 << type->vector_elements) - 1;
+            nir_instr_insert_after_cf_list(this->cf_node_list,
+                                           &load_ssbo_compare->instr);
+            dest = &load_ssbo_compare->dest.dest;
+         }
+         break;
+      }
+      case nir_intrinsic_ssbo_atomic_add:
+      case nir_intrinsic_ssbo_atomic_min:
+      case nir_intrinsic_ssbo_atomic_max:
+      case nir_intrinsic_ssbo_atomic_and:
+      case nir_intrinsic_ssbo_atomic_or:
+      case nir_intrinsic_ssbo_atomic_xor:
+      case nir_intrinsic_ssbo_atomic_exchange:
+      case nir_intrinsic_ssbo_atomic_comp_swap: {
+         int param_count = ir->actual_parameters.length();
+         assert(param_count == 3 || param_count == 4);
+
+         /* Block index */
+         exec_node *param = ir->actual_parameters.get_head();
+         ir_instruction *inst = (ir_instruction *) param;
+         instr->src[0] = evaluate_rvalue(inst->as_rvalue());
+
+         /* Offset */
+         param = param->get_next();
+         inst = (ir_instruction *) param;
+         instr->src[1] = evaluate_rvalue(inst->as_rvalue());
+
+         /* data1 parameter (this is always present) */
+         param = param->get_next();
+         inst = (ir_instruction *) param;
+         instr->src[2] = evaluate_rvalue(inst->as_rvalue());
+
+         /* data2 parameter (only with atomic_comp_swap) */
+         if (param_count == 4) {
+            assert(op == nir_intrinsic_ssbo_atomic_comp_swap);
+            param = param->get_next();
+            inst = (ir_instruction *) param;
+            instr->src[3] = evaluate_rvalue(inst->as_rvalue());
+         }
+
+         /* Atomic result */
+         assert(ir->return_deref);
+         nir_ssa_dest_init(&instr->instr, &instr->dest,
+                           ir->return_deref->type->vector_elements, NULL);
+         nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+         break;
+      }
        default:
           unreachable("not reached");
        }
  
-      nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
-
        if (ir->return_deref) {
           nir_intrinsic_instr *store_instr =
              nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
@@ -755,7 +929,7 @@ nir_visitor::visit(ir_call *ir)
  
           store_instr->variables[0] =
              evaluate_deref(&store_instr->instr, ir->return_deref);
-         store_instr->src[0] = nir_src_for_ssa(&instr->dest.ssa);
+         store_instr->src[0] = nir_src_for_ssa(&dest->ssa);
  
           nir_instr_insert_after_cf_list(this->cf_node_list,
                                          &store_instr->instr);
@@ -922,7 +1096,8 @@ nir_visitor::add_instr(nir_instr *instr, unsigned num_components)
  {
     nir_dest *dest = get_instr_dest(instr);
  
-   nir_ssa_dest_init(instr, dest, num_components, NULL);
+   if (dest)
+      nir_ssa_dest_init(instr, dest, num_components, NULL);
  
     nir_instr_insert_after_cf_list(this->cf_node_list, instr);
     this->result = instr;
@@ -1006,7 +1181,6 @@ nir_visitor::visit(ir_expression *ir)
        nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader, op);
        load->num_components = ir->type->vector_elements;
        load->const_index[0] = const_index ? const_index->value.u[0] : 0; /* base offset */
-      load->const_index[1] = 1; /* number of vec4's */
        load->src[0] = evaluate_rvalue(ir->operands[0]);
        if (!const_index)
           load->src[1] = evaluate_rvalue(ir->operands[1]);
@@ -1319,6 +1493,16 @@ nir_visitor::visit(ir_expression *ir)
           unreachable("not reached");
        }
        break;
+   case ir_unop_get_buffer_size: {
+      nir_intrinsic_instr *load = nir_intrinsic_instr_create(
+         this->shader,
+         nir_intrinsic_get_buffer_size);
+      load->num_components = ir->type->vector_elements;
+      load->src[0] = evaluate_rvalue(ir->operands[0]);
+      add_instr(&load->instr, ir->type->vector_elements);
+      return;
+   }
+
     case ir_binop_add:
     case ir_binop_sub:
     case ir_binop_mul:
@@ -1722,6 +1906,11 @@ nir_visitor::visit(ir_texture *ir)
        num_srcs = 0;
        break;
  
+   case ir_texture_samples:
+      op = nir_texop_texture_samples;
+      num_srcs = 0;
+      break;
+
     default:
        unreachable("not reached");
     }
diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c

index bf00131..57fd959 100644 (file)
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -54,6 +54,9 @@ nir_shader_create(void *mem_ctx,
  
     shader->stage = stage;
  
+   shader->gs.vertices_out = 0;
+   shader->gs.invocations = 0;
+
     return shader;
  }
  
@@ -150,7 +153,7 @@ void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx)
     }
  }
  
-void nir_dest_copy(nir_dest *dest, const nir_dest *src, void *mem_ctx)
+void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr)
  {
     /* Copying an SSA definition makes no sense whatsoever. */
     assert(!src->is_ssa);
@@ -160,17 +163,18 @@ void nir_dest_copy(nir_dest *dest, const nir_dest *src, void *mem_ctx)
     dest->reg.base_offset = src->reg.base_offset;
     dest->reg.reg = src->reg.reg;
     if (src->reg.indirect) {
-      dest->reg.indirect = ralloc(mem_ctx, nir_src);
-      nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
+      dest->reg.indirect = ralloc(instr, nir_src);
+      nir_src_copy(dest->reg.indirect, src->reg.indirect, instr);
     } else {
        dest->reg.indirect = NULL;
     }
  }
  
  void
-nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, void *mem_ctx)
+nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
+                 nir_alu_instr *instr)
  {
-   nir_src_copy(&dest->src, &src->src, mem_ctx);
+   nir_src_copy(&dest->src, &src->src, &instr->instr);
     dest->abs = src->abs;
     dest->negate = src->negate;
     for (unsigned i = 0; i < 4; i++)
@@ -178,9 +182,10 @@ nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, void *mem_ctx)
  }
  
  void
-nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src, void *mem_ctx)
+nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
+                  nir_alu_instr *instr)
  {
-   nir_dest_copy(&dest->dest, &src->dest, mem_ctx);
+   nir_dest_copy(&dest->dest, &src->dest, &instr->instr);
     dest->write_mask = src->write_mask;
     dest->saturate = src->saturate;
  }
@@ -712,9 +717,16 @@ nir_instr_insert(nir_cursor cursor, nir_instr *instr)
  }
  
  static bool
+src_is_valid(const nir_src *src)
+{
+   return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL);
+}
+
+static bool
  remove_use_cb(nir_src *src, void *state)
  {
-   list_del(&src->use_link);
+   if (src_is_valid(src))
+      list_del(&src->use_link);
  
     return true;
  }
@@ -1097,12 +1109,6 @@ nir_srcs_equal(nir_src src1, nir_src src2)
     }
  }
  
-static bool
-src_is_valid(const nir_src *src)
-{
-   return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL);
-}
-
  static void
  src_remove_all_uses(nir_src *src)
  {
@@ -1172,6 +1178,30 @@ nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src)
  }
  
  void
+nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest)
+{
+   if (dest->is_ssa) {
+      /* We can only overwrite an SSA destination if it has no uses. */
+      assert(list_empty(&dest->ssa.uses) && list_empty(&dest->ssa.if_uses));
+   } else {
+      list_del(&dest->reg.def_link);
+      if (dest->reg.indirect)
+         src_remove_all_uses(dest->reg.indirect);
+   }
+
+   /* We can't re-write with an SSA def */
+   assert(!new_dest.is_ssa);
+
+   nir_dest_copy(dest, &new_dest, instr);
+
+   dest->reg.parent_instr = instr;
+   list_addtail(&dest->reg.def_link, &new_dest.reg.reg->defs);
+
+   if (dest->reg.indirect)
+      src_add_all_uses(dest->reg.indirect, instr, NULL);
+}
+
+void
  nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
                   unsigned num_components, const char *name)
  {
@@ -1200,21 +1230,21 @@ nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
  }
  
  void
-nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src, void *mem_ctx)
+nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src)
  {
     assert(!new_src.is_ssa || def != new_src.ssa);
  
     nir_foreach_use_safe(def, use_src) {
        nir_instr *src_parent_instr = use_src->parent_instr;
        list_del(&use_src->use_link);
-      nir_src_copy(use_src, &new_src, mem_ctx);
+      nir_src_copy(use_src, &new_src, src_parent_instr);
        src_add_all_uses(use_src, src_parent_instr, NULL);
     }
  
     nir_foreach_if_use_safe(def, use_src) {
        nir_if *src_parent_if = use_src->parent_if;
        list_del(&use_src->use_link);
-      nir_src_copy(use_src, &new_src, mem_ctx);
+      nir_src_copy(use_src, &new_src, src_parent_if);
        src_add_all_uses(use_src, NULL, src_parent_if);
     }
  }
@@ -1293,6 +1323,13 @@ foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
  }
  
  bool
+nir_foreach_block_in_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
+                             void *state)
+{
+   return foreach_cf_node(node, cb, false, state);
+}
+
+bool
  nir_foreach_block(nir_function_impl *impl, nir_foreach_block_cb cb, void *state)
  {
     foreach_list_typed_safe(nir_cf_node, node, node, &impl->body) {
@@ -1335,6 +1372,22 @@ nir_block_get_following_if(nir_block *block)
     return nir_cf_node_as_if(next_node);
  }
  
+nir_loop *
+nir_block_get_following_loop(nir_block *block)
+{
+   if (exec_node_is_tail_sentinel(&block->cf_node.node))
+      return NULL;
+
+   if (nir_cf_node_is_last(&block->cf_node))
+      return NULL;
+
+   nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
+
+   if (next_node->type != nir_cf_node_loop)
+      return NULL;
+
+   return nir_cf_node_as_loop(next_node);
+}
  static bool
  index_block(nir_block *block, void *state)
  {
@@ -1374,6 +1427,10 @@ index_ssa_block(nir_block *block, void *state)
     return true;
  }
  
+/**
+ * The indices are applied top-to-bottom which has the very nice property
+ * that, if A dominates B, then A->index <= B->index.
+ */
  void
  nir_index_ssa_defs(nir_function_impl *impl)
  {
@@ -1381,3 +1438,105 @@ nir_index_ssa_defs(nir_function_impl *impl)
     nir_foreach_block(impl, index_ssa_block, &index);
     impl->ssa_alloc = index;
  }
+
+static bool
+index_instrs_block(nir_block *block, void *state)
+{
+   unsigned *index = state;
+   nir_foreach_instr(block, instr)
+      instr->index = (*index)++;
+
+   return true;
+}
+
+/**
+ * The indices are applied top-to-bottom which has the very nice property
+ * that, if A dominates B, then A->index <= B->index.
+ */
+unsigned
+nir_index_instrs(nir_function_impl *impl)
+{
+   unsigned index = 0;
+   nir_foreach_block(impl, index_instrs_block, &index);
+   return index;
+}
+
+nir_intrinsic_op
+nir_intrinsic_from_system_value(gl_system_value val)
+{
+   switch (val) {
+   case SYSTEM_VALUE_VERTEX_ID:
+      return nir_intrinsic_load_vertex_id;
+   case SYSTEM_VALUE_INSTANCE_ID:
+      return nir_intrinsic_load_instance_id;
+   case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
+      return nir_intrinsic_load_vertex_id_zero_base;
+   case SYSTEM_VALUE_BASE_VERTEX:
+      return nir_intrinsic_load_base_vertex;
+   case SYSTEM_VALUE_INVOCATION_ID:
+      return nir_intrinsic_load_invocation_id;
+   case SYSTEM_VALUE_FRONT_FACE:
+      return nir_intrinsic_load_front_face;
+   case SYSTEM_VALUE_SAMPLE_ID:
+      return nir_intrinsic_load_sample_id;
+   case SYSTEM_VALUE_SAMPLE_POS:
+      return nir_intrinsic_load_sample_pos;
+   case SYSTEM_VALUE_SAMPLE_MASK_IN:
+      return nir_intrinsic_load_sample_mask_in;
+   case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
+      return nir_intrinsic_load_local_invocation_id;
+   case SYSTEM_VALUE_WORK_GROUP_ID:
+      return nir_intrinsic_load_work_group_id;
+   case SYSTEM_VALUE_NUM_WORK_GROUPS:
+      return nir_intrinsic_load_num_work_groups;
+   /* FINISHME: Add tessellation intrinsics.
+   case SYSTEM_VALUE_TESS_COORD:
+   case SYSTEM_VALUE_VERTICES_IN:
+   case SYSTEM_VALUE_PRIMITIVE_ID:
+   case SYSTEM_VALUE_TESS_LEVEL_OUTER:
+   case SYSTEM_VALUE_TESS_LEVEL_INNER:
+    */
+   default:
+      unreachable("system value does not directly correspond to intrinsic");
+   }
+}
+
+gl_system_value
+nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
+{
+   switch (intrin) {
+   case nir_intrinsic_load_vertex_id:
+      return SYSTEM_VALUE_VERTEX_ID;
+   case nir_intrinsic_load_instance_id:
+      return SYSTEM_VALUE_INSTANCE_ID;
+   case nir_intrinsic_load_vertex_id_zero_base:
+      return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
+   case nir_intrinsic_load_base_vertex:
+      return SYSTEM_VALUE_BASE_VERTEX;
+   case nir_intrinsic_load_invocation_id:
+      return SYSTEM_VALUE_INVOCATION_ID;
+   case nir_intrinsic_load_front_face:
+      return SYSTEM_VALUE_FRONT_FACE;
+   case nir_intrinsic_load_sample_id:
+      return SYSTEM_VALUE_SAMPLE_ID;
+   case nir_intrinsic_load_sample_pos:
+      return SYSTEM_VALUE_SAMPLE_POS;
+   case nir_intrinsic_load_sample_mask_in:
+      return SYSTEM_VALUE_SAMPLE_MASK_IN;
+   case nir_intrinsic_load_local_invocation_id:
+      return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
+   case nir_intrinsic_load_num_work_groups:
+      return SYSTEM_VALUE_NUM_WORK_GROUPS;
+   case nir_intrinsic_load_work_group_id:
+      return SYSTEM_VALUE_WORK_GROUP_ID;
+   /* FINISHME: Add tessellation intrinsics.
+      return SYSTEM_VALUE_TESS_COORD;
+      return SYSTEM_VALUE_VERTICES_IN;
+      return SYSTEM_VALUE_PRIMITIVE_ID;
+      return SYSTEM_VALUE_TESS_LEVEL_OUTER;
+      return SYSTEM_VALUE_TESS_LEVEL_INNER;
+    */
+   default:
+      unreachable("intrinsic doesn't produce a system value");
+   }
+}
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h

index 7b188fa..19a4a36 100644 (file)
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -278,6 +278,7 @@ typedef struct {
         *   - Fragment shader output: one of the values from \c gl_frag_result.
         *   - Uniforms: Per-stage uniform slot number for default uniform block.
         *   - Uniforms: Index within the uniform block definition for UBO members.
+       *   - Non-UBO Uniforms: uniform slot number.
         *   - Other: This field is not currently used.
         *
         * If the variable is a uniform, shader input, or shader output, and the
@@ -422,6 +423,9 @@ typedef struct nir_instr {
     nir_instr_type type;
     struct nir_block *block;
  
+   /** generic instruction index. */
+   unsigned index;
+
     /* A temporary for optimization and analysis passes to use for storing
      * flags.  For instance, DCE uses this to store the "dead/live" info.
      */
@@ -593,8 +597,8 @@ nir_dest_for_reg(nir_register *reg)
     return dest;
  }
  
-void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx);
-void nir_dest_copy(nir_dest *dest, const nir_dest *src, void *mem_ctx);
+void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if);
+void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr);
  
  typedef struct {
     nir_src src;
@@ -643,10 +647,6 @@ typedef struct {
     unsigned write_mask : 4; /* ignored if dest.is_ssa is true */
  } nir_alu_dest;
  
-void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, void *mem_ctx);
-void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
-                       void *mem_ctx);
-
  typedef enum {
     nir_type_invalid = 0, /* Not a valid type */
     nir_type_float,
@@ -715,6 +715,11 @@ typedef struct nir_alu_instr {
     nir_alu_src src[];
  } nir_alu_instr;
  
+void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
+                      nir_alu_instr *instr);
+void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
+                       nir_alu_instr *instr);
+
  /* is this source channel used? */
  static inline bool
  nir_alu_instr_channel_used(nir_alu_instr *instr, unsigned src, unsigned channel)
@@ -966,7 +971,8 @@ typedef enum {
     nir_texop_txs,                /**< Texture size */
     nir_texop_lod,                /**< Texture lod query */
     nir_texop_tg4,                /**< Texture gather */
-   nir_texop_query_levels       /**< Texture levels query */
+   nir_texop_query_levels,       /**< Texture levels query */
+   nir_texop_texture_samples,    /**< Texture samples query */
  } nir_texop;
  
  typedef struct {
@@ -1041,6 +1047,7 @@ nir_tex_instr_dest_size(nir_tex_instr *instr)
     case nir_texop_lod:
        return 2;
  
+   case nir_texop_texture_samples:
     case nir_texop_query_levels:
        return 1;
  
@@ -1457,6 +1464,15 @@ typedef struct nir_shader_compiler_options {
     /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */
     bool lower_scmp;
  
+   /* Does the native fdot instruction replicate its result for four
+    * components?  If so, then opt_algebraic_late will turn all fdotN
+    * instructions into fdot_replicatedN instructions.
+    */
+   bool fdot_replicates;
+
+   /** lowers ffract to fsub+ffloor: */
+   bool lower_ffract;
+
     /**
      * Does the driver support real 32-bit integers?  (Otherwise, integers
      * are simulated by floats.)
@@ -1465,13 +1481,13 @@ typedef struct nir_shader_compiler_options {
  } nir_shader_compiler_options;
  
  typedef struct nir_shader {
-   /** hash table of name -> uniform nir_variable */
+   /** list of uniforms (nir_variable) */
     struct exec_list uniforms;
  
-   /** hash table of name -> input nir_variable */
+   /** list of inputs (nir_variable) */
     struct exec_list inputs;
  
-   /** hash table of name -> output nir_variable */
+   /** list of outputs (nir_variable) */
     struct exec_list outputs;
  
     /** Set of driver-specific options for the shader.
@@ -1481,10 +1497,10 @@ typedef struct nir_shader {
      */
     const struct nir_shader_compiler_options *options;
  
-   /** list of global variables in the shader */
+   /** list of global variables in the shader (nir_variable) */
     struct exec_list globals;
  
-   /** list of system value variables in the shader */
+   /** list of system value variables in the shader (nir_variable) */
     struct exec_list system_values;
  
     struct exec_list functions; /** < list of nir_function */
@@ -1503,6 +1519,14 @@ typedef struct nir_shader {
  
     /** The shader stage, such as MESA_SHADER_VERTEX. */
     gl_shader_stage stage;
+
+   struct {
+      /** The maximum number of vertices the geometry shader might write. */
+      unsigned vertices_out;
+
+      /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
+      unsigned invocations;
+   } gs;
  } nir_shader;
  
  #define nir_foreach_overload(shader, overload)                        \
@@ -1761,12 +1785,14 @@ bool nir_srcs_equal(nir_src src1, nir_src src2);
  void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src);
  void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src);
  void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src);
+void nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest,
+                            nir_dest new_dest);
  
  void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
                         unsigned num_components, const char *name);
  void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
                        unsigned num_components, const char *name);
-void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src, void *mem_ctx);
+void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src);
  
  /* visits basic blocks in source-code order */
  typedef bool (*nir_foreach_block_cb)(nir_block *block, void *state);
@@ -1774,15 +1800,20 @@ bool nir_foreach_block(nir_function_impl *impl, nir_foreach_block_cb cb,
                         void *state);
  bool nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb,
                                 void *state);
+bool nir_foreach_block_in_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
+                                  void *state);
  
  /* If the following CF node is an if, this function returns that if.
   * Otherwise, it returns NULL.
   */
  nir_if *nir_block_get_following_if(nir_block *block);
  
+nir_loop *nir_block_get_following_loop(nir_block *block);
+
  void nir_index_local_regs(nir_function_impl *impl);
  void nir_index_global_regs(nir_shader *shader);
  void nir_index_ssa_defs(nir_function_impl *impl);
+unsigned nir_index_instrs(nir_function_impl *impl);
  
  void nir_index_blocks(nir_function_impl *impl);
  
@@ -1810,14 +1841,18 @@ void nir_dump_dom_frontier(nir_shader *shader, FILE *fp);
  void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp);
  void nir_dump_cfg(nir_shader *shader, FILE *fp);
  
-void nir_split_var_copies(nir_shader *shader);
+int nir_gs_count_vertices(nir_shader *shader);
+
+bool nir_split_var_copies(nir_shader *shader);
  
  void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx);
  void nir_lower_var_copies(nir_shader *shader);
  
-void nir_lower_global_vars_to_local(nir_shader *shader);
+bool nir_lower_global_vars_to_local(nir_shader *shader);
  
-void nir_lower_locals_to_regs(nir_shader *shader);
+bool nir_lower_locals_to_regs(nir_shader *shader);
+
+void nir_lower_outputs_to_temporaries(nir_shader *shader);
  
  void nir_lower_outputs_to_temporaries(nir_shader *shader);
  
@@ -1829,9 +1864,10 @@ void nir_lower_io(nir_shader *shader,
                    int (*type_size)(const struct glsl_type *));
  void nir_lower_vars_to_ssa(nir_shader *shader);
  
-void nir_remove_dead_variables(nir_shader *shader);
+bool nir_remove_dead_variables(nir_shader *shader);
  
-void nir_lower_vec_to_movs(nir_shader *shader);
+void nir_move_vec_src_uses_to_dest(nir_shader *shader);
+bool nir_lower_vec_to_movs(nir_shader *shader);
  void nir_lower_alu_to_scalar(nir_shader *shader);
  void nir_lower_load_const_to_scalar(nir_shader *shader);
  
@@ -1841,14 +1877,57 @@ void nir_lower_samplers(nir_shader *shader,
                          const struct gl_shader_program *shader_program);
  void nir_lower_samplers_for_vk(nir_shader *shader);
  
-void nir_lower_system_values(nir_shader *shader);
-void nir_lower_tex_projector(nir_shader *shader);
+bool nir_lower_system_values(nir_shader *shader);
+
+typedef struct nir_lower_tex_options {
+   /**
+    * bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which
+    * sampler types a texture projector is lowered.
+    */
+   unsigned lower_txp;
+
+   /**
+    * If true, lower rect textures to 2D, using txs to fetch the
+    * texture dimensions and dividing the texture coords by the
+    * texture dims to normalize.
+    */
+   bool lower_rect;
+
+   /**
+    * To emulate certain texture wrap modes, this can be used
+    * to saturate the specified tex coord to [0.0, 1.0].  The
+    * bits are according to sampler #, ie. if, for example:
+    *
+    *   (conf->saturate_s & (1 << n))
+    *
+    * is true, then the s coord for sampler n is saturated.
+    *
+    * Note that clamping must happen *after* projector lowering
+    * so any projected texture sample instruction with a clamped
+    * coordinate gets automatically lowered, regardless of the
+    * 'lower_txp' setting.
+    */
+   unsigned saturate_s;
+   unsigned saturate_t;
+   unsigned saturate_r;
+} nir_lower_tex_options;
+
+void nir_lower_tex(nir_shader *shader,
+                   const nir_lower_tex_options *options);
+
  void nir_lower_idiv(nir_shader *shader);
  
+void nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables);
+void nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables);
+
+void nir_lower_two_sided_color(nir_shader *shader);
+
  void nir_lower_atomics(nir_shader *shader);
  void nir_lower_to_source_mods(nir_shader *shader);
  
-void nir_normalize_cubemap_coords(nir_shader *shader);
+bool nir_lower_gs_intrinsics(nir_shader *shader);
+
+bool nir_normalize_cubemap_coords(nir_shader *shader);
  
  void nir_live_variables_impl(nir_function_impl *impl);
  bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b);
@@ -1876,6 +1955,8 @@ bool nir_opt_cse(nir_shader *shader);
  bool nir_opt_dce_impl(nir_function_impl *impl);
  bool nir_opt_dce(nir_shader *shader);
  
+bool nir_opt_dead_cf(nir_shader *shader);
+
  void nir_opt_gcm(nir_shader *shader);
  
  bool nir_opt_peephole_select(nir_shader *shader);
@@ -1887,6 +1968,9 @@ bool nir_opt_undef(nir_shader *shader);
  
  void nir_sweep(nir_shader *shader);
  
+nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val);
+gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin);
+
  #ifdef __cplusplus
  } /* extern "C" */
  #endif
diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h

index 295a209..bf45d03 100644 (file)
--- a/src/glsl/nir/nir_builder.h
+++ b/src/glsl/nir/nir_builder.h
@@ -76,21 +76,36 @@ nir_build_imm(nir_builder *build, unsigned num_components, nir_const_value value
  static inline nir_ssa_def *
  nir_imm_float(nir_builder *build, float x)
  {
-   nir_const_value v = { { .f = {x, 0, 0, 0} } };
+   nir_const_value v;
+
+   memset(&v, 0, sizeof(v));
+   v.f[0] = x;
+
     return nir_build_imm(build, 1, v);
  }
  
  static inline nir_ssa_def *
  nir_imm_vec4(nir_builder *build, float x, float y, float z, float w)
  {
-   nir_const_value v = { { .f = {x, y, z, w} } };
+   nir_const_value v;
+
+   memset(&v, 0, sizeof(v));
+   v.f[0] = x;
+   v.f[1] = y;
+   v.f[2] = z;
+   v.f[3] = w;
+
     return nir_build_imm(build, 4, v);
  }
  
  static inline nir_ssa_def *
  nir_imm_int(nir_builder *build, int x)
  {
-   nir_const_value v = { { .i = {x, 0, 0, 0} } };
+   nir_const_value v;
+
+   memset(&v, 0, sizeof(v));
+   v.i[0] = x;
+
     return nir_build_imm(build, 1, v);
  }
  
@@ -173,6 +188,24 @@ nir_##op(nir_builder *build, nir_ssa_def *src0,                           \
  
  #include "nir_builder_opcodes.h"
  
+static inline nir_ssa_def *
+nir_vec(nir_builder *build, nir_ssa_def **comp, unsigned num_components)
+{
+   switch (num_components) {
+   case 4:
+      return nir_vec4(build, comp[0], comp[1], comp[2], comp[3]);
+   case 3:
+      return nir_vec3(build, comp[0], comp[1], comp[2]);
+   case 2:
+      return nir_vec2(build, comp[0], comp[1]);
+   case 1:
+      return comp[0];
+   default:
+      unreachable("bad component count");
+      return NULL;
+   }
+}
+
  /**
   * Similar to nir_fmov, but takes a nir_alu_src instead of a nir_ssa_def.
   */
@@ -233,6 +266,13 @@ nir_fdot(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1)
     return NULL;
  }
  
+static inline nir_ssa_def *
+nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c)
+{
+   unsigned swizzle[4] = {c, c, c, c};
+   return nir_swizzle(b, def, swizzle, 1, false);
+}
+
  /**
   * Turns a nir_src into a nir_ssa_def * so it can be passed to
   * nir_build_alu()-based builder calls.
@@ -251,4 +291,31 @@ nir_ssa_for_src(nir_builder *build, nir_src src, int num_components)
     return nir_imov_alu(build, alu, num_components);
  }
  
+static inline nir_ssa_def *
+nir_load_var(nir_builder *build, nir_variable *var)
+{
+   const unsigned num_components = glsl_get_vector_elements(var->type);
+
+   nir_intrinsic_instr *load =
+      nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_var);
+   load->num_components = num_components;
+   load->variables[0] = nir_deref_var_create(load, var);
+   nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
+   nir_builder_instr_insert(build, &load->instr);
+   return &load->dest.ssa;
+}
+
+static inline void
+nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value)
+{
+   const unsigned num_components = glsl_get_vector_elements(var->type);
+
+   nir_intrinsic_instr *store =
+      nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var);
+   store->num_components = num_components;
+   store->variables[0] = nir_deref_var_create(store, var);
+   store->src[0] = nir_src_for_ssa(value);
+   nir_builder_instr_insert(build, &store->instr);
+}
+
  #endif /* NIR_BUILDER_H */
diff --git a/src/glsl/nir/nir_control_flow.c b/src/glsl/nir/nir_control_flow.c

index 5c03375..7f51c4f 100644 (file)
--- a/src/glsl/nir/nir_control_flow.c
+++ b/src/glsl/nir/nir_control_flow.c
@@ -60,6 +60,16 @@ block_add_pred(nir_block *block, nir_block *pred)
     _mesa_set_add(block->predecessors, pred);
  }
  
+static inline void
+block_remove_pred(nir_block *block, nir_block *pred)
+{
+   struct set_entry *entry = _mesa_set_search(block->predecessors, pred);
+
+   assert(entry);
+
+   _mesa_set_remove(block->predecessors, entry);
+}
+
  static void
  link_blocks(nir_block *pred, nir_block *succ1, nir_block *succ2)
  {
@@ -83,20 +93,16 @@ unlink_blocks(nir_block *pred, nir_block *succ)
        pred->successors[1] = NULL;
     }
  
-   struct set_entry *entry = _mesa_set_search(succ->predecessors, pred);
-
-   assert(entry);
-
-   _mesa_set_remove(succ->predecessors, entry);
+   block_remove_pred(succ, pred);
  }
  
  static void
  unlink_block_successors(nir_block *block)
  {
-   if (block->successors[0] != NULL)
-      unlink_blocks(block, block->successors[0]);
     if (block->successors[1] != NULL)
        unlink_blocks(block, block->successors[1]);
+   if (block->successors[0] != NULL)
+      unlink_blocks(block, block->successors[0]);
  }
  
  static void
@@ -194,6 +200,23 @@ link_block_to_non_block(nir_block *block, nir_cf_node *node)
  }
  
  /**
+ * Replace a block's successor with a different one.
+ */
+static void
+replace_successor(nir_block *block, nir_block *old_succ, nir_block *new_succ)
+{
+   if (block->successors[0] == old_succ) {
+      block->successors[0] = new_succ;
+   } else {
+      assert(block->successors[1] == old_succ);
+      block->successors[1] = new_succ;
+   }
+
+   block_remove_pred(old_succ, block);
+   block_add_pred(new_succ, block);
+}
+
+/**
   * Takes a basic block and inserts a new empty basic block before it, making its
   * predecessors point to the new block. This essentially splits the block into
   * an empty header and a body so that another non-block CF node can be inserted
@@ -211,9 +234,7 @@ split_block_beginning(nir_block *block)
     struct set_entry *entry;
     set_foreach(block->predecessors, entry) {
        nir_block *pred = (nir_block *) entry->key;
-
-      unlink_blocks(pred, block);
-      link_blocks(pred, new_block, NULL);
+      replace_successor(pred, block, new_block);
     }
  
     /* Any phi nodes must stay part of the new block, or else their
@@ -527,40 +548,52 @@ remove_phi_src(nir_block *block, nir_block *pred)
   * infinite loops. Note that the jump to be eliminated may be free-floating.
   */
  
-static
-void unlink_jump(nir_block *block, nir_jump_type type)
+static void
+unlink_jump(nir_block *block, nir_jump_type type, bool add_normal_successors)
  {
+   nir_block *next = block->successors[0];
+
     if (block->successors[0])
        remove_phi_src(block->successors[0], block);
     if (block->successors[1])
        remove_phi_src(block->successors[1], block);
  
-   if (type == nir_jump_break) {
-      nir_block *next = block->successors[0];
+   unlink_block_successors(block);
+   if (add_normal_successors)
+      block_add_normal_succs(block);
  
-      if (next->predecessors->entries == 1) {
-         nir_loop *loop =
-            nir_cf_node_as_loop(nir_cf_node_prev(&next->cf_node));
+   /* If we've just removed a break, and the block we were jumping to (after
+    * the loop) now has zero predecessors, we've created a new infinite loop.
+    *
+    * NIR doesn't allow blocks (other than the start block) to have zero
+    * predecessors.  In particular, dominance assumes all blocks are reachable.
+    * So, we insert a "fake link" by making successors[1] point after the loop.
+    *
+    * Note that we have to do this after unlinking/recreating the block's
+    * successors.  If we removed a "break" at the end of the loop, then
+    * block == last_block, so block->successors[0] would already be "next",
+    * and adding a fake link would create two identical successors.  Doing
+    * this afterward works, as we'll have changed block->successors[0] to
+    * be the top of the loop.
+    */
+   if (type == nir_jump_break && next->predecessors->entries == 0) {
+      nir_loop *loop =
+         nir_cf_node_as_loop(nir_cf_node_prev(&next->cf_node));
  
-         /* insert fake link */
-         nir_cf_node *last = nir_loop_last_cf_node(loop);
-         assert(last->type == nir_cf_node_block);
-         nir_block *last_block = nir_cf_node_as_block(last);
+      /* insert fake link */
+      nir_cf_node *last = nir_loop_last_cf_node(loop);
+      assert(last->type == nir_cf_node_block);
+      nir_block *last_block = nir_cf_node_as_block(last);
  
-         last_block->successors[1] = next;
-         block_add_pred(next, last_block);
-      }
+      last_block->successors[1] = next;
+      block_add_pred(next, last_block);
     }
-
-   unlink_block_successors(block);
  }
  
  void
  nir_handle_remove_jump(nir_block *block, nir_jump_type type)
  {
-   unlink_jump(block, type);
-
-   block_add_normal_succs(block);
+   unlink_jump(block, type, true);
  
     nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node);
     nir_metadata_preserve(impl, nir_metadata_none);
@@ -654,7 +687,7 @@ replace_ssa_def_uses(nir_ssa_def *def, void *void_impl)
     nir_ssa_undef_instr *undef =
        nir_ssa_undef_instr_create(mem_ctx, def->num_components);
     nir_instr_insert_before_cf_list(&impl->body, &undef->instr);
-   nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(&undef->def), mem_ctx);
+   nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(&undef->def));
     return true;
  }
  
@@ -668,7 +701,7 @@ cleanup_cf_node(nir_cf_node *node, nir_function_impl *impl)
        nir_foreach_instr_safe(block, instr) {
           if (instr->type == nir_instr_type_jump) {
              nir_jump_type jump_type = nir_instr_as_jump(instr)->type;
-            unlink_jump(block, jump_type);
+            unlink_jump(block, jump_type, false);
           } else {
              nir_foreach_ssa_def(instr, replace_ssa_def_uses, impl);
              nir_instr_remove(instr);
@@ -723,6 +756,9 @@ nir_cf_extract(nir_cf_list *extracted, nir_cursor begin, nir_cursor end)
     extracted->impl = nir_cf_node_get_function(&block_begin->cf_node);
     exec_list_make_empty(&extracted->list);
  
+   /* Dominance and other block-related information is toast. */
+   nir_metadata_preserve(extracted->impl, nir_metadata_none);
+
     nir_cf_node *cf_node = &block_begin->cf_node;
     nir_cf_node *cf_node_end = &block_end->cf_node;
     while (true) {
diff --git a/src/glsl/nir/nir_from_ssa.c b/src/glsl/nir/nir_from_ssa.c

index 94002f1..eaf883d 100644 (file)
--- a/src/glsl/nir/nir_from_ssa.c
+++ b/src/glsl/nir/nir_from_ssa.c
@@ -359,8 +359,7 @@ isolate_phi_nodes_block(nir_block *block, void *void_state)
        exec_list_push_tail(&block_pcopy->entries, &entry->node);
  
        nir_ssa_def_rewrite_uses(&phi->dest.ssa,
-                               nir_src_for_ssa(&entry->dest.ssa),
-                               state->mem_ctx);
+                               nir_src_for_ssa(&entry->dest.ssa));
  
        nir_instr_rewrite_src(&block_pcopy->instr, &entry->src,
                              nir_src_for_ssa(&phi->dest.ssa));
@@ -493,7 +492,7 @@ rewrite_ssa_def(nir_ssa_def *def, void *void_state)
        reg->num_array_elems = 0;
     }
  
-   nir_ssa_def_rewrite_uses(def, nir_src_for_reg(reg), state->mem_ctx);
+   nir_ssa_def_rewrite_uses(def, nir_src_for_reg(reg));
     assert(list_empty(&def->uses) && list_empty(&def->if_uses));
  
     if (def->parent_instr->type == nir_instr_type_ssa_undef) {
@@ -513,9 +512,7 @@ rewrite_ssa_def(nir_ssa_def *def, void *void_state)
      */
     nir_dest *dest = exec_node_data(nir_dest, def, ssa);
  
-   *dest = nir_dest_for_reg(reg);
-   dest->reg.parent_instr = state->instr;
-   list_addtail(&dest->reg.def_link, &reg->defs);
+   nir_instr_rewrite_dest(state->instr, dest, nir_dest_for_reg(reg));
  
     return true;
  }
@@ -556,7 +553,7 @@ emit_copy(nir_parallel_copy_instr *pcopy, nir_src src, nir_src dest_src,
        assert(src.reg.reg->num_components >= dest_src.reg.reg->num_components);
  
     nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
-   nir_src_copy(&mov->src[0].src, &src, mem_ctx);
+   nir_src_copy(&mov->src[0].src, &src, mov);
     mov->dest.dest = nir_dest_for_reg(dest_src.reg.reg);
     mov->dest.write_mask = (1 << dest_src.reg.reg->num_components) - 1;
  
diff --git a/src/glsl/nir/nir_gs_count_vertices.c b/src/glsl/nir/nir_gs_count_vertices.c

new file mode 100644 (file)

index 0000000..e0bdf17
--- /dev/null
+++ b/src/glsl/nir/nir_gs_count_vertices.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+static nir_intrinsic_instr *
+as_intrinsic(nir_instr *instr, nir_intrinsic_op op)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return NULL;
+
+   nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+   if (intrin->intrinsic != op)
+      return NULL;
+
+   return intrin;
+}
+
+static nir_intrinsic_instr *
+as_set_vertex_count(nir_instr *instr)
+{
+   return as_intrinsic(instr, nir_intrinsic_set_vertex_count);
+}
+
+/**
+ * If a geometry shader emits a constant number of vertices, return the
+ * number of vertices.  Otherwise, return -1 (unknown).
+ *
+ * This only works if you've used nir_lower_gs_intrinsics() to do vertex
+ * counting at the NIR level.
+ */
+int
+nir_gs_count_vertices(nir_shader *shader)
+{
+   int count = -1;
+
+   nir_foreach_overload(shader, overload) {
+      if (!overload->impl)
+         continue;
+
+      /* set_vertex_count intrinsics only appear in predecessors of the
+       * end block.  So we don't need to walk all of them.
+       */
+      struct set_entry *entry;
+      set_foreach(overload->impl->end_block->predecessors, entry) {
+         nir_block *block = (nir_block *) entry->key;
+
+         nir_foreach_instr_reverse(block, instr) {
+            nir_intrinsic_instr *intrin = as_set_vertex_count(instr);
+            if (!intrin)
+               continue;
+
+            nir_const_value *val = nir_src_as_const_value(intrin->src[0]);
+            /* We've found a non-constant value.  Bail. */
+            if (!val)
+               return -1;
+
+            if (count == -1)
+               count = val->i[0];
+
+            /* We've found contradictory set_vertex_count intrinsics.
+             * This can happen if there are early-returns in main() and
+             * different paths emit different numbers of vertices.
+             */
+            if (count != val->i[0])
+               return -1;
+         }
+      }
+   }
+
+   return count;
+}
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h

index 1f24f9f..e02779e 100644 (file)
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -62,6 +62,13 @@ INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0,
            NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
  
  /*
+ * Ask the driver for the size of a given buffer. It takes the buffer index
+ * as source.
+ */
+INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0,
+          NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+
+/*
   * a barrier is an intrinsic with no inputs/outputs but which can't be moved
   * around/optimized in general
   */
@@ -79,9 +86,30 @@ BARRIER(memory_barrier)
  /** A conditional discard, with a single boolean source. */
  INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0)
  
+/**
+ * Basic Geometry Shader intrinsics.
+ *
+ * emit_vertex implements GLSL's EmitStreamVertex() built-in.  It takes a single
+ * index, which is the stream ID to write to.
+ *
+ * end_primitive implements GLSL's EndPrimitive() built-in.
+ */
  INTRINSIC(emit_vertex,   0, ARR(), false, 0, 0, 1, 0)
  INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, 0)
  
+/**
+ * Geometry Shader intrinsics with a vertex count.
+ *
+ * Alternatively, drivers may implement these intrinsics, and use
+ * nir_lower_gs_intrinsics() to convert from the basic intrinsics.
+ *
+ * These maintain a count of the number of vertices emitted, as an additional
+ * unsigned integer source.
+ */
+INTRINSIC(emit_vertex_with_counter, 1, ARR(1), false, 0, 0, 1, 0)
+INTRINSIC(end_primitive_with_counter, 1, ARR(1), false, 0, 0, 1, 0)
+INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, 0)
+
  /*
   * Atomic counters
   *
@@ -125,20 +153,52 @@ INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
  INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, 0)
  INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0,
            NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0,
+          NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
  
-#define SYSTEM_VALUE(name, components) \
-   INTRINSIC(load_##name, 0, ARR(), true, components, 0, 0, \
+/*
+ * SSBO atomic intrinsics
+ *
+ * All of the SSBO atomic memory operations read a value from memory,
+ * compute a new value using one of the operations below, write the new
+ * value to memory, and return the original value read.
+ *
+ * All operations take 3 sources except CompSwap that takes 4. These
+ * sources represent:
+ *
+ * 0: The SSBO buffer index.
+ * 1: The offset into the SSBO buffer of the variable that the atomic
+ *    operation will operate on.
+ * 2: The data parameter to the atomic function (i.e. the value to add
+ *    in ssbo_atomic_add, etc).
+ * 3: For CompSwap only: the second data parameter.
+ */
+INTRINSIC(ssbo_atomic_add, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_min, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_max, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_and, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_or, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, 0)
+
+#define SYSTEM_VALUE(name, components, num_indices) \
+   INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \
     NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
  
-SYSTEM_VALUE(front_face, 1)
-SYSTEM_VALUE(vertex_id, 1)
-SYSTEM_VALUE(vertex_id_zero_base, 1)
-SYSTEM_VALUE(base_vertex, 1)
-SYSTEM_VALUE(instance_id, 1)
-SYSTEM_VALUE(sample_id, 1)
-SYSTEM_VALUE(sample_pos, 2)
-SYSTEM_VALUE(sample_mask_in, 1)
-SYSTEM_VALUE(invocation_id, 1)
+SYSTEM_VALUE(front_face, 1, 0)
+SYSTEM_VALUE(vertex_id, 1, 0)
+SYSTEM_VALUE(vertex_id_zero_base, 1, 0)
+SYSTEM_VALUE(base_vertex, 1, 0)
+SYSTEM_VALUE(instance_id, 1, 0)
+SYSTEM_VALUE(sample_id, 1, 0)
+SYSTEM_VALUE(sample_pos, 2, 0)
+SYSTEM_VALUE(sample_mask_in, 1, 0)
+SYSTEM_VALUE(invocation_id, 1, 0)
+SYSTEM_VALUE(local_invocation_id, 3, 0)
+SYSTEM_VALUE(work_group_id, 3, 0)
+SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */
+SYSTEM_VALUE(num_work_groups, 3, 0)
  
  /*
   * The format of the indices depends on the type of the load.  For uniforms,
@@ -168,20 +228,24 @@ SYSTEM_VALUE(invocation_id, 1)
  LOAD(uniform, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
  LOAD(ubo, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
  LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-/* LOAD(ssbo, 1, 0) */
+LOAD(ssbo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
  
  /*
   * Stores work the same way as loads, except now the first register input is
   * the value or array to store and the optional second input is the indirect
- * offset.
+ * offset. SSBO stores are similar, but they accept an extra source for the
+ * block index and an extra index with the writemask to use.
   */
  
-#define STORE(name, num_indices, flags) \
-   INTRINSIC(store_##name, 1, ARR(0), false, 0, 0, num_indices, flags) \
-   INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \
-             num_indices, flags) \
+#define STORE(name, extra_srcs, extra_srcs_size, extra_indices, flags) \
+   INTRINSIC(store_##name, 1 + extra_srcs, \
+             ARR(0, extra_srcs_size, extra_srcs_size, extra_srcs_size), \
+             false, 0, 0, 1 + extra_indices, flags) \
+   INTRINSIC(store_##name##_indirect, 2 + extra_srcs, \
+             ARR(0, 1, extra_srcs_size, extra_srcs_size), \
+             false, 0, 0, 1 + extra_indices, flags)
  
-STORE(output, 1, 0)
-/* STORE(ssbo, 2, 0) */
+STORE(output, 0, 0, 0, 0)
+STORE(ssbo, 1, 1, 1, 0)
  
-LAST_INTRINSIC(store_output_indirect)
+LAST_INTRINSIC(store_ssbo_indirect)
diff --git a/src/glsl/nir/nir_lower_alu_to_scalar.c b/src/glsl/nir/nir_lower_alu_to_scalar.c

index efbe9e7..9313fc0 100644 (file)
--- a/src/glsl/nir/nir_lower_alu_to_scalar.c
+++ b/src/glsl/nir/nir_lower_alu_to_scalar.c
@@ -22,6 +22,7 @@
   */
  
  #include "nir.h"
+#include "nir_builder.h"
  
  /** @file nir_lower_alu_to_scalar.c
   *
@@ -38,45 +39,39 @@ nir_alu_ssa_dest_init(nir_alu_instr *instr, unsigned num_components)
  
  static void
  lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op,
-                void *mem_ctx)
+                nir_builder *builder)
  {
     unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
  
     nir_ssa_def *last = NULL;
     for (unsigned i = 0; i < num_components; i++) {
-      nir_alu_instr *chan = nir_alu_instr_create(mem_ctx, chan_op);
+      nir_alu_instr *chan = nir_alu_instr_create(builder->shader, chan_op);
        nir_alu_ssa_dest_init(chan, 1);
-      nir_alu_src_copy(&chan->src[0], &instr->src[0], mem_ctx);
+      nir_alu_src_copy(&chan->src[0], &instr->src[0], chan);
        chan->src[0].swizzle[0] = chan->src[0].swizzle[i];
        if (nir_op_infos[chan_op].num_inputs > 1) {
           assert(nir_op_infos[chan_op].num_inputs == 2);
-         nir_alu_src_copy(&chan->src[1], &instr->src[1], mem_ctx);
+         nir_alu_src_copy(&chan->src[1], &instr->src[1], chan);
           chan->src[1].swizzle[0] = chan->src[1].swizzle[i];
        }
  
-      nir_instr_insert_before(&instr->instr, &chan->instr);
+      nir_builder_instr_insert(builder, &chan->instr);
  
        if (i == 0) {
           last = &chan->dest.dest.ssa;
        } else {
-         nir_alu_instr *merge = nir_alu_instr_create(mem_ctx, merge_op);
-         nir_alu_ssa_dest_init(merge, 1);
-         merge->dest.write_mask = 1;
-         merge->src[0].src = nir_src_for_ssa(last);
-         merge->src[1].src = nir_src_for_ssa(&chan->dest.dest.ssa);
-         nir_instr_insert_before(&instr->instr, &merge->instr);
-         last = &merge->dest.dest.ssa;
+         last = nir_build_alu(builder, merge_op,
+                              last, &chan->dest.dest.ssa, NULL, NULL);
        }
     }
  
     assert(instr->dest.write_mask == 1);
-   nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(last),
-                            mem_ctx);
+   nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(last));
     nir_instr_remove(&instr->instr);
  }
  
  static void
-lower_alu_instr_scalar(nir_alu_instr *instr, void *mem_ctx)
+lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
  {
     unsigned num_src = nir_op_infos[instr->op].num_inputs;
     unsigned i, chan;
@@ -84,12 +79,14 @@ lower_alu_instr_scalar(nir_alu_instr *instr, void *mem_ctx)
     assert(instr->dest.dest.is_ssa);
     assert(instr->dest.write_mask != 0);
  
+   b->cursor = nir_before_instr(&instr->instr);
+
  #define LOWER_REDUCTION(name, chan, merge) \
     case name##2: \
     case name##3: \
     case name##4: \
-      lower_reduction(instr, chan, merge, mem_ctx); \
-      break;
+      lower_reduction(instr, chan, merge, b); \
+      return;
  
     switch (instr->op) {
     case nir_op_vec4:
@@ -115,6 +112,24 @@ lower_alu_instr_scalar(nir_alu_instr *instr, void *mem_ctx)
         */
        return;
  
+   case nir_op_fdph: {
+      nir_ssa_def *sum[4];
+      for (unsigned i = 0; i < 3; i++) {
+         sum[i] = nir_fmul(b, nir_channel(b, instr->src[0].src.ssa,
+                                          instr->src[0].swizzle[i]),
+                              nir_channel(b, instr->src[1].src.ssa,
+                                          instr->src[1].swizzle[i]));
+      }
+      sum[3] = nir_channel(b, instr->src[1].src.ssa, instr->src[1].swizzle[3]);
+
+      nir_ssa_def *val = nir_fadd(b, nir_fadd(b, sum[0], sum[1]),
+                                     nir_fadd(b, sum[2], sum[3]));
+
+      nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val));
+      nir_instr_remove(&instr->instr);
+      return;
+   }
+
        LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd);
        LOWER_REDUCTION(nir_op_ball_fequal, nir_op_feq, nir_op_iand);
        LOWER_REDUCTION(nir_op_ball_iequal, nir_op_ieq, nir_op_iand);
@@ -135,16 +150,13 @@ lower_alu_instr_scalar(nir_alu_instr *instr, void *mem_ctx)
        return;
  
     unsigned num_components = instr->dest.dest.ssa.num_components;
-   static const nir_op nir_op_map[] = {nir_op_vec2, nir_op_vec3, nir_op_vec4};
-   nir_alu_instr *vec_instr =
-      nir_alu_instr_create(mem_ctx, nir_op_map[num_components - 2]);
-   nir_alu_ssa_dest_init(vec_instr, num_components);
+   nir_ssa_def *comps[] = { NULL, NULL, NULL, NULL };
  
     for (chan = 0; chan < 4; chan++) {
        if (!(instr->dest.write_mask & (1 << chan)))
           continue;
  
-      nir_alu_instr *lower = nir_alu_instr_create(mem_ctx, instr->op);
+      nir_alu_instr *lower = nir_alu_instr_create(b->shader, instr->op);
        for (i = 0; i < num_src; i++) {
           /* We only handle same-size-as-dest (input_sizes[] == 0) or scalar
            * args (input_sizes[] == 1).
@@ -153,33 +165,31 @@ lower_alu_instr_scalar(nir_alu_instr *instr, void *mem_ctx)
           unsigned src_chan = (nir_op_infos[instr->op].input_sizes[i] == 1 ?
                                0 : chan);
  
-         nir_alu_src_copy(&lower->src[i], &instr->src[i], mem_ctx);
+         nir_alu_src_copy(&lower->src[i], &instr->src[i], lower);
           for (int j = 0; j < 4; j++)
              lower->src[i].swizzle[j] = instr->src[i].swizzle[src_chan];
        }
  
        nir_alu_ssa_dest_init(lower, 1);
        lower->dest.saturate = instr->dest.saturate;
-      vec_instr->src[chan].src = nir_src_for_ssa(&lower->dest.dest.ssa);
+      comps[chan] = &lower->dest.dest.ssa;
  
-      nir_instr_insert_before(&instr->instr, &lower->instr);
+      nir_builder_instr_insert(b, &lower->instr);
     }
  
-   nir_instr_insert_before(&instr->instr, &vec_instr->instr);
+   nir_ssa_def *vec = nir_vec(b, comps, num_components);
  
-   nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa,
-                            nir_src_for_ssa(&vec_instr->dest.dest.ssa),
-                            mem_ctx);
+   nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(vec));
  
     nir_instr_remove(&instr->instr);
  }
  
  static bool
-lower_alu_to_scalar_block(nir_block *block, void *data)
+lower_alu_to_scalar_block(nir_block *block, void *builder)
  {
     nir_foreach_instr_safe(block, instr) {
        if (instr->type == nir_instr_type_alu)
-         lower_alu_instr_scalar(nir_instr_as_alu(instr), data);
+         lower_alu_instr_scalar(nir_instr_as_alu(instr), builder);
     }
  
     return true;
@@ -188,7 +198,10 @@ lower_alu_to_scalar_block(nir_block *block, void *data)
  static void
  nir_lower_alu_to_scalar_impl(nir_function_impl *impl)
  {
-   nir_foreach_block(impl, lower_alu_to_scalar_block, ralloc_parent(impl));
+   nir_builder builder;
+   nir_builder_init(&builder, impl);
+
+   nir_foreach_block(impl, lower_alu_to_scalar_block, &builder);
  }
  
  void
diff --git a/src/glsl/nir/nir_lower_atomics.c b/src/glsl/nir/nir_lower_atomics.c

index ce3615a..6f9ecc0 100644 (file)
--- a/src/glsl/nir/nir_lower_atomics.c
+++ b/src/glsl/nir/nir_lower_atomics.c
@@ -91,7 +91,7 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl)
           nir_alu_instr *mul = nir_alu_instr_create(mem_ctx, nir_op_imul);
           nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL);
           mul->dest.write_mask = 0x1;
-         nir_src_copy(&mul->src[0].src, &deref_array->indirect, mem_ctx);
+         nir_src_copy(&mul->src[0].src, &deref_array->indirect, mul);
           mul->src[1].src.is_ssa = true;
           mul->src[1].src.ssa = &atomic_counter_size->def;
           nir_instr_insert_before(&instr->instr, &mul->instr);
@@ -116,8 +116,7 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl)
        nir_ssa_dest_init(&new_instr->instr, &new_instr->dest,
                          instr->dest.ssa.num_components, NULL);
        nir_ssa_def_rewrite_uses(&instr->dest.ssa,
-                               nir_src_for_ssa(&new_instr->dest.ssa),
-                               mem_ctx);
+                               nir_src_for_ssa(&new_instr->dest.ssa));
     } else {
        nir_dest_copy(&new_instr->dest, &instr->dest, mem_ctx);
     }
diff --git a/src/glsl/nir/nir_lower_clip.c b/src/glsl/nir/nir_lower_clip.c

new file mode 100644 (file)

index 0000000..94d12b7
--- /dev/null
+++ b/src/glsl/nir/nir_lower_clip.c
@@ -0,0 +1,340 @@
+/*
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+#define MAX_CLIP_PLANES 8
+
+/* Generates the lowering code for user-clip-planes, generating CLIPDIST
+ * from UCP[n] + CLIPVERTEX or POSITION.  Additionally, an optional pass
+ * for fragment shaders to insert conditional kill's based on the inter-
+ * polated CLIPDIST
+ *
+ * NOTE: should be run after nir_lower_outputs_to_temporaries() (or at
+ * least in scenarios where you can count on each output written once
+ * and only once).
+ */
+
+
+static nir_variable *
+create_clipdist_var(nir_shader *shader, unsigned drvloc,
+                    bool output, gl_varying_slot slot)
+{
+   nir_variable *var = rzalloc(shader, nir_variable);
+
+   var->data.driver_location = drvloc;
+   var->type = glsl_vec4_type();
+   var->data.mode = output ? nir_var_shader_out : nir_var_shader_in;
+   var->name = ralloc_asprintf(var, "clipdist_%d", drvloc);
+   var->data.index = 0;
+   var->data.location = slot;
+
+   if (output) {
+      exec_list_push_tail(&shader->outputs, &var->node);
+   }
+   else {
+      exec_list_push_tail(&shader->inputs, &var->node);
+   }
+   return var;
+}
+
+static void
+store_clipdist_output(nir_builder *b, nir_variable *out, nir_ssa_def **val)
+{
+   nir_intrinsic_instr *store;
+
+   store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
+   store->num_components = 4;
+   store->const_index[0] = out->data.driver_location;
+   store->src[0].ssa = nir_vec4(b, val[0], val[1], val[2], val[3]);
+   store->src[0].is_ssa = true;
+   nir_builder_instr_insert(b, &store->instr);
+}
+
+static void
+load_clipdist_input(nir_builder *b, nir_variable *in, nir_ssa_def **val)
+{
+   nir_intrinsic_instr *load;
+
+   load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
+   load->num_components = 4;
+   load->const_index[0] = in->data.driver_location;
+   nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
+   nir_builder_instr_insert(b, &load->instr);
+
+   val[0] = nir_channel(b, &load->dest.ssa, 0);
+   val[1] = nir_channel(b, &load->dest.ssa, 1);
+   val[2] = nir_channel(b, &load->dest.ssa, 2);
+   val[3] = nir_channel(b, &load->dest.ssa, 3);
+}
+
+struct find_output_state
+{
+   unsigned drvloc;
+   nir_ssa_def *def;
+};
+
+static bool
+find_output_in_block(nir_block *block, void *void_state)
+{
+   struct find_output_state *state = void_state;
+   nir_foreach_instr(block, instr) {
+
+      if (instr->type == nir_instr_type_intrinsic) {
+         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+         if ((intr->intrinsic == nir_intrinsic_store_output) &&
+             intr->const_index[0] == state->drvloc) {
+            assert(state->def == NULL);
+            assert(intr->src[0].is_ssa);
+            state->def = intr->src[0].ssa;
+
+#if !defined(DEBUG)
+            /* for debug builds, scan entire shader to assert
+             * if output is written multiple times.  For release
+             * builds just assume all is well and bail when we
+             * find first:
+             */
+            return false;
+#endif
+         }
+      }
+   }
+
+   return true;
+}
+
+/* TODO: maybe this would be a useful helper?
+ * NOTE: assumes each output is written exactly once (and unconditionally)
+ * so if needed nir_lower_outputs_to_temporaries()
+ */
+static nir_ssa_def *
+find_output(nir_shader *shader, unsigned drvloc)
+{
+   struct find_output_state state = {
+      .drvloc = drvloc,
+   };
+
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl) {
+         nir_foreach_block_reverse(overload->impl,
+                                   find_output_in_block, &state);
+      }
+   }
+
+   return state.def;
+}
+
+/*
+ * VS lowering
+ */
+
+static void
+lower_clip_vs(nir_function_impl *impl, unsigned ucp_enables,
+              nir_ssa_def *cv, nir_variable **out)
+{
+   nir_ssa_def *clipdist[MAX_CLIP_PLANES];
+   nir_builder b;
+
+   nir_builder_init(&b, impl);
+
+   /* NIR should ensure that, even in case of loops/if-else, there
+    * should be only a single predecessor block to end_block, which
+    * makes the perfect place to insert the clipdist calculations.
+    *
+    * NOTE: in case of early return's, these would have to be lowered
+    * to jumps to end_block predecessor in a previous pass.  Not sure
+    * if there is a good way to sanity check this, but for now the
+    * users of this pass don't support sub-routines.
+    */
+   assert(impl->end_block->predecessors->entries == 1);
+   b.cursor = nir_after_cf_list(&impl->body);
+
+   for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) {
+      if (ucp_enables & (1 << plane)) {
+         nir_intrinsic_instr *ucp;
+
+         /* insert intrinsic to fetch ucp[plane]: */
+         ucp = nir_intrinsic_instr_create(b.shader,
+                                          nir_intrinsic_load_user_clip_plane);
+         ucp->num_components = 4;
+         ucp->const_index[0] = plane;
+         nir_ssa_dest_init(&ucp->instr, &ucp->dest, 4, NULL);
+         nir_builder_instr_insert(&b, &ucp->instr);
+
+         /* calculate clipdist[plane] - dot(ucp, cv): */
+         clipdist[plane] = nir_fdot4(&b, &ucp->dest.ssa, cv);
+      }
+      else {
+         /* 0.0 == don't-clip == disabled: */
+         clipdist[plane] = nir_imm_float(&b, 0.0);
+      }
+   }
+
+   if (ucp_enables & 0x0f)
+      store_clipdist_output(&b, out[0], &clipdist[0]);
+   if (ucp_enables & 0xf0)
+      store_clipdist_output(&b, out[1], &clipdist[4]);
+
+   nir_metadata_preserve(impl, nir_metadata_dominance);
+}
+
+/* ucp_enables is bitmask of enabled ucp's.  Actual ucp values are
+ * passed in to shader via user_clip_plane system-values
+ */
+void
+nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables)
+{
+   int clipvertex = -1;
+   int position = -1;
+   int maxloc = -1;
+   nir_ssa_def *cv;
+   nir_variable *out[2];
+
+   if (!ucp_enables)
+      return;
+
+   /* find clipvertex/position outputs: */
+   foreach_list_typed(nir_variable, var, node, &shader->outputs) {
+      int loc = var->data.driver_location;
+
+      /* keep track of last used driver-location.. we'll be
+       * appending CLIP_DIST0/CLIP_DIST1 after last existing
+       * output:
+       */
+      maxloc = MAX2(maxloc, loc);
+
+      switch (var->data.location) {
+      case VARYING_SLOT_POS:
+         position = loc;
+         break;
+      case VARYING_SLOT_CLIP_VERTEX:
+         clipvertex = loc;
+         break;
+      case VARYING_SLOT_CLIP_DIST0:
+      case VARYING_SLOT_CLIP_DIST1:
+         /* if shader is already writing CLIPDIST, then
+          * there should be no user-clip-planes to deal
+          * with.
+          */
+         return;
+      }
+   }
+
+   if (clipvertex != -1)
+      cv = find_output(shader, clipvertex);
+   else if (position != -1)
+      cv = find_output(shader, position);
+   else
+      return;
+
+   /* insert CLIPDIST outputs: */
+   if (ucp_enables & 0x0f)
+      out[0] =
+         create_clipdist_var(shader, ++maxloc, true, VARYING_SLOT_CLIP_DIST0);
+   if (ucp_enables & 0xf0)
+      out[1] =
+         create_clipdist_var(shader, ++maxloc, true, VARYING_SLOT_CLIP_DIST1);
+
+   nir_foreach_overload(shader, overload) {
+      if (!strcmp(overload->function->name, "main"))
+         lower_clip_vs(overload->impl, ucp_enables, cv, out);
+   }
+}
+
+/*
+ * FS lowering
+ */
+
+static void
+lower_clip_fs(nir_function_impl *impl, unsigned ucp_enables,
+              nir_variable **in)
+{
+   nir_ssa_def *clipdist[MAX_CLIP_PLANES];
+   nir_builder b;
+
+   nir_builder_init(&b, impl);
+   b.cursor = nir_before_cf_list(&impl->body);
+
+   if (ucp_enables & 0x0f)
+      load_clipdist_input(&b, in[0], &clipdist[0]);
+   if (ucp_enables & 0xf0)
+      load_clipdist_input(&b, in[1], &clipdist[4]);
+
+   for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) {
+      if (ucp_enables & (1 << plane)) {
+         nir_intrinsic_instr *discard;
+         nir_ssa_def *cond;
+
+         cond = nir_flt(&b, clipdist[plane], nir_imm_float(&b, 0.0));
+
+         discard = nir_intrinsic_instr_create(b.shader,
+                                              nir_intrinsic_discard_if);
+         discard->src[0] = nir_src_for_ssa(cond);
+         nir_builder_instr_insert(&b, &discard->instr);
+      }
+   }
+}
+
+/* insert conditional kill based on interpolated CLIPDIST
+ */
+void
+nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables)
+{
+   nir_variable *in[2];
+   int maxloc = -1;
+
+   if (!ucp_enables)
+      return;
+
+   foreach_list_typed(nir_variable, var, node, &shader->inputs) {
+      int loc = var->data.driver_location;
+
+      /* keep track of last used driver-location.. we'll be
+       * appending CLIP_DIST0/CLIP_DIST1 after last existing
+       * input:
+       */
+      maxloc = MAX2(maxloc, loc);
+   }
+
+   /* The shader won't normally have CLIPDIST inputs, so we
+    * must add our own:
+    */
+   /* insert CLIPDIST outputs: */
+   if (ucp_enables & 0x0f)
+      in[0] =
+         create_clipdist_var(shader, ++maxloc, false,
+                             VARYING_SLOT_CLIP_DIST0);
+   if (ucp_enables & 0xf0)
+      in[1] =
+         create_clipdist_var(shader, ++maxloc, false,
+                             VARYING_SLOT_CLIP_DIST1);
+
+   nir_foreach_overload(shader, overload) {
+      if (!strcmp(overload->function->name, "main"))
+         lower_clip_fs(overload->impl, ucp_enables, in);
+   }
+}
diff --git a/src/glsl/nir/nir_lower_global_vars_to_local.c b/src/glsl/nir/nir_lower_global_vars_to_local.c

index 0cd8740..fab2366 100644 (file)
--- a/src/glsl/nir/nir_lower_global_vars_to_local.c
+++ b/src/glsl/nir/nir_lower_global_vars_to_local.c
@@ -73,10 +73,11 @@ mark_global_var_uses_block(nir_block *block, void *void_state)
     return true;
  }
  
-void
+bool
  nir_lower_global_vars_to_local(nir_shader *shader)
  {
     struct global_to_local_state state;
+   bool progress = false;
  
     state.var_func_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
                                                    _mesa_key_pointer_equal);
@@ -99,8 +100,11 @@ nir_lower_global_vars_to_local(nir_shader *shader)
           exec_node_remove(&var->node);
           var->data.mode = nir_var_local;
           exec_list_push_tail(&impl->locals, &var->node);
+         progress = true;
        }
     }
  
     _mesa_hash_table_destroy(state.var_func_table, NULL);
+
+   return progress;
  }
diff --git a/src/glsl/nir/nir_lower_gs_intrinsics.c b/src/glsl/nir/nir_lower_gs_intrinsics.c

new file mode 100644 (file)

index 0000000..2ee4e5c
--- /dev/null
+++ b/src/glsl/nir/nir_lower_gs_intrinsics.c
@@ -0,0 +1,218 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+/**
+ * \file nir_lower_gs_intrinsics.c
+ *
+ * Geometry Shaders can call EmitVertex()/EmitStreamVertex() to output an
+ * arbitrary number of vertices.  However, the shader must declare the maximum
+ * number of vertices that it will ever output - further attempts to emit
+ * vertices result in undefined behavior according to the GLSL specification.
+ *
+ * Drivers might use this maximum number of vertices to allocate enough space
+ * to hold the geometry shader's output.  Some drivers (such as i965) need to
+ * implement "safety checks" which ensure that the shader hasn't emitted too
+ * many vertices, to avoid overflowing that space and trashing other memory.
+ *
+ * The count of emitted vertices can also be useful in buffer offset
+ * calculations, so drivers know where to write the GS output.
+ *
+ * However, for simple geometry shaders that emit a statically determinable
+ * number of vertices, this extra bookkeeping is unnecessary and inefficient.
+ * By tracking the vertex count in NIR, we allow constant folding/propagation
+ * and dead control flow optimizations to eliminate most of it where possible.
+ *
+ * This pass introduces a new global variable which stores the current vertex
+ * count (initialized to 0), and converts emit_vertex/end_primitive intrinsics
+ * to their *_with_counter variants.  emit_vertex is also wrapped in a safety
+ * check to avoid buffer overflows.  Finally, it adds a set_vertex_count
+ * intrinsic at the end of the program, informing the driver of the final
+ * vertex count.
+ */
+
+struct state {
+   nir_builder *builder;
+   nir_variable *vertex_count_var;
+   bool progress;
+};
+
+/**
+ * Replace emit_vertex intrinsics with:
+ *
+ * if (vertex_count < max_vertices) {
+ *    emit_vertex_with_counter vertex_count ...
+ *    vertex_count += 1
+ * }
+ */
+static void
+rewrite_emit_vertex(nir_intrinsic_instr *intrin, struct state *state)
+{
+   nir_builder *b = state->builder;
+
+   /* Load the vertex count */
+   b->cursor = nir_before_instr(&intrin->instr);
+   nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
+
+   nir_ssa_def *max_vertices = nir_imm_int(b, b->shader->gs.vertices_out);
+
+   /* Create: if (vertex_count < max_vertices) and insert it.
+    *
+    * The new if statement needs to be hooked up to the control flow graph
+    * before we start inserting instructions into it.
+    */
+   nir_if *if_stmt = nir_if_create(b->shader);
+   if_stmt->condition = nir_src_for_ssa(nir_ilt(b, count, max_vertices));
+   nir_builder_cf_insert(b, &if_stmt->cf_node);
+
+   /* Fill out the new then-block */
+   b->cursor = nir_after_cf_list(&if_stmt->then_list);
+
+   nir_intrinsic_instr *lowered =
+      nir_intrinsic_instr_create(b->shader,
+                                 nir_intrinsic_emit_vertex_with_counter);
+   lowered->const_index[0] = intrin->const_index[0];
+   lowered->src[0] = nir_src_for_ssa(count);
+   nir_builder_instr_insert(b, &lowered->instr);
+
+   /* Increment the vertex count by 1 */
+   nir_store_var(b, state->vertex_count_var,
+                 nir_iadd(b, count, nir_imm_int(b, 1)));
+
+   nir_instr_remove(&intrin->instr);
+
+   state->progress = true;
+}
+
+/**
+ * Replace end_primitive with end_primitive_with_counter.
+ */
+static void
+rewrite_end_primitive(nir_intrinsic_instr *intrin, struct state *state)
+{
+   nir_builder *b = state->builder;
+
+   b->cursor = nir_before_instr(&intrin->instr);
+   nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
+
+   nir_intrinsic_instr *lowered =
+      nir_intrinsic_instr_create(b->shader,
+                                 nir_intrinsic_end_primitive_with_counter);
+   lowered->const_index[0] = intrin->const_index[0];
+   lowered->src[0] = nir_src_for_ssa(count);
+   nir_builder_instr_insert(b, &lowered->instr);
+
+   nir_instr_remove(&intrin->instr);
+
+   state->progress = true;
+}
+
+static bool
+rewrite_intrinsics(nir_block *block, void *closure)
+{
+   struct state *state = closure;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+      switch (intrin->intrinsic) {
+      case nir_intrinsic_emit_vertex:
+         rewrite_emit_vertex(intrin, state);
+         break;
+      case nir_intrinsic_end_primitive:
+         rewrite_end_primitive(intrin, state);
+         break;
+      default:
+         /* not interesting; skip this */
+         break;
+      }
+   }
+
+   return true;
+}
+
+/**
+ * Add a set_vertex_count intrinsic at the end of the program
+ * (representing the final vertex count).
+ */
+static void
+append_set_vertex_count(nir_block *end_block, struct state *state)
+{
+   nir_builder *b = state->builder;
+   nir_shader *shader = state->builder->shader;
+
+   /* Insert the new intrinsic in all of the predecessors of the end block,
+    * but before any jump instructions (return).
+    */
+   struct set_entry *entry;
+   set_foreach(end_block->predecessors, entry) {
+      nir_block *pred = (nir_block *) entry->key;
+      b->cursor = nir_after_block_before_jump(pred);
+
+      nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
+
+      nir_intrinsic_instr *set_vertex_count =
+         nir_intrinsic_instr_create(shader, nir_intrinsic_set_vertex_count);
+      set_vertex_count->src[0] = nir_src_for_ssa(count);
+
+      nir_builder_instr_insert(b, &set_vertex_count->instr);
+   }
+}
+
+bool
+nir_lower_gs_intrinsics(nir_shader *shader)
+{
+   struct state state;
+   state.progress = false;
+
+   /* Create the counter variable */
+   nir_variable *var = rzalloc(shader, nir_variable);
+   var->data.mode = nir_var_global;
+   var->type = glsl_uint_type();
+   var->name = "vertex_count";
+   var->constant_initializer = rzalloc(shader, nir_constant); /* initialize to 0 */
+
+   exec_list_push_tail(&shader->globals, &var->node);
+   state.vertex_count_var = var;
+
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl) {
+         nir_builder b;
+         nir_builder_init(&b, overload->impl);
+         state.builder = &b;
+
+         nir_foreach_block(overload->impl, rewrite_intrinsics, &state);
+
+         /* This only works because we have a single main() function. */
+         append_set_vertex_count(overload->impl->end_block, &state);
+
+         nir_metadata_preserve(overload->impl, 0);
+      }
+   }
+
+   return state.progress;
+}
diff --git a/src/glsl/nir/nir_lower_idiv.c b/src/glsl/nir/nir_lower_idiv.c

index 0e1653d..c961178 100644 (file)
--- a/src/glsl/nir/nir_lower_idiv.c
+++ b/src/glsl/nir/nir_lower_idiv.c
@@ -116,9 +116,7 @@ convert_instr(nir_builder *bld, nir_alu_instr *alu)
     }
  
     assert(alu->dest.dest.is_ssa);
-   nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
-                            nir_src_for_ssa(q),
-                            ralloc_parent(alu));
+   nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(q));
  }
  
  static bool
diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c

index afb4630..9f79c56 100644 (file)
--- a/src/glsl/nir/nir_lower_io.c
+++ b/src/glsl/nir/nir_lower_io.c
@@ -186,8 +186,7 @@ nir_lower_io_block(nir_block *block, void *void_state)
              nir_ssa_dest_init(&load->instr, &load->dest,
                                intrin->num_components, NULL);
              nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
-                                     nir_src_for_ssa(&load->dest.ssa),
-                                     state->mem_ctx);
+                                     nir_src_for_ssa(&load->dest.ssa));
           } else {
              nir_dest_copy(&load->dest, &intrin->dest, state->mem_ctx);
           }
@@ -221,7 +220,7 @@ nir_lower_io_block(nir_block *block, void *void_state)
  
           store->const_index[0] = offset;
  
-         nir_src_copy(&store->src[0], &intrin->src[0], state->mem_ctx);
+         nir_src_copy(&store->src[0], &intrin->src[0], store);
  
           if (has_indirect)
              store->src[1] = indirect;
diff --git a/src/glsl/nir/nir_lower_load_const_to_scalar.c b/src/glsl/nir/nir_lower_load_const_to_scalar.c

index b83ef05..84d0c14 100644 (file)
--- a/src/glsl/nir/nir_lower_load_const_to_scalar.c
+++ b/src/glsl/nir/nir_lower_load_const_to_scalar.c
@@ -55,24 +55,10 @@ lower_load_const_instr_scalar(nir_load_const_instr *lower)
     }
  
     /* Batch things back together into a vector. */
-   nir_ssa_def *vec;
-   switch (lower->def.num_components) {
-   case 2:
-      vec = nir_vec2(&b, loads[0], loads[1]);
-      break;
-   case 3:
-      vec = nir_vec3(&b, loads[0], loads[1], loads[2]);
-      break;
-   case 4:
-      vec = nir_vec4(&b, loads[0], loads[1], loads[2], loads[3]);
-      break;
-   default:
-      unreachable("Unknown load_const component count.");
-   }
+   nir_ssa_def *vec = nir_vec(&b, loads, lower->def.num_components);
  
     /* Replace the old load with a reference to our reconstructed vector. */
-   nir_ssa_def_rewrite_uses(&lower->def, nir_src_for_ssa(vec),
-                            ralloc_parent(b.impl));
+   nir_ssa_def_rewrite_uses(&lower->def, nir_src_for_ssa(vec));
     nir_instr_remove(&lower->instr);
  }
  
diff --git a/src/glsl/nir/nir_lower_locals_to_regs.c b/src/glsl/nir/nir_lower_locals_to_regs.c

index 28fdec5..17b53ca 100644 (file)
--- a/src/glsl/nir/nir_lower_locals_to_regs.c
+++ b/src/glsl/nir/nir_lower_locals_to_regs.c
@@ -40,6 +40,8 @@ struct locals_to_regs_state {
      * used to make adding register initialization code deterministic.
      */
     nir_array derefs_array;
+
+   bool progress;
  };
  
  /* The following two functions implement a hash and equality check for
@@ -183,8 +185,7 @@ get_deref_reg_src(nir_deref_var *deref, nir_instr *instr,
              nir_alu_instr *add = nir_alu_instr_create(state->shader,
                                                        nir_op_iadd);
              add->src[0].src = *src.reg.indirect;
-            nir_src_copy(&add->src[1].src, &deref_array->indirect,
-                         state->shader);
+            nir_src_copy(&add->src[1].src, &deref_array->indirect, add);
              add->dest.write_mask = 1;
              nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL);
              nir_instr_insert_before(instr, &add->instr);
@@ -222,14 +223,14 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
              nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
                                intrin->num_components, NULL);
              nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
-                                     nir_src_for_ssa(&mov->dest.dest.ssa),
-                                     state->shader);
+                                     nir_src_for_ssa(&mov->dest.dest.ssa));
           } else {
-            nir_dest_copy(&mov->dest.dest, &intrin->dest, state->shader);
+            nir_dest_copy(&mov->dest.dest, &intrin->dest, &mov->instr);
           }
           nir_instr_insert_before(&intrin->instr, &mov->instr);
  
           nir_instr_remove(&intrin->instr);
+         state->progress = true;
           break;
        }
  
@@ -241,7 +242,7 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
                                               &intrin->instr, state);
  
           nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov);
-         nir_src_copy(&mov->src[0].src, &intrin->src[0], state->shader);
+         nir_src_copy(&mov->src[0].src, &intrin->src[0], mov);
           mov->dest.write_mask = (1 << intrin->num_components) - 1;
           mov->dest.dest.is_ssa = false;
           mov->dest.dest.reg.reg = reg_src.reg.reg;
@@ -251,6 +252,7 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
           nir_instr_insert_before(&intrin->instr, &mov->instr);
  
           nir_instr_remove(&intrin->instr);
+         state->progress = true;
           break;
        }
  
@@ -338,15 +340,17 @@ insert_constant_initializer(nir_deref_var *deref_head, nir_deref *deref_tail,
     mov->dest.dest.reg.indirect = reg_src.reg.indirect;
  
     nir_instr_insert_after(&load->instr, &mov->instr);
+   state->progress = true;
  }
  
-static void
+static bool
  nir_lower_locals_to_regs_impl(nir_function_impl *impl)
  {
     struct locals_to_regs_state state;
  
     state.shader = impl->overload->function->shader;
     state.impl = impl;
+   state.progress = false;
     state.regs_table = _mesa_hash_table_create(NULL, hash_deref, derefs_equal);
     nir_array_init(&state.derefs_array, NULL);
  
@@ -374,13 +378,19 @@ nir_lower_locals_to_regs_impl(nir_function_impl *impl)
  
     nir_array_fini(&state.derefs_array);
     _mesa_hash_table_destroy(state.regs_table, NULL);
+
+   return state.progress;
  }
  
-void
+bool
  nir_lower_locals_to_regs(nir_shader *shader)
  {
+   bool progress = false;
+
     nir_foreach_overload(shader, overload) {
        if (overload->impl)
-         nir_lower_locals_to_regs_impl(overload->impl);
+         progress = nir_lower_locals_to_regs_impl(overload->impl) || progress;
     }
+
+   return progress;
  }
diff --git a/src/glsl/nir/nir_lower_outputs_to_temporaries.c b/src/glsl/nir/nir_lower_outputs_to_temporaries.c

index 1a3e772..4ea5fd4 100644 (file)
--- a/src/glsl/nir/nir_lower_outputs_to_temporaries.c
+++ b/src/glsl/nir/nir_lower_outputs_to_temporaries.c
@@ -19,10 +19,6 @@
   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   * IN THE SOFTWARE.
- *
- * Authors:
- *    Jason Ekstrand (jason@jlekstrand.net)
- *
   */
  
  /*
@@ -37,47 +33,66 @@
  
  #include "nir.h"
  
+struct lower_outputs_state {
+   nir_shader *shader;
+   struct exec_list old_outputs;
+};
+
  static void
-emit_output_copies(nir_shader *shader, nir_variable *temp, nir_variable *output)
+emit_output_copies(nir_cursor cursor, struct lower_outputs_state *state)
  {
-   nir_foreach_overload(shader, overload) {
-      if (!overload->impl || strcmp(overload->function->name, "main"))
-         continue;
+   assert(exec_list_length(&state->shader->outputs) ==
+          exec_list_length(&state->old_outputs));
  
-      struct set_entry *block_entry;
-      set_foreach(overload->impl->end_block->predecessors, block_entry) {
-         struct nir_block *block = (void *)block_entry->key;
+   foreach_two_lists(out_node, &state->shader->outputs,
+                     temp_node, &state->old_outputs) {
+      nir_variable *output = exec_node_data(nir_variable, out_node, node);
+      nir_variable *temp = exec_node_data(nir_variable, temp_node, node);
  
-         nir_intrinsic_instr *copy =
-            nir_intrinsic_instr_create(shader, nir_intrinsic_copy_var);
-         copy->variables[0] = nir_deref_var_create(copy, output);
-         copy->variables[1] = nir_deref_var_create(copy, temp);
+      nir_intrinsic_instr *copy =
+         nir_intrinsic_instr_create(state->shader, nir_intrinsic_copy_var);
+      copy->variables[0] = nir_deref_var_create(copy, output);
+      copy->variables[1] = nir_deref_var_create(copy, temp);
  
-         nir_instr_insert(nir_after_block_before_jump(block), &copy->instr);
-      }
+      nir_instr_insert(cursor, &copy->instr);
     }
  }
  
+static bool
+emit_output_copies_block(nir_block *block, void *state)
+{
+   nir_foreach_instr(block, instr) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+      if (intrin->intrinsic == nir_intrinsic_emit_vertex)
+         emit_output_copies(nir_before_instr(&intrin->instr), state);
+   }
+
+   return true;
+}
+
  void
  nir_lower_outputs_to_temporaries(nir_shader *shader)
  {
-   struct exec_list old_outputs;
+   struct lower_outputs_state state;
  
-   exec_list_move_nodes_to(&shader->outputs, &old_outputs);
+   state.shader = shader;
+   exec_list_move_nodes_to(&shader->outputs, &state.old_outputs);
  
     /* Walk over all of the outputs turn each output into a temporary and
      * make a new variable for the actual output.
      */
-   foreach_list_typed(nir_variable, var, node, &old_outputs) {
+   foreach_list_typed(nir_variable, var, node, &state.old_outputs) {
        nir_variable *output = ralloc(shader, nir_variable);
        memcpy(output, var, sizeof *output);
  
        /* The orignal is now the temporary */
        nir_variable *temp = var;
  
-      /* Move the original name over to the new output */
-      if (output->name)
-         ralloc_steal(output, output->name);
+      /* Reparent the name to the new variable */
+      ralloc_steal(output, output->name);
  
        /* Give the output a new name with @out-temp appended */
        temp->name = ralloc_asprintf(var, "%s@out-temp", output->name);
@@ -85,9 +100,31 @@ nir_lower_outputs_to_temporaries(nir_shader *shader)
        temp->constant_initializer = NULL;
  
        exec_list_push_tail(&shader->outputs, &output->node);
+   }
+
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl == NULL)
+         continue;
+
+      if (shader->stage == MESA_SHADER_GEOMETRY) {
+         /* For geometry shaders, we have to emit the output copies right
+          * before each EmitVertex call.
+          */
+         nir_foreach_block(overload->impl, emit_output_copies_block, &state);
+      } else if (strcmp(overload->function->name, "main") == 0) {
+         /* For all other shader types, we need to do the copies right before
+          * the jumps to the end block.
+          */
+         struct set_entry *block_entry;
+         set_foreach(overload->impl->end_block->predecessors, block_entry) {
+            struct nir_block *block = (void *)block_entry->key;
+            emit_output_copies(nir_after_block_before_jump(block), &state);
+         }
+      }
  
-      emit_output_copies(shader, temp, output);
+      nir_metadata_preserve(overload->impl, nir_metadata_block_index |
+                                            nir_metadata_dominance);
     }
  
-   exec_list_append(&shader->globals, &old_outputs);
+   exec_list_append(&shader->globals, &state.old_outputs);
  }
diff --git a/src/glsl/nir/nir_lower_phis_to_scalar.c b/src/glsl/nir/nir_lower_phis_to_scalar.c

index 739170d..aa124d9 100644 (file)
--- a/src/glsl/nir/nir_lower_phis_to_scalar.c
+++ b/src/glsl/nir/nir_lower_phis_to_scalar.c
@@ -94,6 +94,8 @@ is_phi_src_scalarizable(nir_phi_src *src,
        case nir_intrinsic_load_uniform_indirect:
        case nir_intrinsic_load_ubo:
        case nir_intrinsic_load_ubo_indirect:
+      case nir_intrinsic_load_ssbo:
+      case nir_intrinsic_load_ssbo_indirect:
        case nir_intrinsic_load_input:
        case nir_intrinsic_load_input_indirect:
           return true;
@@ -242,8 +244,7 @@ lower_phis_to_scalar_block(nir_block *block, void *void_state)
        nir_instr_insert_after(&last_phi->instr, &vec->instr);
  
        nir_ssa_def_rewrite_uses(&phi->dest.ssa,
-                               nir_src_for_ssa(&vec->dest.dest.ssa),
-                               state->mem_ctx);
+                               nir_src_for_ssa(&vec->dest.dest.ssa));
  
        ralloc_steal(state->dead_ctx, phi);
        nir_instr_remove(&phi->instr);
diff --git a/src/glsl/nir/nir_lower_samplers.c b/src/glsl/nir/nir_lower_samplers.c

new file mode 100644 (file)

index 0000000..33cd9c8
--- /dev/null
+++ b/src/glsl/nir/nir_lower_samplers.c
@@ -0,0 +1,258 @@
+/*
+ * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
+ * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "../program.h"
+#include "program/hash_table.h"
+#include "ir_uniform.h"
+
+#include "main/compiler.h"
+#include "main/mtypes.h"
+#include "program/prog_parameter.h"
+#include "program/program.h"
+
+static void
+add_indirect_to_tex(nir_tex_instr *instr, nir_src indirect)
+{
+   /* First, we have to resize the array of texture sources */
+   nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src,
+                                         instr->num_srcs + 1);
+
+   for (unsigned i = 0; i < instr->num_srcs; i++) {
+      new_srcs[i].src_type = instr->src[i].src_type;
+      nir_instr_move_src(&instr->instr, &new_srcs[i].src, &instr->src[i].src);
+   }
+
+   ralloc_free(instr->src);
+   instr->src = new_srcs;
+
+   /* Now we can go ahead and move the source over to being a
+    * first-class texture source.
+    */
+   instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset;
+   instr->num_srcs++;
+   nir_instr_rewrite_src(&instr->instr, &instr->src[instr->num_srcs - 1].src,
+                         indirect);
+}
+
+/* Calculate the sampler index based on array indicies and also
+ * calculate the base uniform location for struct members.
+ */
+static void
+calc_sampler_offsets(nir_deref *tail, nir_tex_instr *instr,
+                     unsigned *array_elements, nir_ssa_def **indirect,
+                     nir_builder *b, unsigned *location)
+{
+   if (tail->child == NULL)
+      return;
+
+   switch (tail->child->deref_type) {
+   case nir_deref_type_array: {
+      nir_deref_array *deref_array = nir_deref_as_array(tail->child);
+
+      assert(deref_array->deref_array_type != nir_deref_array_type_wildcard);
+
+      calc_sampler_offsets(tail->child, instr, array_elements,
+                           indirect, b, location);
+      instr->sampler_index += deref_array->base_offset * *array_elements;
+
+      if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+         nir_ssa_def *mul =
+            nir_imul(b, nir_imm_int(b, *array_elements),
+                     nir_ssa_for_src(b, deref_array->indirect, 1));
+
+         nir_instr_rewrite_src(&instr->instr, &deref_array->indirect,
+                               NIR_SRC_INIT);
+
+         if (*indirect) {
+            *indirect = nir_iadd(b, *indirect, mul);
+         } else {
+            *indirect = mul;
+         }
+      }
+
+      *array_elements *= glsl_get_length(tail->type);
+       break;
+   }
+
+   case nir_deref_type_struct: {
+      nir_deref_struct *deref_struct = nir_deref_as_struct(tail->child);
+      *location += glsl_get_record_location_offset(tail->type, deref_struct->index);
+      calc_sampler_offsets(tail->child, instr, array_elements,
+                           indirect, b, location);
+      break;
+   }
+
+   default:
+      unreachable("Invalid deref type");
+      break;
+   }
+}
+
+static void
+lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_program,
+              gl_shader_stage stage, nir_builder *builder)
+{
+   if (instr->sampler == NULL)
+      return;
+
+   instr->sampler_index = 0;
+   unsigned location = instr->sampler->var->data.location;
+   unsigned array_elements = 1;
+   nir_ssa_def *indirect = NULL;
+
+   builder->cursor = nir_before_instr(&instr->instr);
+   calc_sampler_offsets(&instr->sampler->deref, instr, &array_elements,
+                        &indirect, builder, &location);
+
+   if (indirect) {
+      /* First, we have to resize the array of texture sources */
+      nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src,
+                                            instr->num_srcs + 1);
+
+      for (unsigned i = 0; i < instr->num_srcs; i++) {
+         new_srcs[i].src_type = instr->src[i].src_type;
+         nir_instr_move_src(&instr->instr, &new_srcs[i].src,
+                            &instr->src[i].src);
+      }
+
+      ralloc_free(instr->src);
+      instr->src = new_srcs;
+
+      /* Now we can go ahead and move the source over to being a
+       * first-class texture source.
+       */
+      instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset;
+      instr->num_srcs++;
+      nir_instr_rewrite_src(&instr->instr,
+                            &instr->src[instr->num_srcs - 1].src,
+                            nir_src_for_ssa(indirect));
+
+      instr->sampler_array_size = array_elements;
+   }
+
+   if (location > shader_program->NumUniformStorage - 1 ||
+       !shader_program->UniformStorage[location].sampler[stage].active) {
+      assert(!"cannot return a sampler");
+      return;
+   }
+
+   instr->sampler_index +=
+      shader_program->UniformStorage[location].sampler[stage].index;
+
+   instr->sampler = NULL;
+}
+
+typedef struct {
+   nir_builder builder;
+   const struct gl_shader_program *shader_program;
+   gl_shader_stage stage;
+} lower_state;
+
+static bool
+lower_block_cb(nir_block *block, void *_state)
+{
+   lower_state *state = (lower_state *) _state;
+
+   nir_foreach_instr(block, instr) {
+      if (instr->type == nir_instr_type_tex) {
+         nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
+         lower_sampler(tex_instr, state->shader_program, state->stage,
+                       &state->builder);
+      }
+   }
+
+   return true;
+}
+
+static void
+lower_impl(nir_function_impl *impl, const struct gl_shader_program *shader_program,
+           gl_shader_stage stage)
+{
+   lower_state state;
+
+   nir_builder_init(&state.builder, impl);
+   state.shader_program = shader_program;
+   state.stage = stage;
+
+   nir_foreach_block(impl, lower_block_cb, &state);
+}
+
+void
+nir_lower_samplers(nir_shader *shader,
+                   const struct gl_shader_program *shader_program)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         lower_impl(overload->impl, shader_program, shader->stage);
+   }
+}
+
+static bool
+lower_samplers_for_vk_block(nir_block *block, void *data)
+{
+   nir_foreach_instr(block, instr) {
+      if (instr->type != nir_instr_type_tex)
+         continue;
+
+      nir_tex_instr *tex = nir_instr_as_tex(instr);
+
+      assert(tex->sampler);
+
+      tex->sampler_set = tex->sampler->var->data.descriptor_set;
+      tex->sampler_index = tex->sampler->var->data.binding;
+
+      if (tex->sampler->deref.child) {
+         assert(tex->sampler->deref.child->deref_type == nir_deref_type_array);
+         nir_deref_array *arr = nir_deref_as_array(tex->sampler->deref.child);
+
+         /* Only one-level arrays are allowed in vulkan */
+         assert(arr->deref.child == NULL);
+
+         tex->sampler_index += arr->base_offset;
+         if (arr->deref_array_type == nir_deref_array_type_indirect) {
+            add_indirect_to_tex(tex, arr->indirect);
+            nir_instr_rewrite_src(instr, &arr->indirect, NIR_SRC_INIT);
+
+            tex->sampler_array_size = glsl_get_length(tex->sampler->deref.type);
+         }
+      }
+
+      tex->sampler = NULL;
+   }
+
+   return true;
+}
+
+void
+nir_lower_samplers_for_vk(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl) {
+         nir_foreach_block(overload->impl, lower_samplers_for_vk_block, NULL);
+      }
+   }
+}
diff --git a/src/glsl/nir/nir_lower_system_values.c b/src/glsl/nir/nir_lower_system_values.c

index a6eec65..d77bb2f 100644 (file)
--- a/src/glsl/nir/nir_lower_system_values.c
+++ b/src/glsl/nir/nir_lower_system_values.c
@@ -28,96 +28,71 @@
  #include "nir.h"
  #include "main/mtypes.h"
  
-static void
+static bool
  convert_instr(nir_intrinsic_instr *instr)
  {
     if (instr->intrinsic != nir_intrinsic_load_var)
-      return;
+      return false;
  
     nir_variable *var = instr->variables[0]->var;
     if (var->data.mode != nir_var_system_value)
-      return;
+      return false;
  
     void *mem_ctx = ralloc_parent(instr);
  
-   nir_intrinsic_op op;
-
-   switch (var->data.location) {
-   case SYSTEM_VALUE_FRONT_FACE:
-      op = nir_intrinsic_load_front_face;
-      break;
-   case SYSTEM_VALUE_VERTEX_ID:
-      op = nir_intrinsic_load_vertex_id;
-      break;
-   case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
-      op = nir_intrinsic_load_vertex_id_zero_base;
-      break;
-   case SYSTEM_VALUE_BASE_VERTEX:
-      op = nir_intrinsic_load_base_vertex;
-      break;
-   case SYSTEM_VALUE_INSTANCE_ID:
-      op = nir_intrinsic_load_instance_id;
-      break;
-   case SYSTEM_VALUE_SAMPLE_ID:
-      op = nir_intrinsic_load_sample_id;
-      break;
-   case SYSTEM_VALUE_SAMPLE_POS:
-      op = nir_intrinsic_load_sample_pos;
-      break;
-   case SYSTEM_VALUE_SAMPLE_MASK_IN:
-      op = nir_intrinsic_load_sample_mask_in;
-      break;
-   case SYSTEM_VALUE_INVOCATION_ID:
-      op = nir_intrinsic_load_invocation_id;
-      break;
-   default:
-      unreachable("not reached");
-   }
-
+   nir_intrinsic_op op = nir_intrinsic_from_system_value(var->data.location);
     nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op);
  
     if (instr->dest.is_ssa) {
        nir_ssa_dest_init(&new_instr->instr, &new_instr->dest,
                          instr->dest.ssa.num_components, NULL);
        nir_ssa_def_rewrite_uses(&instr->dest.ssa,
-                               nir_src_for_ssa(&new_instr->dest.ssa),
-                               mem_ctx);
+                               nir_src_for_ssa(&new_instr->dest.ssa));
     } else {
        nir_dest_copy(&new_instr->dest, &instr->dest, mem_ctx);
     }
  
     nir_instr_insert_before(&instr->instr, &new_instr->instr);
     nir_instr_remove(&instr->instr);
+
+   return true;
  }
  
  static bool
  convert_block(nir_block *block, void *state)
  {
-   (void) state;
+   bool *progress = state;
  
     nir_foreach_instr_safe(block, instr) {
        if (instr->type == nir_instr_type_intrinsic)
-         convert_instr(nir_instr_as_intrinsic(instr));
+         *progress = convert_instr(nir_instr_as_intrinsic(instr)) || *progress;
     }
  
     return true;
  }
  
-static void
+static bool
  convert_impl(nir_function_impl *impl)
  {
-   nir_foreach_block(impl, convert_block, NULL);
+   bool progress;
+
+   nir_foreach_block(impl, convert_block, &progress);
     nir_metadata_preserve(impl, nir_metadata_block_index |
                                 nir_metadata_dominance);
+   return progress;
  }
  
-void
+bool
  nir_lower_system_values(nir_shader *shader)
  {
+   bool progress = false;
+
     nir_foreach_overload(shader, overload) {
        if (overload->impl)
-         convert_impl(overload->impl);
+         progress = convert_impl(overload->impl) || progress;
     }
  
     exec_list_make_empty(&shader->system_values);
+
+   return progress;
  }
diff --git a/src/glsl/nir/nir_lower_tex.c b/src/glsl/nir/nir_lower_tex.c

new file mode 100644 (file)

index 0000000..8aaa48a
--- /dev/null
+++ b/src/glsl/nir/nir_lower_tex.c
@@ -0,0 +1,276 @@
+/*
+ * Copyright © 2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/*
+ * This lowering pass supports (as configured via nir_lower_tex_options)
+ * various texture related conversions:
+ *   + texture projector lowering: converts the coordinate division for
+ *     texture projection to be done in ALU instructions instead of
+ *     asking the texture operation to do so.
+ *   + lowering RECT: converts the un-normalized RECT texture coordinates
+ *     to normalized coordinates with txs plus ALU instructions
+ *   + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
+ *     inserts instructions to clamp specified coordinates to [0.0, 1.0].
+ *     Note that this automatically triggers texture projector lowering if
+ *     needed, since clamping must happen after projector lowering.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+typedef struct {
+   nir_builder b;
+   const nir_lower_tex_options *options;
+} lower_tex_state;
+
+static void
+project_src(nir_builder *b, nir_tex_instr *tex)
+{
+   /* Find the projector in the srcs list, if present. */
+   unsigned proj_index;
+   for (proj_index = 0; proj_index < tex->num_srcs; proj_index++) {
+      if (tex->src[proj_index].src_type == nir_tex_src_projector)
+         break;
+   }
+   if (proj_index == tex->num_srcs)
+      return;
+
+   b->cursor = nir_before_instr(&tex->instr);
+
+   nir_ssa_def *inv_proj =
+      nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
+
+   /* Walk through the sources projecting the arguments. */
+   for (unsigned i = 0; i < tex->num_srcs; i++) {
+      switch (tex->src[i].src_type) {
+      case nir_tex_src_coord:
+      case nir_tex_src_comparitor:
+         break;
+      default:
+         continue;
+      }
+      nir_ssa_def *unprojected =
+         nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
+      nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
+
+      /* Array indices don't get projected, so make an new vector with the
+       * coordinate's array index untouched.
+       */
+      if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
+         switch (tex->coord_components) {
+         case 4:
+            projected = nir_vec4(b,
+                                 nir_channel(b, projected, 0),
+                                 nir_channel(b, projected, 1),
+                                 nir_channel(b, projected, 2),
+                                 nir_channel(b, unprojected, 3));
+            break;
+         case 3:
+            projected = nir_vec3(b,
+                                 nir_channel(b, projected, 0),
+                                 nir_channel(b, projected, 1),
+                                 nir_channel(b, unprojected, 2));
+            break;
+         case 2:
+            projected = nir_vec2(b,
+                                 nir_channel(b, projected, 0),
+                                 nir_channel(b, unprojected, 1));
+            break;
+         default:
+            unreachable("bad texture coord count for array");
+            break;
+         }
+      }
+
+      nir_instr_rewrite_src(&tex->instr,
+                            &tex->src[i].src,
+                            nir_src_for_ssa(projected));
+   }
+
+   /* Now move the later tex sources down the array so that the projector
+    * disappears.
+    */
+   nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src,
+                         NIR_SRC_INIT);
+   for (unsigned i = proj_index + 1; i < tex->num_srcs; i++) {
+      tex->src[i-1].src_type = tex->src[i].src_type;
+      nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
+   }
+   tex->num_srcs--;
+}
+
+static nir_ssa_def *
+get_texture_size(nir_builder *b, nir_tex_instr *tex)
+{
+   b->cursor = nir_before_instr(&tex->instr);
+
+   /* RECT textures should not be array: */
+   assert(!tex->is_array);
+
+   nir_tex_instr *txs;
+
+   txs = nir_tex_instr_create(b->shader, 1);
+   txs->op = nir_texop_txs;
+   txs->sampler_dim = GLSL_SAMPLER_DIM_RECT;
+   txs->sampler_index = tex->sampler_index;
+
+   /* only single src, the lod: */
+   txs->src[0].src = nir_src_for_ssa(nir_imm_int(b, 0));
+   txs->src[0].src_type = nir_tex_src_lod;
+
+   nir_ssa_dest_init(&txs->instr, &txs->dest, 2, NULL);
+   nir_builder_instr_insert(b, &txs->instr);
+
+   return nir_i2f(b, &txs->dest.ssa);
+}
+
+static void
+lower_rect(nir_builder *b, nir_tex_instr *tex)
+{
+   nir_ssa_def *txs = get_texture_size(b, tex);
+   nir_ssa_def *scale = nir_frcp(b, txs);
+
+   /* Walk through the sources normalizing the requested arguments. */
+   for (unsigned i = 0; i < tex->num_srcs; i++) {
+      if (tex->src[i].src_type != nir_tex_src_coord)
+         continue;
+
+      nir_ssa_def *coords =
+         nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
+      nir_instr_rewrite_src(&tex->instr,
+                            &tex->src[i].src,
+                            nir_src_for_ssa(nir_fmul(b, coords, scale)));
+   }
+
+   tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
+}
+
+static void
+saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
+{
+   b->cursor = nir_before_instr(&tex->instr);
+
+   /* Walk through the sources saturating the requested arguments. */
+   for (unsigned i = 0; i < tex->num_srcs; i++) {
+      if (tex->src[i].src_type != nir_tex_src_coord)
+         continue;
+
+      nir_ssa_def *src =
+         nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
+
+      /* split src into components: */
+      nir_ssa_def *comp[4];
+
+      for (unsigned j = 0; j < tex->coord_components; j++)
+         comp[j] = nir_channel(b, src, j);
+
+      /* clamp requested components, array index does not get clamped: */
+      unsigned ncomp = tex->coord_components;
+      if (tex->is_array)
+         ncomp--;
+
+      for (unsigned j = 0; j < ncomp; j++) {
+         if ((1 << j) & sat_mask) {
+            if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
+               /* non-normalized texture coords, so clamp to texture
+                * size rather than [0.0, 1.0]
+                */
+               nir_ssa_def *txs = get_texture_size(b, tex);
+               comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0));
+               comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j));
+            } else {
+               comp[j] = nir_fsat(b, comp[j]);
+            }
+         }
+      }
+
+      /* and move the result back into a single vecN: */
+      src = nir_vec(b, comp, tex->coord_components);
+
+      nir_instr_rewrite_src(&tex->instr,
+                            &tex->src[i].src,
+                            nir_src_for_ssa(src));
+   }
+}
+
+static bool
+nir_lower_tex_block(nir_block *block, void *void_state)
+{
+   lower_tex_state *state = void_state;
+   nir_builder *b = &state->b;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_tex)
+         continue;
+
+      nir_tex_instr *tex = nir_instr_as_tex(instr);
+      bool lower_txp = !!(state->options->lower_txp & (1 << tex->sampler_dim));
+
+      /* mask of src coords to saturate (clamp): */
+      unsigned sat_mask = 0;
+
+      if ((1 << tex->sampler_index) & state->options->saturate_r)
+         sat_mask |= (1 << 2);    /* .z */
+      if ((1 << tex->sampler_index) & state->options->saturate_t)
+         sat_mask |= (1 << 1);    /* .y */
+      if ((1 << tex->sampler_index) & state->options->saturate_s)
+         sat_mask |= (1 << 0);    /* .x */
+
+      /* If we are clamping any coords, we must lower projector first
+       * as clamping happens *after* projection:
+       */
+      if (lower_txp || sat_mask)
+         project_src(b, tex);
+
+      if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) &&
+          state->options->lower_rect)
+         lower_rect(b, tex);
+
+      if (sat_mask)
+         saturate_src(b, tex, sat_mask);
+   }
+
+   return true;
+}
+
+static void
+nir_lower_tex_impl(nir_function_impl *impl, lower_tex_state *state)
+{
+   nir_builder_init(&state->b, impl);
+
+   nir_foreach_block(impl, nir_lower_tex_block, state);
+
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+}
+
+void
+nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
+{
+   lower_tex_state state;
+   state.options = options;
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_lower_tex_impl(overload->impl, &state);
+   }
+}
diff --git a/src/glsl/nir/nir_lower_tex_projector.c b/src/glsl/nir/nir_lower_tex_projector.c

deleted file mode 100644 (file)

index 8a482b1..0000000
--- a/src/glsl/nir/nir_lower_tex_projector.c
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright © 2015 Broadcom
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/*
- * This lowering pass converts the coordinate division for texture projection
- * to be done in ALU instructions instead of asking the texture operation to
- * do so.
- */
-
-#include "nir.h"
-#include "nir_builder.h"
-
-static nir_ssa_def *
-channel(nir_builder *b, nir_ssa_def *def, int c)
-{
-   return nir_swizzle(b, def, (unsigned[4]){c, c, c, c}, 1, false);
-}
-
-static bool
-nir_lower_tex_projector_block(nir_block *block, void *void_state)
-{
-   nir_builder *b = void_state;
-
-   nir_foreach_instr_safe(block, instr) {
-      if (instr->type != nir_instr_type_tex)
-         continue;
-
-      nir_tex_instr *tex = nir_instr_as_tex(instr);
-      b->cursor = nir_before_instr(&tex->instr);
-
-      /* Find the projector in the srcs list, if present. */
-      int proj_index;
-      for (proj_index = 0; proj_index < tex->num_srcs; proj_index++) {
-         if (tex->src[proj_index].src_type == nir_tex_src_projector)
-            break;
-      }
-      if (proj_index == tex->num_srcs)
-         continue;
-      nir_ssa_def *inv_proj =
-         nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
-
-      /* Walk through the sources projecting the arguments. */
-      for (int i = 0; i < tex->num_srcs; i++) {
-         switch (tex->src[i].src_type) {
-         case nir_tex_src_coord:
-         case nir_tex_src_comparitor:
-            break;
-         default:
-            continue;
-         }
-         nir_ssa_def *unprojected =
-            nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
-         nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
-
-         /* Array indices don't get projected, so make an new vector with the
-          * coordinate's array index untouched.
-          */
-         if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
-            switch (tex->coord_components) {
-            case 4:
-               projected = nir_vec4(b,
-                                    channel(b, projected, 0),
-                                    channel(b, projected, 1),
-                                    channel(b, projected, 2),
-                                    channel(b, unprojected, 3));
-               break;
-            case 3:
-               projected = nir_vec3(b,
-                                    channel(b, projected, 0),
-                                    channel(b, projected, 1),
-                                    channel(b, unprojected, 2));
-               break;
-            case 2:
-               projected = nir_vec2(b,
-                                    channel(b, projected, 0),
-                                    channel(b, unprojected, 1));
-               break;
-            default:
-               unreachable("bad texture coord count for array");
-               break;
-            }
-         }
-
-         nir_instr_rewrite_src(&tex->instr,
-                               &tex->src[i].src,
-                               nir_src_for_ssa(projected));
-      }
-
-      /* Now move the later tex sources down the array so that the projector
-       * disappears.
-       */
-      nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src,
-                            NIR_SRC_INIT);
-      for (int i = proj_index + 1; i < tex->num_srcs; i++) {
-         tex->src[i-1].src_type = tex->src[i].src_type;
-         nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
-      }
-      tex->num_srcs--;
-   }
-
-   return true;
-}
-
-static void
-nir_lower_tex_projector_impl(nir_function_impl *impl)
-{
-   nir_builder b;
-   nir_builder_init(&b, impl);
-
-   nir_foreach_block(impl, nir_lower_tex_projector_block, &b);
-
-   nir_metadata_preserve(impl, nir_metadata_block_index |
-                               nir_metadata_dominance);
-}
-
-void
-nir_lower_tex_projector(nir_shader *shader)
-{
-   nir_foreach_overload(shader, overload) {
-      if (overload->impl)
-         nir_lower_tex_projector_impl(overload->impl);
-   }
-}
diff --git a/src/glsl/nir/nir_lower_two_sided_color.c b/src/glsl/nir/nir_lower_two_sided_color.c

new file mode 100644 (file)

index 0000000..131feef
--- /dev/null
+++ b/src/glsl/nir/nir_lower_two_sided_color.c
@@ -0,0 +1,208 @@
+/*
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+#define MAX_COLORS 2  /* VARYING_SLOT_COL0/COL1 */
+
+typedef struct {
+   nir_builder   b;
+   nir_shader   *shader;
+   nir_variable *face;
+   struct {
+      nir_variable *front;        /* COLn */
+      nir_variable *back;         /* BFCn */
+   } colors[MAX_COLORS];
+   int colors_count;
+} lower_2side_state;
+
+
+/* Lowering pass for fragment shaders to emulated two-sided-color.  For
+ * each COLOR input, a corresponding BCOLOR input is created, and bcsel
+ * instruction used to select front or back color based on FACE.
+ */
+
+static nir_variable *
+create_input(nir_shader *shader, unsigned drvloc, gl_varying_slot slot)
+{
+   nir_variable *var = rzalloc(shader, nir_variable);
+
+   var->data.driver_location = drvloc;
+   var->type = glsl_vec4_type();
+   var->data.mode = nir_var_shader_in;
+   var->name = ralloc_asprintf(var, "in_%d", drvloc);
+   var->data.index = 0;
+   var->data.location = slot;
+
+   exec_list_push_tail(&shader->inputs, &var->node);
+
+   return var;
+}
+
+static nir_ssa_def *
+load_input(nir_builder *b, nir_variable *in)
+{
+   nir_intrinsic_instr *load;
+
+   load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
+   load->num_components = 4;
+   load->const_index[0] = in->data.driver_location;
+   nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
+   nir_builder_instr_insert(b, &load->instr);
+
+   return &load->dest.ssa;
+}
+
+static int
+setup_inputs(lower_2side_state *state)
+{
+   int maxloc = -1;
+
+   /* find color/face inputs: */
+   foreach_list_typed(nir_variable, var, node, &state->shader->inputs) {
+      int loc = var->data.driver_location;
+
+      /* keep track of last used driver-location.. we'll be
+       * appending BCLr/FACE after last existing input:
+       */
+      maxloc = MAX2(maxloc, loc);
+
+      switch (var->data.location) {
+      case VARYING_SLOT_COL0:
+      case VARYING_SLOT_COL1:
+         assert(state->colors_count < ARRAY_SIZE(state->colors));
+         state->colors[state->colors_count].front = var;
+         state->colors_count++;
+         break;
+      case VARYING_SLOT_FACE:
+         state->face = var;
+         break;
+      }
+   }
+
+   /* if we don't have any color inputs, nothing to do: */
+   if (state->colors_count == 0)
+      return -1;
+
+   /* if we don't already have one, insert a FACE input: */
+   if (!state->face) {
+      state->face = create_input(state->shader, ++maxloc, VARYING_SLOT_FACE);
+      state->face->data.interpolation = INTERP_QUALIFIER_FLAT;
+   }
+
+   /* add required back-face color inputs: */
+   for (int i = 0; i < state->colors_count; i++) {
+      gl_varying_slot slot;
+
+      if (state->colors[i].front->data.location == VARYING_SLOT_COL0)
+         slot = VARYING_SLOT_BFC0;
+      else
+         slot = VARYING_SLOT_BFC1;
+
+      state->colors[i].back = create_input(state->shader, ++maxloc, slot);
+   }
+
+   return 0;
+}
+
+static bool
+nir_lower_two_sided_color_block(nir_block *block, void *void_state)
+{
+   lower_2side_state *state = void_state;
+   nir_builder *b = &state->b;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+      if (intr->intrinsic != nir_intrinsic_load_input)
+         continue;
+
+      int idx;
+      for (idx = 0; idx < state->colors_count; idx++) {
+         unsigned drvloc =
+            state->colors[idx].front->data.driver_location;
+         if (intr->const_index[0] == drvloc) {
+            break;
+         }
+      }
+
+      if (idx == state->colors_count)
+         continue;
+
+      /* replace load_input(COLn) with
+       * bcsel(load_input(FACE), load_input(COLn), load_input(BFCn))
+       */
+      b->cursor = nir_before_instr(&intr->instr);
+      nir_ssa_def *face  = nir_channel(b, load_input(b, state->face), 0);
+      nir_ssa_def *front = load_input(b, state->colors[idx].front);
+      nir_ssa_def *back  = load_input(b, state->colors[idx].back);
+      nir_ssa_def *cond  = nir_flt(b, face, nir_imm_float(b, 0.0));
+      nir_ssa_def *color = nir_bcsel(b, cond, back, front);
+
+      assert(intr->dest.is_ssa);
+      nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(color));
+   }
+
+   return true;
+}
+
+static void
+nir_lower_two_sided_color_impl(nir_function_impl *impl,
+                               lower_2side_state *state)
+{
+   nir_builder *b = &state->b;
+
+   nir_builder_init(b, impl);
+
+   nir_foreach_block(impl, nir_lower_two_sided_color_block, state);
+
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+}
+
+void
+nir_lower_two_sided_color(nir_shader *shader)
+{
+   lower_2side_state state = {
+      .shader = shader,
+   };
+
+   if (shader->stage != MESA_SHADER_FRAGMENT)
+      return;
+
+   if (setup_inputs(&state) != 0)
+      return;
+
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_lower_two_sided_color_impl(overload->impl, &state);
+   }
+
+}
diff --git a/src/glsl/nir/nir_lower_vars_to_ssa.c b/src/glsl/nir/nir_lower_vars_to_ssa.c

index 4ff2166..5971507 100644 (file)
--- a/src/glsl/nir/nir_lower_vars_to_ssa.c
+++ b/src/glsl/nir/nir_lower_vars_to_ssa.c
@@ -625,8 +625,7 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state)
                 nir_instr_remove(&intrin->instr);
  
                 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
-                                        nir_src_for_ssa(&undef->def),
-                                        state->shader);
+                                        nir_src_for_ssa(&undef->def));
                 continue;
              }
  
@@ -650,8 +649,7 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state)
              nir_instr_remove(&intrin->instr);
  
              nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
-                                     nir_src_for_ssa(&mov->dest.dest.ssa),
-                                     state->shader);
+                                     nir_src_for_ssa(&mov->dest.dest.ssa));
              break;
           }
  
diff --git a/src/glsl/nir/nir_lower_vec_to_movs.c b/src/glsl/nir/nir_lower_vec_to_movs.c

index e6d522f..c08b721 100644 (file)
--- a/src/glsl/nir/nir_lower_vec_to_movs.c
+++ b/src/glsl/nir/nir_lower_vec_to_movs.c
@@ -32,6 +32,11 @@
   * moves with partial writes.
   */
  
+struct vec_to_movs_state {
+   nir_function_impl *impl;
+   bool progress;
+};
+
  static bool
  src_matches_dest_reg(nir_dest *dest, nir_src *src)
  {
@@ -53,39 +58,167 @@ src_matches_dest_reg(nir_dest *dest, nir_src *src)
   * which ones have been processed.
   */
  static unsigned
-insert_mov(nir_alu_instr *vec, unsigned start_channel,
-            unsigned start_src_idx, void *mem_ctx)
+insert_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
  {
-   unsigned src_idx = start_src_idx;
-   assert(src_idx < nir_op_infos[vec->op].num_inputs);
+   assert(start_idx < nir_op_infos[vec->op].num_inputs);
  
-   nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
-   nir_alu_src_copy(&mov->src[0], &vec->src[src_idx], mem_ctx);
-   nir_alu_dest_copy(&mov->dest, &vec->dest, mem_ctx);
+   nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_imov);
+   nir_alu_src_copy(&mov->src[0], &vec->src[start_idx], mov);
+   nir_alu_dest_copy(&mov->dest, &vec->dest, mov);
  
-   mov->dest.write_mask = (1u << start_channel);
-   mov->src[0].swizzle[start_channel] = vec->src[src_idx].swizzle[0];
-   src_idx++;
+   mov->dest.write_mask = (1u << start_idx);
+   mov->src[0].swizzle[start_idx] = vec->src[start_idx].swizzle[0];
+   mov->src[0].negate = vec->src[start_idx].negate;
+   mov->src[0].abs = vec->src[start_idx].abs;
  
-   for (unsigned i = start_channel + 1; i < 4; i++) {
+   for (unsigned i = start_idx + 1; i < 4; i++) {
        if (!(vec->dest.write_mask & (1 << i)))
           continue;
  
-      if (nir_srcs_equal(vec->src[src_idx].src, vec->src[start_src_idx].src)) {
+      if (nir_srcs_equal(vec->src[i].src, vec->src[start_idx].src) &&
+          vec->src[i].negate == vec->src[start_idx].negate &&
+          vec->src[i].abs == vec->src[start_idx].abs) {
           mov->dest.write_mask |= (1 << i);
-         mov->src[0].swizzle[i] = vec->src[src_idx].swizzle[0];
+         mov->src[0].swizzle[i] = vec->src[i].swizzle[0];
        }
-      src_idx++;
     }
  
-   nir_instr_insert_before(&vec->instr, &mov->instr);
+   /* In some situations (if the vecN is involved in a phi-web), we can end
+    * up with a mov from a register to itself.  Some of those channels may end
+    * up doing nothing and there's no reason to have them as part of the mov.
+    */
+   if (src_matches_dest_reg(&mov->dest.dest, &mov->src[0].src) &&
+       !mov->src[0].abs && !mov->src[0].negate) {
+      for (unsigned i = 0; i < 4; i++) {
+         if (mov->src[0].swizzle[i] == i) {
+            mov->dest.write_mask &= ~(1 << i);
+         }
+      }
+   }
+
+   /* Only emit the instruction if it actually does something */
+   if (mov->dest.write_mask) {
+      nir_instr_insert_before(&vec->instr, &mov->instr);
+   } else {
+      ralloc_free(mov);
+   }
  
     return mov->dest.write_mask;
  }
  
  static bool
-lower_vec_to_movs_block(nir_block *block, void *mem_ctx)
+has_replicated_dest(nir_alu_instr *alu)
+{
+   return alu->op == nir_op_fdot_replicated2 ||
+          alu->op == nir_op_fdot_replicated3 ||
+          alu->op == nir_op_fdot_replicated4 ||
+          alu->op == nir_op_fdph_replicated;
+}
+
+/* Attempts to coalesce the "move" from the given source of the vec to the
+ * destination of the instruction generating the value. If, for whatever
+ * reason, we cannot coalesce the mmove, it does nothing and returns 0.  We
+ * can then call insert_mov as normal.
+ */
+static unsigned
+try_coalesce(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
+{
+   assert(start_idx < nir_op_infos[vec->op].num_inputs);
+
+   /* We will only even try if the source is SSA */
+   if (!vec->src[start_idx].src.is_ssa)
+      return 0;
+
+   assert(vec->src[start_idx].src.ssa);
+
+   /* If we are going to do a reswizzle, then the vecN operation must be the
+    * only use of the source value.  We also can't have any source modifiers.
+    */
+   nir_foreach_use(vec->src[start_idx].src.ssa, src) {
+      if (src->parent_instr != &vec->instr)
+         return 0;
+
+      nir_alu_src *alu_src = exec_node_data(nir_alu_src, src, src);
+      if (alu_src->abs || alu_src->negate)
+         return 0;
+   }
+
+   if (!list_empty(&vec->src[start_idx].src.ssa->if_uses))
+      return 0;
+
+   if (vec->src[start_idx].src.ssa->parent_instr->type != nir_instr_type_alu)
+      return 0;
+
+   nir_alu_instr *src_alu =
+      nir_instr_as_alu(vec->src[start_idx].src.ssa->parent_instr);
+
+   if (has_replicated_dest(src_alu)) {
+      /* The fdot instruction is special: It replicates its result to all
+       * components.  This means that we can always rewrite its destination
+       * and we don't need to swizzle anything.
+       */
+   } else {
+      /* We only care about being able to re-swizzle the instruction if it is
+       * something that we can reswizzle.  It must be per-component.  The one
+       * exception to this is the fdotN instructions which implicitly splat
+       * their result out to all channels.
+       */
+      if (nir_op_infos[src_alu->op].output_size != 0)
+         return 0;
+
+      /* If we are going to reswizzle the instruction, we can't have any
+       * non-per-component sources either.
+       */
+      for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
+         if (nir_op_infos[src_alu->op].input_sizes[j] != 0)
+            return 0;
+   }
+
+   /* Stash off all of the ALU instruction's swizzles. */
+   uint8_t swizzles[4][4];
+   for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
+      for (unsigned i = 0; i < 4; i++)
+         swizzles[j][i] = src_alu->src[j].swizzle[i];
+
+   unsigned write_mask = 0;
+   for (unsigned i = start_idx; i < 4; i++) {
+      if (!(vec->dest.write_mask & (1 << i)))
+         continue;
+
+      if (!vec->src[i].src.is_ssa ||
+          vec->src[i].src.ssa != &src_alu->dest.dest.ssa)
+         continue;
+
+      /* At this point, the give vec source matchese up with the ALU
+       * instruction so we can re-swizzle that component to match.
+       */
+      write_mask |= 1 << i;
+      if (has_replicated_dest(src_alu)) {
+         /* Since the destination is a single replicated value, we don't need
+          * to do any reswizzling
+          */
+      } else {
+         for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
+            src_alu->src[j].swizzle[i] = swizzles[j][vec->src[i].swizzle[0]];
+      }
+
+      /* Clear the no longer needed vec source */
+      nir_instr_rewrite_src(&vec->instr, &vec->src[i].src, NIR_SRC_INIT);
+   }
+
+   nir_instr_rewrite_dest(&src_alu->instr, &src_alu->dest.dest, vec->dest.dest);
+   src_alu->dest.write_mask = write_mask;
+
+   return write_mask;
+}
+
+static bool
+lower_vec_to_movs_block(nir_block *block, void *void_state)
  {
+   struct vec_to_movs_state *state = void_state;
+   nir_function_impl *impl = state->impl;
+   nir_shader *shader = impl->overload->function->shader;
+
     nir_foreach_instr_safe(block, instr) {
        if (instr->type != nir_instr_type_alu)
           continue;
@@ -101,8 +234,16 @@ lower_vec_to_movs_block(nir_block *block, void *mem_ctx)
           continue; /* The loop */
        }
  
-      /* Since we insert multiple MOVs, we have to be non-SSA. */
-      assert(!vec->dest.dest.is_ssa);
+      if (vec->dest.dest.is_ssa) {
+         /* Since we insert multiple MOVs, we have a register destination. */
+         nir_register *reg = nir_local_reg_create(impl);
+         reg->num_components = vec->dest.dest.ssa.num_components;
+
+         nir_ssa_def_rewrite_uses(&vec->dest.dest.ssa, nir_src_for_reg(reg));
+
+         nir_instr_rewrite_dest(&vec->instr, &vec->dest.dest,
+                                nir_dest_for_reg(reg));
+      }
  
        unsigned finished_write_mask = 0;
  
@@ -110,46 +251,55 @@ lower_vec_to_movs_block(nir_block *block, void *mem_ctx)
         * destination reg, in case other values we're populating in the dest
         * might overwrite them.
         */
-      for (unsigned i = 0, src_idx = 0; i < 4; i++) {
+      for (unsigned i = 0; i < 4; i++) {
           if (!(vec->dest.write_mask & (1 << i)))
              continue;
  
-         if (src_matches_dest_reg(&vec->dest.dest, &vec->src[src_idx].src)) {
-            finished_write_mask |= insert_mov(vec, i, src_idx, mem_ctx);
+         if (src_matches_dest_reg(&vec->dest.dest, &vec->src[i].src)) {
+            finished_write_mask |= insert_mov(vec, i, shader);
              break;
           }
-         src_idx++;
        }
  
        /* Now, emit MOVs for all the other src channels. */
-      for (unsigned i = 0, src_idx = 0; i < 4; i++) {
+      for (unsigned i = 0; i < 4; i++) {
           if (!(vec->dest.write_mask & (1 << i)))
              continue;
  
           if (!(finished_write_mask & (1 << i)))
-            finished_write_mask |= insert_mov(vec, i, src_idx, mem_ctx);
+            finished_write_mask |= try_coalesce(vec, i, shader);
  
-         src_idx++;
+         if (!(finished_write_mask & (1 << i)))
+            finished_write_mask |= insert_mov(vec, i, shader);
        }
  
        nir_instr_remove(&vec->instr);
        ralloc_free(vec);
+      state->progress = true;
     }
  
     return true;
  }
  
-static void
+static bool
  nir_lower_vec_to_movs_impl(nir_function_impl *impl)
  {
-   nir_foreach_block(impl, lower_vec_to_movs_block, ralloc_parent(impl));
+   struct vec_to_movs_state state = { impl, false };
+
+   nir_foreach_block(impl, lower_vec_to_movs_block, &state);
+
+   return state.progress;
  }
  
-void
+bool
  nir_lower_vec_to_movs(nir_shader *shader)
  {
+   bool progress = false;
+
     nir_foreach_overload(shader, overload) {
        if (overload->impl)
-         nir_lower_vec_to_movs_impl(overload->impl);
+         progress = nir_lower_vec_to_movs_impl(overload->impl) || progress;
     }
+
+   return progress;
  }
diff --git a/src/glsl/nir/nir_move_vec_src_uses_to_dest.c b/src/glsl/nir/nir_move_vec_src_uses_to_dest.c

new file mode 100644 (file)

index 0000000..4c9032d
--- /dev/null
+++ b/src/glsl/nir/nir_move_vec_src_uses_to_dest.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements a pass that tries to move uses vecN sources to their
+ * destinations.  This is kind of like an inverse copy-propagation pass.
+ * For instance, if you have
+ *
+ * ssa_1 = vec4(a, b, c, d)
+ * ssa_2 = fadd(a, b)
+ *
+ * This will be turned into
+ *
+ * ssa_1 = vec4(a, b, c, d)
+ * ssa_2 = fadd(ssa_1.x, ssa_1.y)
+ *
+ * While this is "worse" because it adds a bunch of unneeded dependencies, it
+ * actually makes it much easier for vec4-based backends to coalesce the MOV's
+ * that result from the vec4 operation because it doesn't have to worry about
+ * quite as many reads.
+ */
+
+/* Returns true if the given SSA def dominates the instruction.  An SSA def is
+ * considered to *not* dominate the instruction that defines it.
+ */
+static bool
+ssa_def_dominates_instr(nir_ssa_def *def, nir_instr *instr)
+{
+   if (instr->index <= def->parent_instr->index) {
+      return false;
+   } else if (def->parent_instr->block == instr->block) {
+      return def->parent_instr->index < instr->index;
+   } else {
+      return nir_block_dominates(def->parent_instr->block, instr->block);
+   }
+}
+
+static bool
+move_vec_src_uses_to_dest_block(nir_block *block, void *shader)
+{
+   nir_foreach_instr(block, instr) {
+      if (instr->type != nir_instr_type_alu)
+         continue;
+
+      nir_alu_instr *vec = nir_instr_as_alu(instr);
+
+      switch (vec->op) {
+      case nir_op_vec2:
+      case nir_op_vec3:
+      case nir_op_vec4:
+         break;
+      default:
+         continue; /* The loop */
+      }
+
+      /* Can't handle non-SSA vec operations */
+      if (!vec->dest.dest.is_ssa)
+         continue;
+
+      /* Can't handle saturation */
+      if (vec->dest.saturate)
+         continue;
+
+      /* First, mark all of the sources we are going to consider for rewriting
+       * to the destination
+       */
+      int srcs_remaining = 0;
+      for (unsigned i = 0; i < nir_op_infos[vec->op].num_inputs; i++) {
+         /* We can't rewrite a source if it's not in SSA form */
+         if (!vec->src[i].src.is_ssa)
+            continue;
+
+         /* We can't rewrite a source if it has modifiers */
+         if (vec->src[i].abs || vec->src[i].negate)
+            continue;
+
+         srcs_remaining |= 1 << i;
+      }
+
+      /* We can't actually do anything with this instruction */
+      if (srcs_remaining == 0)
+         continue;
+
+      for (unsigned i; i = ffs(srcs_remaining) - 1, srcs_remaining;) {
+         int8_t swizzle[4] = { -1, -1, -1, -1 };
+
+         for (unsigned j = i; j < nir_op_infos[vec->op].num_inputs; j++) {
+            if (vec->src[j].src.ssa != vec->src[i].src.ssa)
+               continue;
+
+            /* Mark the given chanle as having been handled */
+            srcs_remaining &= ~(1 << j);
+
+            /* Mark the appropreate channel as coming from src j */
+            swizzle[vec->src[j].swizzle[0]] = j;
+         }
+
+         nir_foreach_use_safe(vec->src[i].src.ssa, use) {
+            if (use->parent_instr == &vec->instr)
+               continue;
+
+            /* We need to dominate the use if we are going to rewrite it */
+            if (!ssa_def_dominates_instr(&vec->dest.dest.ssa, use->parent_instr))
+               continue;
+
+            /* For now, we'll just rewrite ALU instructions */
+            if (use->parent_instr->type != nir_instr_type_alu)
+               continue;
+
+            assert(use->is_ssa);
+
+            nir_alu_instr *use_alu = nir_instr_as_alu(use->parent_instr);
+
+            /* Figure out which source we're actually looking at */
+            nir_alu_src *use_alu_src = exec_node_data(nir_alu_src, use, src);
+            unsigned src_idx = use_alu_src - use_alu->src;
+            assert(src_idx < nir_op_infos[use_alu->op].num_inputs);
+
+            bool can_reswizzle = true;
+            for (unsigned j = 0; j < 4; j++) {
+               if (!nir_alu_instr_channel_used(use_alu, src_idx, j))
+                  continue;
+
+               if (swizzle[use_alu_src->swizzle[j]] == -1) {
+                  can_reswizzle = false;
+                  break;
+               }
+            }
+
+            if (!can_reswizzle)
+               continue;
+
+            /* At this point, we have determined that the given use can be
+             * reswizzled to actually use the destination of the vecN operation.
+             * Go ahead and rewrite it as needed.
+             */
+            nir_instr_rewrite_src(use->parent_instr, use,
+                                  nir_src_for_ssa(&vec->dest.dest.ssa));
+            for (unsigned j = 0; j < 4; j++) {
+               if (!nir_alu_instr_channel_used(use_alu, src_idx, j))
+                  continue;
+
+               use_alu_src->swizzle[j] = swizzle[use_alu_src->swizzle[j]];
+            }
+         }
+      }
+   }
+
+   return true;
+}
+
+static void
+nir_move_vec_src_uses_to_dest_impl(nir_shader *shader, nir_function_impl *impl)
+{
+   nir_metadata_require(impl, nir_metadata_dominance);
+
+   nir_index_instrs(impl);
+   nir_foreach_block(impl, move_vec_src_uses_to_dest_block, shader);
+
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+}
+
+void
+nir_move_vec_src_uses_to_dest(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_move_vec_src_uses_to_dest_impl(shader, overload->impl);
+   }
+}
diff --git a/src/glsl/nir/nir_normalize_cubemap_coords.c b/src/glsl/nir/nir_normalize_cubemap_coords.c

index 75b647f..7385576 100644 (file)
--- a/src/glsl/nir/nir_normalize_cubemap_coords.c
+++ b/src/glsl/nir/nir_normalize_cubemap_coords.c
@@ -33,16 +33,16 @@
   * or 1.0.  This is based on the old GLSL IR based pass by Eric.
   */
  
-static nir_ssa_def *
-channel(nir_builder *b, nir_ssa_def *def, int c)
-{
-   return nir_swizzle(b, def, (unsigned[4]){c, c, c, c}, 1, false);
-}
+struct normalize_cubemap_state {
+   nir_builder b;
+   bool progress;
+};
  
  static bool
  normalize_cubemap_coords_block(nir_block *block, void *void_state)
  {
-   nir_builder *b = void_state;
+   struct normalize_cubemap_state *state = void_state;
+   nir_builder *b = &state->b;
  
     nir_foreach_instr(block, instr) {
        if (instr->type != nir_instr_type_tex)
@@ -63,9 +63,9 @@ normalize_cubemap_coords_block(nir_block *block, void *void_state)
           assert(orig_coord->num_components >= 3);
  
           nir_ssa_def *abs = nir_fabs(b, orig_coord);
-         nir_ssa_def *norm = nir_fmax(b, channel(b, abs, 0),
-                                         nir_fmax(b, channel(b, abs, 1),
-                                                     channel(b, abs, 2)));
+         nir_ssa_def *norm = nir_fmax(b, nir_channel(b, abs, 0),
+                                         nir_fmax(b, nir_channel(b, abs, 1),
+                                                     nir_channel(b, abs, 2)));
  
           nir_ssa_def *normalized = nir_fmul(b, orig_coord, nir_frcp(b, norm));
  
@@ -74,37 +74,47 @@ normalize_cubemap_coords_block(nir_block *block, void *void_state)
            */
           if (tex->coord_components == 4) {
              normalized = nir_vec4(b,
-                                  channel(b, normalized, 0),
-                                  channel(b, normalized, 1),
-                                  channel(b, normalized, 2),
-                                  channel(b, orig_coord, 3));
+                                  nir_channel(b, normalized, 0),
+                                  nir_channel(b, normalized, 1),
+                                  nir_channel(b, normalized, 2),
+                                  nir_channel(b, orig_coord, 3));
           }
  
           nir_instr_rewrite_src(&tex->instr,
                                 &tex->src[i].src,
                                 nir_src_for_ssa(normalized));
+
+         state->progress = true;
        }
     }
  
     return true;
  }
  
-static void
+static bool
  normalize_cubemap_coords_impl(nir_function_impl *impl)
  {
-   nir_builder b;
-   nir_builder_init(&b, impl);
+   struct normalize_cubemap_state state;
+   nir_builder_init(&state.b, impl);
+   state.progress = false;
  
-   nir_foreach_block(impl, normalize_cubemap_coords_block, &b);
+   nir_foreach_block(impl, normalize_cubemap_coords_block, &state);
  
     nir_metadata_preserve(impl, nir_metadata_block_index |
                                 nir_metadata_dominance);
+
+   return state.progress;
  }
  
-void
+bool
  nir_normalize_cubemap_coords(nir_shader *shader)
  {
-   nir_foreach_overload(shader, overload)
+   bool progress = false;
+
+   nir_foreach_overload(shader, overload) {
        if (overload->impl)
-         normalize_cubemap_coords_impl(overload->impl);
+         progress = normalize_cubemap_coords_impl(overload->impl) || progress;
+   }
+
+   return progress;
  }
diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py

index df5b7e2..f2d584f 100644 (file)
--- a/src/glsl/nir/nir_opcodes.py
+++ b/src/glsl/nir/nir_opcodes.py
@@ -453,6 +453,14 @@ binop("fxor", tfloat, commutative,
  binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
               "{src}")
  
+binop_reduce("fdot_replicated", 4, tfloat, tfloat,
+             "{src0} * {src1}", "{src0} + {src1}", "{src}")
+
+opcode("fdph", 1, tfloat, [3, 4], [tfloat, tfloat], "",
+       "src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w")
+opcode("fdph_replicated", 4, tfloat, [3, 4], [tfloat, tfloat], "",
+       "src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w")
+
  binop("fmin", tfloat, "", "fminf(src0, src1)")
  binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1")
  binop("umin", tunsigned, commutative + associative, "src1 > src0 ? src0 : src1")
diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py

index 880408b..cafbd6d 100644 (file)
--- a/src/glsl/nir/nir_opt_algebraic.py
+++ b/src/glsl/nir/nir_opt_algebraic.py
@@ -77,6 +77,7 @@ optimizations = [
     (('flrp', a, a, b), a),
     (('flrp', 0.0, a, b), ('fmul', a, b)),
     (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'),
+   (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
     (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'),
     (('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
     (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
@@ -241,6 +242,10 @@ late_optimizations = [
     (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
     (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
     (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
+   (('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'),
+   (('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'),
+   (('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'),
+   (('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates'),
  ]
  
  print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()
diff --git a/src/glsl/nir/nir_opt_constant_folding.c b/src/glsl/nir/nir_opt_constant_folding.c

index 85c09fc..007b81c 100644 (file)
--- a/src/glsl/nir/nir_opt_constant_folding.c
+++ b/src/glsl/nir/nir_opt_constant_folding.c
@@ -80,8 +80,8 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
  
     nir_instr_insert_before(&instr->instr, &new_instr->instr);
  
-   nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(&new_instr->def),
-                            mem_ctx);
+   nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa,
+                            nir_src_for_ssa(&new_instr->def));
  
     nir_instr_remove(&instr->instr);
     ralloc_free(instr);
diff --git a/src/glsl/nir/nir_opt_cse.c b/src/glsl/nir/nir_opt_cse.c

index 864795c..64c94af 100644 (file)
--- a/src/glsl/nir/nir_opt_cse.c
+++ b/src/glsl/nir/nir_opt_cse.c
@@ -272,8 +272,7 @@ nir_opt_cse_instr(nir_instr *instr, struct cse_state *state)
        if (nir_instrs_equal(instr, other)) {
           nir_ssa_def *other_def = nir_instr_get_dest_ssa_def(other);
           nir_ssa_def_rewrite_uses(nir_instr_get_dest_ssa_def(instr),
-                                  nir_src_for_ssa(other_def),
-                                  state->mem_ctx);
+                                  nir_src_for_ssa(other_def));
           nir_instr_remove(instr);
           state->progress = true;
           return;
@@ -286,8 +285,7 @@ nir_opt_cse_instr(nir_instr *instr, struct cse_state *state)
           if (nir_instrs_equal(instr, other)) {
              nir_ssa_def *other_def = nir_instr_get_dest_ssa_def(other);
              nir_ssa_def_rewrite_uses(nir_instr_get_dest_ssa_def(instr),
-                                     nir_src_for_ssa(other_def),
-                                     state->mem_ctx);
+                                     nir_src_for_ssa(other_def));
              nir_instr_remove(instr);
              state->progress = true;
              return;
diff --git a/src/glsl/nir/nir_opt_dead_cf.c b/src/glsl/nir/nir_opt_dead_cf.c

new file mode 100644 (file)

index 0000000..0d4819b
--- /dev/null
+++ b/src/glsl/nir/nir_opt_dead_cf.c
@@ -0,0 +1,358 @@
+/*
+ * Copyright © 2014 Connor Abbott
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+#include "nir_control_flow.h"
+
+/*
+ * This file implements an optimization that deletes statically
+ * unreachable/dead code. In NIR, one way this can happen if if an if
+ * statement has a constant condition:
+ *
+ * if (true) {
+ *    ...
+ * }
+ *
+ * We delete the if statement and paste the contents of the always-executed
+ * branch into the surrounding control flow, possibly removing more code if
+ * the branch had a jump at the end.
+ *
+ * Another way is that control flow can end in a jump so that code after it
+ * never gets executed. In particular, this can happen after optimizing
+ * something like:
+ *
+ * if (true) {
+ *    ...
+ *    break;
+ * }
+ * ...
+ *
+ * We also consider the case where both branches of an if end in a jump, e.g.:
+ *
+ * if (...) {
+ *    break;
+ * } else {
+ *    continue;
+ * }
+ * ...
+ *
+ * Finally, we also handle removing useless loops, i.e. loops with no side
+ * effects and without any definitions that are used elsewhere. This case is a
+ * little different from the first two in that the code is actually run (it
+ * just never does anything), but there are similar issues with needing to
+ * be careful with restarting after deleting the cf_node (see dead_cf_list())
+ * so this is a convenient place to remove them.
+ */
+
+static void
+remove_after_cf_node(nir_cf_node *node)
+{
+   nir_cf_node *end = node;
+   while (!nir_cf_node_is_last(end))
+      end = nir_cf_node_next(end);
+
+   nir_cf_list list;
+   nir_cf_extract(&list, nir_after_cf_node(node), nir_after_cf_node(end));
+   nir_cf_delete(&list);
+}
+
+static void
+opt_constant_if(nir_if *if_stmt, bool condition)
+{
+   /* First, we need to remove any phi nodes after the if by rewriting uses to
+    * point to the correct source.
+    */
+   nir_block *after = nir_cf_node_as_block(nir_cf_node_next(&if_stmt->cf_node));
+   nir_block *last_block =
+      nir_cf_node_as_block(condition ? nir_if_last_then_node(if_stmt)
+                                     : nir_if_last_else_node(if_stmt));
+
+   nir_foreach_instr_safe(after, instr) {
+      if (instr->type != nir_instr_type_phi)
+         break;
+
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+      nir_ssa_def *def = NULL;
+      nir_foreach_phi_src(phi, phi_src) {
+         if (phi_src->pred != last_block)
+            continue;
+
+         assert(phi_src->src.is_ssa);
+         def = phi_src->src.ssa;
+      }
+
+      assert(def);
+      assert(phi->dest.is_ssa);
+      nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_src_for_ssa(def));
+      nir_instr_remove(instr);
+   }
+
+   /* The control flow list we're about to paste in may include a jump at the
+    * end, and in that case we have to delete the rest of the control flow
+    * list after the if since it's unreachable and the validator will balk if
+    * we don't.
+    */
+
+   if (!exec_list_is_empty(&last_block->instr_list)) {
+      nir_instr *last_instr = nir_block_last_instr(last_block);
+      if (last_instr->type == nir_instr_type_jump)
+         remove_after_cf_node(&if_stmt->cf_node);
+   }
+
+   /* Finally, actually paste in the then or else branch and delete the if. */
+   struct exec_list *cf_list = condition ? &if_stmt->then_list
+                                         : &if_stmt->else_list;
+
+   nir_cf_list list;
+   nir_cf_extract(&list, nir_before_cf_list(cf_list),
+                  nir_after_cf_list(cf_list));
+   nir_cf_reinsert(&list, nir_after_cf_node(&if_stmt->cf_node));
+   nir_cf_node_remove(&if_stmt->cf_node);
+}
+
+static bool
+block_has_no_side_effects(nir_block *block, void *state)
+{
+   (void) state;
+
+   nir_foreach_instr(block, instr) {
+      if (instr->type == nir_instr_type_call)
+         return false;
+
+      /* Return instructions can cause us to skip over other side-effecting
+       * instructions after the loop, so consider them to have side effects
+       * here.
+       */
+
+      if (instr->type == nir_instr_type_jump &&
+          nir_instr_as_jump(instr)->type == nir_jump_return)
+         return false;
+
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+      if (!nir_intrinsic_infos[intrin->intrinsic].flags &
+          NIR_INTRINSIC_CAN_ELIMINATE)
+         return false;
+   }
+
+   return true;
+}
+
+static bool
+def_not_live_out(nir_ssa_def *def, void *state)
+{
+   nir_block *after = state;
+
+   return !BITSET_TEST(after->live_in, def->live_index);
+}
+
+/*
+ * Test if a loop is dead. A loop is dead if:
+ *
+ * 1) It has no side effects (i.e. intrinsics which could possibly affect the
+ * state of the program aside from producing an SSA value, indicated by a lack
+ * of NIR_INTRINSIC_CAN_ELIMINATE).
+ *
+ * 2) It has no phi nodes after it, since those indicate values inside the
+ * loop being used after the loop.
+ *
+ * 3) If there are no phi nodes after the loop, then the only way a value
+ * defined inside the loop can be used outside the loop is if its definition
+ * dominates the block after the loop. If none of the definitions that
+ * dominate the loop exit are used outside the loop, then the loop is dead
+ * and it can be deleted.
+ */
+
+static bool
+loop_is_dead(nir_loop *loop)
+{
+   nir_block *before = nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
+   nir_block *after = nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
+
+   if (!exec_list_is_empty(&after->instr_list) &&
+       nir_block_first_instr(after)->type == nir_instr_type_phi)
+      return false;
+
+   if (!nir_foreach_block_in_cf_node(&loop->cf_node, block_has_no_side_effects,
+                                     NULL))
+      return false;
+
+   nir_function_impl *impl = nir_cf_node_get_function(&loop->cf_node);
+   nir_metadata_require(impl, nir_metadata_live_variables |
+                              nir_metadata_dominance);
+
+   for (nir_block *cur = after->imm_dom; cur != before; cur = cur->imm_dom) {
+      nir_foreach_instr(cur, instr) {
+         if (!nir_foreach_ssa_def(instr, def_not_live_out, after))
+            return false;
+      }
+   }
+
+   return true;
+}
+
+static bool
+dead_cf_block(nir_block *block)
+{
+   nir_if *following_if = nir_block_get_following_if(block);
+   if (following_if) {
+     nir_const_value *const_value =
+        nir_src_as_const_value(following_if->condition);
+
+     if (!const_value)
+        return false;
+
+      opt_constant_if(following_if, const_value->u[0] != 0);
+      return true;
+   }
+
+   nir_loop *following_loop = nir_block_get_following_loop(block);
+   if (!following_loop)
+      return false;
+
+   if (!loop_is_dead(following_loop))
+      return false;
+
+   nir_cf_node_remove(&following_loop->cf_node);
+   return true;
+}
+
+static bool
+ends_in_jump(nir_block *block)
+{
+   if (exec_list_is_empty(&block->instr_list))
+      return false;
+
+   nir_instr *instr = nir_block_last_instr(block);
+   return instr->type == nir_instr_type_jump;
+}
+
+static bool
+dead_cf_list(struct exec_list *list, bool *list_ends_in_jump)
+{
+   bool progress = false;
+   *list_ends_in_jump = false;
+
+   nir_cf_node *prev = NULL;
+
+   foreach_list_typed(nir_cf_node, cur, node, list) {
+      switch (cur->type) {
+      case nir_cf_node_block: {
+         nir_block *block = nir_cf_node_as_block(cur);
+         if (dead_cf_block(block)) {
+            /* We just deleted the if or loop after this block, so we may have
+             * deleted the block before or after it -- which one is an
+             * implementation detail. Therefore, to recover the place we were
+             * at, we have to use the previous cf_node.
+             */
+
+            if (prev) {
+               cur = nir_cf_node_next(prev);
+            } else {
+               cur = exec_node_data(nir_cf_node, exec_list_get_head(list),
+                                    node);
+            }
+
+            block = nir_cf_node_as_block(cur);
+
+            progress = true;
+         }
+
+         if (ends_in_jump(block)) {
+            *list_ends_in_jump = true;
+
+            if (!exec_node_is_tail_sentinel(cur->node.next)) {
+               remove_after_cf_node(cur);
+               return true;
+            }
+         }
+
+         break;
+      }
+
+      case nir_cf_node_if: {
+         nir_if *if_stmt = nir_cf_node_as_if(cur);
+         bool then_ends_in_jump, else_ends_in_jump;
+         progress |= dead_cf_list(&if_stmt->then_list, &then_ends_in_jump);
+         progress |= dead_cf_list(&if_stmt->else_list, &else_ends_in_jump);
+
+         if (then_ends_in_jump && else_ends_in_jump) {
+            *list_ends_in_jump = true;
+            nir_block *next = nir_cf_node_as_block(nir_cf_node_next(cur));
+            if (!exec_list_is_empty(&next->instr_list) ||
+                !exec_node_is_tail_sentinel(next->cf_node.node.next)) {
+               remove_after_cf_node(cur);
+               return true;
+            }
+         }
+
+         break;
+      }
+
+      case nir_cf_node_loop: {
+         nir_loop *loop = nir_cf_node_as_loop(cur);
+         bool dummy;
+         progress |= dead_cf_list(&loop->body, &dummy);
+
+         break;
+      }
+
+      default:
+         unreachable("unknown cf node type");
+      }
+
+      prev = cur;
+   }
+
+   return progress;
+}
+
+static bool
+opt_dead_cf_impl(nir_function_impl *impl)
+{
+   bool dummy;
+   bool progress = dead_cf_list(&impl->body, &dummy);
+
+   if (progress)
+      nir_metadata_preserve(impl, nir_metadata_none);
+
+   return progress;
+}
+
+bool
+nir_opt_dead_cf(nir_shader *shader)
+{
+   bool progress = false;
+
+   nir_foreach_overload(shader, overload)
+      if (overload->impl)
+         progress |= opt_dead_cf_impl(overload->impl);
+
+   return progress;
+}
diff --git a/src/glsl/nir/nir_opt_peephole_ffma.c b/src/glsl/nir/nir_opt_peephole_ffma.c

index a823adb..4f0f0da 100644 (file)
--- a/src/glsl/nir/nir_opt_peephole_ffma.c
+++ b/src/glsl/nir/nir_opt_peephole_ffma.c
@@ -127,7 +127,7 @@ get_mul_for_src(nir_alu_src *src, int num_components,
      *   If we reuse swizzle in the loop, then output swizzle would be zyzz.
      */
     memcpy(swizzle_tmp, swizzle, 4*sizeof(uint8_t));
-   for (unsigned i = 0; i < num_components; i++)
+   for (int i = 0; i < num_components; i++)
        swizzle[i] = swizzle_tmp[src->swizzle[i]];
  
     return alu;
@@ -216,8 +216,7 @@ nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
           for (unsigned j = 0; j < add->dest.dest.ssa.num_components; j++)
              ffma->src[i].swizzle[j] = mul->src[i].swizzle[swizzle[j]];
        }
-      nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src],
-                       state->mem_ctx);
+      nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src], ffma);
  
        assert(add->dest.dest.is_ssa);
  
@@ -225,8 +224,7 @@ nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
                          add->dest.dest.ssa.num_components,
                          add->dest.dest.ssa.name);
        nir_ssa_def_rewrite_uses(&add->dest.dest.ssa,
-                               nir_src_for_ssa(&ffma->dest.dest.ssa),
-                               state->mem_ctx);
+                               nir_src_for_ssa(&ffma->dest.dest.ssa));
  
        nir_instr_insert_before(&add->instr, &ffma->instr);
        assert(list_empty(&add->dest.dest.ssa.uses));
diff --git a/src/glsl/nir/nir_opt_peephole_select.c b/src/glsl/nir/nir_opt_peephole_select.c

index 26ec4ed..90902b9 100644 (file)
--- a/src/glsl/nir/nir_opt_peephole_select.c
+++ b/src/glsl/nir/nir_opt_peephole_select.c
@@ -196,7 +196,7 @@ nir_opt_peephole_select_block(nir_block *block, void *void_state)
  
        nir_phi_instr *phi = nir_instr_as_phi(instr);
        nir_alu_instr *sel = nir_alu_instr_create(state->mem_ctx, nir_op_bcsel);
-      nir_src_copy(&sel->src[0].src, &if_stmt->condition, state->mem_ctx);
+      nir_src_copy(&sel->src[0].src, &if_stmt->condition, sel);
        /* Splat the condition to all channels */
        memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle);
  
@@ -206,7 +206,7 @@ nir_opt_peephole_select_block(nir_block *block, void *void_state)
           assert(src->src.is_ssa);
  
           unsigned idx = src->pred == then_block ? 1 : 2;
-         nir_src_copy(&sel->src[idx].src, &src->src, state->mem_ctx);
+         nir_src_copy(&sel->src[idx].src, &src->src, sel);
        }
  
        nir_ssa_dest_init(&sel->instr, &sel->dest.dest,
@@ -214,8 +214,7 @@ nir_opt_peephole_select_block(nir_block *block, void *void_state)
        sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
  
        nir_ssa_def_rewrite_uses(&phi->dest.ssa,
-                               nir_src_for_ssa(&sel->dest.dest.ssa),
-                               state->mem_ctx);
+                               nir_src_for_ssa(&sel->dest.dest.ssa));
  
        nir_instr_insert_before(&phi->instr, &sel->instr);
        nir_instr_remove(&phi->instr);
diff --git a/src/glsl/nir/nir_opt_remove_phis.c b/src/glsl/nir/nir_opt_remove_phis.c

index 7896584..bf4a67e 100644 (file)
--- a/src/glsl/nir/nir_opt_remove_phis.c
+++ b/src/glsl/nir/nir_opt_remove_phis.c
@@ -47,8 +47,6 @@ remove_phis_block(nir_block *block, void *state)
  {
     bool *progress = state;
  
-   void *mem_ctx = ralloc_parent(block);
-
     nir_foreach_instr_safe(block, instr) {
        if (instr->type != nir_instr_type_phi)
           break;
@@ -75,8 +73,7 @@ remove_phis_block(nir_block *block, void *state)
           continue;
  
        assert(phi->dest.is_ssa);
-      nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_src_for_ssa(def),
-                               mem_ctx);
+      nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_src_for_ssa(def));
        nir_instr_remove(instr);
  
        *progress = true;
diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c

index f591c4b..a19aa8b 100644 (file)
--- a/src/glsl/nir/nir_print.c
+++ b/src/glsl/nir/nir_print.c
@@ -26,6 +26,7 @@
   */
  
  #include "nir.h"
+#include "shader_enums.h"
  #include <stdio.h>
  #include <stdlib.h>
  
@@ -37,6 +38,8 @@ print_tabs(unsigned num_tabs, FILE *fp)
  }
  
  typedef struct {
+   FILE *fp;
+   nir_shader *shader;
     /** map from nir_variable -> printable name */
     struct hash_table *ht;
  
@@ -45,11 +48,12 @@ typedef struct {
  
     /* an index used to make new non-conflicting names */
     unsigned index;
-} print_var_state;
+} print_state;
  
  static void
-print_register(nir_register *reg, FILE *fp)
+print_register(nir_register *reg, print_state *state)
  {
+   FILE *fp = state->fp;
     if (reg->name != NULL)
        fprintf(fp, "/* %s */ ", reg->name);
     if (reg->is_global)
@@ -61,90 +65,97 @@ print_register(nir_register *reg, FILE *fp)
  static const char *sizes[] = { "error", "vec1", "vec2", "vec3", "vec4" };
  
  static void
-print_register_decl(nir_register *reg, FILE *fp)
+print_register_decl(nir_register *reg, print_state *state)
  {
+   FILE *fp = state->fp;
     fprintf(fp, "decl_reg %s ", sizes[reg->num_components]);
     if (reg->is_packed)
        fprintf(fp, "(packed) ");
-   print_register(reg, fp);
+   print_register(reg, state);
     if (reg->num_array_elems != 0)
        fprintf(fp, "[%u]", reg->num_array_elems);
     fprintf(fp, "\n");
  }
  
  static void
-print_ssa_def(nir_ssa_def *def, FILE *fp)
+print_ssa_def(nir_ssa_def *def, print_state *state)
  {
+   FILE *fp = state->fp;
     if (def->name != NULL)
        fprintf(fp, "/* %s */ ", def->name);
     fprintf(fp, "%s ssa_%u", sizes[def->num_components], def->index);
  }
  
  static void
-print_ssa_use(nir_ssa_def *def, FILE *fp)
+print_ssa_use(nir_ssa_def *def, print_state *state)
  {
+   FILE *fp = state->fp;
     if (def->name != NULL)
        fprintf(fp, "/* %s */ ", def->name);
     fprintf(fp, "ssa_%u", def->index);
  }
  
-static void print_src(nir_src *src, FILE *fp);
+static void print_src(nir_src *src, print_state *state);
  
  static void
-print_reg_src(nir_reg_src *src, FILE *fp)
+print_reg_src(nir_reg_src *src, print_state *state)
  {
-   print_register(src->reg, fp);
+   FILE *fp = state->fp;
+   print_register(src->reg, state);
     if (src->reg->num_array_elems != 0) {
        fprintf(fp, "[%u", src->base_offset);
        if (src->indirect != NULL) {
           fprintf(fp, " + ");
-         print_src(src->indirect, fp);
+         print_src(src->indirect, state);
        }
        fprintf(fp, "]");
     }
  }
  
  static void
-print_reg_dest(nir_reg_dest *dest, FILE *fp)
+print_reg_dest(nir_reg_dest *dest, print_state *state)
  {
-   print_register(dest->reg, fp);
+   FILE *fp = state->fp;
+   print_register(dest->reg, state);
     if (dest->reg->num_array_elems != 0) {
        fprintf(fp, "[%u", dest->base_offset);
        if (dest->indirect != NULL) {
           fprintf(fp, " + ");
-         print_src(dest->indirect, fp);
+         print_src(dest->indirect, state);
        }
        fprintf(fp, "]");
     }
  }
  
  static void
-print_src(nir_src *src, FILE *fp)
+print_src(nir_src *src, print_state *state)
  {
     if (src->is_ssa)
-      print_ssa_use(src->ssa, fp);
+      print_ssa_use(src->ssa, state);
     else
-      print_reg_src(&src->reg, fp);
+      print_reg_src(&src->reg, state);
  }
  
  static void
-print_dest(nir_dest *dest, FILE *fp)
+print_dest(nir_dest *dest, print_state *state)
  {
     if (dest->is_ssa)
-      print_ssa_def(&dest->ssa, fp);
+      print_ssa_def(&dest->ssa, state);
     else
-      print_reg_dest(&dest->reg, fp);
+      print_reg_dest(&dest->reg, state);
  }
  
  static void
-print_alu_src(nir_alu_instr *instr, unsigned src, FILE *fp)
+print_alu_src(nir_alu_instr *instr, unsigned src, print_state *state)
  {
+   FILE *fp = state->fp;
+
     if (instr->src[src].negate)
        fprintf(fp, "-");
     if (instr->src[src].abs)
        fprintf(fp, "abs(");
  
-   print_src(&instr->src[src].src, fp);
+   print_src(&instr->src[src].src, state);
  
     bool print_swizzle = false;
     for (unsigned i = 0; i < 4; i++) {
@@ -172,11 +183,12 @@ print_alu_src(nir_alu_instr *instr, unsigned src, FILE *fp)
  }
  
  static void
-print_alu_dest(nir_alu_dest *dest, FILE *fp)
+print_alu_dest(nir_alu_dest *dest, print_state *state)
  {
+   FILE *fp = state->fp;
     /* we're going to print the saturate modifier later, after the opcode */
  
-   print_dest(&dest->dest, fp);
+   print_dest(&dest->dest, state);
  
     if (!dest->dest.is_ssa &&
         dest->write_mask != (1 << dest->dest.reg.reg->num_components) - 1) {
@@ -188,9 +200,11 @@ print_alu_dest(nir_alu_dest *dest, FILE *fp)
  }
  
  static void
-print_alu_instr(nir_alu_instr *instr, FILE *fp)
+print_alu_instr(nir_alu_instr *instr, print_state *state)
  {
-   print_alu_dest(&instr->dest, fp);
+   FILE *fp = state->fp;
+
+   print_alu_dest(&instr->dest, state);
  
     fprintf(fp, " = %s", nir_op_infos[instr->op].name);
     if (instr->dest.saturate)
@@ -201,13 +215,15 @@ print_alu_instr(nir_alu_instr *instr, FILE *fp)
        if (i != 0)
           fprintf(fp, ", ");
  
-      print_alu_src(instr, i, fp);
+      print_alu_src(instr, i, state);
     }
  }
  
  static void
-print_var_decl(nir_variable *var, print_var_state *state, FILE *fp)
+print_var_decl(nir_variable *var, print_state *state)
  {
+   FILE *fp = state->fp;
+
     fprintf(fp, "decl_var ");
  
     const char *const cent = (var->data.centroid) ? "centroid " : "";
@@ -215,15 +231,15 @@ print_var_decl(nir_variable *var, print_var_state *state, FILE *fp)
     const char *const inv = (var->data.invariant) ? "invariant " : "";
     const char *const mode[] = { "shader_in ", "shader_out ", "", "",
                                  "uniform ", "shader_storage", "system " };
-   const char *const interp[] = { "", "smooth", "flat", "noperspective" };
  
     fprintf(fp, "%s%s%s%s%s ",
-      cent, samp, inv, mode[var->data.mode], interp[var->data.interpolation]);
+      cent, samp, inv, mode[var->data.mode],
+         glsl_interp_qualifier_name(var->data.interpolation));
  
     glsl_print_type(var->type, fp);
  
     struct set_entry *entry = NULL;
-   if (state)
+   if (state->syms)
        entry = _mesa_set_search(state->syms, var->name);
  
     char *name;
@@ -241,22 +257,57 @@ print_var_decl(nir_variable *var, print_var_state *state, FILE *fp)
         var->data.mode == nir_var_shader_out ||
         var->data.mode == nir_var_uniform ||
         var->data.mode == nir_var_shader_storage) {
-      fprintf(fp, " (%u, %u)", var->data.location, var->data.driver_location);
+      const char *loc = NULL;
+      char buf[4];
+
+      switch (state->shader->stage) {
+      case MESA_SHADER_VERTEX:
+         if (var->data.mode == nir_var_shader_in)
+            loc = gl_vert_attrib_name(var->data.location);
+         else if (var->data.mode == nir_var_shader_out)
+            loc = gl_varying_slot_name(var->data.location);
+         break;
+      case MESA_SHADER_GEOMETRY:
+         if ((var->data.mode == nir_var_shader_in) ||
+             (var->data.mode == nir_var_shader_out))
+            loc = gl_varying_slot_name(var->data.location);
+         break;
+      case MESA_SHADER_FRAGMENT:
+         if (var->data.mode == nir_var_shader_in)
+            loc = gl_varying_slot_name(var->data.location);
+         else if (var->data.mode == nir_var_shader_out)
+            loc = gl_frag_result_name(var->data.location);
+         break;
+      case MESA_SHADER_TESS_CTRL:
+      case MESA_SHADER_TESS_EVAL:
+      case MESA_SHADER_COMPUTE:
+      default:
+         /* TODO */
+         break;
+      }
+
+      if (!loc) {
+         snprintf(buf, sizeof(buf), "%u", var->data.location);
+         loc = buf;
+      }
+
+      fprintf(fp, " (%s, %u)", loc, var->data.driver_location);
     }
  
     fprintf(fp, "\n");
  
-   if (state) {
+   if (state->syms) {
        _mesa_set_add(state->syms, name);
        _mesa_hash_table_insert(state->ht, var, name);
     }
  }
  
  static void
-print_var(nir_variable *var, print_var_state *state, FILE *fp)
+print_var(nir_variable *var, print_state *state)
  {
+   FILE *fp = state->fp;
     const char *name;
-   if (state) {
+   if (state->ht) {
        struct hash_entry *entry = _mesa_hash_table_search(state->ht, var);
  
        assert(entry != NULL);
@@ -269,14 +320,15 @@ print_var(nir_variable *var, print_var_state *state, FILE *fp)
  }
  
  static void
-print_deref_var(nir_deref_var *deref, print_var_state *state, FILE *fp)
+print_deref_var(nir_deref_var *deref, print_state *state)
  {
-   print_var(deref->var, state, fp);
+   print_var(deref->var, state);
  }
  
  static void
-print_deref_array(nir_deref_array *deref, print_var_state *state, FILE *fp)
+print_deref_array(nir_deref_array *deref, print_state *state)
  {
+   FILE *fp = state->fp;
     fprintf(fp, "[");
     switch (deref->deref_array_type) {
     case nir_deref_array_type_direct:
@@ -285,7 +337,7 @@ print_deref_array(nir_deref_array *deref, print_var_state *state, FILE *fp)
     case nir_deref_array_type_indirect:
        if (deref->base_offset != 0)
           fprintf(fp, "%u + ", deref->base_offset);
-      print_src(&deref->indirect, fp);
+      print_src(&deref->indirect, state);
        break;
     case nir_deref_array_type_wildcard:
        fprintf(fp, "*");
@@ -296,13 +348,14 @@ print_deref_array(nir_deref_array *deref, print_var_state *state, FILE *fp)
  
  static void
  print_deref_struct(nir_deref_struct *deref, const struct glsl_type *parent_type,
-                   print_var_state *state, FILE *fp)
+                   print_state *state)
  {
+   FILE *fp = state->fp;
     fprintf(fp, ".%s", glsl_get_struct_elem_name(parent_type, deref->index));
  }
  
  static void
-print_deref(nir_deref_var *deref, print_var_state *state, FILE *fp)
+print_deref(nir_deref_var *deref, print_state *state)
  {
     nir_deref *tail = &deref->deref;
     nir_deref *pretail = NULL;
@@ -311,18 +364,18 @@ print_deref(nir_deref_var *deref, print_var_state *state, FILE *fp)
        case nir_deref_type_var:
           assert(pretail == NULL);
           assert(tail == &deref->deref);
-         print_deref_var(deref, state, fp);
+         print_deref_var(deref, state);
           break;
  
        case nir_deref_type_array:
           assert(pretail != NULL);
-         print_deref_array(nir_deref_as_array(tail), state, fp);
+         print_deref_array(nir_deref_as_array(tail), state);
           break;
  
        case nir_deref_type_struct:
           assert(pretail != NULL);
           print_deref_struct(nir_deref_as_struct(tail),
-                            pretail->type, state, fp);
+                            pretail->type, state);
           break;
  
        default:
@@ -335,13 +388,13 @@ print_deref(nir_deref_var *deref, print_var_state *state, FILE *fp)
  }
  
  static void
-print_intrinsic_instr(nir_intrinsic_instr *instr, print_var_state *state,
-                      FILE *fp)
+print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
  {
     unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
+   FILE *fp = state->fp;
  
     if (nir_intrinsic_infos[instr->intrinsic].has_dest) {
-      print_dest(&instr->dest, fp);
+      print_dest(&instr->dest, state);
        fprintf(fp, " = ");
     }
  
@@ -351,7 +404,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_var_state *state,
        if (i != 0)
           fprintf(fp, ", ");
  
-      print_src(&instr->src[i], fp);
+      print_src(&instr->src[i], state);
     }
  
     fprintf(fp, ") (");
@@ -362,7 +415,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_var_state *state,
        if (i != 0)
           fprintf(fp, ", ");
  
-      print_deref(instr->variables[i], state, fp);
+      print_deref(instr->variables[i], state);
     }
  
     fprintf(fp, ") (");
@@ -377,12 +430,44 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_var_state *state,
     }
  
     fprintf(fp, ")");
+
+   if (!state->shader)
+      return;
+
+   struct exec_list *var_list = NULL;
+
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_uniform:
+   case nir_intrinsic_load_uniform_indirect:
+      var_list = &state->shader->uniforms;
+      break;
+   case nir_intrinsic_load_input:
+   case nir_intrinsic_load_input_indirect:
+      var_list = &state->shader->inputs;
+      break;
+   case nir_intrinsic_store_output:
+   case nir_intrinsic_store_output_indirect:
+      var_list = &state->shader->outputs;
+      break;
+   default:
+      return;
+   }
+
+   foreach_list_typed(nir_variable, var, node, var_list) {
+      if ((var->data.driver_location == instr->const_index[0]) &&
+          var->name) {
+         fprintf(fp, "\t/* %s */", var->name);
+         break;
+      }
+   }
  }
  
  static void
-print_tex_instr(nir_tex_instr *instr, print_var_state *state, FILE *fp)
+print_tex_instr(nir_tex_instr *instr, print_state *state)
  {
-   print_dest(&instr->dest, fp);
+   FILE *fp = state->fp;
+
+   print_dest(&instr->dest, state);
  
     fprintf(fp, " = ");
  
@@ -417,6 +502,9 @@ print_tex_instr(nir_tex_instr *instr, print_var_state *state, FILE *fp)
     case nir_texop_query_levels:
        fprintf(fp, "query_levels ");
        break;
+   case nir_texop_texture_samples:
+      fprintf(fp, "texture_samples ");
+      break;
  
     default:
        unreachable("Invalid texture operation");
@@ -424,7 +512,7 @@ print_tex_instr(nir_tex_instr *instr, print_var_state *state, FILE *fp)
     }
  
     for (unsigned i = 0; i < instr->num_srcs; i++) {
-      print_src(&instr->src[i].src, fp);
+      print_src(&instr->src[i].src, state);
  
        fprintf(fp, " ");
  
@@ -487,7 +575,7 @@ print_tex_instr(nir_tex_instr *instr, print_var_state *state, FILE *fp)
     }
  
     if (instr->sampler) {
-      print_deref(instr->sampler, state, fp);
+      print_deref(instr->sampler, state);
     } else {
        fprintf(fp, "%u", instr->sampler_index);
     }
@@ -496,29 +584,33 @@ print_tex_instr(nir_tex_instr *instr, print_var_state *state, FILE *fp)
  }
  
  static void
-print_call_instr(nir_call_instr *instr, print_var_state *state, FILE *fp)
+print_call_instr(nir_call_instr *instr, print_state *state)
  {
+   FILE *fp = state->fp;
+
     fprintf(fp, "call %s ", instr->callee->function->name);
  
     for (unsigned i = 0; i < instr->num_params; i++) {
        if (i != 0)
           fprintf(fp, ", ");
  
-      print_deref(instr->params[i], state, fp);
+      print_deref(instr->params[i], state);
     }
  
     if (instr->return_deref != NULL) {
        if (instr->num_params != 0)
           fprintf(fp, ", ");
        fprintf(fp, "returning ");
-      print_deref(instr->return_deref, state, fp);
+      print_deref(instr->return_deref, state);
     }
  }
  
  static void
-print_load_const_instr(nir_load_const_instr *instr, unsigned tabs, FILE *fp)
+print_load_const_instr(nir_load_const_instr *instr, print_state *state)
  {
-   print_ssa_def(&instr->def, fp);
+   FILE *fp = state->fp;
+
+   print_ssa_def(&instr->def, state);
  
     fprintf(fp, " = load_const (");
  
@@ -539,8 +631,10 @@ print_load_const_instr(nir_load_const_instr *instr, unsigned tabs, FILE *fp)
  }
  
  static void
-print_jump_instr(nir_jump_instr *instr, FILE *fp)
+print_jump_instr(nir_jump_instr *instr, print_state *state)
  {
+   FILE *fp = state->fp;
+
     switch (instr->type) {
     case nir_jump_break:
        fprintf(fp, "break");
@@ -557,79 +651,83 @@ print_jump_instr(nir_jump_instr *instr, FILE *fp)
  }
  
  static void
-print_ssa_undef_instr(nir_ssa_undef_instr* instr, FILE *fp)
+print_ssa_undef_instr(nir_ssa_undef_instr* instr, print_state *state)
  {
-   print_ssa_def(&instr->def, fp);
+   FILE *fp = state->fp;
+   print_ssa_def(&instr->def, state);
     fprintf(fp, " = undefined");
  }
  
  static void
-print_phi_instr(nir_phi_instr *instr, FILE *fp)
+print_phi_instr(nir_phi_instr *instr, print_state *state)
  {
-   print_dest(&instr->dest, fp);
+   FILE *fp = state->fp;
+   print_dest(&instr->dest, state);
     fprintf(fp, " = phi ");
     nir_foreach_phi_src(instr, src) {
        if (&src->node != exec_list_get_head(&instr->srcs))
           fprintf(fp, ", ");
  
        fprintf(fp, "block_%u: ", src->pred->index);
-      print_src(&src->src, fp);
+      print_src(&src->src, state);
     }
  }
  
  static void
-print_parallel_copy_instr(nir_parallel_copy_instr *instr, FILE *fp)
+print_parallel_copy_instr(nir_parallel_copy_instr *instr, print_state *state)
  {
+   FILE *fp = state->fp;
     nir_foreach_parallel_copy_entry(instr, entry) {
        if (&entry->node != exec_list_get_head(&instr->entries))
           fprintf(fp, "; ");
  
-      print_dest(&entry->dest, fp);
+      print_dest(&entry->dest, state);
        fprintf(fp, " = ");
-      print_src(&entry->src, fp);
+      print_src(&entry->src, state);
     }
  }
  
  static void
-print_instr(const nir_instr *instr, print_var_state *state, unsigned tabs, FILE *fp)
+print_instr(const nir_instr *instr, print_state *state, unsigned tabs)
  {
+   FILE *fp = state->fp;
     print_tabs(tabs, fp);
  
     switch (instr->type) {
     case nir_instr_type_alu:
-      print_alu_instr(nir_instr_as_alu(instr), fp);
+      print_alu_instr(nir_instr_as_alu(instr), state);
        break;
  
     case nir_instr_type_call:
-      print_call_instr(nir_instr_as_call(instr), state, fp);
+      print_call_instr(nir_instr_as_call(instr), state);
        break;
  
     case nir_instr_type_intrinsic:
-      print_intrinsic_instr(nir_instr_as_intrinsic(instr), state, fp);
+      print_intrinsic_instr(nir_instr_as_intrinsic(instr), state);
        break;
  
     case nir_instr_type_tex:
-      print_tex_instr(nir_instr_as_tex(instr), state, fp);
+      print_tex_instr(nir_instr_as_tex(instr), state);
        break;
  
     case nir_instr_type_load_const:
-      print_load_const_instr(nir_instr_as_load_const(instr), tabs, fp);
+      print_load_const_instr(nir_instr_as_load_const(instr), state);
        break;
  
     case nir_instr_type_jump:
-      print_jump_instr(nir_instr_as_jump(instr), fp);
+      print_jump_instr(nir_instr_as_jump(instr), state);
        break;
  
     case nir_instr_type_ssa_undef:
-      print_ssa_undef_instr(nir_instr_as_ssa_undef(instr), fp);
+      print_ssa_undef_instr(nir_instr_as_ssa_undef(instr), state);
        break;
  
     case nir_instr_type_phi:
-      print_phi_instr(nir_instr_as_phi(instr), fp);
+      print_phi_instr(nir_instr_as_phi(instr), state);
        break;
  
     case nir_instr_type_parallel_copy:
-      print_parallel_copy_instr(nir_instr_as_parallel_copy(instr), fp);
+      print_parallel_copy_instr(nir_instr_as_parallel_copy(instr), state);
        break;
  
     default:
@@ -647,12 +745,14 @@ compare_block_index(const void *p1, const void *p2)
     return (int) block1->index - (int) block2->index;
  }
  
-static void print_cf_node(nir_cf_node *node, print_var_state *state,
-                          unsigned tabs, FILE *fp);
+static void print_cf_node(nir_cf_node *node, print_state *state,
+                          unsigned tabs);
  
  static void
-print_block(nir_block *block, print_var_state *state, unsigned tabs, FILE *fp)
+print_block(nir_block *block, print_state *state, unsigned tabs)
  {
+   FILE *fp = state->fp;
+
     print_tabs(tabs, fp);
     fprintf(fp, "block block_%u:\n", block->index);
  
@@ -680,7 +780,7 @@ print_block(nir_block *block, print_var_state *state, unsigned tabs, FILE *fp)
     free(preds);
  
     nir_foreach_instr(block, instr) {
-      print_instr(instr, state, tabs, fp);
+      print_instr(instr, state, tabs);
        fprintf(fp, "\n");
     }
  
@@ -694,51 +794,54 @@ print_block(nir_block *block, print_var_state *state, unsigned tabs, FILE *fp)
  }
  
  static void
-print_if(nir_if *if_stmt, print_var_state *state, unsigned tabs, FILE *fp)
+print_if(nir_if *if_stmt, print_state *state, unsigned tabs)
  {
+   FILE *fp = state->fp;
+
     print_tabs(tabs, fp);
     fprintf(fp, "if ");
-   print_src(&if_stmt->condition, fp);
+   print_src(&if_stmt->condition, state);
     fprintf(fp, " {\n");
     foreach_list_typed(nir_cf_node, node, node, &if_stmt->then_list) {
-      print_cf_node(node, state, tabs + 1, fp);
+      print_cf_node(node, state, tabs + 1);
     }
     print_tabs(tabs, fp);
     fprintf(fp, "} else {\n");
     foreach_list_typed(nir_cf_node, node, node, &if_stmt->else_list) {
-      print_cf_node(node, state, tabs + 1, fp);
+      print_cf_node(node, state, tabs + 1);
     }
     print_tabs(tabs, fp);
     fprintf(fp, "}\n");
  }
  
  static void
-print_loop(nir_loop *loop, print_var_state *state, unsigned tabs, FILE *fp)
+print_loop(nir_loop *loop, print_state *state, unsigned tabs)
  {
+   FILE *fp = state->fp;
+
     print_tabs(tabs, fp);
     fprintf(fp, "loop {\n");
     foreach_list_typed(nir_cf_node, node, node, &loop->body) {
-      print_cf_node(node, state, tabs + 1, fp);
+      print_cf_node(node, state, tabs + 1);
     }
     print_tabs(tabs, fp);
     fprintf(fp, "}\n");
  }
  
  static void
-print_cf_node(nir_cf_node *node, print_var_state *state, unsigned int tabs,
-              FILE *fp)
+print_cf_node(nir_cf_node *node, print_state *state, unsigned int tabs)
  {
     switch (node->type) {
     case nir_cf_node_block:
-      print_block(nir_cf_node_as_block(node), state, tabs, fp);
+      print_block(nir_cf_node_as_block(node), state, tabs);
        break;
  
     case nir_cf_node_if:
-      print_if(nir_cf_node_as_if(node), state, tabs, fp);
+      print_if(nir_cf_node_as_if(node), state, tabs);
        break;
  
     case nir_cf_node_loop:
-      print_loop(nir_cf_node_as_loop(node), state, tabs, fp);
+      print_loop(nir_cf_node_as_loop(node), state, tabs);
        break;
  
     default:
@@ -747,40 +850,42 @@ print_cf_node(nir_cf_node *node, print_var_state *state, unsigned int tabs,
  }
  
  static void
-print_function_impl(nir_function_impl *impl, print_var_state *state, FILE *fp)
+print_function_impl(nir_function_impl *impl, print_state *state)
  {
+   FILE *fp = state->fp;
+
     fprintf(fp, "\nimpl %s ", impl->overload->function->name);
  
     for (unsigned i = 0; i < impl->num_params; i++) {
        if (i != 0)
           fprintf(fp, ", ");
  
-      print_var(impl->params[i], state, fp);
+      print_var(impl->params[i], state);
     }
  
     if (impl->return_var != NULL) {
        if (impl->num_params != 0)
           fprintf(fp, ", ");
        fprintf(fp, "returning ");
-      print_var(impl->return_var, state, fp);
+      print_var(impl->return_var, state);
     }
  
     fprintf(fp, "{\n");
  
     foreach_list_typed(nir_variable, var, node, &impl->locals) {
        fprintf(fp, "\t");
-      print_var_decl(var, state, fp);
+      print_var_decl(var, state);
     }
  
     foreach_list_typed(nir_register, reg, node, &impl->registers) {
        fprintf(fp, "\t");
-      print_register_decl(reg, fp);
+      print_register_decl(reg, state);
     }
  
     nir_index_blocks(impl);
  
     foreach_list_typed(nir_cf_node, node, node, &impl->body) {
-      print_cf_node(node, state, 1, fp);
+      print_cf_node(node, state, 1);
     }
  
     fprintf(fp, "\tblock block_%u:\n}\n\n", impl->end_block->index);
@@ -788,8 +893,10 @@ print_function_impl(nir_function_impl *impl, print_var_state *state, FILE *fp)
  
  static void
  print_function_overload(nir_function_overload *overload,
-                        print_var_state *state, FILE *fp)
+                        print_state *state)
  {
+   FILE *fp = state->fp;
+
     fprintf(fp, "decl_overload %s ", overload->function->name);
  
     for (unsigned i = 0; i < overload->num_params; i++) {
@@ -823,22 +930,24 @@ print_function_overload(nir_function_overload *overload,
     fprintf(fp, "\n");
  
     if (overload->impl != NULL) {
-      print_function_impl(overload->impl, state, fp);
+      print_function_impl(overload->impl, state);
        return;
     }
  }
  
  static void
-print_function(nir_function *func, print_var_state *state, FILE *fp)
+print_function(nir_function *func, print_state *state)
  {
     foreach_list_typed(nir_function_overload, overload, node, &func->overload_list) {
-      print_function_overload(overload, state, fp);
+      print_function_overload(overload, state);
     }
  }
  
  static void
-init_print_state(print_var_state *state)
+init_print_state(print_state *state, nir_shader *shader, FILE *fp)
  {
+   state->fp = fp;
+   state->shader = shader;
     state->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
                                         _mesa_key_pointer_equal);
     state->syms = _mesa_set_create(NULL, _mesa_key_hash_string,
@@ -847,7 +956,7 @@ init_print_state(print_var_state *state)
  }
  
  static void
-destroy_print_state(print_var_state *state)
+destroy_print_state(print_state *state)
  {
     _mesa_hash_table_destroy(state->ht, NULL);
     _mesa_set_destroy(state->syms, NULL);
@@ -856,35 +965,37 @@ destroy_print_state(print_var_state *state)
  void
  nir_print_shader(nir_shader *shader, FILE *fp)
  {
-   print_var_state state;
-   init_print_state(&state);
+   print_state state;
+   init_print_state(&state, shader, fp);
+
+   fprintf(fp, "shader: %s\n", gl_shader_stage_name(shader->stage));
  
     foreach_list_typed(nir_variable, var, node, &shader->uniforms) {
-      print_var_decl(var, &state, fp);
+      print_var_decl(var, &state);
     }
  
     foreach_list_typed(nir_variable, var, node, &shader->inputs) {
-      print_var_decl(var, &state, fp);
+      print_var_decl(var, &state);
     }
  
     foreach_list_typed(nir_variable, var, node, &shader->outputs) {
-      print_var_decl(var, &state, fp);
+      print_var_decl(var, &state);
     }
  
     foreach_list_typed(nir_variable, var, node, &shader->globals) {
-      print_var_decl(var, &state, fp);
+      print_var_decl(var, &state);
     }
  
     foreach_list_typed(nir_variable, var, node, &shader->system_values) {
-      print_var_decl(var, &state, fp);
+      print_var_decl(var, &state);
     }
  
     foreach_list_typed(nir_register, reg, node, &shader->registers) {
-      print_register_decl(reg, fp);
+      print_register_decl(reg, &state);
     }
  
     foreach_list_typed(nir_function, func, node, &shader->functions) {
-      print_function(func, &state, fp);
+      print_function(func, &state);
     }
  
     destroy_print_state(&state);
@@ -893,5 +1004,9 @@ nir_print_shader(nir_shader *shader, FILE *fp)
  void
  nir_print_instr(const nir_instr *instr, FILE *fp)
  {
-   print_instr(instr, NULL, 0, fp);
+   print_state state = {
+      .fp = fp,
+   };
+   print_instr(instr, &state, 0);
+
  }
diff --git a/src/glsl/nir/nir_remove_dead_variables.c b/src/glsl/nir/nir_remove_dead_variables.c

index 4417e2a..d6783e7 100644 (file)
--- a/src/glsl/nir/nir_remove_dead_variables.c
+++ b/src/glsl/nir/nir_remove_dead_variables.c
@@ -97,32 +97,39 @@ add_var_use_shader(nir_shader *shader, struct set *live)
     }
  }
  
-static void
+static bool
  remove_dead_vars(struct exec_list *var_list, struct set *live)
  {
+   bool progress = false;
+
     foreach_list_typed_safe(nir_variable, var, node, var_list) {
        struct set_entry *entry = _mesa_set_search(live, var);
        if (entry == NULL) {
           exec_node_remove(&var->node);
           ralloc_free(var);
+         progress = true;
        }
     }
+
+   return progress;
  }
  
-void
+bool
  nir_remove_dead_variables(nir_shader *shader)
  {
+   bool progress = false;
     struct set *live =
        _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
  
     add_var_use_shader(shader, live);
  
-   remove_dead_vars(&shader->globals, live);
+   progress = remove_dead_vars(&shader->globals, live) || progress;
  
     nir_foreach_overload(shader, overload) {
        if (overload->impl)
-         remove_dead_vars(&overload->impl->locals, live);
+         progress = remove_dead_vars(&overload->impl->locals, live) || progress;
     }
  
     _mesa_set_destroy(live, NULL);
+   return progress;
  }
diff --git a/src/glsl/nir/nir_search.c b/src/glsl/nir/nir_search.c

index c33d6c3..bb15440 100644 (file)
--- a/src/glsl/nir/nir_search.c
+++ b/src/glsl/nir/nir_search.c
@@ -81,7 +81,7 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
        swizzle = identity_swizzle;
     }
  
-   for (int i = 0; i < num_components; ++i)
+   for (unsigned i = 0; i < num_components; ++i)
        new_swizzle[i] = instr->src[src].swizzle[swizzle[i]];
  
     switch (value->type) {
@@ -107,7 +107,7 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
  
           assert(!instr->src[src].abs && !instr->src[src].negate);
  
-         for (int i = 0; i < num_components; ++i) {
+         for (unsigned i = 0; i < num_components; ++i) {
              if (state->variables[var->variable].swizzle[i] != new_swizzle[i])
                 return false;
           }
@@ -135,7 +135,7 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
           state->variables[var->variable].abs = false;
           state->variables[var->variable].negate = false;
  
-         for (int i = 0; i < 4; ++i) {
+         for (unsigned i = 0; i < 4; ++i) {
              if (i < num_components)
                 state->variables[var->variable].swizzle[i] = new_swizzle[i];
              else
@@ -367,7 +367,7 @@ nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search,
     nir_instr_insert_before(&instr->instr, &mov->instr);
  
     nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa,
-                            nir_src_for_ssa(&mov->dest.dest.ssa), mem_ctx);
+                            nir_src_for_ssa(&mov->dest.dest.ssa));
  
     /* We know this one has no more uses because we just rewrote them all,
      * so we can remove it.  The rest of the matched expression, however, we
diff --git a/src/glsl/nir/nir_split_var_copies.c b/src/glsl/nir/nir_split_var_copies.c

index 5c163b5..f583178 100644 (file)
--- a/src/glsl/nir/nir_split_var_copies.c
+++ b/src/glsl/nir/nir_split_var_copies.c
@@ -64,6 +64,7 @@
  struct split_var_copies_state {
     void *mem_ctx;
     void *dead_ctx;
+   bool progress;
  };
  
  /* Recursively constructs deref chains to split a copy instruction into
@@ -190,6 +191,7 @@ split_var_copy_instr(nir_intrinsic_instr *old_copy,
            * remove the old one later.
            */
           nir_instr_insert_after(&old_copy->instr, &new_copy->instr);
+         state->progress = true;
        }
        break;
  
@@ -248,24 +250,31 @@ split_var_copies_block(nir_block *block, void *void_state)
     return true;
  }
  
-static void
+static bool
  split_var_copies_impl(nir_function_impl *impl)
  {
     struct split_var_copies_state state;
  
     state.mem_ctx = ralloc_parent(impl);
     state.dead_ctx = ralloc_context(NULL);
+   state.progress = false;
  
     nir_foreach_block(impl, split_var_copies_block, &state);
  
     ralloc_free(state.dead_ctx);
+
+   return state.progress;
  }
  
-void
+bool
  nir_split_var_copies(nir_shader *shader)
  {
+   bool progress = false;
+
     nir_foreach_overload(shader, overload) {
        if (overload->impl)
-         split_var_copies_impl(overload->impl);
+         progress = split_var_copies_impl(overload->impl) || progress;
     }
+
+   return progress;
  }
diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp

index 69cfac1..01f0e9b 100644 (file)
--- a/src/glsl/nir/nir_types.cpp
+++ b/src/glsl/nir/nir_types.cpp
@@ -138,6 +138,13 @@ glsl_get_sampler_result_type(const struct glsl_type *type)
     return (glsl_base_type)type->sampler_type;
  }
  
+unsigned
+glsl_get_record_location_offset(const struct glsl_type *type,
+                                unsigned length)
+{
+   return type->record_location_offset(length);
+}
+
  bool
  glsl_type_is_void(const glsl_type *type)
  {
diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h

index a2fa793..1a0cb1f 100644 (file)
--- a/src/glsl/nir/nir_types.h
+++ b/src/glsl/nir/nir_types.h
@@ -27,6 +27,8 @@
  
  #pragma once
  
+#include <stdio.h>
+
  /* C wrapper around glsl_types.h */
  
  #include "../glsl_types.h"
@@ -37,8 +39,6 @@ extern "C" {
  struct glsl_type;
  #endif
  
-#include <stdio.h>
-
  void glsl_print_type(const struct glsl_type *type, FILE *fp);
  void glsl_print_struct(const struct glsl_type *type, FILE *fp);
  
@@ -71,6 +71,9 @@ const char *glsl_get_struct_elem_name(const struct glsl_type *type,
  enum glsl_sampler_dim glsl_get_sampler_dim(const struct glsl_type *type);
  enum glsl_base_type glsl_get_sampler_result_type(const struct glsl_type *type);
  
+unsigned glsl_get_record_location_offset(const struct glsl_type *type,
+                                         unsigned length);
+
  bool glsl_type_is_void(const struct glsl_type *type);
  bool glsl_type_is_vector(const struct glsl_type *type);
  bool glsl_type_is_scalar(const struct glsl_type *type);
diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c

index 9938c0e..1c9993a 100644 (file)
--- a/src/glsl/nir/nir_validate.c
+++ b/src/glsl/nir/nir_validate.c
@@ -586,6 +586,7 @@ validate_block(nir_block *block, validate_state *state)
     }
  
     assert(block->successors[0] != NULL);
+   assert(block->successors[0] != block->successors[1]);
  
     for (unsigned i = 0; i < 2; i++) {
        if (block->successors[i] != NULL) {
diff --git a/src/glsl/nir/tests/control_flow_tests.cpp b/src/glsl/nir/tests/control_flow_tests.cpp

new file mode 100644 (file)

index 0000000..b9f90e6
--- /dev/null
+++ b/src/glsl/nir/tests/control_flow_tests.cpp
@@ -0,0 +1,155 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <gtest/gtest.h>
+#include "nir.h"
+#include "nir_builder.h"
+
+class nir_cf_test : public ::testing::Test {
+protected:
+   nir_cf_test();
+   ~nir_cf_test();
+
+   nir_builder b;
+   nir_shader *shader;
+   nir_function_impl *impl;
+};
+
+nir_cf_test::nir_cf_test()
+{
+   static const nir_shader_compiler_options options = { };
+   shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, &options);
+   nir_function *func = nir_function_create(shader, "main");
+   nir_function_overload *overload = nir_function_overload_create(func);
+   impl = nir_function_impl_create(overload);
+
+   nir_builder_init(&b, impl);
+}
+
+nir_cf_test::~nir_cf_test()
+{
+   ralloc_free(shader);
+}
+
+TEST_F(nir_cf_test, delete_break_in_loop)
+{
+   /* Create IR:
+    *
+    * while (...) { break; }
+    */
+   nir_loop *loop = nir_loop_create(shader);
+   nir_cf_node_insert(nir_after_cf_list(&impl->body), &loop->cf_node);
+
+   b.cursor = nir_after_cf_list(&loop->body);
+
+   nir_jump_instr *jump = nir_jump_instr_create(shader, nir_jump_break);
+   nir_builder_instr_insert(&b, &jump->instr);
+
+   /* At this point, we should have:
+    *
+    * impl main {
+    *         block block_0:
+    *         // preds:
+    *         // succs: block_1
+    *         loop {
+    *                 block block_1:
+    *                 // preds: block_0
+    *                 break
+    *                 // succs: block_2
+    *         }
+    *         block block_2:
+    *         // preds: block_1
+    *         // succs: block_3
+    *         block block_3:
+    * }
+    */
+   nir_block *block_0 = nir_start_block(impl);
+   nir_block *block_1 = nir_cf_node_as_block(nir_loop_first_cf_node(loop));
+   nir_block *block_2 = nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
+   nir_block *block_3 = impl->end_block;
+   ASSERT_EQ(nir_cf_node_block, block_0->cf_node.type);
+   ASSERT_EQ(nir_cf_node_block, block_1->cf_node.type);
+   ASSERT_EQ(nir_cf_node_block, block_2->cf_node.type);
+   ASSERT_EQ(nir_cf_node_block, block_3->cf_node.type);
+
+   /* Verify the successors and predecessors. */
+   EXPECT_EQ(block_1, block_0->successors[0]);
+   EXPECT_EQ(NULL,    block_0->successors[1]);
+   EXPECT_EQ(block_2, block_1->successors[0]);
+   EXPECT_EQ(NULL,    block_1->successors[1]);
+   EXPECT_EQ(block_3, block_2->successors[0]);
+   EXPECT_EQ(NULL,    block_2->successors[1]);
+   EXPECT_EQ(NULL,    block_3->successors[0]);
+   EXPECT_EQ(NULL,    block_3->successors[1]);
+   EXPECT_EQ(0,       block_0->predecessors->entries);
+   EXPECT_EQ(1,       block_1->predecessors->entries);
+   EXPECT_EQ(1,       block_2->predecessors->entries);
+   EXPECT_EQ(1,       block_3->predecessors->entries);
+   EXPECT_TRUE(_mesa_set_search(block_1->predecessors, block_0));
+   EXPECT_TRUE(_mesa_set_search(block_2->predecessors, block_1));
+   EXPECT_TRUE(_mesa_set_search(block_3->predecessors, block_2));
+
+   nir_print_shader(shader, stderr);
+
+   /* Now remove the break. */
+   nir_instr_remove(&jump->instr);
+
+   nir_print_shader(shader, stderr);
+
+   /* At this point, we should have:
+    *
+    * impl main {
+    *         block block_0:
+    *         // preds:
+    *         // succs: block_1
+    *         loop {
+    *                 block block_1:
+    *                 // preds: block_0 block_1
+    *                 // succs: block_1
+    *         }
+    *         block block_2:
+    *         // preds: block_1
+    *         // succs: block_3
+    *         block block_3:
+    * }
+    *
+    * Re-verify the predecessors and successors.
+    */
+   EXPECT_EQ(block_1, block_0->successors[0]);
+   EXPECT_EQ(NULL,    block_0->successors[1]);
+   EXPECT_EQ(block_1, block_1->successors[0]); /* back to itself */
+   EXPECT_EQ(block_2, block_1->successors[1]); /* fake successor */
+   EXPECT_EQ(block_3, block_2->successors[0]);
+   EXPECT_EQ(NULL,    block_2->successors[1]);
+   EXPECT_EQ(NULL,    block_3->successors[0]);
+   EXPECT_EQ(NULL,    block_3->successors[1]);
+   EXPECT_EQ(0,       block_0->predecessors->entries);
+   EXPECT_EQ(2,       block_1->predecessors->entries);
+   EXPECT_EQ(1,       block_2->predecessors->entries);
+   EXPECT_EQ(1,       block_3->predecessors->entries);
+   EXPECT_TRUE(_mesa_set_search(block_1->predecessors, block_0));
+   EXPECT_TRUE(_mesa_set_search(block_1->predecessors, block_1));
+   EXPECT_TRUE(_mesa_set_search(block_2->predecessors, block_1));
+   EXPECT_TRUE(_mesa_set_search(block_3->predecessors, block_2));
+
+   nir_metadata_require(impl, nir_metadata_dominance);
+}
diff --git a/src/glsl/opt_constant_propagation.cpp b/src/glsl/opt_constant_propagation.cpp

index 5221417..184aaa1 100644 (file)
--- a/src/glsl/opt_constant_propagation.cpp
+++ b/src/glsl/opt_constant_propagation.cpp
@@ -40,6 +40,7 @@
  #include "ir_basic_block.h"
  #include "ir_optimization.h"
  #include "glsl_types.h"
+#include "util/hash_table.h"
  
  namespace {
  
@@ -95,7 +96,8 @@ public:
        killed_all = false;
        mem_ctx = ralloc_context(0);
        this->acp = new(mem_ctx) exec_list;
-      this->kills = new(mem_ctx) exec_list;
+      this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                            _mesa_key_pointer_equal);
     }
     ~ir_constant_propagation_visitor()
     {
@@ -123,7 +125,7 @@ public:
      * List of kill_entry: The masks of variables whose values were
      * killed in this block.
      */
-   exec_list *kills;
+   hash_table *kills;
  
     bool progress;
  
@@ -263,11 +265,12 @@ ir_constant_propagation_visitor::visit_enter(ir_function_signature *ir)
      * main() at link time, so they're irrelevant to us.
      */
     exec_list *orig_acp = this->acp;
-   exec_list *orig_kills = this->kills;
+   hash_table *orig_kills = this->kills;
     bool orig_killed_all = this->killed_all;
  
     this->acp = new(mem_ctx) exec_list;
-   this->kills = new(mem_ctx) exec_list;
+   this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
     this->killed_all = false;
  
     visit_list_elements(this, &ir->body);
@@ -352,11 +355,12 @@ void
  ir_constant_propagation_visitor::handle_if_block(exec_list *instructions)
  {
     exec_list *orig_acp = this->acp;
-   exec_list *orig_kills = this->kills;
+   hash_table *orig_kills = this->kills;
     bool orig_killed_all = this->killed_all;
  
     this->acp = new(mem_ctx) exec_list;
-   this->kills = new(mem_ctx) exec_list;
+   this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
     this->killed_all = false;
  
     /* Populate the initial acp with a constant of the original */
@@ -370,12 +374,14 @@ ir_constant_propagation_visitor::handle_if_block(exec_list *instructions)
        orig_acp->make_empty();
     }
  
-   exec_list *new_kills = this->kills;
+   hash_table *new_kills = this->kills;
     this->kills = orig_kills;
     this->acp = orig_acp;
     this->killed_all = this->killed_all || orig_killed_all;
  
-   foreach_in_list(kill_entry, k, new_kills) {
+   hash_entry *htk;
+   hash_table_foreach(new_kills, htk) {
+      kill_entry *k = (kill_entry *) htk->data;
        kill(k->var, k->write_mask);
     }
  }
@@ -397,7 +403,7 @@ ir_visitor_status
  ir_constant_propagation_visitor::visit_enter(ir_loop *ir)
  {
     exec_list *orig_acp = this->acp;
-   exec_list *orig_kills = this->kills;
+   hash_table *orig_kills = this->kills;
     bool orig_killed_all = this->killed_all;
  
     /* FINISHME: For now, the initial acp for loops is totally empty.
@@ -405,7 +411,8 @@ ir_constant_propagation_visitor::visit_enter(ir_loop *ir)
      * cloned minus the killed entries after the first run through.
      */
     this->acp = new(mem_ctx) exec_list;
-   this->kills = new(mem_ctx) exec_list;
+   this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
     this->killed_all = false;
  
     visit_list_elements(this, &ir->body_instructions);
@@ -414,12 +421,14 @@ ir_constant_propagation_visitor::visit_enter(ir_loop *ir)
        orig_acp->make_empty();
     }
  
-   exec_list *new_kills = this->kills;
+   hash_table *new_kills = this->kills;
     this->kills = orig_kills;
     this->acp = orig_acp;
     this->killed_all = this->killed_all || orig_killed_all;
  
-   foreach_in_list(kill_entry, k, new_kills) {
+   hash_entry *htk;
+   hash_table_foreach(new_kills, htk) {
+      kill_entry *k = (kill_entry *) htk->data;
        kill(k->var, k->write_mask);
     }
  
@@ -448,14 +457,15 @@ ir_constant_propagation_visitor::kill(ir_variable *var, unsigned write_mask)
     /* Add this writemask of the variable to the list of killed
      * variables in this block.
      */
-   foreach_in_list(kill_entry, entry, this->kills) {
-      if (entry->var == var) {
-        entry->write_mask |= write_mask;
-        return;
-      }
+   hash_entry *kill_hash_entry = _mesa_hash_table_search(this->kills, var);
+   if (kill_hash_entry) {
+      kill_entry *entry = (kill_entry *) kill_hash_entry->data;
+      entry->write_mask |= write_mask;
+      return;
     }
     /* Not already in the list.  Make new entry. */
-   this->kills->push_tail(new(this->mem_ctx) kill_entry(var, write_mask));
+   _mesa_hash_table_insert(this->kills, var,
+                           new(this->mem_ctx) kill_entry(var, write_mask));
  }
  
  /**
diff --git a/src/glsl/opt_constant_variable.cpp b/src/glsl/opt_constant_variable.cpp

index 7aaaeed..cdfbc34 100644 (file)
--- a/src/glsl/opt_constant_variable.cpp
+++ b/src/glsl/opt_constant_variable.cpp
@@ -36,11 +36,11 @@
  #include "ir_visitor.h"
  #include "ir_optimization.h"
  #include "glsl_types.h"
+#include "util/hash_table.h"
  
  namespace {
  
  struct assignment_entry {
-   exec_node link;
     int assignment_count;
     ir_variable *var;
     ir_constant *constval;
@@ -54,31 +54,32 @@ public:
     virtual ir_visitor_status visit_enter(ir_assignment *);
     virtual ir_visitor_status visit_enter(ir_call *);
  
-   exec_list list;
+   struct hash_table *ht;
  };
  
  } /* unnamed namespace */
  
  static struct assignment_entry *
-get_assignment_entry(ir_variable *var, exec_list *list)
+get_assignment_entry(ir_variable *var, struct hash_table *ht)
  {
+   struct hash_entry *hte = _mesa_hash_table_search(ht, var);
     struct assignment_entry *entry;
  
-   foreach_list_typed(struct assignment_entry, entry, link, list) {
-      if (entry->var == var)
-        return entry;
+   if (hte) {
+      entry = (struct assignment_entry *) hte->data;
+   } else {
+      entry = (struct assignment_entry *) calloc(1, sizeof(*entry));
+      entry->var = var;
+      _mesa_hash_table_insert(ht, var, entry);
     }
  
-   entry = (struct assignment_entry *)calloc(1, sizeof(*entry));
-   entry->var = var;
-   list->push_head(&entry->link);
     return entry;
  }
  
  ir_visitor_status
  ir_constant_variable_visitor::visit(ir_variable *ir)
  {
-   struct assignment_entry *entry = get_assignment_entry(ir, &this->list);
+   struct assignment_entry *entry = get_assignment_entry(ir, this->ht);
     entry->our_scope = true;
     return visit_continue;
  }
@@ -97,7 +98,7 @@ ir_constant_variable_visitor::visit_enter(ir_assignment *ir)
     ir_constant *constval;
     struct assignment_entry *entry;
  
-   entry = get_assignment_entry(ir->lhs->variable_referenced(), &this->list);
+   entry = get_assignment_entry(ir->lhs->variable_referenced(), this->ht);
     assert(entry);
     entry->assignment_count++;
  
@@ -150,7 +151,7 @@ ir_constant_variable_visitor::visit_enter(ir_call *ir)
          struct assignment_entry *entry;
  
          assert(var);
-        entry = get_assignment_entry(var, &this->list);
+        entry = get_assignment_entry(var, this->ht);
          entry->assignment_count++;
        }
     }
@@ -161,7 +162,7 @@ ir_constant_variable_visitor::visit_enter(ir_call *ir)
        struct assignment_entry *entry;
  
        assert(var);
-      entry = get_assignment_entry(var, &this->list);
+      entry = get_assignment_entry(var, this->ht);
        entry->assignment_count++;
     }
  
@@ -177,20 +178,22 @@ do_constant_variable(exec_list *instructions)
     bool progress = false;
     ir_constant_variable_visitor v;
  
+   v.ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                  _mesa_key_pointer_equal);
     v.run(instructions);
  
-   while (!v.list.is_empty()) {
-
-      struct assignment_entry *entry;
-      entry = exec_node_data(struct assignment_entry, v.list.head, link);
+   struct hash_entry *hte;
+   hash_table_foreach(v.ht, hte) {
+      struct assignment_entry *entry = (struct assignment_entry *) hte->data;
  
        if (entry->assignment_count == 1 && entry->constval && entry->our_scope) {
          entry->var->constant_value = entry->constval;
          progress = true;
        }
-      entry->link.remove();
+      hte->data = NULL;
        free(entry);
     }
+   _mesa_hash_table_destroy(v.ht, NULL);
  
     return progress;
  }
diff --git a/src/glsl/opt_dead_builtin_variables.cpp b/src/glsl/opt_dead_builtin_variables.cpp

index 0d4e3a8..03e5789 100644 (file)
--- a/src/glsl/opt_dead_builtin_variables.cpp
+++ b/src/glsl/opt_dead_builtin_variables.cpp
@@ -62,6 +62,23 @@ optimize_dead_builtin_variables(exec_list *instructions,
         * information, so removing these variables from the user shader will
         * cause problems later.
         *
+       * For compute shaders, gl_GlobalInvocationID has some dependencies, so
+       * we avoid removing these dependencies.
+       *
+       * We also avoid removing gl_GlobalInvocationID at this stage because it
+       * might be used by a linked shader. In this case it still needs to be
+       * initialized by the main function.
+       *
+       *    gl_GlobalInvocationID =
+       *       gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
+       *
+       * Similarly, we initialize gl_LocalInvocationIndex in the main function:
+       *
+       *    gl_LocalInvocationIndex =
+       *       gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
+       *       gl_LocalInvocationID.y * gl_WorkGroupSize.x +
+       *       gl_LocalInvocationID.x;
+       *
         * Matrix uniforms with "Transpose" are not eliminated because there's
         * an optimization pass that can turn references to the regular matrix
         * into references to the transpose matrix.  Eliminating the transpose
@@ -73,6 +90,11 @@ optimize_dead_builtin_variables(exec_list *instructions,
         */
        if (strcmp(var->name, "gl_ModelViewProjectionMatrix") == 0
            || strcmp(var->name, "gl_Vertex") == 0
+          || strcmp(var->name, "gl_WorkGroupID") == 0
+          || strcmp(var->name, "gl_WorkGroupSize") == 0
+          || strcmp(var->name, "gl_LocalInvocationID") == 0
+          || strcmp(var->name, "gl_GlobalInvocationID") == 0
+          || strcmp(var->name, "gl_LocalInvocationIndex") == 0
            || strstr(var->name, "Transpose") != NULL)
           continue;
  
diff --git a/src/glsl/opt_dead_code.cpp b/src/glsl/opt_dead_code.cpp

index e4bf874..2cb7f41 100644 (file)
--- a/src/glsl/opt_dead_code.cpp
+++ b/src/glsl/opt_dead_code.cpp
@@ -119,11 +119,8 @@ do_dead_code(exec_list *instructions, bool uniform_locations_assigned)
               * layouts, do not eliminate it.
               */
              if (entry->var->is_in_buffer_block()) {
-               const glsl_type *const block_type =
-                  entry->var->is_interface_instance()
-                  ? entry->var->type : entry->var->get_interface_type();
-
-               if (block_type->interface_packing != GLSL_INTERFACE_PACKING_PACKED)
+               if (entry->var->get_interface_type()->interface_packing !=
+                   GLSL_INTERFACE_PACKING_PACKED)
                    continue;
              }
  
diff --git a/src/glsl/opt_tree_grafting.cpp b/src/glsl/opt_tree_grafting.cpp

index 7f2ee6c..a7a219c 100644 (file)
--- a/src/glsl/opt_tree_grafting.cpp
+++ b/src/glsl/opt_tree_grafting.cpp
@@ -274,6 +274,7 @@ ir_tree_grafting_visitor::visit_enter(ir_texture *ir)
     case ir_tex:
     case ir_lod:
     case ir_query_levels:
+   case ir_texture_samples:
        break;
     case ir_txb:
        if (do_graft(&ir->lod_info.bias))
diff --git a/src/glsl/program.h b/src/glsl/program.h

index c06541a..64f5463 100644 (file)
--- a/src/glsl/program.h
+++ b/src/glsl/program.h
@@ -40,8 +40,7 @@ extern void
  link_shaders(struct gl_context *ctx, struct gl_shader_program *prog);
  
  extern void
-build_program_resource_list(struct gl_context *ctx,
-                            struct gl_shader_program *shProg);
+build_program_resource_list(struct gl_shader_program *shProg);
  
  extern void
  linker_error(struct gl_shader_program *prog, const char *fmt, ...)
diff --git a/src/glsl/shader_enums.c b/src/glsl/shader_enums.c

new file mode 100644 (file)

index 0000000..c196b79
--- /dev/null
+++ b/src/glsl/shader_enums.c
@@ -0,0 +1,205 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "glsl/shader_enums.h"
+#include "util/macros.h"
+
+#define ENUM(x) [x] = #x
+#define NAME(val) ((((val) < ARRAY_SIZE(names)) && names[(val)]) ? names[(val)] : "UNKNOWN")
+
+const char * gl_shader_stage_name(gl_shader_stage stage)
+{
+   static const char *names[] = {
+      ENUM(MESA_SHADER_VERTEX),
+      ENUM(MESA_SHADER_TESS_CTRL),
+      ENUM(MESA_SHADER_TESS_EVAL),
+      ENUM(MESA_SHADER_GEOMETRY),
+      ENUM(MESA_SHADER_FRAGMENT),
+      ENUM(MESA_SHADER_COMPUTE),
+   };
+   return NAME(stage);
+}
+
+const char * gl_vert_attrib_name(gl_vert_attrib attrib)
+{
+   static const char *names[] = {
+      ENUM(VERT_ATTRIB_POS),
+      ENUM(VERT_ATTRIB_WEIGHT),
+      ENUM(VERT_ATTRIB_NORMAL),
+      ENUM(VERT_ATTRIB_COLOR0),
+      ENUM(VERT_ATTRIB_COLOR1),
+      ENUM(VERT_ATTRIB_FOG),
+      ENUM(VERT_ATTRIB_COLOR_INDEX),
+      ENUM(VERT_ATTRIB_EDGEFLAG),
+      ENUM(VERT_ATTRIB_TEX0),
+      ENUM(VERT_ATTRIB_TEX1),
+      ENUM(VERT_ATTRIB_TEX2),
+      ENUM(VERT_ATTRIB_TEX3),
+      ENUM(VERT_ATTRIB_TEX4),
+      ENUM(VERT_ATTRIB_TEX5),
+      ENUM(VERT_ATTRIB_TEX6),
+      ENUM(VERT_ATTRIB_TEX7),
+      ENUM(VERT_ATTRIB_POINT_SIZE),
+      ENUM(VERT_ATTRIB_GENERIC0),
+      ENUM(VERT_ATTRIB_GENERIC1),
+      ENUM(VERT_ATTRIB_GENERIC2),
+      ENUM(VERT_ATTRIB_GENERIC3),
+      ENUM(VERT_ATTRIB_GENERIC4),
+      ENUM(VERT_ATTRIB_GENERIC5),
+      ENUM(VERT_ATTRIB_GENERIC6),
+      ENUM(VERT_ATTRIB_GENERIC7),
+      ENUM(VERT_ATTRIB_GENERIC8),
+      ENUM(VERT_ATTRIB_GENERIC9),
+      ENUM(VERT_ATTRIB_GENERIC10),
+      ENUM(VERT_ATTRIB_GENERIC11),
+      ENUM(VERT_ATTRIB_GENERIC12),
+      ENUM(VERT_ATTRIB_GENERIC13),
+      ENUM(VERT_ATTRIB_GENERIC14),
+      ENUM(VERT_ATTRIB_GENERIC15),
+   };
+   return NAME(attrib);
+}
+
+const char * gl_varying_slot_name(gl_varying_slot slot)
+{
+   static const char *names[] = {
+      ENUM(VARYING_SLOT_POS),
+      ENUM(VARYING_SLOT_COL0),
+      ENUM(VARYING_SLOT_COL1),
+      ENUM(VARYING_SLOT_FOGC),
+      ENUM(VARYING_SLOT_TEX0),
+      ENUM(VARYING_SLOT_TEX1),
+      ENUM(VARYING_SLOT_TEX2),
+      ENUM(VARYING_SLOT_TEX3),
+      ENUM(VARYING_SLOT_TEX4),
+      ENUM(VARYING_SLOT_TEX5),
+      ENUM(VARYING_SLOT_TEX6),
+      ENUM(VARYING_SLOT_TEX7),
+      ENUM(VARYING_SLOT_PSIZ),
+      ENUM(VARYING_SLOT_BFC0),
+      ENUM(VARYING_SLOT_BFC1),
+      ENUM(VARYING_SLOT_EDGE),
+      ENUM(VARYING_SLOT_CLIP_VERTEX),
+      ENUM(VARYING_SLOT_CLIP_DIST0),
+      ENUM(VARYING_SLOT_CLIP_DIST1),
+      ENUM(VARYING_SLOT_PRIMITIVE_ID),
+      ENUM(VARYING_SLOT_LAYER),
+      ENUM(VARYING_SLOT_VIEWPORT),
+      ENUM(VARYING_SLOT_FACE),
+      ENUM(VARYING_SLOT_PNTC),
+      ENUM(VARYING_SLOT_TESS_LEVEL_OUTER),
+      ENUM(VARYING_SLOT_TESS_LEVEL_INNER),
+      ENUM(VARYING_SLOT_VAR0),
+      ENUM(VARYING_SLOT_VAR1),
+      ENUM(VARYING_SLOT_VAR2),
+      ENUM(VARYING_SLOT_VAR3),
+      ENUM(VARYING_SLOT_VAR4),
+      ENUM(VARYING_SLOT_VAR5),
+      ENUM(VARYING_SLOT_VAR6),
+      ENUM(VARYING_SLOT_VAR7),
+      ENUM(VARYING_SLOT_VAR8),
+      ENUM(VARYING_SLOT_VAR9),
+      ENUM(VARYING_SLOT_VAR10),
+      ENUM(VARYING_SLOT_VAR11),
+      ENUM(VARYING_SLOT_VAR12),
+      ENUM(VARYING_SLOT_VAR13),
+      ENUM(VARYING_SLOT_VAR14),
+      ENUM(VARYING_SLOT_VAR15),
+      ENUM(VARYING_SLOT_VAR16),
+      ENUM(VARYING_SLOT_VAR17),
+      ENUM(VARYING_SLOT_VAR18),
+      ENUM(VARYING_SLOT_VAR19),
+      ENUM(VARYING_SLOT_VAR20),
+      ENUM(VARYING_SLOT_VAR21),
+      ENUM(VARYING_SLOT_VAR22),
+      ENUM(VARYING_SLOT_VAR23),
+      ENUM(VARYING_SLOT_VAR24),
+      ENUM(VARYING_SLOT_VAR25),
+      ENUM(VARYING_SLOT_VAR26),
+      ENUM(VARYING_SLOT_VAR27),
+      ENUM(VARYING_SLOT_VAR28),
+      ENUM(VARYING_SLOT_VAR29),
+      ENUM(VARYING_SLOT_VAR30),
+      ENUM(VARYING_SLOT_VAR31),
+   };
+   return NAME(slot);
+}
+
+const char * gl_system_value_name(gl_system_value sysval)
+{
+   static const char *names[] = {
+     ENUM(SYSTEM_VALUE_VERTEX_ID),
+     ENUM(SYSTEM_VALUE_INSTANCE_ID),
+     ENUM(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE),
+     ENUM(SYSTEM_VALUE_BASE_VERTEX),
+     ENUM(SYSTEM_VALUE_INVOCATION_ID),
+     ENUM(SYSTEM_VALUE_FRONT_FACE),
+     ENUM(SYSTEM_VALUE_SAMPLE_ID),
+     ENUM(SYSTEM_VALUE_SAMPLE_POS),
+     ENUM(SYSTEM_VALUE_SAMPLE_MASK_IN),
+     ENUM(SYSTEM_VALUE_TESS_COORD),
+     ENUM(SYSTEM_VALUE_VERTICES_IN),
+     ENUM(SYSTEM_VALUE_PRIMITIVE_ID),
+     ENUM(SYSTEM_VALUE_TESS_LEVEL_OUTER),
+     ENUM(SYSTEM_VALUE_TESS_LEVEL_INNER),
+     ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_ID),
+     ENUM(SYSTEM_VALUE_WORK_GROUP_ID),
+     ENUM(SYSTEM_VALUE_VERTEX_CNT),
+   };
+   return NAME(sysval);
+}
+
+const char * glsl_interp_qualifier_name(enum glsl_interp_qualifier qual)
+{
+   static const char *names[] = {
+      ENUM(INTERP_QUALIFIER_NONE),
+      ENUM(INTERP_QUALIFIER_SMOOTH),
+      ENUM(INTERP_QUALIFIER_FLAT),
+      ENUM(INTERP_QUALIFIER_NOPERSPECTIVE),
+   };
+   return NAME(qual);
+}
+
+const char * gl_frag_result_name(gl_frag_result result)
+{
+   static const char *names[] = {
+      ENUM(FRAG_RESULT_DEPTH),
+      ENUM(FRAG_RESULT_STENCIL),
+      ENUM(FRAG_RESULT_COLOR),
+      ENUM(FRAG_RESULT_SAMPLE_MASK),
+      ENUM(FRAG_RESULT_DATA0),
+      ENUM(FRAG_RESULT_DATA1),
+      ENUM(FRAG_RESULT_DATA2),
+      ENUM(FRAG_RESULT_DATA3),
+      ENUM(FRAG_RESULT_DATA4),
+      ENUM(FRAG_RESULT_DATA5),
+      ENUM(FRAG_RESULT_DATA6),
+      ENUM(FRAG_RESULT_DATA7),
+   };
+   return NAME(result);
+}
diff --git a/src/glsl/shader_enums.h b/src/glsl/shader_enums.h

index 9bb163f..99acc64 100644 (file)
--- a/src/glsl/shader_enums.h
+++ b/src/glsl/shader_enums.h
@@ -43,6 +43,8 @@ typedef enum
     MESA_SHADER_COMPUTE = 5,
  } gl_shader_stage;
  
+const char * gl_shader_stage_name(gl_shader_stage stage);
+
  #define MESA_SHADER_STAGES (MESA_SHADER_COMPUTE + 1)
  
  
@@ -91,6 +93,8 @@ typedef enum
     VERT_ATTRIB_MAX = 33
  } gl_vert_attrib;
  
+const char * gl_vert_attrib_name(gl_vert_attrib attrib);
+
  /**
   * Symbolic constats to help iterating over
   * specific blocks of vertex attributes.
@@ -193,8 +197,43 @@ typedef enum
     VARYING_SLOT_TESS_LEVEL_OUTER, /* Only appears as TCS output. */
     VARYING_SLOT_TESS_LEVEL_INNER, /* Only appears as TCS output. */
     VARYING_SLOT_VAR0, /* First generic varying slot */
+   /* the remaining are simply for the benefit of gl_varying_slot_name()
+    * and not to be construed as an upper bound:
+    */
+   VARYING_SLOT_VAR1,
+   VARYING_SLOT_VAR2,
+   VARYING_SLOT_VAR3,
+   VARYING_SLOT_VAR4,
+   VARYING_SLOT_VAR5,
+   VARYING_SLOT_VAR6,
+   VARYING_SLOT_VAR7,
+   VARYING_SLOT_VAR8,
+   VARYING_SLOT_VAR9,
+   VARYING_SLOT_VAR10,
+   VARYING_SLOT_VAR11,
+   VARYING_SLOT_VAR12,
+   VARYING_SLOT_VAR13,
+   VARYING_SLOT_VAR14,
+   VARYING_SLOT_VAR15,
+   VARYING_SLOT_VAR16,
+   VARYING_SLOT_VAR17,
+   VARYING_SLOT_VAR18,
+   VARYING_SLOT_VAR19,
+   VARYING_SLOT_VAR20,
+   VARYING_SLOT_VAR21,
+   VARYING_SLOT_VAR22,
+   VARYING_SLOT_VAR23,
+   VARYING_SLOT_VAR24,
+   VARYING_SLOT_VAR25,
+   VARYING_SLOT_VAR26,
+   VARYING_SLOT_VAR27,
+   VARYING_SLOT_VAR28,
+   VARYING_SLOT_VAR29,
+   VARYING_SLOT_VAR30,
+   VARYING_SLOT_VAR31,
  } gl_varying_slot;
  
+const char * gl_varying_slot_name(gl_varying_slot slot);
  
  /**
   * Bitflags for varying slots.
@@ -238,6 +277,8 @@ typedef enum
  #define SYSTEM_BIT_SAMPLE_ID ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_ID)
  #define SYSTEM_BIT_SAMPLE_POS ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_POS)
  #define SYSTEM_BIT_SAMPLE_MASK_IN ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_MASK_IN)
+#define SYSTEM_BIT_LOCAL_INVOCATION_ID ((uint64_t)1 << SYSTEM_VALUE_LOCAL_INVOCATION_ID)
+
  /**
   * If the gl_register_file is PROGRAM_SYSTEM_VALUE, the register index will be
   * one of these values.  If a NIR variable's mode is nir_var_system_value, it
@@ -363,9 +404,25 @@ typedef enum
     SYSTEM_VALUE_TESS_LEVEL_INNER, /**< TES input */
     /*@}*/
  
+   /**
+    * \name Compute shader system values
+    */
+   /*@{*/
+   SYSTEM_VALUE_LOCAL_INVOCATION_ID,
+   SYSTEM_VALUE_WORK_GROUP_ID,
+   SYSTEM_VALUE_NUM_WORK_GROUPS,
+   /*@}*/
+
+   /**
+    * Driver internal vertex-count, used (for example) for drivers to
+    * calculate stride for stream-out outputs.  Not externally visible.
+    */
+   SYSTEM_VALUE_VERTEX_CNT,
+
     SYSTEM_VALUE_MAX             /**< Number of values */
  } gl_system_value;
  
+const char * gl_system_value_name(gl_system_value sysval);
  
  /**
   * The possible interpolation qualifiers that can be applied to a fragment
@@ -383,6 +440,8 @@ enum glsl_interp_qualifier
     INTERP_QUALIFIER_COUNT /**< Number of interpolation qualifiers */
  };
  
+const char * glsl_interp_qualifier_name(enum glsl_interp_qualifier qual);
+
  /**
   * Fragment program results
   */
@@ -399,8 +458,19 @@ typedef enum
     /* FRAG_RESULT_DATAn are the per-render-target (GLSL gl_FragData[n]
      * or ARB_fragment_program fragment.color[n]) color results.  If
      * any are written, FRAG_RESULT_COLOR will not be written.
+    * FRAG_RESULT_DATA1 and up are simply for the benefit of
+    * gl_frag_result_name() and not to be construed as an upper bound
      */
     FRAG_RESULT_DATA0 = 4,
+   FRAG_RESULT_DATA1,
+   FRAG_RESULT_DATA2,
+   FRAG_RESULT_DATA3,
+   FRAG_RESULT_DATA4,
+   FRAG_RESULT_DATA5,
+   FRAG_RESULT_DATA6,
+   FRAG_RESULT_DATA7,
  } gl_frag_result;
  
+const char * gl_frag_result_name(gl_frag_result result);
+
  #endif /* SHADER_ENUMS_H */
diff --git a/src/glsl/standalone_scaffolding.cpp b/src/glsl/standalone_scaffolding.cpp

index 6ff9553..1af50d6 100644 (file)
--- a/src/glsl/standalone_scaffolding.cpp
+++ b/src/glsl/standalone_scaffolding.cpp
@@ -68,7 +68,7 @@ _mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,
  }
  
  void
-_mesa_shader_debug(struct gl_context *, GLenum, GLuint *id,
+_mesa_shader_debug(struct gl_context *, GLenum, GLuint *,
                     const char *, int)
  {
  }
@@ -107,7 +107,7 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg)
  
     ralloc_free(shProg->UniformBlocks);
     shProg->UniformBlocks = NULL;
-   shProg->NumUniformBlocks = 0;
+   shProg->NumBufferInterfaceBlocks = 0;
     for (i = 0; i < MESA_SHADER_STAGES; i++) {
        ralloc_free(shProg->UniformBlockStageIndex[i]);
        shProg->UniformBlockStageIndex[i] = NULL;
diff --git a/src/glx/tests/Makefile.am b/src/glx/tests/Makefile.am

index b02a9e3..bdc78c0 100644 (file)
--- a/src/glx/tests/Makefile.am
+++ b/src/glx/tests/Makefile.am
@@ -10,7 +10,7 @@ AM_CPPFLAGS = \
         -I$(top_srcdir)/include/GL/internal \
         $(DEFINES) \
         $(LIBDRM_CFLAGS) \
-       $(X11_CFLAGS)
+       $(X11_INCLUDES)
  
  TESTS = glx-test
  check_PROGRAMS = glx-test
diff --git a/src/mapi/Makefile.am b/src/mapi/Makefile.am

index 160a255..307e05d 100644 (file)
--- a/src/mapi/Makefile.am
+++ b/src/mapi/Makefile.am
@@ -50,19 +50,14 @@ AM_CPPFLAGS =                                                       \
  
  include Makefile.sources
  
+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
+
  glapi_gen_mapi_deps := \
         mapi_abi.py \
         $(wildcard glapi/gen/*.xml) \
         $(wildcard glapi/gen/*.py)
  
-# $(1): path to an XML file
-# $(2): name of the printer
-define glapi_gen_mapi
-@$(MKDIR_P) $(dir $@)
-$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/mapi_abi.py \
-       --mode lib --printer $(2) $(1) > $@
-endef
-
  if HAVE_SHARED_GLAPI
  BUILT_SOURCES += shared-glapi/glapi_mapi_tmp.h
  
@@ -93,7 +88,9 @@ shared_glapi_test_LDADD = \
  endif
  
  shared-glapi/glapi_mapi_tmp.h : glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
-       $(call glapi_gen_mapi,$<,shared-glapi)
+       $(MKDIR_GEN)
+       $(PYTHON_GEN) $(srcdir)/mapi_abi.py --mode lib --printer shared-glapi \
+               $(srcdir)/glapi/gen/gl_and_es_API.xml > $@
  
  if HAVE_OPENGL
  noinst_LTLIBRARIES = glapi/libglapi.la
@@ -185,7 +182,9 @@ endif
  endif
  
  es1api/glapi_mapi_tmp.h: glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
-       $(call glapi_gen_mapi,$<,es1api)
+       $(MKDIR_GEN)
+       $(PYTHON_GEN) $(srcdir)/mapi_abi.py --mode lib --printer es1api \
+               $(srcdir)/glapi/gen/gl_and_es_API.xml > $@
  
  if HAVE_OPENGL_ES2
  TESTS += es2api/ABI-check
@@ -229,6 +228,8 @@ endif
  endif
  
  es2api/glapi_mapi_tmp.h: glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
-       $(call glapi_gen_mapi,$<,es2api)
+       $(MKDIR_GEN)
+       $(PYTHON_GEN) $(srcdir)/mapi_abi.py --mode lib --printer es2api \
+               $(srcdir)/glapi/gen/gl_and_es_API.xml > $@
  
  include $(top_srcdir)/install-lib-links.mk
diff --git a/src/mapi/entry_x86-64_tls.h b/src/mapi/entry_x86-64_tls.h

index 5c03b04..38faccc 100644 (file)
--- a/src/mapi/entry_x86-64_tls.h
+++ b/src/mapi/entry_x86-64_tls.h
@@ -46,13 +46,6 @@ __asm__(".text\n"
  
  #ifndef MAPI_MODE_BRIDGE
  
-__asm__("x86_64_current_tls:\n\t"
-       "movq " ENTRY_CURRENT_TABLE "@GOTTPOFF(%rip), %rax\n\t"
-       "ret");
-
-extern unsigned long
-x86_64_current_tls();
-
  #include <string.h>
  #include "u_execmem.h"
  
@@ -90,7 +83,8 @@ entry_generate(int slot)
     char *code;
     mapi_func entry;
  
-   addr = x86_64_current_tls();
+   __asm__("movq " ENTRY_CURRENT_TABLE "@GOTTPOFF(%%rip), %0"
+           : "=r" (addr));
     if ((addr >> 32) != 0xffffffff)
        return NULL;
     addr &= 0xffffffff;
diff --git a/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml b/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml

index 59839a0..55ad764 100644 (file)
--- a/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml
+++ b/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml
@@ -15,13 +15,13 @@
     <enum name="MAX_FRAMEBUFFER_LAYERS"                     value="0x9317" />
     <enum name="MAX_FRAMEBUFFER_SAMPLES"                    value="0x9318" />
  
-    <function name="FramebufferParameteri">
+    <function name="FramebufferParameteri" es2="3.1">
         <param name="target" type="GLenum"/>
         <param name="pname"  type="GLenum"/>
         <param name="param"  type="GLint" />
      </function>
  
-    <function name="GetFramebufferParameteriv">
+    <function name="GetFramebufferParameteriv" es2="3.1">
         <param name="target" type="GLenum" />
         <param name="pname"  type="GLenum" />
         <param name="params" type="GLint *" output="true" />
diff --git a/src/mapi/glapi/gen/ARB_shader_storage_buffer_object.xml b/src/mapi/glapi/gen/ARB_shader_storage_buffer_object.xml

new file mode 100644 (file)

index 0000000..6901bdf
--- /dev/null
+++ b/src/mapi/glapi/gen/ARB_shader_storage_buffer_object.xml
@@ -0,0 +1,36 @@
+<?xml version="1.0"?>
+<!DOCTYPE OpenGLAPI SYSTEM "gl_API.dtd">
+
+<OpenGLAPI>
+
+<category name="GL_ARB_shader_storage_buffer" number="137">
+
+<enum name="SHADER_STORAGE_BUFFER" value="0x90D2" />
+<enum name="SHADER_STORAGE_BUFFER_BINDING" value="0x90D3" />
+<enum name="SHADER_STORAGE_BUFFER_START" value="0x90D4" />
+<enum name="SHADER_STORAGE_BUFFER_SIZE" value="0x90D5" />
+<enum name="MAX_VERTEX_SHADER_STORAGE_BLOCKS" value="0x90D6" />
+<enum name="MAX_GEOMETRY_SHADER_STORAGE_BLOCKS" value="0x90D7" />
+<enum name="MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS" value="0x90D8" />
+<enum name="MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS" value="0x90D9" />
+<enum name="MAX_FRAGMENT_SHADER_STORAGE_BLOCKS" value="0x90DA" />
+<enum name="MAX_COMPUTE_SHADER_STORAGE_BLOCKS" value="0x90DB" />
+<enum name="MAX_COMBINED_SHADER_STORAGE_BLOCKS" value="0x90DC" />
+<enum name="MAX_SHADER_STORAGE_BUFFER_BINDINGS" value="0x90DD" />
+<enum name="MAX_SHADER_STORAGE_BLOCK_SIZE" value="0x90DE" />
+<enum name="SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT" value="0x90DF" />
+<enum name="SHADER_STORAGE_BARRIER_BIT" value="0x2000" />
+<enum name="MAX_COMBINED_SHADER_OUTPUT_RESOURCES" value="0x8F39" />
+
+<!-- Duplicated with GL3x.xml: BindBufferRange, BindBufferBase,
+     GetIntegeri_v -->
+
+<function name="ShaderStorageBlockBinding">
+    <param name="program" type="GLuint" />
+    <param name="shaderStorageBlockIndex" type="GLuint" />
+    <param name="shaderStorageBlockBinding" type="GLuint" />
+</function>
+
+</category>
+
+</OpenGLAPI>
diff --git a/src/mapi/glapi/gen/GL4x.xml b/src/mapi/glapi/gen/GL4x.xml

index dee5027..dd48c83 100644 (file)
--- a/src/mapi/glapi/gen/GL4x.xml
+++ b/src/mapi/glapi/gen/GL4x.xml
@@ -41,7 +41,23 @@
  </category>
  
  <category name="4.3">
-  <enum name="DEPTH_STENCIL_TEXTURE_MODE"              value="0x90EA"/>
+  <enum name="SHADER_STORAGE_BARRIER_BIT"                value="0x2000" />
+  <enum name="MAX_COMBINED_SHADER_OUTPUT_RESOURCES"      value="0x8F39" />
+  <enum name="SHADER_STORAGE_BUFFER"                     value="0x90D2"/>
+  <enum name="SHADER_STORAGE_BUFFER_BINDING"             value="0x90D3"/>
+  <enum name="SHADER_STORAGE_BUFFER_START"               value="0x90D4"/>
+  <enum name="SHADER_STORAGE_BUFFER_SIZE"                value="0x90D5"/>
+  <enum name="MAX_VERTEX_SHADER_STORAGE_BLOCKS"          value="0x90D6" />
+  <enum name="MAX_GEOMETRY_SHADER_STORAGE_BLOCKS"        value="0x90D7" />
+  <enum name="MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS"    value="0x90D8" />
+  <enum name="MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS" value="0x90D9" />
+  <enum name="MAX_FRAGMENT_SHADER_STORAGE_BLOCKS"        value="0x90DA" />
+  <enum name="MAX_COMPUTE_SHADER_STORAGE_BLOCKS"         value="0x90DB" />
+  <enum name="MAX_COMBINED_SHADER_STORAGE_BLOCKS"        value="0x90DC" />
+  <enum name="MAX_SHADER_STORAGE_BUFFER_BINDINGS"        value="0x90DD" />
+  <enum name="MAX_SHADER_STORAGE_BLOCK_SIZE"             value="0x90DE" />
+  <enum name="SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT"    value="0x90DF" />
+  <enum name="DEPTH_STENCIL_TEXTURE_MODE"                value="0x90EA"/>
  </category>
  
  <category name="4.5">
diff --git a/src/mapi/glapi/gen/Makefile.am b/src/mapi/glapi/gen/Makefile.am

index 9224de2..a5a26a6 100644 (file)
--- a/src/mapi/glapi/gen/Makefile.am
+++ b/src/mapi/glapi/gen/Makefile.am
@@ -20,7 +20,7 @@ XORG_INDENT_FLAGS = -linux -bad -bap -blf -bli0 -cbi0 -cdw -nce -cs -i4 -lc80 -p
  
  MESA_DIR = $(top_builddir)/src/mesa
  MESA_GLAPI_DIR = $(top_builddir)/src/mapi/glapi
-MESA_MAPI_DIR = $(top_builddir)/src/mapi
+MESA_MAPI_DIR = $(top_srcdir)/src/mapi
  MESA_GLX_DIR = $(top_builddir)/src/glx
  
  MESA_GLAPI_OUTPUTS = \
@@ -153,6 +153,7 @@ API_XML = \
         ARB_shader_atomic_counters.xml \
         ARB_shader_image_load_store.xml \
         ARB_shader_subroutine.xml \
+       ARB_shader_storage_buffer_object.xml \
         ARB_sync.xml \
         ARB_tessellation_shader.xml \
         ARB_texture_barrier.xml \
@@ -210,7 +211,7 @@ COMMON = $(API_XML) \
  
  COMMON_GLX = $(COMMON) glX_API.xml glX_XML.py glX_proto_common.py
  
-PYTHON_GEN = $(AM_V_GEN) $(PYTHON2) $(PYTHON_FLAGS)
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
  
  ######################################################################
  
@@ -242,65 +243,65 @@ $(XORG_GLAPI_DIR)/%.h: $(MESA_GLAPI_DIR)/%.h
  ######################################################################
  
  $(MESA_GLAPI_DIR)/glapi_mapi_tmp.h: $(MESA_MAPI_DIR)/mapi_abi.py $(COMMON)
-       $(PYTHON_GEN) $< \
+       $(PYTHON_GEN) $(MESA_MAPI_DIR)/mapi_abi.py \
                 --printer glapi --mode lib $(srcdir)/gl_and_es_API.xml > $@
  
  $(MESA_GLAPI_DIR)/glprocs.h: gl_procs.py $(COMMON)
-       $(PYTHON_GEN) $< -c -f $(srcdir)/gl_and_es_API.xml > $@
+       $(PYTHON_GEN) $(srcdir)/gl_procs.py -c -f $(srcdir)/gl_and_es_API.xml > $@
  
  $(MESA_GLAPI_DIR)/glapitemp.h: gl_apitemp.py $(COMMON)
-       $(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+       $(PYTHON_GEN) $(srcdir)/gl_apitemp.py -f $(srcdir)/gl_and_es_API.xml > $@
  
  $(MESA_GLAPI_DIR)/glapitable.h: gl_table.py $(COMMON)
-       $(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+       $(PYTHON_GEN) $(srcdir)/gl_table.py -f $(srcdir)/gl_and_es_API.xml > $@
  
  $(MESA_GLAPI_DIR)/glapi_gentable.c: gl_gentable.py $(COMMON)
-       $(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+       $(PYTHON_GEN) $(srcdir)/gl_gentable.py -f $(srcdir)/gl_and_es_API.xml > $@
  
  ######################################################################
  
  $(MESA_GLAPI_DIR)/glapi_x86.S: gl_x86_asm.py $(COMMON)
-       $(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+       $(PYTHON_GEN) $(srcdir)/gl_x86_asm.py -f $(srcdir)/gl_and_es_API.xml > $@
  
  $(MESA_GLAPI_DIR)/glapi_x86-64.S: gl_x86-64_asm.py $(COMMON)
-       $(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+       $(PYTHON_GEN) $(srcdir)/gl_x86-64_asm.py -f $(srcdir)/gl_and_es_API.xml > $@
  
  $(MESA_GLAPI_DIR)/glapi_sparc.S: gl_SPARC_asm.py $(COMMON)
-       $(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+       $(PYTHON_GEN) $(srcdir)/gl_SPARC_asm.py -f $(srcdir)/gl_and_es_API.xml > $@
  
  ######################################################################
  
  $(MESA_DIR)/main/enums.c: gl_enums.py $(COMMON)
-       $(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+       $(PYTHON_GEN) $(srcdir)/gl_enums.py -f $(srcdir)/gl_and_es_API.xml > $@
  
  $(MESA_DIR)/main/api_exec.c: gl_genexec.py apiexec.py $(COMMON)
-       $(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+       $(PYTHON_GEN) $(srcdir)/gl_genexec.py -f $(srcdir)/gl_and_es_API.xml > $@
  
  $(MESA_DIR)/main/dispatch.h: gl_table.py $(COMMON)
-       $(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml -m remap_table > $@
+       $(PYTHON_GEN) $(srcdir)/gl_table.py -f $(srcdir)/gl_and_es_API.xml -m remap_table > $@
  
  $(MESA_DIR)/main/remap_helper.h: remap_helper.py $(COMMON)
-       $(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+       $(PYTHON_GEN) $(srcdir)/remap_helper.py -f $(srcdir)/gl_and_es_API.xml > $@
  
  ######################################################################
  
  $(MESA_GLX_DIR)/indirect.c: glX_proto_send.py $(COMMON_GLX)
-       $(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m proto \
+       $(PYTHON_GEN) $(srcdir)/glX_proto_send.py -f $(srcdir)/gl_API.xml -m proto \
           | $(INDENT) $(INDENT_FLAGS) > $@
  
  $(MESA_GLX_DIR)/indirect.h: glX_proto_send.py $(COMMON_GLX)
-       $(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m init_h > $@
+       $(PYTHON_GEN) $(srcdir)/glX_proto_send.py -f $(srcdir)/gl_API.xml -m init_h > $@
  
  $(MESA_GLX_DIR)/indirect_init.c: glX_proto_send.py $(COMMON_GLX)
-       $(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m init_c > $@
+       $(PYTHON_GEN) $(srcdir)/glX_proto_send.py -f $(srcdir)/gl_API.xml -m init_c > $@
  
  $(MESA_GLX_DIR)/indirect_size.h $(XORG_GLX_DIR)/indirect_size.h: glX_proto_size.py $(COMMON_GLX)
-       $(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m size_h --only-set \
+       $(PYTHON_GEN) $(srcdir)/glX_proto_size.py -f $(srcdir)/gl_API.xml -m size_h --only-set \
             --header-tag _INDIRECT_SIZE_H_ \
           | $(INDENT) $(INDENT_FLAGS) > $@
  
  $(MESA_GLX_DIR)/indirect_size.c: glX_proto_size.py $(COMMON_GLX)
-       $(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m size_c --only-set \
+       $(PYTHON_GEN) $(srcdir)/glX_proto_size.py -f $(srcdir)/gl_API.xml -m size_c --only-set \
           | $(INDENT) $(INDENT_FLAGS) > $@
  
  ######################################################################
diff --git a/src/mapi/glapi/gen/apiexec.py b/src/mapi/glapi/gen/apiexec.py

index 3a0eb18..58ec08b 100644 (file)
--- a/src/mapi/glapi/gen/apiexec.py
+++ b/src/mapi/glapi/gen/apiexec.py
@@ -151,8 +151,8 @@ functions = {
  
      # OpenGL 4.3 / GL_ARB_framebuffer_no_attachments.  Mesa can expose the
      # extension with OpenGL 3.0.
-    "FramebufferParameteri": exec_info(compatibility=30, core=31),
-    "GetFramebufferParameteri": exec_info(compatibility=30, core=31),
+    "FramebufferParameteri": exec_info(compatibility=30, core=31, es2=31),
+    "GetFramebufferParameteri": exec_info(compatibility=30, core=31, es2=31),
  
      # OpenGL 4.5 / GL_ARB_direct_state_access.   Mesa can expose the extension
      # with core profile.
diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml

index f0dcdca..ec83cd4 100644 (file)
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -8211,7 +8211,11 @@
  
  <xi:include href="ARB_program_interface_query.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
  
-<!-- ARB extensions #135...#138 -->
+<!-- ARB extensions #135...#136 -->
+
+<xi:include href="ARB_shader_storage_buffer_object.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
+
+<!-- ARB extensions #138 -->
  
  <xi:include href="ARB_texture_buffer_range.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
  
diff --git a/src/mapi/glapi/gen/gl_genexec.py b/src/mapi/glapi/gen/gl_genexec.py

index 26d8e7b..6c66779 100644 (file)
--- a/src/mapi/glapi/gen/gl_genexec.py
+++ b/src/mapi/glapi/gen/gl_genexec.py
@@ -88,6 +88,7 @@ header = """/**
  #include "main/matrix.h"
  #include "main/multisample.h"
  #include "main/objectlabel.h"
+#include "main/objectpurge.h"
  #include "main/performance_monitor.h"
  #include "main/pipelineobj.h"
  #include "main/pixel.h"
diff --git a/src/mesa/Makefile.am b/src/mesa/Makefile.am

index eb4a3da..69506f2 100644 (file)
--- a/src/mesa/Makefile.am
+++ b/src/mesa/Makefile.am
@@ -90,37 +90,24 @@ CLEANFILES = \
         program/program_parse.tab.h \
         main/git_sha1.h.tmp
  
-GET_HASH_GEN = main/get_hash_generator.py
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
  
-main/get_hash.h: ../mapi/glapi/gen/gl_and_es_API.xml main/get_hash_params.py   \
-                $(GET_HASH_GEN)
-       $(AM_V_GEN)set -e;                                              \
-       $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/$(GET_HASH_GEN)            \
-               -f $< > $@.tmp;                                         \
-       mv $@.tmp $@;
+main/get_hash.h: ../mapi/glapi/gen/gl_and_es_API.xml main/get_hash_params.py \
+                 main/get_hash_generator.py
+       $(PYTHON_GEN) $(srcdir)/main/get_hash_generator.py \
+               -f $(srcdir)/../mapi/glapi/gen/gl_and_es_API.xml > $@
  
-main/format_info.h: main/formats.csv                                    \
+main/format_info.h: main/formats.csv \
                      main/format_parser.py main/format_info.py
-       $(AM_V_GEN)set -e;                                              \
-       $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/main/format_info.py        \
-                   $< > $@.tmp;                                         \
-       mv $@.tmp $@;
+       $(PYTHON_GEN) $(srcdir)/main/format_info.py $(srcdir)/main/formats.csv > $@
  
-main/format_pack.c: main/format_pack.py main/formats.csv               \
+main/format_pack.c: main/format_pack.py main/formats.csv \
                      main/format_parser.py
-       $(AM_V_GEN)set -e;                                              \
-       $(PYTHON2) $(PYTHON_FLAGS)                                      \
-                       $(srcdir)/main/format_pack.py                   \
-                       $(srcdir)/main/formats.csv                      \
-               | $(INDENT) $(INDENT_FLAGS) > $@;
+       $(PYTHON_GEN) $(srcdir)/main/format_pack.py $(srcdir)/main/formats.csv > $@
  
  main/format_unpack.c: main/format_unpack.py main/formats.csv   \
                        main/format_parser.py
-       $(AM_V_GEN)set -e;                                              \
-       $(PYTHON2) $(PYTHON_FLAGS)                                      \
-                       $(srcdir)/main/format_unpack.py                 \
-                       $(srcdir)/main/formats.csv                      \
-               | $(INDENT) $(INDENT_FLAGS) > $@;
+       $(PYTHON_GEN) $(srcdir)/main/format_unpack.py $(srcdir)/main/formats.csv > $@
  
  main/formats.c: main/format_info.h
  
@@ -201,13 +188,17 @@ libmesa_sse41_la_CFLAGS = $(AM_CFLAGS) $(SSE41_CFLAGS)
  pkgconfigdir = $(libdir)/pkgconfig
  pkgconfig_DATA = gl.pc
  
+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+YACC_GEN = $(AM_V_GEN)$(YACC) $(YFLAGS)
+LEX_GEN = $(AM_V_GEN)$(LEX) $(LFLAGS)
+
  program/lex.yy.c: program/program_lexer.l
-       $(AM_V_at)$(MKDIR_P) program
-       $(AM_V_GEN) $(LEX) --never-interactive --outfile=$@ $<
+       $(MKDIR_GEN)
+       $(LEX_GEN) -o $@ $(srcdir)/program/program_lexer.l
  
  program/program_parse.tab.c program/program_parse.tab.h: program/program_parse.y
-       $(AM_V_at)$(MKDIR_P) program
-       $(AM_V_GEN) $(YACC) -p "_mesa_program_" -v -d --output=program/program_parse.tab.c $<
+       $(MKDIR_GEN)
+       $(YACC_GEN) -o $@ -p "_mesa_program_" --defines=$(builddir)/program/program_parse.tab.h $(srcdir)/program/program_parse.y
  
  if GEN_ASM_OFFSETS
  matypes.h: $(gen_matypes_SOURCES)
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources

index ed9848c..0915594 100644 (file)
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -134,6 +134,8 @@ MAIN_FILES = \
         main/multisample.h \
         main/objectlabel.c \
         main/objectlabel.h \
+       main/objectpurge.c \
+       main/objectpurge.h \
         main/pack.c \
         main/pack.h \
         main/pbo.c \
@@ -523,7 +525,9 @@ PROGRAM_FILES = \
         program/sampler.h \
         program/string_to_uint_map.cpp \
         program/symbol_table.c \
-       program/symbol_table.h
+       program/symbol_table.h \
+       ../glsl/shader_enums.c \
+       ../glsl/shader_enums.h
  
  PROGRAM_NIR_FILES = \
         program/prog_to_nir.c \
diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c

index bde544e..e27489d 100644 (file)
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -599,7 +599,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
        /* Save the shader state from ctx->Shader (instead of ctx->_Shader) so
         * that we don't have to worry about the current pipeline state.
         */
-      for (i = 0; i <= MESA_SHADER_FRAGMENT; i++) {
+      for (i = 0; i < MESA_SHADER_STAGES; i++) {
           _mesa_reference_shader_program(ctx, &save->Shader[i],
                                          ctx->Shader.CurrentProgram[i]);
        }
@@ -949,7 +949,9 @@ _mesa_meta_end(struct gl_context *ctx)
           GL_TESS_EVALUATION_SHADER,
           GL_GEOMETRY_SHADER,
           GL_FRAGMENT_SHADER,
+         GL_COMPUTE_SHADER,
        };
+      STATIC_ASSERT(MESA_SHADER_STAGES == ARRAY_SIZE(targets));
  
        bool any_shader;
  
@@ -975,7 +977,7 @@ _mesa_meta_end(struct gl_context *ctx)
        }
  
        any_shader = false;
-      for (i = 0; i <= MESA_SHADER_FRAGMENT; i++) {
+      for (i = 0; i < MESA_SHADER_STAGES; i++) {
           /* It is safe to call _mesa_use_shader_program even if the extension
            * necessary for that program state is not supported.  In that case,
            * the saved program object must be NULL and the currently bound
diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h

index fe43915..23fa209 100644 (file)
--- a/src/mesa/drivers/common/meta.h
+++ b/src/mesa/drivers/common/meta.h
@@ -494,8 +494,10 @@ _mesa_meta_and_swrast_BlitFramebuffer(struct gl_context *ctx,
  bool
  _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx,
                                           struct gl_texture_image *src_tex_image,
+                                         struct gl_renderbuffer *src_renderbuffer,
                                           int src_x, int src_y, int src_z,
                                           struct gl_texture_image *dst_tex_image,
+                                         struct gl_renderbuffer *dst_renderbuffer,
                                           int dst_x, int dst_y, int dst_z,
                                           int src_width, int src_height);
  
diff --git a/src/mesa/drivers/common/meta_blit.c b/src/mesa/drivers/common/meta_blit.c

index 71d18de..a41fe42 100644 (file)
--- a/src/mesa/drivers/common/meta_blit.c
+++ b/src/mesa/drivers/common/meta_blit.c
@@ -187,8 +187,8 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
                                 "   vec2 tex_coord = texCoords - s_0_offset;\n"
                                 "\n"
                                 "   tex_coord *= scale;\n"
-                               "   clamp(tex_coord.x, 0.0f, scale.x * src_width - 1.0f);\n"
-                               "   clamp(tex_coord.y, 0.0f, scale.y * src_height - 1.0f);\n"
+                               "   tex_coord.x = clamp(tex_coord.x, 0.0f, scale.x * src_width - 1.0f);\n"
+                               "   tex_coord.y = clamp(tex_coord.y, 0.0f, scale.y * src_height - 1.0f);\n"
                                 "   interp = fract(tex_coord);\n"
                                 "   tex_coord = ivec2(tex_coord) * scale_inv;\n"
                                 "\n"
diff --git a/src/mesa/drivers/common/meta_copy_image.c b/src/mesa/drivers/common/meta_copy_image.c

index 149ed18..33490ee 100644 (file)
--- a/src/mesa/drivers/common/meta_copy_image.c
+++ b/src/mesa/drivers/common/meta_copy_image.c
@@ -35,6 +35,46 @@
  #include "mtypes.h"
  #include "meta.h"
  
+/**
+ * Create a texture image that wraps a renderbuffer.
+ */
+static struct gl_texture_image *
+wrap_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb)
+{
+   GLenum texTarget;
+   struct gl_texture_object *texObj;
+   struct gl_texture_image *texImage;
+
+   if (rb->NumSamples > 1)
+      texTarget = GL_TEXTURE_2D_MULTISAMPLE;
+   else
+      texTarget = GL_TEXTURE_2D;
+
+   /* Texture ID is not significant since it never goes into the hash table */
+   texObj = ctx->Driver.NewTextureObject(ctx, 0, texTarget);
+   assert(texObj);
+   if (!texObj)
+      return NULL;
+
+   texImage = _mesa_get_tex_image(ctx, texObj, texTarget, 0);
+   assert(texImage);
+   if (!texImage)
+      return NULL;
+
+   if (!ctx->Driver.BindRenderbufferTexImage(ctx, rb, texImage)) {
+      _mesa_problem(ctx, "Failed to create texture from renderbuffer");
+      return NULL;
+   }
+
+   if (ctx->Driver.FinishRenderTexture && !rb->NeedsFinishRenderTexture) {
+      rb->NeedsFinishRenderTexture = true;
+      ctx->Driver.FinishRenderTexture(ctx, rb);
+   }
+
+   return texImage;
+}
+
+
  /* This function makes a texture view without bothering with all of the API
   * checks.  Most of them are the same for CopyTexSubImage so checking would
   * be redundant.  The one major difference is that we don't check for
@@ -112,11 +152,15 @@ make_view(struct gl_context *ctx, struct gl_texture_image *tex_image,
  bool
  _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx,
                                           struct gl_texture_image *src_tex_image,
+                                         struct gl_renderbuffer *src_renderbuffer,
                                           int src_x, int src_y, int src_z,
                                           struct gl_texture_image *dst_tex_image,
+                                         struct gl_renderbuffer *dst_renderbuffer,
                                           int dst_x, int dst_y, int dst_z,
                                           int src_width, int src_height)
  {
+   mesa_format src_format, dst_format;
+   GLint src_internal_format, dst_internal_format;
     GLuint src_view_texture = 0;
     struct gl_texture_image *src_view_tex_image;
     GLuint fbos[2];
@@ -124,15 +168,37 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx,
     GLbitfield mask;
     GLenum status, attachment;
  
-   if (_mesa_is_format_compressed(dst_tex_image->TexFormat))
+   if (src_renderbuffer) {
+      src_format = src_renderbuffer->Format;
+      src_internal_format = src_renderbuffer->InternalFormat;
+   } else {
+      assert(src_tex_image);
+      src_format = src_tex_image->TexFormat;
+      src_internal_format = src_tex_image->InternalFormat;
+   }
+
+   if (dst_renderbuffer) {
+      dst_format = dst_renderbuffer->Format;
+      dst_internal_format = dst_renderbuffer->InternalFormat;
+   } else {
+      assert(dst_tex_image);
+      dst_format = dst_tex_image->TexFormat;
+      dst_internal_format = dst_tex_image->InternalFormat;
+   }
+
+   if (_mesa_is_format_compressed(src_format))
        return false;
  
-   if (_mesa_is_format_compressed(src_tex_image->TexFormat))
+   if (_mesa_is_format_compressed(dst_format))
        return false;
  
-   if (src_tex_image->InternalFormat == dst_tex_image->InternalFormat) {
+   if (src_internal_format == dst_internal_format) {
        src_view_tex_image = src_tex_image;
     } else {
+      if (src_renderbuffer) {
+         assert(src_tex_image == NULL);
+         src_tex_image = wrap_renderbuffer(ctx, src_renderbuffer);
+      }
        if (!make_view(ctx, src_tex_image, &src_view_tex_image, &src_view_texture,
                       dst_tex_image->InternalFormat))
           goto cleanup;
@@ -145,7 +211,7 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx,
     _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, fbos[0]);
     _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbos[1]);
  
-   switch (_mesa_get_format_base_format(src_tex_image->TexFormat)) {
+   switch (_mesa_get_format_base_format(src_format)) {
     case GL_DEPTH_COMPONENT:
        attachment = GL_DEPTH_ATTACHMENT;
        mask = GL_DEPTH_BUFFER_BIT;
@@ -165,15 +231,32 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx,
        _mesa_ReadBuffer(GL_COLOR_ATTACHMENT0);
     }
  
-   _mesa_meta_bind_fbo_image(GL_READ_FRAMEBUFFER, attachment,
-                             src_view_tex_image, src_z);
+   if (src_view_tex_image) {
+      /* Prever the tex image because, even if we have a renderbuffer, we may
+       * have had to wrap it in a texture view.
+       */
+      _mesa_meta_bind_fbo_image(GL_READ_FRAMEBUFFER, attachment,
+                                src_view_tex_image, src_z);
+   } else {
+      _mesa_FramebufferRenderbuffer(GL_READ_FRAMEBUFFER,
+                                    attachment,
+                                    GL_RENDERBUFFER,
+                                    src_renderbuffer->Name);
+   }
  
     status = _mesa_CheckFramebufferStatus(GL_READ_FRAMEBUFFER);
     if (status != GL_FRAMEBUFFER_COMPLETE)
        goto meta_end;
  
-   _mesa_meta_bind_fbo_image(GL_DRAW_FRAMEBUFFER, attachment,
-                             dst_tex_image, dst_z);
+   if (dst_renderbuffer) {
+      _mesa_FramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER,
+                                    attachment,
+                                    GL_RENDERBUFFER,
+                                    dst_renderbuffer->Name);
+   } else {
+      _mesa_meta_bind_fbo_image(GL_DRAW_FRAMEBUFFER, attachment,
+                                dst_tex_image, dst_z);
+   }
  
     status = _mesa_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER);
     if (status != GL_FRAMEBUFFER_COMPLETE)
@@ -205,5 +288,9 @@ meta_end:
  cleanup:
     _mesa_DeleteTextures(1, &src_view_texture);
  
+   /* If we got a renderbuffer source, delete the temporary texture */
+   if (src_renderbuffer && src_tex_image)
+      ctx->Driver.DeleteTexture(ctx, src_tex_image->TexObject);
+
     return success;
  }
diff --git a/src/mesa/drivers/common/meta_generate_mipmap.c b/src/mesa/drivers/common/meta_generate_mipmap.c

index 0655f05..5dc40a2 100644 (file)
--- a/src/mesa/drivers/common/meta_generate_mipmap.c
+++ b/src/mesa/drivers/common/meta_generate_mipmap.c
@@ -163,7 +163,6 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target,
     const GLuint maxLevel = texObj->MaxLevel;
     const GLint maxLevelSave = texObj->MaxLevel;
     const GLboolean genMipmapSave = texObj->GenerateMipmap;
-   const GLuint currentTexUnitSave = ctx->Texture.CurrentUnit;
     const GLboolean use_glsl_version = ctx->Extensions.ARB_vertex_shader &&
                                        ctx->Extensions.ARB_fragment_shader;
     GLenum faceTarget;
@@ -202,8 +201,12 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target,
     samplerSave = ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler ?
        ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler->Name : 0;
  
-   if (currentTexUnitSave != 0)
-      _mesa_BindTexture(target, texObj->Name);
+   /* We may have been called from glGenerateTextureMipmap with CurrentUnit
+    * still set to 0, so we don't know when we can skip binding the texture.
+    * Assume that _mesa_BindTexture will be fast if we're rebinding the same
+    * texture.
+    */
+   _mesa_BindTexture(target, texObj->Name);
  
     if (!mipmap->Sampler) {
        _mesa_GenSamplers(1, &mipmap->Sampler);
diff --git a/src/mesa/drivers/common/meta_tex_subimage.c b/src/mesa/drivers/common/meta_tex_subimage.c

index 16d8f5d..181dde9 100644 (file)
--- a/src/mesa/drivers/common/meta_tex_subimage.c
+++ b/src/mesa/drivers/common/meta_tex_subimage.c
@@ -45,9 +45,28 @@
  #include "uniforms.h"
  #include "varray.h"
  
+static bool
+need_signed_unsigned_int_conversion(mesa_format mesaFormat,
+                                    GLenum format, GLenum type)
+{
+   const GLenum mesaFormatType = _mesa_get_format_datatype(mesaFormat);
+   const bool is_format_integer = _mesa_is_enum_format_integer(format);
+   return (mesaFormatType == GL_INT &&
+           is_format_integer &&
+           (type == GL_UNSIGNED_INT ||
+            type == GL_UNSIGNED_SHORT ||
+            type == GL_UNSIGNED_BYTE)) ||
+          (mesaFormatType == GL_UNSIGNED_INT &&
+           is_format_integer &&
+           (type == GL_INT ||
+            type == GL_SHORT ||
+            type == GL_BYTE));
+}
+
  static struct gl_texture_image *
-create_texture_for_pbo(struct gl_context *ctx, bool create_pbo,
-                       GLenum pbo_target, int width, int height,
+create_texture_for_pbo(struct gl_context *ctx,
+                       bool create_pbo, GLenum pbo_target,
+                       int dims, int width, int height, int depth,
                         GLenum format, GLenum type, const void *pixels,
                         const struct gl_pixelstore_attrib *packing,
                         GLuint *tmp_pbo, GLuint *tmp_tex)
@@ -73,13 +92,18 @@ create_texture_for_pbo(struct gl_context *ctx, bool create_pbo,
        return NULL;
  
     /* Account for SKIP_PIXELS, SKIP_ROWS, ALIGNMENT, and SKIP_IMAGES */
-   pixels = _mesa_image_address3d(packing, pixels,
-                                  width, height, format, type, 0, 0, 0);
+   uint32_t first_pixel = _mesa_image_offset(dims, packing, width, height,
+                                             format, type,
+                                             0, 0, 0);
+   uint32_t last_pixel =  _mesa_image_offset(dims, packing, width, height,
+                                             format, type,
+                                             depth-1, height-1, width);
     row_stride = _mesa_image_row_stride(packing, width, format, type);
  
     if (_mesa_is_bufferobj(packing->BufferObj)) {
        *tmp_pbo = 0;
        buffer_obj = packing->BufferObj;
+      first_pixel += (intptr_t)pixels;
     } else {
        bool is_pixel_pack = pbo_target == GL_PIXEL_PACK_BUFFER;
  
@@ -97,14 +121,18 @@ create_texture_for_pbo(struct gl_context *ctx, bool create_pbo,
         * data to avoid unnecessary data copying in _mesa_BufferData().
         */
        if (is_pixel_pack)
-         _mesa_BufferData(pbo_target, row_stride * height, NULL,
+         _mesa_BufferData(pbo_target,
+                          last_pixel - first_pixel,
+                          NULL,
                            GL_STREAM_READ);
        else
-         _mesa_BufferData(pbo_target, row_stride * height, pixels,
+         _mesa_BufferData(pbo_target,
+                          last_pixel - first_pixel,
+                          (char *)pixels + first_pixel,
                            GL_STREAM_DRAW);
  
        buffer_obj = packing->BufferObj;
-      pixels = NULL;
+      first_pixel = 0;
  
        _mesa_BindBuffer(pbo_target, 0);
     }
@@ -119,14 +147,21 @@ create_texture_for_pbo(struct gl_context *ctx, bool create_pbo,
  
     internal_format = _mesa_get_format_base_format(pbo_format);
  
+   /* The texture is addressed as a single very-tall image, so we
+    * need to pack the multiple image depths together taking the
+    * inter-image padding into account.
+    */
+   int image_height = packing->ImageHeight == 0 ? height : packing->ImageHeight;
+   int full_height = image_height * (depth - 1) + height;
+
     tex_image = _mesa_get_tex_image(ctx, tex_obj, tex_obj->Target, 0);
-   _mesa_init_teximage_fields(ctx, tex_image, width, height, 1,
+   _mesa_init_teximage_fields(ctx, tex_image, width, full_height, 1,
                                0, internal_format, pbo_format);
  
     read_only = pbo_target == GL_PIXEL_UNPACK_BUFFER;
     if (!ctx->Driver.SetTextureStorageForBufferObject(ctx, tex_obj,
                                                       buffer_obj,
-                                                     (intptr_t)pixels,
+                                                     first_pixel,
                                                       row_stride,
                                                       read_only)) {
        _mesa_DeleteTextures(1, tmp_tex);
@@ -147,7 +182,7 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims,
                             const struct gl_pixelstore_attrib *packing)
  {
     GLuint pbo = 0, pbo_tex = 0, fbos[2] = { 0, 0 };
-   int full_height, image_height;
+   int image_height;
     struct gl_texture_image *pbo_tex_image;
     GLenum status;
     bool success = false;
@@ -166,16 +201,22 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims,
     if (ctx->_ImageTransferState)
        return false;
  
+   /* This function rely on BlitFramebuffer to fill in the pixel data for
+    * glTex[Sub]Image*D. But, BlitFrameBuffer doesn't support signed to
+    * unsigned or unsigned to signed integer conversions.
+    */
+   if (need_signed_unsigned_int_conversion(tex_image->TexFormat, format, type))
+      return false;
+
     /* For arrays, use a tall (height * depth) 2D texture but taking into
      * account the inter-image padding specified with the image height packing
      * property.
      */
     image_height = packing->ImageHeight == 0 ? height : packing->ImageHeight;
-   full_height = image_height * (depth - 1) + height;
  
     pbo_tex_image = create_texture_for_pbo(ctx, create_pbo,
                                            GL_PIXEL_UNPACK_BUFFER,
-                                          width, full_height,
+                                          dims, width, height, depth,
                                            format, type, pixels, packing,
                                            &pbo, &pbo_tex);
     if (!pbo_tex_image)
@@ -250,24 +291,6 @@ fail:
     return success;
  }
  
-static bool
-need_signed_unsigned_int_conversion(mesa_format rbFormat,
-                                    GLenum format, GLenum type)
-{
-   const GLenum srcType = _mesa_get_format_datatype(rbFormat);
-   const bool is_dst_format_integer = _mesa_is_enum_format_integer(format);
-   return (srcType == GL_INT &&
-           is_dst_format_integer &&
-           (type == GL_UNSIGNED_INT ||
-            type == GL_UNSIGNED_SHORT ||
-            type == GL_UNSIGNED_BYTE)) ||
-          (srcType == GL_UNSIGNED_INT &&
-           is_dst_format_integer &&
-           (type == GL_INT ||
-            type == GL_SHORT ||
-            type == GL_BYTE));
-}
-
  bool
  _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
                                struct gl_texture_image *tex_image,
@@ -277,7 +300,7 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
                                const struct gl_pixelstore_attrib *packing)
  {
     GLuint pbo = 0, pbo_tex = 0, fbos[2] = { 0, 0 };
-   int full_height, image_height;
+   int image_height;
     struct gl_texture_image *pbo_tex_image;
     struct gl_renderbuffer *rb = NULL;
     GLenum dstBaseFormat = _mesa_unpack_format_to_base_format(format);
@@ -324,10 +347,9 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
      * property.
      */
     image_height = packing->ImageHeight == 0 ? height : packing->ImageHeight;
-   full_height = image_height * (depth - 1) + height;
  
     pbo_tex_image = create_texture_for_pbo(ctx, false, GL_PIXEL_PACK_BUFFER,
-                                          width, full_height * depth,
+                                          dims, width, height, depth,
                                            format, type, pixels, packing,
                                            &pbo, &pbo_tex);
     if (!pbo_tex_image)
diff --git a/src/mesa/drivers/dri/common/Makefile.am b/src/mesa/drivers/dri/common/Makefile.am

index b307f10..2973a06 100644 (file)
--- a/src/mesa/drivers/dri/common/Makefile.am
+++ b/src/mesa/drivers/dri/common/Makefile.am
@@ -34,6 +34,7 @@ AM_CFLAGS = \
         -I$(top_srcdir)/src/gallium/auxiliary \
         $(LIBDRM_CFLAGS) \
         $(DEFINES) \
+       -DSYSCONFDIR=\"$(sysconfdir)\" \
         $(VISIBILITY_CFLAGS)
  
  noinst_LTLIBRARIES = \
diff --git a/src/mesa/drivers/dri/common/utils.c b/src/mesa/drivers/dri/common/utils.c

index 43d90d9..1246bec 100644 (file)
--- a/src/mesa/drivers/dri/common/utils.c
+++ b/src/mesa/drivers/dri/common/utils.c
@@ -43,19 +43,26 @@
  
  
  uint64_t
-driParseDebugString( const char * debug, 
-                    const struct dri_debug_control * control  )
+driParseDebugString(const char *debug,
+                    const struct dri_debug_control *control)
  {
     uint64_t flag = 0;
  
-   if ( debug != NULL ) {
-      while( control->string != NULL ) {
-        if ( !strcmp( debug, "all" ) ||
-             strstr( debug, control->string ) != NULL ) {
-           flag |= control->flag;
-        }
-
-        control++;
+   if (debug != NULL) {
+      for (; control->string != NULL; control++) {
+         if (!strcmp(debug, "all")) {
+            flag |= control->flag;
+
+         } else {
+            const char *s = debug;
+            unsigned n;
+
+            for (; n = strcspn(s, ", "), *s; s += MAX2(1, n)) {
+               if (strlen(control->string) == n &&
+                   !strncmp(control->string, s, n))
+                  flag |= control->flag;
+            }
+         }
        }
     }
  
diff --git a/src/mesa/drivers/dri/common/xmlconfig.c b/src/mesa/drivers/dri/common/xmlconfig.c

index f17693e..b8ab480 100644 (file)
--- a/src/mesa/drivers/dri/common/xmlconfig.c
+++ b/src/mesa/drivers/dri/common/xmlconfig.c
@@ -935,9 +935,13 @@ static void parseOneConfigFile (XML_Parser p) {
  #undef BUF_SIZE
  }
  
+#ifndef SYSCONFDIR
+#define SYSCONFDIR "/etc"
+#endif
+
  void driParseConfigFiles (driOptionCache *cache, const driOptionCache *info,
                           int screenNum, const char *driverName) {
-    char *filenames[2] = {"/etc/drirc", NULL};
+    char *filenames[2] = { SYSCONFDIR "/drirc", NULL};
      char *home;
      uint32_t i;
      struct OptConfData userData;
diff --git a/src/mesa/drivers/dri/common/xmlpool/Makefile.am b/src/mesa/drivers/dri/common/xmlpool/Makefile.am

index a6f1652..dfd8fb8 100644 (file)
--- a/src/mesa/drivers/dri/common/xmlpool/Makefile.am
+++ b/src/mesa/drivers/dri/common/xmlpool/Makefile.am
@@ -67,7 +67,7 @@ CLEANFILES = \
         $(MOS)
  
  # Default target options.h
-options.h: LOCALEDIR := .
+LOCALEDIR := .
  options.h: t_options.h $(MOS)
         $(AM_V_GEN) $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/gen_xmlpool.py $(srcdir)/t_options.h $(LOCALEDIR) $(LANGS) > options.h
  
diff --git a/src/mesa/drivers/dri/i915/intel_batchbuffer.h b/src/mesa/drivers/dri/i915/intel_batchbuffer.h

index feecc01..c4efa76 100644 (file)
--- a/src/mesa/drivers/dri/i915/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i915/intel_batchbuffer.h
@@ -128,6 +128,8 @@ intel_batchbuffer_advance(struct intel_context *intel)
        abort();
     }
     batch->total = 0;
+#else
+   (void) intel;
  #endif
  }
  
diff --git a/src/mesa/drivers/dri/i915/intel_mipmap_tree.c b/src/mesa/drivers/dri/i915/intel_mipmap_tree.c

index 1aa06c1..5cbf763 100644 (file)
--- a/src/mesa/drivers/dri/i915/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i915/intel_mipmap_tree.c
@@ -60,11 +60,6 @@ target_to_target(GLenum target)
     }
  }
  
-/**
- * @param for_bo Indicates that the caller is
- *        intel_miptree_create_for_bo(). If true, then do not create
- *        \c stencil_mt.
- */
  struct intel_mipmap_tree *
  intel_miptree_create_layout(struct intel_context *intel,
                              GLenum target,
@@ -73,8 +68,7 @@ intel_miptree_create_layout(struct intel_context *intel,
                              GLuint last_level,
                              GLuint width0,
                              GLuint height0,
-                            GLuint depth0,
-                            bool for_bo)
+                            GLuint depth0)
  {
     struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
     if (!mt)
@@ -181,8 +175,7 @@ intel_miptree_create(struct intel_context *intel,
  
     mt = intel_miptree_create_layout(intel, target, format,
                                       first_level, last_level, width0,
-                                     height0, depth0,
-                                     false);
+                                     height0, depth0);
     /*
      * pitch == 0 || height == 0  indicates the null texture
      */
@@ -262,8 +255,7 @@ intel_miptree_create_for_bo(struct intel_context *intel,
  
     mt = intel_miptree_create_layout(intel, GL_TEXTURE_2D, format,
                                      0, 0,
-                                    width, height, 1,
-                                    true);
+                                    width, height, 1);
     if (!mt) {
        free(region);
        return mt;
@@ -723,8 +715,7 @@ intel_miptree_map_raw(struct intel_context *intel, struct intel_mipmap_tree *mt)
  }
  
  void
-intel_miptree_unmap_raw(struct intel_context *intel,
-                        struct intel_mipmap_tree *mt)
+intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
  {
     drm_intel_bo_unmap(mt->region->bo);
  }
@@ -772,13 +763,9 @@ intel_miptree_map_gtt(struct intel_context *intel,
  }
  
  static void
-intel_miptree_unmap_gtt(struct intel_context *intel,
-                       struct intel_mipmap_tree *mt,
-                       struct intel_miptree_map *map,
-                       unsigned int level,
-                       unsigned int slice)
+intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
  {
-   intel_miptree_unmap_raw(intel, mt);
+   intel_miptree_unmap_raw(mt);
  }
  
  static void
@@ -833,7 +820,7 @@ intel_miptree_unmap_blit(struct intel_context *intel,
  {
     struct gl_context *ctx = &intel->ctx;
  
-   intel_miptree_unmap_raw(intel, map->mt);
+   intel_miptree_unmap_raw(map->mt);
  
     if (map->mode & GL_MAP_WRITE_BIT) {
        bool ok = intel_miptree_blit(intel,
@@ -949,7 +936,7 @@ intel_miptree_unmap(struct intel_context *intel,
     if (map->mt) {
        intel_miptree_unmap_blit(intel, mt, map, level, slice);
     } else {
-      intel_miptree_unmap_gtt(intel, mt, map, level, slice);
+      intel_miptree_unmap_gtt(mt);
     }
  
     intel_miptree_release_map(mt, level, slice);
diff --git a/src/mesa/drivers/dri/i915/intel_mipmap_tree.h b/src/mesa/drivers/dri/i915/intel_mipmap_tree.h

index 77b1f54..2520b30 100644 (file)
--- a/src/mesa/drivers/dri/i915/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i915/intel_mipmap_tree.h
@@ -240,8 +240,7 @@ intel_miptree_create_layout(struct intel_context *intel,
                              GLuint last_level,
                              GLuint width0,
                              GLuint height0,
-                            GLuint depth0,
-                            bool for_bo);
+                            GLuint depth0);
  
  struct intel_mipmap_tree *
  intel_miptree_create_for_bo(struct intel_context *intel,
@@ -285,6 +284,10 @@ intel_miptree_check_level_layer(struct intel_mipmap_tree *mt,
                                  uint32_t level,
                                  uint32_t layer)
  {
+   (void) mt;
+   (void) level;
+   (void) layer;
+
     assert(level >= mt->first_level);
     assert(level <= mt->last_level);
     assert(layer < mt->level[level].depth);
@@ -340,14 +343,11 @@ intel_miptree_copy_teximage(struct intel_context *intel,
   */
  void i915_miptree_layout(struct intel_mipmap_tree *mt);
  void i945_miptree_layout(struct intel_mipmap_tree *mt);
-void brw_miptree_layout(struct intel_context *intel,
-                       struct intel_mipmap_tree *mt);
  
  void *intel_miptree_map_raw(struct intel_context *intel,
                              struct intel_mipmap_tree *mt);
  
-void intel_miptree_unmap_raw(struct intel_context *intel,
-                             struct intel_mipmap_tree *mt);
+void intel_miptree_unmap_raw(struct intel_mipmap_tree *mt);
  
  void
  intel_miptree_map(struct intel_context *intel,
diff --git a/src/mesa/drivers/dri/i915/intel_render.c b/src/mesa/drivers/dri/i915/intel_render.c

index 5962dad..6c2ad6c 100644 (file)
--- a/src/mesa/drivers/dri/i915/intel_render.c
+++ b/src/mesa/drivers/dri/i915/intel_render.c
@@ -61,15 +61,12 @@
  #define HAVE_LINE_STRIPS 1
  #define HAVE_TRIANGLES   1
  #define HAVE_TRI_STRIPS  1
-#define HAVE_TRI_STRIP_1 0      /* has it, template can't use it yet */
  #define HAVE_TRI_FANS    1
  #define HAVE_POLYGONS    1
-#define HAVE_QUADS       0
-#define HAVE_QUAD_STRIPS 0
  
  #define HAVE_ELTS        0
  
-static uint32_t hw_prim[GL_POLYGON + 1] = {
+static const uint32_t hw_prim[GL_POLYGON + 1] = {
     0,
     PRIM3D_LINELIST,
     PRIM3D_LINESTRIP,
@@ -251,7 +248,7 @@ intel_run_render(struct gl_context * ctx, struct tnl_pipeline_stage *stage)
           continue;
  
        intel_render_tab_verts[prim & PRIM_MODE_MASK] (ctx, start,
-                                                     start + length, prim);
+                                                     length, prim);
     }
  
     tnl->Driver.Render.Finish(ctx);
diff --git a/src/mesa/drivers/dri/i915/intel_tex_image.c b/src/mesa/drivers/dri/i915/intel_tex_image.c

index 5ab60d1..63ef08b 100644 (file)
--- a/src/mesa/drivers/dri/i915/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i915/intel_tex_image.c
@@ -241,8 +241,7 @@ intel_set_texture_image_region(struct gl_context *ctx,
  
     intel_image->mt = intel_miptree_create_layout(intel, target, image->TexFormat,
                                                   0, 0,
-                                                 width, height, 1,
-                                                 true);
+                                                 width, height, 1);
     if (intel_image->mt == NULL)
         return;
     intel_region_reference(&intel_image->mt->region, region);
diff --git a/src/mesa/drivers/dri/i915/intel_tex_subimage.c b/src/mesa/drivers/dri/i915/intel_tex_subimage.c

index f11ef2e..4083d69 100644 (file)
--- a/src/mesa/drivers/dri/i915/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/i915/intel_tex_subimage.c
@@ -102,7 +102,7 @@ intel_blit_texsubimage(struct gl_context * ctx,
        _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage");
     }
  
-   intel_miptree_unmap_raw(intel, temp_mt);
+   intel_miptree_unmap_raw(temp_mt);
  
     bool ret;
  
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources

index dfdad75..cc3ecaf 100644 (file)
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -21,7 +21,7 @@ i965_FILES = \
         brw_conditional_render.c \
         brw_context.c \
         brw_context.h \
-       brw_cs.cpp \
+       brw_cs.c \
         brw_cs.h \
         brw_cubemap_normalize.cpp \
         brw_curbe.c \
@@ -62,6 +62,7 @@ i965_FILES = \
         brw_fs_sel_peephole.cpp \
         brw_fs_surface_builder.cpp \
         brw_fs_surface_builder.h \
+       brw_fs_validate.cpp \
         brw_fs_vector_splitting.cpp \
         brw_fs_visitor.cpp \
         brw_gs.c \
@@ -115,6 +116,7 @@ i965_FILES = \
         brw_urb.c \
         brw_util.c \
         brw_util.h \
+       brw_vec4_builder.h \
         brw_vec4_copy_propagation.cpp \
         brw_vec4.cpp \
         brw_vec4_cse.cpp \
@@ -128,6 +130,8 @@ i965_FILES = \
         brw_vec4_nir.cpp \
         brw_vec4_gs_nir.cpp \
         brw_vec4_reg_allocate.cpp \
+       brw_vec4_surface_builder.cpp \
+       brw_vec4_surface_builder.h \
         brw_vec4_visitor.cpp \
         brw_vec4_vp.cpp \
         brw_vec4_vs_visitor.cpp \
@@ -163,6 +167,7 @@ i965_FILES = \
         gen6_wm_state.c \
         gen7_blorp.cpp \
         gen7_blorp.h \
+       gen7_cs_state.c \
         gen7_disable.c \
         gen7_gs_state.c \
         gen7_misc_state.c \
diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c b/src/mesa/drivers/dri/i965/brw_binding_tables.c

index b188fc7..508f1f0 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
+++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
@@ -311,7 +311,7 @@ gen7_disable_hw_binding_tables(struct brw_context *brw)
  /**
   * Enable hardware binding tables and set up the binding table pool.
   */
-void
+static void
  gen7_enable_hw_binding_tables(struct brw_context *brw)
  {
     if (!brw->use_resource_streamer)
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp

index eac1f00..df2969d 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp
@@ -144,7 +144,9 @@ brw_blorp_surface_info::compute_tile_offsets(uint32_t *tile_x,
  {
     uint32_t mask_x, mask_y;
  
-   intel_miptree_get_tile_masks(mt, &mask_x, &mask_y, map_stencil_as_y_tiled);
+   intel_get_tile_masks(mt->tiling, mt->tr_mode, mt->cpp,
+                        map_stencil_as_y_tiled,
+                        &mask_x, &mask_y);
  
     *tile_x = x_offset & mask_x;
     *tile_y = y_offset & mask_y;
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp

index 205c905..ba11d3d 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -215,6 +215,10 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
     struct intel_renderbuffer *src_irb = intel_renderbuffer(src_rb);
     struct intel_texture_image *intel_image = intel_texture_image(dst_image);
  
+   /* No pixel transfer operations (zoom, bias, mapping), just a blit */
+   if (brw->ctx._ImageTransferState)
+      return false;
+
     /* Sync up the state of window system buffers.  We need to do this before
      * we go looking at the src renderbuffer's miptree.
      */
diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c

index f981388..17a745d 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2003 VMware, Inc.
   * Copyright 2009, 2012 Intel Corporation.
   * All Rights Reserved.
@@ -8,7 +7,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -18,13 +17,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  #include "main/glheader.h"
  #include "main/mtypes.h"
diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c

index 5693ab5..fe991a4 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_compute.c
+++ b/src/mesa/drivers/dri/i965/brw_compute.c
@@ -31,14 +31,37 @@
  #include "brw_draw.h"
  #include "brw_state.h"
  #include "intel_batchbuffer.h"
+#include "intel_buffer_objects.h"
  #include "brw_defines.h"
  
  
  static void
-brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
+brw_emit_gpgpu_walker(struct brw_context *brw)
  {
     const struct brw_cs_prog_data *prog_data = brw->cs.prog_data;
  
+   const GLuint *num_groups = brw->compute.num_work_groups;
+   uint32_t indirect_flag;
+
+   if (brw->compute.num_work_groups_bo == NULL) {
+      indirect_flag = 0;
+   } else {
+      GLintptr indirect_offset = brw->compute.num_work_groups_offset;
+      drm_intel_bo *bo = brw->compute.num_work_groups_bo;
+
+      indirect_flag = GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE;
+
+      brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMX, bo,
+                            I915_GEM_DOMAIN_VERTEX, 0,
+                            indirect_offset + 0);
+      brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMY, bo,
+                            I915_GEM_DOMAIN_VERTEX, 0,
+                            indirect_offset + 4);
+      brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMZ, bo,
+                            I915_GEM_DOMAIN_VERTEX, 0,
+                            indirect_offset + 8);
+   }
+
     const unsigned simd_size = prog_data->simd_size;
     unsigned group_size = prog_data->local_size[0] *
        prog_data->local_size[1] * prog_data->local_size[2];
@@ -52,7 +75,7 @@ brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
  
     uint32_t dwords = brw->gen < 8 ? 11 : 15;
     BEGIN_BATCH(dwords);
-   OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2));
+   OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2) | indirect_flag);
     OUT_BATCH(0);
     if (brw->gen >= 8) {
        OUT_BATCH(0);                     /* Indirect Data Length */
@@ -83,7 +106,7 @@ brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
  
  
  static void
-brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
+brw_dispatch_compute_common(struct gl_context *ctx)
  {
     struct brw_context *brw = brw_context(ctx);
     int estimated_buffer_space_needed;
@@ -117,7 +140,7 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
     brw->no_batch_wrap = true;
     brw_upload_compute_state(brw);
  
-   brw_emit_gpgpu_walker(brw, num_groups);
+   brw_emit_gpgpu_walker(brw);
  
     brw->no_batch_wrap = false;
  
@@ -155,9 +178,39 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
      */
  }
  
+static void
+brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) {
+   struct brw_context *brw = brw_context(ctx);
+
+   brw->compute.num_work_groups_bo = NULL;
+   brw->compute.num_work_groups = num_groups;
+   ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
+
+   brw_dispatch_compute_common(ctx);
+}
+
+static void
+brw_dispatch_compute_indirect(struct gl_context *ctx, GLintptr indirect)
+{
+   struct brw_context *brw = brw_context(ctx);
+   static const GLuint indirect_group_counts[3] = { 0, 0, 0 };
+   struct gl_buffer_object *indirect_buffer = ctx->DispatchIndirectBuffer;
+   drm_intel_bo *bo =
+      intel_bufferobj_buffer(brw,
+                             intel_buffer_object(indirect_buffer),
+                             indirect, 3 * sizeof(GLuint));
+
+   brw->compute.num_work_groups_bo = bo;
+   brw->compute.num_work_groups_offset = indirect;
+   brw->compute.num_work_groups = indirect_group_counts;
+   ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
+
+   brw_dispatch_compute_common(ctx);
+}
  
  void
  brw_init_compute_functions(struct dd_function_table *functions)
  {
     functions->DispatchCompute = brw_dispatch_compute;
+   functions->DispatchComputeIndirect = brw_dispatch_compute_indirect;
  }
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c

index 907b2a0..9dcdaf5 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -323,6 +323,15 @@ brw_initialize_context_constants(struct brw_context *brw)
  
     ctx->Const.StripTextureBorder = true;
  
+   ctx->Const.MaxUniformBlockSize = 65536;
+   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+      struct gl_program_constants *prog = &ctx->Const.Program[i];
+      prog->MaxUniformBlocks = 12;
+      prog->MaxCombinedUniformComponents =
+         prog->MaxUniformComponents +
+         ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
+   }
+
     ctx->Const.MaxDualSourceDrawBuffers = 1;
     ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
     ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = max_samplers;
@@ -558,9 +567,33 @@ brw_initialize_context_constants(struct brw_context *brw)
      * However, unaligned accesses are slower, so enforce buffer alignment.
      */
     ctx->Const.UniformBufferOffsetAlignment = 16;
+
+   /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
+    * that we can safely have the CPU and GPU writing the same SSBO on
+    * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
+    * writes, so there's no problem. For an SSBO, the GPU and the CPU can
+    * be updating disjoint regions of the buffer simultaneously and that will
+    * break if the regions overlap the same cacheline.
+    */
+   ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
     ctx->Const.TextureBufferOffsetAlignment = 16;
     ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
  
+   /* FIXME: Tessellation stages are not yet supported in i965, so
+    * MaxCombinedShaderStorageBlocks doesn't take them into account.
+    */
+   ctx->Const.Program[MESA_SHADER_VERTEX].MaxShaderStorageBlocks = 12;
+   ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxShaderStorageBlocks = 12;
+   ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxShaderStorageBlocks = 0;
+   ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxShaderStorageBlocks = 0;
+   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks = 12;
+   ctx->Const.Program[MESA_SHADER_COMPUTE].MaxShaderStorageBlocks = 12;
+   ctx->Const.MaxCombinedShaderStorageBlocks = 12 * 3;
+   ctx->Const.MaxShaderStorageBufferBindings = 36;
+
+   if (_mesa_extension_override_enables.ARB_compute_shader)
+      ctx->Const.MaxShaderStorageBufferBindings += 12;
+
     if (brw->gen >= 6) {
        ctx->Const.MaxVarying = 32;
        ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h

index 49ff428..2479182 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -194,7 +194,6 @@ enum brw_state_id {
     BRW_STATE_GS_CONSTBUF,
     BRW_STATE_PROGRAM_CACHE,
     BRW_STATE_STATE_BASE_ADDRESS,
-   BRW_STATE_VUE_MAP_VS,
     BRW_STATE_VUE_MAP_GEOM_OUT,
     BRW_STATE_TRANSFORM_FEEDBACK,
     BRW_STATE_RASTERIZER_DISCARD,
@@ -214,6 +213,7 @@ enum brw_state_id {
     BRW_STATE_SAMPLER_STATE_TABLE,
     BRW_STATE_VS_ATTRIB_WORKAROUNDS,
     BRW_STATE_COMPUTE_PROGRAM,
+   BRW_STATE_CS_WORK_GROUPS,
     BRW_NUM_STATE_BITS
  };
  
@@ -276,7 +276,6 @@ enum brw_state_id {
  #define BRW_NEW_GS_CONSTBUF             (1ull << BRW_STATE_GS_CONSTBUF)
  #define BRW_NEW_PROGRAM_CACHE           (1ull << BRW_STATE_PROGRAM_CACHE)
  #define BRW_NEW_STATE_BASE_ADDRESS      (1ull << BRW_STATE_STATE_BASE_ADDRESS)
-#define BRW_NEW_VUE_MAP_VS              (1ull << BRW_STATE_VUE_MAP_VS)
  #define BRW_NEW_VUE_MAP_GEOM_OUT        (1ull << BRW_STATE_VUE_MAP_GEOM_OUT)
  #define BRW_NEW_TRANSFORM_FEEDBACK      (1ull << BRW_STATE_TRANSFORM_FEEDBACK)
  #define BRW_NEW_RASTERIZER_DISCARD      (1ull << BRW_STATE_RASTERIZER_DISCARD)
@@ -296,6 +295,7 @@ enum brw_state_id {
  #define BRW_NEW_SAMPLER_STATE_TABLE     (1ull << BRW_STATE_SAMPLER_STATE_TABLE)
  #define BRW_NEW_VS_ATTRIB_WORKAROUNDS   (1ull << BRW_STATE_VS_ATTRIB_WORKAROUNDS)
  #define BRW_NEW_COMPUTE_PROGRAM         (1ull << BRW_STATE_COMPUTE_PROGRAM)
+#define BRW_NEW_CS_WORK_GROUPS          (1ull << BRW_STATE_CS_WORK_GROUPS)
  
  struct brw_state_flags {
     /** State update flags signalled by mesa internals */
@@ -504,6 +504,16 @@ struct brw_cs_prog_data {
     GLuint dispatch_grf_start_reg_16;
     unsigned local_size[3];
     unsigned simd_size;
+   bool uses_barrier;
+   bool uses_num_work_groups;
+
+   struct {
+      /** @{
+       * surface indices the CS-specific surfaces
+       */
+      uint32_t work_groups_start;
+      /** @} */
+   } binding_table;
  };
  
  /**
@@ -546,6 +556,17 @@ struct brw_vue_map {
     GLbitfield64 slots_valid;
  
     /**
+    * Is this VUE map for a separate shader pipeline?
+    *
+    * Separable programs (GL_ARB_separate_shader_objects) can be mixed and matched
+    * without the linker having a chance to dead code eliminate unused varyings.
+    *
+    * This means that we have to use a fixed slot layout, based on the output's
+    * location field, rather than assigning slots in a compact contiguous block.
+    */
+   bool separate;
+
+   /**
      * Map from gl_varying_slot value to VUE slot.  For gl_varying_slots that are
      * not stored in a slot (because they are not written, or because
      * additional processing is applied before storing them in the VUE), the
@@ -590,7 +611,8 @@ static inline GLuint brw_varying_to_offset(struct brw_vue_map *vue_map,
  
  void brw_compute_vue_map(const struct brw_device_info *devinfo,
                           struct brw_vue_map *vue_map,
-                         GLbitfield64 slots_valid);
+                         GLbitfield64 slots_valid,
+                         bool separate_shader);
  
  
  /**
@@ -753,7 +775,8 @@ struct brw_vs_prog_data {
                              12 + /* ubo */                              \
                              BRW_MAX_ABO +                               \
                              BRW_MAX_IMAGES +                            \
-                            2 /* shader time, pull constants */)
+                            2 + /* shader time, pull constants */       \
+                            1 /* cs num work groups */)
  
  #define SURF_INDEX_GEN6_SOL_BINDING(t) (t)
  
@@ -787,6 +810,11 @@ struct brw_gs_prog_data
  
     bool include_primitive_id;
  
+   /**
+    * The number of vertices emitted, if constant - otherwise -1.
+    */
+   int static_vertex_count;
+
     int invocations;
  
     /**
@@ -1242,6 +1270,17 @@ struct brw_context
     } draw;
  
     struct {
+      /**
+       * For gl_NumWorkGroups: If num_work_groups_bo is non NULL, then it is
+       * an indirect call, and num_work_groups_offset is valid. Otherwise,
+       * num_work_groups is set based on glDispatchCompute.
+       */
+      drm_intel_bo *num_work_groups_bo;
+      GLintptr num_work_groups_offset;
+      const GLuint *num_work_groups;
+   } compute;
+
+   struct {
        struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
        struct brw_vertex_buffer buffers[VERT_ATTRIB_MAX];
  
@@ -1368,16 +1407,8 @@ struct brw_context
     } curbe;
  
     /**
-    * Layout of vertex data exiting the vertex shader.
-    *
-    * BRW_NEW_VUE_MAP_VS is flagged when this VUE map changes.
-    */
-   struct brw_vue_map vue_map_vs;
-
-   /**
      * Layout of vertex data exiting the geometry portion of the pipleine.
-    * This comes from the geometry shader if one exists, otherwise from the
-    * vertex shader.
+    * This comes from the last enabled shader stage (GS, DS, or VS).
      *
      * BRW_NEW_VUE_MAP_GEOM_OUT is flagged when the VUE map changes.
      */
@@ -1523,7 +1554,7 @@ struct brw_context
  
     int num_atoms[BRW_NUM_PIPELINES];
     const struct brw_tracked_state render_atoms[60];
-   const struct brw_tracked_state compute_atoms[4];
+   const struct brw_tracked_state compute_atoms[7];
  
     /* If (INTEL_DEBUG & DEBUG_BATCH) */
     struct {
@@ -1784,6 +1815,12 @@ void brw_create_constant_surface(struct brw_context *brw,
                                   uint32_t size,
                                   uint32_t *out_offset,
                                   bool dword_pitch);
+void brw_create_buffer_surface(struct brw_context *brw,
+                               drm_intel_bo *bo,
+                               uint32_t offset,
+                               uint32_t size,
+                               uint32_t *out_offset,
+                               bool dword_pitch);
  void brw_update_buffer_texture_surface(struct gl_context *ctx,
                                         unsigned unit,
                                         uint32_t *surf_offset);
@@ -2064,11 +2101,6 @@ void gen8_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
  uint32_t get_hw_prim_for_gl_prim(int mode);
  
  void
-brw_setup_vue_key_clip_info(struct brw_context *brw,
-                            struct brw_vue_prog_key *key,
-                            bool program_uses_clip_distance);
-
-void
  gen6_upload_push_constants(struct brw_context *brw,
                             const struct gl_program *prog,
                             const struct brw_stage_prog_data *prog_data,
diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c

new file mode 100644 (file)

index 0000000..cb3fae6
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_cs.c
@@ -0,0 +1,215 @@
+/*
+ * Copyright (c) 2014 - 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "util/ralloc.h"
+#include "brw_context.h"
+#include "brw_cs.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+#include "brw_shader.h"
+#include "intel_mipmap_tree.h"
+#include "brw_state.h"
+#include "intel_batchbuffer.h"
+
+bool
+brw_cs_prog_data_compare(const void *in_a, const void *in_b)
+{
+   const struct brw_cs_prog_data *a =
+      (const struct brw_cs_prog_data *)in_a;
+   const struct brw_cs_prog_data *b =
+      (const struct brw_cs_prog_data *)in_b;
+
+   /* Compare the base structure. */
+   if (!brw_stage_prog_data_compare(&a->base, &b->base))
+      return false;
+
+   /* Compare the rest of the structure. */
+   const unsigned offset = sizeof(struct brw_stage_prog_data);
+   if (memcmp(((char *) a) + offset, ((char *) b) + offset,
+              sizeof(struct brw_cs_prog_data) - offset))
+      return false;
+
+   return true;
+}
+
+static bool
+brw_codegen_cs_prog(struct brw_context *brw,
+                    struct gl_shader_program *prog,
+                    struct brw_compute_program *cp,
+                    struct brw_cs_prog_key *key)
+{
+   struct gl_context *ctx = &brw->ctx;
+   const GLuint *program;
+   void *mem_ctx = ralloc_context(NULL);
+   GLuint program_size;
+   struct brw_cs_prog_data prog_data;
+   bool start_busy = false;
+   double start_time = 0;
+
+   struct brw_shader *cs =
+      (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE];
+   assert (cs);
+
+   memset(&prog_data, 0, sizeof(prog_data));
+
+   /* Allocate the references to the uniforms that will end up in the
+    * prog_data associated with the compiled program, and which will be freed
+    * by the state cache.
+    */
+   int param_count = cs->base.num_uniform_components +
+                     cs->base.NumImages * BRW_IMAGE_PARAM_SIZE;
+
+   /* The backend also sometimes adds params for texture size. */
+   param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
+   prog_data.base.param =
+      rzalloc_array(NULL, const gl_constant_value *, param_count);
+   prog_data.base.pull_param =
+      rzalloc_array(NULL, const gl_constant_value *, param_count);
+   prog_data.base.image_param =
+      rzalloc_array(NULL, struct brw_image_param, cs->base.NumImages);
+   prog_data.base.nr_params = param_count;
+   prog_data.base.nr_image_params = cs->base.NumImages;
+
+   if (unlikely(brw->perf_debug)) {
+      start_busy = (brw->batch.last_bo &&
+                    drm_intel_bo_busy(brw->batch.last_bo));
+      start_time = get_time();
+   }
+
+   program = brw_cs_emit(brw, mem_ctx, key, &prog_data,
+                         &cp->program, prog, &program_size);
+   if (program == NULL) {
+      ralloc_free(mem_ctx);
+      return false;
+   }
+
+   if (unlikely(brw->perf_debug) && cs) {
+      if (cs->compiled_once) {
+         _mesa_problem(&brw->ctx, "CS programs shouldn't need recompiles");
+      }
+      cs->compiled_once = true;
+
+      if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
+         perf_debug("CS compile took %.03f ms and stalled the GPU\n",
+                    (get_time() - start_time) * 1000);
+      }
+   }
+
+   if (prog_data.base.total_scratch) {
+      brw_get_scratch_bo(brw, &brw->cs.base.scratch_bo,
+                         prog_data.base.total_scratch * brw->max_cs_threads);
+   }
+
+   if (unlikely(INTEL_DEBUG & DEBUG_CS))
+      fprintf(stderr, "\n");
+
+   brw_upload_cache(&brw->cache, BRW_CACHE_CS_PROG,
+                    key, sizeof(*key),
+                    program, program_size,
+                    &prog_data, sizeof(prog_data),
+                    &brw->cs.base.prog_offset, &brw->cs.prog_data);
+   ralloc_free(mem_ctx);
+
+   return true;
+}
+
+
+static void
+brw_cs_populate_key(struct brw_context *brw, struct brw_cs_prog_key *key)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* BRW_NEW_COMPUTE_PROGRAM */
+   const struct brw_compute_program *cp =
+      (struct brw_compute_program *) brw->compute_program;
+   const struct gl_program *prog = (struct gl_program *) cp;
+
+   memset(key, 0, sizeof(*key));
+
+   /* _NEW_TEXTURE */
+   brw_populate_sampler_prog_key_data(ctx, prog, brw->cs.base.sampler_count,
+                                      &key->tex);
+
+   /* The unique compute program ID */
+   key->program_string_id = cp->id;
+}
+
+
+void
+brw_upload_cs_prog(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct brw_cs_prog_key key;
+   struct brw_compute_program *cp = (struct brw_compute_program *)
+      brw->compute_program;
+
+   if (!cp)
+      return;
+
+   if (!brw_state_dirty(brw, _NEW_TEXTURE, BRW_NEW_COMPUTE_PROGRAM))
+      return;
+
+   brw->cs.base.sampler_count =
+      _mesa_fls(ctx->ComputeProgram._Current->Base.SamplersUsed);
+
+   brw_cs_populate_key(brw, &key);
+
+   if (!brw_search_cache(&brw->cache, BRW_CACHE_CS_PROG,
+                         &key, sizeof(key),
+                         &brw->cs.base.prog_offset, &brw->cs.prog_data)) {
+      bool success =
+         brw_codegen_cs_prog(brw,
+                             ctx->Shader.CurrentProgram[MESA_SHADER_COMPUTE],
+                             cp, &key);
+      (void) success;
+      assert(success);
+   }
+   brw->cs.base.prog_data = &brw->cs.prog_data->base;
+}
+
+
+bool
+brw_cs_precompile(struct gl_context *ctx,
+                  struct gl_shader_program *shader_prog,
+                  struct gl_program *prog)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_cs_prog_key key;
+
+   struct gl_compute_program *cp = (struct gl_compute_program *) prog;
+   struct brw_compute_program *bcp = brw_compute_program(cp);
+
+   memset(&key, 0, sizeof(key));
+   key.program_string_id = bcp->id;
+
+   brw_setup_tex_for_precompile(brw, &key.tex, prog);
+
+   uint32_t old_prog_offset = brw->cs.base.prog_offset;
+   struct brw_cs_prog_data *old_prog_data = brw->cs.prog_data;
+
+   bool success = brw_codegen_cs_prog(brw, shader_prog, bcp, &key);
+
+   brw->cs.base.prog_offset = old_prog_offset;
+   brw->cs.prog_data = old_prog_data;
+
+   return success;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_cs.h b/src/mesa/drivers/dri/i965/brw_cs.h

index b83d49a..746fb05 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_cs.h
+++ b/src/mesa/drivers/dri/i965/brw_cs.h
@@ -50,6 +50,10 @@ brw_cs_emit(struct brw_context *brw,
              struct gl_shader_program *prog,
              unsigned *final_assembly_size);
  
+unsigned
+brw_cs_prog_local_id_payload_dwords(const struct gl_program *prog,
+                                    unsigned dispatch_width);
+
  #ifdef __cplusplus
  }
  #endif
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h

index f9dcdc7..a8cde20 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -66,10 +66,10 @@
  #define _3DPRIM_TRIFAN            0x06
  #define _3DPRIM_QUADLIST          0x07
  #define _3DPRIM_QUADSTRIP         0x08
-#define _3DPRIM_LINELIST_ADJ      0x09
-#define _3DPRIM_LINESTRIP_ADJ     0x0A
-#define _3DPRIM_TRILIST_ADJ       0x0B
-#define _3DPRIM_TRISTRIP_ADJ      0x0C
+#define _3DPRIM_LINELIST_ADJ      0x09 /* G45+ */
+#define _3DPRIM_LINESTRIP_ADJ     0x0A /* G45+ */
+#define _3DPRIM_TRILIST_ADJ       0x0B /* G45+ */
+#define _3DPRIM_TRISTRIP_ADJ      0x0C /* G45+ */
  #define _3DPRIM_TRISTRIP_REVERSE  0x0D
  #define _3DPRIM_POLYGON           0x0E
  #define _3DPRIM_RECTLIST          0x0F
@@ -78,7 +78,7 @@
  #define _3DPRIM_LINESTRIP_CONT    0x12
  #define _3DPRIM_LINESTRIP_BF      0x13
  #define _3DPRIM_LINESTRIP_CONT_BF 0x14
-#define _3DPRIM_TRIFAN_NOSTIPPLE  0x15
+#define _3DPRIM_TRIFAN_NOSTIPPLE  0x16
  #endif
  
  /* We use this offset to be able to pass native primitive types in struct
@@ -981,6 +981,7 @@ enum opcode {
     SHADER_OPCODE_TG4_LOGICAL,
     SHADER_OPCODE_TG4_OFFSET,
     SHADER_OPCODE_TG4_OFFSET_LOGICAL,
+   SHADER_OPCODE_SAMPLEINFO,
  
     /**
      * Combines multiple sources of size 1 into a larger virtual GRF.
@@ -1068,6 +1069,7 @@ enum opcode {
     FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7,
     FS_OPCODE_VARYING_PULL_CONSTANT_LOAD,
     FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
+   FS_OPCODE_GET_BUFFER_SIZE,
     FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
     FS_OPCODE_DISCARD_JUMP,
     FS_OPCODE_SET_SAMPLE_ID,
@@ -1085,6 +1087,9 @@ enum opcode {
     VS_OPCODE_PULL_CONSTANT_LOAD,
     VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
     VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
+
+   VS_OPCODE_GET_BUFFER_SIZE,
+
     VS_OPCODE_UNPACK_FLAGS_SIMD4X2,
  
     /**
@@ -1513,6 +1518,7 @@ enum brw_message_target {
  #define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4      8
  #define GEN5_SAMPLER_MESSAGE_LOD                 9
  #define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO      10
+#define GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO   11
  #define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C    16
  #define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO   17
  #define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C 18
@@ -1696,7 +1702,13 @@ enum brw_message_target {
  
  #define BRW_URB_OPCODE_WRITE_HWORD  0
  #define BRW_URB_OPCODE_WRITE_OWORD  1
-#define GEN8_URB_OPCODE_SIMD8_WRITE  7
+#define BRW_URB_OPCODE_READ_HWORD   2
+#define BRW_URB_OPCODE_READ_OWORD   3
+#define GEN7_URB_OPCODE_ATOMIC_MOV  4
+#define GEN7_URB_OPCODE_ATOMIC_INC  5
+#define GEN8_URB_OPCODE_ATOMIC_ADD  6
+#define GEN8_URB_OPCODE_SIMD8_WRITE 7
+#define GEN8_URB_OPCODE_SIMD8_READ  8
  
  #define BRW_URB_SWIZZLE_NONE          0
  #define BRW_URB_SWIZZLE_INTERLEAVE    1
@@ -1784,6 +1796,8 @@ enum brw_message_target {
  /* DW3: PS */
  
  #define _3DSTATE_SAMPLER_STATE_POINTERS_VS     0x782B /* GEN7+ */
+#define _3DSTATE_SAMPLER_STATE_POINTERS_HS     0x782C /* GEN7+ */
+#define _3DSTATE_SAMPLER_STATE_POINTERS_DS     0x782D /* GEN7+ */
  #define _3DSTATE_SAMPLER_STATE_POINTERS_GS     0x782E /* GEN7+ */
  #define _3DSTATE_SAMPLER_STATE_POINTERS_PS     0x782F /* GEN7+ */
  
@@ -1867,6 +1881,8 @@ enum brw_message_target {
  #define GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES                (5*128)
  
  #define _3DSTATE_PUSH_CONSTANT_ALLOC_VS         0x7912 /* GEN7+ */
+#define _3DSTATE_PUSH_CONSTANT_ALLOC_HS         0x7913 /* GEN7+ */
+#define _3DSTATE_PUSH_CONSTANT_ALLOC_DS         0x7914 /* GEN7+ */
  #define _3DSTATE_PUSH_CONSTANT_ALLOC_GS         0x7915 /* GEN7+ */
  #define _3DSTATE_PUSH_CONSTANT_ALLOC_PS         0x7916 /* GEN7+ */
  # define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT         16
@@ -1950,6 +1966,11 @@ enum brw_message_target {
  # define GEN6_GS_SVBI_POSTINCREMENT_VALUE_MASK         INTEL_MASK(25, 16)
  # define GEN6_GS_ENABLE                                        (1 << 15)
  
+/* Gen8+ DW8 */
+# define GEN8_GS_STATIC_OUTPUT                          (1 << 30)
+# define GEN8_GS_STATIC_VERTEX_COUNT_SHIFT              16
+# define GEN8_GS_STATIC_VERTEX_COUNT_MASK               INTEL_MASK(26, 16)
+
  /* Gen8+ DW9 */
  # define GEN8_GS_URB_ENTRY_OUTPUT_OFFSET_SHIFT          21
  # define GEN8_GS_URB_OUTPUT_LENGTH_SHIFT                16
@@ -1969,8 +1990,76 @@ enum brw_message_target {
  #define GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES           (62*16)
  
  #define _3DSTATE_HS                             0x781B /* GEN7+ */
+/* DW1 */
+# define GEN7_HS_SAMPLER_COUNT_MASK                     INTEL_MASK(29, 27)
+# define GEN7_HS_SAMPLER_COUNT_SHIFT                    27
+# define GEN7_HS_BINDING_TABLE_ENTRY_COUNT_MASK         INTEL_MASK(25, 18)
+# define GEN7_HS_BINDING_TABLE_ENTRY_COUNT_SHIFT        18
+# define GEN7_HS_FLOATING_POINT_MODE_IEEE_754           (0 << 16)
+# define GEN7_HS_FLOATING_POINT_MODE_ALT                (1 << 16)
+# define GEN7_HS_MAX_THREADS_SHIFT                      0
+/* DW2 */
+# define GEN7_HS_ENABLE                                 (1 << 31)
+# define GEN7_HS_STATISTICS_ENABLE                      (1 << 29)
+# define GEN8_HS_MAX_THREADS_SHIFT                      8
+# define GEN7_HS_INSTANCE_COUNT_MASK                    INTEL_MASK(3, 0)
+# define GEN7_HS_INSTANCE_COUNT_SHIFT                   0
+/* DW5 */
+# define GEN7_HS_SINGLE_PROGRAM_FLOW                    (1 << 27)
+# define GEN7_HS_VECTOR_MASK_ENABLE                     (1 << 26)
+# define HSW_HS_ACCESSES_UAV                            (1 << 25)
+# define GEN7_HS_INCLUDE_VERTEX_HANDLES                 (1 << 24)
+# define GEN7_HS_DISPATCH_START_GRF_MASK                INTEL_MASK(23, 19)
+# define GEN7_HS_DISPATCH_START_GRF_SHIFT               19
+# define GEN7_HS_URB_READ_LENGTH_MASK                   INTEL_MASK(16, 11)
+# define GEN7_HS_URB_READ_LENGTH_SHIFT                  11
+# define GEN7_HS_URB_ENTRY_READ_OFFSET_MASK             INTEL_MASK(9, 4)
+# define GEN7_HS_URB_ENTRY_READ_OFFSET_SHIFT            4
+
  #define _3DSTATE_TE                             0x781C /* GEN7+ */
+/* DW1 */
+# define GEN7_TE_PARTITIONING_SHIFT                     12
+# define GEN7_TE_OUTPUT_TOPOLOGY_SHIFT                  8
+# define GEN7_TE_DOMAIN_SHIFT                           4
+//# define GEN7_TE_MODE_SW                                (1 << 1)
+# define GEN7_TE_ENABLE                                 (1 << 0)
+
  #define _3DSTATE_DS                             0x781D /* GEN7+ */
+/* DW2 */
+# define GEN7_DS_SINGLE_DOMAIN_POINT_DISPATCH           (1 << 31)
+# define GEN7_DS_VECTOR_MASK_ENABLE                     (1 << 30)
+# define GEN7_DS_SAMPLER_COUNT_MASK                     INTEL_MASK(29, 27)
+# define GEN7_DS_SAMPLER_COUNT_SHIFT                    27
+# define GEN7_DS_BINDING_TABLE_ENTRY_COUNT_MASK         INTEL_MASK(25, 18)
+# define GEN7_DS_BINDING_TABLE_ENTRY_COUNT_SHIFT        18
+# define GEN7_DS_FLOATING_POINT_MODE_IEEE_754           (0 << 16)
+# define GEN7_DS_FLOATING_POINT_MODE_ALT                (1 << 16)
+# define HSW_DS_ACCESSES_UAV                            (1 << 14)
+/* DW4 */
+# define GEN7_DS_DISPATCH_START_GRF_MASK                INTEL_MASK(24, 20)
+# define GEN7_DS_DISPATCH_START_GRF_SHIFT               20
+# define GEN7_DS_URB_READ_LENGTH_MASK                   INTEL_MASK(17, 11)
+# define GEN7_DS_URB_READ_LENGTH_SHIFT                  11
+# define GEN7_DS_URB_ENTRY_READ_OFFSET_MASK             INTEL_MASK(9, 4)
+# define GEN7_DS_URB_ENTRY_READ_OFFSET_SHIFT            4
+/* DW5 */
+# define GEN7_DS_MAX_THREADS_SHIFT                      25
+# define HSW_DS_MAX_THREADS_SHIFT                       21
+# define GEN7_DS_STATISTICS_ENABLE                      (1 << 10)
+# define GEN7_DS_SIMD8_DISPATCH_ENABLE                  (1 << 3)
+# define GEN7_DS_COMPUTE_W_COORDINATE_ENABLE            (1 << 2)
+# define GEN7_DS_CACHE_DISABLE                          (1 << 1)
+# define GEN7_DS_ENABLE                                 (1 << 0)
+/* Gen8+ DW8 */
+# define GEN8_DS_URB_ENTRY_OUTPUT_OFFSET_MASK           INTEL_MASK(26, 21)
+# define GEN8_DS_URB_ENTRY_OUTPUT_OFFSET_SHIFT          21
+# define GEN8_DS_URB_OUTPUT_LENGTH_MASK                 INTEL_MASK(20, 16)
+# define GEN8_DS_URB_OUTPUT_LENGTH_SHIFT                16
+# define GEN8_DS_USER_CLIP_DISTANCE_MASK                INTEL_MASK(15, 8)
+# define GEN8_DS_USER_CLIP_DISTANCE_SHIFT               8
+# define GEN8_DS_USER_CULL_DISTANCE_MASK                INTEL_MASK(7, 0)
+# define GEN8_DS_USER_CULL_DISTANCE_SHIFT               0
+
  
  #define _3DSTATE_CLIP                          0x7812 /* GEN6+ */
  /* DW1 */
@@ -2268,6 +2357,21 @@ enum brw_pixel_shader_computed_depth_mode {
     BRW_PSCDEPTH_ON_LE = 3, /* PS guarantees output depth <= source depth */
  };
  
+enum brw_pixel_shader_coverage_mask_mode {
+   BRW_PSICMS_OFF     = 0, /* PS does not use input coverage masks. */
+   BRW_PSICMS_NORMAL  = 1, /* Input Coverage masks based on outer conservatism
+                            * and factors in SAMPLE_MASK.  If Pixel is
+                            * conservatively covered, all samples are enabled.
+                            */
+
+   BRW_PSICMS_INNER   = 2, /* Input Coverage masks based on inner conservatism
+                            * and factors in SAMPLE_MASK.  If Pixel is
+                            * conservatively *FULLY* covered, all samples are
+                            * enabled.
+                            */
+   BRW_PCICMS_DEPTH   = 3,
+};
+
  #define _3DSTATE_PS_EXTRA                       0x784F /* GEN8+ */
  /* DW1 */
  # define GEN8_PSX_PIXEL_SHADER_VALID                    (1 << 31)
@@ -2285,6 +2389,7 @@ enum brw_pixel_shader_computed_depth_mode {
  # define GEN9_PSX_SHADER_PULLS_BARY                     (1 << 3)
  # define GEN8_PSX_SHADER_HAS_UAV                        (1 << 2)
  # define GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK       (1 << 1)
+# define GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT     0
  
  enum brw_wm_barycentric_interp_mode {
     BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC                = 0,
@@ -2660,14 +2765,24 @@ enum brw_wm_barycentric_interp_mode {
  # define MEDIA_VFE_STATE_CURBE_ALLOC_SHIFT      0
  # define MEDIA_VFE_STATE_CURBE_ALLOC_MASK       INTEL_MASK(15, 0)
  
+#define MEDIA_CURBE_LOAD                        0x7001
  #define MEDIA_INTERFACE_DESCRIPTOR_LOAD         0x7002
+/* GEN7 DW4, GEN8+ DW5 */
+# define MEDIA_CURBE_READ_LENGTH_SHIFT          16
+# define MEDIA_CURBE_READ_LENGTH_MASK           INTEL_MASK(31, 16)
+# define MEDIA_CURBE_READ_OFFSET_SHIFT          0
+# define MEDIA_CURBE_READ_OFFSET_MASK           INTEL_MASK(15, 0)
  /* GEN7 DW5, GEN8+ DW6 */
+# define MEDIA_BARRIER_ENABLE_SHIFT             21
+# define MEDIA_BARRIER_ENABLE_MASK              INTEL_MASK(21, 21)
  # define MEDIA_GPGPU_THREAD_COUNT_SHIFT         0
  # define MEDIA_GPGPU_THREAD_COUNT_MASK          INTEL_MASK(7, 0)
  # define GEN8_MEDIA_GPGPU_THREAD_COUNT_SHIFT    0
  # define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK     INTEL_MASK(9, 0)
  #define MEDIA_STATE_FLUSH                       0x7004
  #define GPGPU_WALKER                            0x7105
+/* GEN7 DW0 */
+# define GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE   (1 << 10)
  /* GEN8+ DW2 */
  # define GPGPU_WALKER_INDIRECT_LENGTH_SHIFT     0
  # define GPGPU_WALKER_INDIRECT_LENGTH_MASK      INTEL_MASK(15, 0)
diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c

index 16c125d..6517249 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_device_info.c
+++ b/src/mesa/drivers/dri/i965/brw_device_info.c
@@ -314,7 +314,7 @@ static const struct brw_device_info brw_device_info_chv = {
     .max_wm_threads = 64 * 6,                        \
     .max_cs_threads = 56,                            \
     .urb = {                                         \
-      .size = 192,                                  \
+      .size = 384,                                  \
        .min_vs_entries = 64,                         \
        .max_vs_entries = 1856,                       \
        .max_hs_entries = 672,                        \
@@ -324,6 +324,7 @@ static const struct brw_device_info brw_device_info_chv = {
  
  static const struct brw_device_info brw_device_info_skl_gt1 = {
     GEN9_FEATURES, .gt = 1,
+   .urb.size = 192,
  };
  
  static const struct brw_device_info brw_device_info_skl_gt2 = {
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c

index 1075c5a..db23a18 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -412,6 +412,22 @@ static const char *const gen7_gateway_subfuncid[8] = {
     [BRW_MESSAGE_GATEWAY_SFID_MMIO_READ_WRITE] = "mmio read/write",
  };
  
+static const char *const gen4_dp_read_port_msg_type[4] = {
+   [0b00] = "OWord Block Read",
+   [0b01] = "OWord Dual Block Read",
+   [0b10] = "Media Block Read",
+   [0b11] = "DWord Scattered Read",
+};
+
+static const char *const g45_dp_read_port_msg_type[8] = {
+   [0b000] = "OWord Block Read",
+   [0b010] = "OWord Dual Block Read",
+   [0b100] = "Media Block Read",
+   [0b110] = "DWord Scattered Read",
+   [0b001] = "Render Target UNORM Read",
+   [0b011] = "AVC Loop Filter Read",
+};
+
  static const char *const dp_write_port_msg_type[8] = {
     [0b000] = "OWord block write",
     [0b001] = "OWord dual block write",
@@ -556,15 +572,15 @@ static const char *const gen5_urb_opcode[] = {
  };
  
  static const char *const gen7_urb_opcode[] = {
-   [0] = "write HWord",
-   [1] = "write OWord",
-   [2] = "read HWord",
-   [3] = "read OWord",
-   [4] = "atomic mov",  /* Gen7+ */
-   [5] = "atomic inc",  /* Gen7+ */
-   [6] = "atomic add",  /* Gen8+ */
-   [7] = "SIMD8 write", /* Gen8+ */
-   [8] = "SIMD8 read",  /* Gen8+ */
+   [BRW_URB_OPCODE_WRITE_HWORD] = "write HWord",
+   [BRW_URB_OPCODE_WRITE_OWORD] = "write OWord",
+   [BRW_URB_OPCODE_READ_HWORD] = "read HWord",
+   [BRW_URB_OPCODE_READ_OWORD] = "read OWord",
+   [GEN7_URB_OPCODE_ATOMIC_MOV] = "atomic mov",  /* Gen7+ */
+   [GEN7_URB_OPCODE_ATOMIC_INC] = "atomic inc",  /* Gen7+ */
+   [GEN8_URB_OPCODE_ATOMIC_ADD] = "atomic add",  /* Gen8+ */
+   [GEN8_URB_OPCODE_SIMD8_WRITE] = "SIMD8 write", /* Gen8+ */
+   [GEN8_URB_OPCODE_SIMD8_READ] = "SIMD8 read",  /* Gen8+ */
     /* [9-15] - reserved */
  };
  
@@ -601,6 +617,7 @@ static const char *const gen5_sampler_msg_type[] = {
     [GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4]      = "gather4",
     [GEN5_SAMPLER_MESSAGE_LOD]                 = "lod",
     [GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO]      = "resinfo",
+   [GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO]   = "sampleinfo",
     [GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C]    = "gather4_c",
     [GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO]   = "gather4_po",
     [GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C] = "gather4_po_c",
@@ -1444,10 +1461,17 @@ brw_disassemble_inst(FILE *file, const struct brw_device_info *devinfo,
                        brw_inst_dp_msg_type(devinfo, inst),
                        devinfo->gen >= 7 ? 0 : brw_inst_dp_write_commit(devinfo, inst));
              } else {
-               format(file, " (%ld, %ld, %ld)",
-                      brw_inst_binding_table_index(devinfo, inst),
-                      brw_inst_dp_read_msg_control(devinfo, inst),
-                      brw_inst_dp_read_msg_type(devinfo, inst));
+               bool is_965 = devinfo->gen == 4 && !devinfo->is_g4x;
+               err |= control(file, "DP read message type",
+                              is_965 ? gen4_dp_read_port_msg_type :
+                                       g45_dp_read_port_msg_type,
+                              brw_inst_dp_read_msg_type(devinfo, inst),
+                              &space);
+
+               format(file, " MsgCtrl = 0x%lx",
+                      brw_inst_dp_read_msg_control(devinfo, inst));
+
+               format(file, " Surface = %ld", brw_inst_binding_table_index(devinfo, inst));
              }
              break;
  
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c

index e5de420..6a75e06 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2003 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,13 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  #include <sys/errno.h>
  
diff --git a/src/mesa/drivers/dri/i965/brw_draw.h b/src/mesa/drivers/dri/i965/brw_draw.h

index f994726..695973b 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_draw.h
+++ b/src/mesa/drivers/dri/i965/brw_draw.h
@@ -1,5 +1,4 @@
- /**************************************************************************
- *
+/*
   * Copyright 2005 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,13 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  #ifndef BRW_DRAW_H
  #define BRW_DRAW_H
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c

index 21d8f1e..a0ae015 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2003 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,13 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  #include "main/glheader.h"
  #include "main/bufferobj.h"
diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c

index 67f0b45..b798931 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
@@ -802,7 +802,7 @@ set_3src_control_index(const struct brw_device_info *devinfo,
     if (devinfo->gen >= 9 || devinfo->is_cherryview)
        uncompacted |= brw_inst_bits(src, 36, 35) << 24; /* 2b */
  
-   for (int i = 0; i < ARRAY_SIZE(gen8_3src_control_index_table); i++) {
+   for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_control_index_table); i++) {
        if (gen8_3src_control_index_table[i] == uncompacted) {
           brw_compact_inst_set_3src_control_index(dst, i);
          return true;
@@ -836,7 +836,7 @@ set_3src_source_index(const struct brw_device_info *devinfo,
           (brw_inst_bits(src, 104, 104) << 44);  /* 1b */
     }
  
-   for (int i = 0; i < ARRAY_SIZE(gen8_3src_source_index_table); i++) {
+   for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_source_index_table); i++) {
        if (gen8_3src_source_index_table[i] == uncompacted) {
           brw_compact_inst_set_3src_source_index(dst, i);
          return true;
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c

index 4d39762..dc699bb 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -95,7 +95,7 @@ brw_reg_type_to_hw_type(const struct brw_device_info *devinfo,
                          enum brw_reg_type type, unsigned file)
  {
     if (file == BRW_IMMEDIATE_VALUE) {
-      const static int imm_hw_types[] = {
+      static const int imm_hw_types[] = {
           [BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD,
           [BRW_REGISTER_TYPE_D]  = BRW_HW_REG_TYPE_D,
           [BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW,
@@ -117,7 +117,7 @@ brw_reg_type_to_hw_type(const struct brw_device_info *devinfo,
        return imm_hw_types[type];
     } else {
        /* Non-immediate registers */
-      const static int hw_types[] = {
+      static const int hw_types[] = {
           [BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD,
           [BRW_REGISTER_TYPE_D]  = BRW_HW_REG_TYPE_D,
           [BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW,
@@ -146,8 +146,9 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
  {
     const struct brw_device_info *devinfo = p->devinfo;
  
-   if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
-       dest.file != BRW_MESSAGE_REGISTER_FILE)
+   if (dest.file == BRW_MESSAGE_REGISTER_FILE)
+      assert((dest.nr & ~(1 << 7)) < BRW_MAX_MRF(devinfo->gen));
+   else if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE)
        assert(dest.nr < 128);
  
     gen7_convert_mrf_to_grf(p, &dest);
@@ -235,6 +236,15 @@ validate_reg(const struct brw_device_info *devinfo,
         reg.file == BRW_ARF_NULL)
        return;
  
+   /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
+    *
+    *    "Swizzling is not allowed when an accumulator is used as an implicit
+    *    source or an explicit source in an instruction."
+    */
+   if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+       reg.nr == BRW_ARF_ACCUMULATOR)
+      assert(reg.dw1.bits.swizzle == BRW_SWIZZLE_XYZW);
+
     assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg));
     hstride = hstride_for_reg[reg.hstride];
  
@@ -300,7 +310,9 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
  {
     const struct brw_device_info *devinfo = p->devinfo;
  
-   if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
+   if (reg.file == BRW_MESSAGE_REGISTER_FILE)
+      assert((reg.nr & ~(1 << 7)) < BRW_MAX_MRF(devinfo->gen));
+   else if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
        assert(reg.nr < 128);
  
     gen7_convert_mrf_to_grf(p, &reg);
@@ -443,6 +455,14 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
     if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
        assert(reg.nr < 128);
  
+   /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
+    *
+    *    "Accumulator registers may be accessed explicitly as src0
+    *    operands only."
+    */
+   assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE ||
+          reg.nr != BRW_ARF_ACCUMULATOR);
+
     gen7_convert_mrf_to_grf(p, &reg);
     assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
  
@@ -2465,7 +2485,7 @@ void brw_urb_WRITE(struct brw_codegen *p,
  
     insn = next_insn(p, BRW_OPCODE_SEND);
  
-   assert(msg_length < BRW_MAX_MRF);
+   assert(msg_length < BRW_MAX_MRF(devinfo->gen));
  
     brw_set_dest(p, insn, dest);
     brw_set_src0(p, insn, src0);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp

index 76530a4..e620301 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -42,6 +42,7 @@
  #include "brw_eu.h"
  #include "brw_wm.h"
  #include "brw_fs.h"
+#include "brw_cs.h"
  #include "brw_cfg.h"
  #include "brw_dead_control_flow.h"
  #include "main/uniforms.h"
@@ -797,6 +798,7 @@ fs_inst::regs_read(int arg) const
        break;
  
     case CS_OPCODE_CS_TERMINATE:
+   case SHADER_OPCODE_BARRIER:
        return 1;
  
     default:
@@ -878,9 +880,11 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
     case SHADER_OPCODE_TXL:
     case SHADER_OPCODE_TXS:
     case SHADER_OPCODE_LOD:
+   case SHADER_OPCODE_SAMPLEINFO:
        return 1;
     case FS_OPCODE_FB_WRITE:
        return 2;
+   case FS_OPCODE_GET_BUFFER_SIZE:
     case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
     case SHADER_OPCODE_GEN4_SCRATCH_READ:
        return 1;
@@ -1394,6 +1398,9 @@ fs_visitor::assign_curb_setup()
          }
        }
     }
+
+   /* This may be updated in assign_urb_setup or assign_vs_urb_setup. */
+   this->first_non_payload_grf = payload.num_regs + prog_data->curb_read_length;
  }
  
  void
@@ -1434,7 +1441,8 @@ fs_visitor::calculate_urb_setup()
            */
           struct brw_vue_map prev_stage_vue_map;
           brw_compute_vue_map(devinfo, &prev_stage_vue_map,
-                             key->input_slots_valid);
+                             key->input_slots_valid,
+                             shader_prog->SeparateShader);
           int first_slot = 2 * BRW_SF_URB_ENTRY_READ_OFFSET;
           assert(prev_stage_vue_map.num_slots <= first_slot + 32);
           for (int slot = first_slot; slot < prev_stage_vue_map.num_slots;
@@ -1508,8 +1516,7 @@ fs_visitor::assign_urb_setup()
     }
  
     /* Each attribute is 4 setup channels, each of which is half a reg. */
-   this->first_non_payload_grf =
-      urb_start + prog_data->num_varying_inputs * 2;
+   this->first_non_payload_grf += prog_data->num_varying_inputs * 2;
  }
  
  void
@@ -1524,8 +1531,7 @@ fs_visitor::assign_vs_urb_setup()
        count++;
  
     /* Each attribute is 4 regs. */
-   this->first_non_payload_grf =
-      payload.num_regs + prog_data->curb_read_length + count * 4;
+   this->first_non_payload_grf += count * 4;
  
     unsigned vue_entries =
        MAX2(count, vs_prog_data->base.vue_map.num_slots);
@@ -1565,7 +1571,10 @@ fs_visitor::assign_vs_urb_setup()
  
              inst->src[i].file = HW_REG;
              inst->src[i].fixed_hw_reg =
-               retype(brw_vec8_grf(grf, 0), inst->src[i].type);
+               stride(byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type),
+                                  inst->src[i].subreg_offset),
+                      inst->exec_size * inst->src[i].stride,
+                      inst->exec_size, inst->src[i].stride);
           }
        }
     }
@@ -2811,7 +2820,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
  {
     int write_len = inst->regs_written;
     int first_write_grf = inst->dst.reg;
-   bool needs_dep[BRW_MAX_MRF];
+   bool needs_dep[BRW_MAX_MRF(devinfo->gen)];
     assert(write_len < (int)sizeof(needs_dep) - 1);
  
     memset(needs_dep, false, sizeof(needs_dep));
@@ -2882,7 +2891,7 @@ fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_ins
  {
     int write_len = inst->regs_written;
     int first_write_grf = inst->dst.reg;
-   bool needs_dep[BRW_MAX_MRF];
+   bool needs_dep[BRW_MAX_MRF(devinfo->gen)];
     assert(write_len < (int)sizeof(needs_dep) - 1);
  
     memset(needs_dep, false, sizeof(needs_dep));
@@ -3212,7 +3221,8 @@ fs_visitor::lower_integer_multiplication()
               * schedule multi-component multiplications much better.
               */
  
-            if (inst->conditional_mod && inst->dst.is_null()) {
+            fs_reg orig_dst = inst->dst;
+            if (orig_dst.is_null() || orig_dst.file == MRF) {
                 inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8),
                                    inst->dst.type);
              }
@@ -3278,10 +3288,9 @@ fs_visitor::lower_integer_multiplication()
  
              ibld.ADD(dst, low, high);
  
-            if (inst->conditional_mod) {
-               fs_reg null(retype(ibld.null_reg_f(), inst->dst.type));
+            if (inst->conditional_mod || orig_dst.file == MRF) {
                 set_condmod(inst->conditional_mod,
-                           ibld.MOV(null, inst->dst));
+                           ibld.MOV(orig_dst, inst->dst));
              }
           }
  
@@ -4749,10 +4758,19 @@ fs_visitor::setup_cs_payload()
     assert(devinfo->gen >= 7);
  
     payload.num_regs = 1;
+
+   if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
+      const unsigned local_id_dwords =
+         brw_cs_prog_local_id_payload_dwords(prog, dispatch_width);
+      assert((local_id_dwords & 0x7) == 0);
+      const unsigned local_id_regs = local_id_dwords / 8;
+      payload.local_invocation_id_reg = payload.num_regs;
+      payload.num_regs += local_id_regs;
+   }
  }
  
  void
-fs_visitor::assign_binding_table_offsets()
+fs_visitor::assign_fs_binding_table_offsets()
  {
     assert(stage == MESA_SHADER_FRAGMENT);
     brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
@@ -4769,6 +4787,20 @@ fs_visitor::assign_binding_table_offsets()
  }
  
  void
+fs_visitor::assign_cs_binding_table_offsets()
+{
+   assert(stage == MESA_SHADER_COMPUTE);
+   brw_cs_prog_data *prog_data = (brw_cs_prog_data*) this->prog_data;
+   uint32_t next_binding_table_offset = 0;
+
+   /* May not be used if the gl_NumWorkGroups variable is not accessed. */
+   prog_data->binding_table.work_groups_start = next_binding_table_offset;
+   next_binding_table_offset++;
+
+   assign_common_binding_table_offsets(next_binding_table_offset);
+}
+
+void
  fs_visitor::calculate_register_pressure()
  {
     invalidate_live_intervals();
@@ -4789,6 +4821,9 @@ fs_visitor::calculate_register_pressure()
  void
  fs_visitor::optimize()
  {
+   /* Start by validating the shader we currently have. */
+   validate();
+
     /* bld is the common builder object pointing at the end of the program we
      * used to translate it into i965 IR.  For the optimization and lowering
      * passes coming next, any code added after the end of the program without
@@ -4805,7 +4840,10 @@ fs_visitor::optimize()
     assign_constant_locations();
     demote_pull_constants();
  
+   validate();
+
     split_virtual_grfs();
+   validate();
  
  #define OPT(pass, args...) ({                                           \
        pass_num++;                                                       \
@@ -4819,6 +4857,8 @@ fs_visitor::optimize()
           backend_shader::dump_instructions(filename);                   \
        }                                                                 \
                                                                          \
+      validate();                                                       \
+                                                                        \
        progress = progress || this_progress;                             \
        this_progress;                                                    \
     })
@@ -4880,6 +4920,8 @@ fs_visitor::optimize()
     OPT(lower_integer_multiplication);
  
     lower_uniform_pull_constant_loads();
+
+   validate();
  }
  
  /**
@@ -5011,8 +5053,10 @@ fs_visitor::run_fs(bool do_rep_send)
  
     assert(stage == MESA_SHADER_FRAGMENT);
  
+   sanity_param_count = prog->Parameters->NumParameters;
+
     if (prog_data->map_entries == NULL)
-      assign_binding_table_offsets();
+      assign_fs_binding_table_offsets();
  
     if (devinfo->gen >= 6)
        setup_payload_gen6();
@@ -5093,7 +5137,7 @@ fs_visitor::run_cs()
  
     sanity_param_count = prog->Parameters->NumParameters;
  
-   assign_common_binding_table_offsets(0);
+   assign_cs_binding_table_offsets();
  
     setup_cs_payload();
  
@@ -5141,15 +5185,6 @@ brw_wm_fs_emit(struct brw_context *brw,
                 struct gl_shader_program *prog,
                 unsigned *final_assembly_size)
  {
-   bool start_busy = false;
-   double start_time = 0;
-
-   if (unlikely(brw->perf_debug)) {
-      start_busy = (brw->batch.last_bo &&
-                    drm_intel_bo_busy(brw->batch.last_bo));
-      start_time = get_time();
-   }
-
     struct brw_shader *shader = NULL;
     if (prog)
        shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
@@ -5227,93 +5262,127 @@ brw_wm_fs_emit(struct brw_context *brw,
     if (simd16_cfg)
        prog_data->prog_offset_16 = g.generate_code(simd16_cfg, 16);
  
-   if (unlikely(brw->perf_debug) && shader) {
-      if (shader->compiled_once)
-         brw_wm_debug_recompile(brw, prog, key);
-      shader->compiled_once = true;
-
-      if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
-         perf_debug("FS compile took %.03f ms and stalled the GPU\n",
-                    (get_time() - start_time) * 1000);
-      }
-   }
-
     return g.get_assembly(final_assembly_size);
  }
  
-extern "C" bool
-brw_fs_precompile(struct gl_context *ctx,
-                  struct gl_shader_program *shader_prog,
-                  struct gl_program *prog)
+fs_reg *
+fs_visitor::emit_cs_local_invocation_id_setup()
  {
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_wm_prog_key key;
-
-   struct gl_fragment_program *fp = (struct gl_fragment_program *) prog;
-   struct brw_fragment_program *bfp = brw_fragment_program(fp);
-   bool program_uses_dfdy = fp->UsesDFdy;
-
-   memset(&key, 0, sizeof(key));
+   assert(stage == MESA_SHADER_COMPUTE);
  
-   if (brw->gen < 6) {
-      if (fp->UsesKill)
-         key.iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+   fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::uvec3_type));
  
-      if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
-         key.iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+   struct brw_reg src =
+      brw_vec8_grf(payload.local_invocation_id_reg, 0);
+   src = retype(src, BRW_REGISTER_TYPE_UD);
+   bld.MOV(*reg, src);
+   src.nr += dispatch_width / 8;
+   bld.MOV(offset(*reg, bld, 1), src);
+   src.nr += dispatch_width / 8;
+   bld.MOV(offset(*reg, bld, 2), src);
  
-      /* Just assume depth testing. */
-      key.iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
-      key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
-   }
+   return reg;
+}
  
-   if (brw->gen < 6 || _mesa_bitcount_64(fp->Base.InputsRead &
-                                         BRW_FS_VARYING_INPUT_MASK) > 16)
-      key.input_slots_valid = fp->Base.InputsRead | VARYING_BIT_POS;
+fs_reg *
+fs_visitor::emit_cs_work_group_id_setup()
+{
+   assert(stage == MESA_SHADER_COMPUTE);
  
-   brw_setup_tex_for_precompile(brw, &key.tex, &fp->Base);
+   fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::uvec3_type));
  
-   if (fp->Base.InputsRead & VARYING_BIT_POS) {
-      key.drawable_height = ctx->DrawBuffer->Height;
-   }
+   struct brw_reg r0_1(retype(brw_vec1_grf(0, 1), BRW_REGISTER_TYPE_UD));
+   struct brw_reg r0_6(retype(brw_vec1_grf(0, 6), BRW_REGISTER_TYPE_UD));
+   struct brw_reg r0_7(retype(brw_vec1_grf(0, 7), BRW_REGISTER_TYPE_UD));
  
-   key.nr_color_regions = _mesa_bitcount_64(fp->Base.OutputsWritten &
-         ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
-         BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)));
+   bld.MOV(*reg, r0_1);
+   bld.MOV(offset(*reg, bld, 1), r0_6);
+   bld.MOV(offset(*reg, bld, 2), r0_7);
  
-   if ((fp->Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) {
-      key.render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer) ||
-                          key.nr_color_regions > 1;
-   }
+   return reg;
+}
  
-   key.program_string_id = bfp->id;
+const unsigned *
+brw_cs_emit(struct brw_context *brw,
+            void *mem_ctx,
+            const struct brw_cs_prog_key *key,
+            struct brw_cs_prog_data *prog_data,
+            struct gl_compute_program *cp,
+            struct gl_shader_program *prog,
+            unsigned *final_assembly_size)
+{
+   struct brw_shader *shader =
+      (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE];
  
-   uint32_t old_prog_offset = brw->wm.base.prog_offset;
-   struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data;
+   if (unlikely(INTEL_DEBUG & DEBUG_CS))
+      brw_dump_ir("compute", prog, &shader->base, &cp->Base);
  
-   bool success = brw_codegen_wm_prog(brw, shader_prog, bfp, &key);
+   prog_data->local_size[0] = cp->LocalSize[0];
+   prog_data->local_size[1] = cp->LocalSize[1];
+   prog_data->local_size[2] = cp->LocalSize[2];
+   unsigned local_workgroup_size =
+      cp->LocalSize[0] * cp->LocalSize[1] * cp->LocalSize[2];
  
-   brw->wm.base.prog_offset = old_prog_offset;
-   brw->wm.prog_data = old_prog_data;
+   cfg_t *cfg = NULL;
+   const char *fail_msg = NULL;
  
-   return success;
-}
+   int st_index = -1;
+   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+      st_index = brw_get_shader_time_index(brw, prog, &cp->Base, ST_CS);
  
-void
-brw_setup_tex_for_precompile(struct brw_context *brw,
-                             struct brw_sampler_prog_key_data *tex,
-                             struct gl_program *prog)
-{
-   const bool has_shader_channel_select = brw->is_haswell || brw->gen >= 8;
-   unsigned sampler_count = _mesa_fls(prog->SamplersUsed);
-   for (unsigned i = 0; i < sampler_count; i++) {
-      if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
-         /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
-         tex->swizzles[i] =
-            MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
+   /* Now the main event: Visit the shader IR and generate our CS IR for it.
+    */
+   fs_visitor v8(brw->intelScreen->compiler, brw,
+                 mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
+                 &cp->Base, 8, st_index);
+   if (!v8.run_cs()) {
+      fail_msg = v8.fail_msg;
+   } else if (local_workgroup_size <= 8 * brw->max_cs_threads) {
+      cfg = v8.cfg;
+      prog_data->simd_size = 8;
+   }
+
+   fs_visitor v16(brw->intelScreen->compiler, brw,
+                  mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
+                  &cp->Base, 16, st_index);
+   if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
+       !fail_msg && !v8.simd16_unsupported &&
+       local_workgroup_size <= 16 * brw->max_cs_threads) {
+      /* Try a SIMD16 compile */
+      v16.import_uniforms(&v8);
+      if (!v16.run_cs()) {
+         perf_debug("SIMD16 shader failed to compile: %s", v16.fail_msg);
+         if (!cfg) {
+            fail_msg =
+               "Couldn't generate SIMD16 program and not "
+               "enough threads for SIMD8";
+         }
        } else {
-         /* Color sampler: assume no swizzling. */
-         tex->swizzles[i] = SWIZZLE_XYZW;
+         cfg = v16.cfg;
+         prog_data->simd_size = 16;
        }
     }
+
+   if (unlikely(cfg == NULL)) {
+      assert(fail_msg);
+      prog->LinkStatus = false;
+      ralloc_strcat(&prog->InfoLog, fail_msg);
+      _mesa_problem(NULL, "Failed to compile compute shader: %s\n",
+                    fail_msg);
+      return NULL;
+   }
+
+   fs_generator g(brw->intelScreen->compiler, brw,
+                  mem_ctx, (void*) key, &prog_data->base, &cp->Base,
+                  v8.promoted_constants, v8.runtime_check_aads_emit, "CS");
+   if (INTEL_DEBUG & DEBUG_CS) {
+      char *name = ralloc_asprintf(mem_ctx, "%s compute shader %d",
+                                   prog->Label ? prog->Label : "unnamed",
+                                   prog->Name);
+      g.enable_debug(name);
+   }
+
+   g.generate_code(cfg, prog_data->simd_size);
+
+   return g.get_assembly(final_assembly_size);
  }
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h

index 0a89d2e..a8b6726 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -128,7 +128,8 @@ public:
     bool run_cs();
     void optimize();
     void allocate_registers();
-   void assign_binding_table_offsets();
+   void assign_fs_binding_table_offsets();
+   void assign_cs_binding_table_offsets();
     void setup_payload_gen4();
     void setup_payload_gen6();
     void setup_vs_payload();
@@ -151,6 +152,7 @@ public:
     void invalidate_live_intervals();
     void calculate_live_intervals();
     void calculate_register_pressure();
+   void validate();
     bool opt_algebraic();
     bool opt_redundant_discard_jumps();
     bool opt_cse();
@@ -255,6 +257,8 @@ public:
                         nir_ssa_undef_instr *instr);
     void nir_emit_intrinsic(const brw::fs_builder &bld,
                             nir_intrinsic_instr *instr);
+   void nir_emit_ssbo_atomic(const brw::fs_builder &bld,
+                             int op, nir_intrinsic_instr *instr);
     void nir_emit_texture(const brw::fs_builder &bld,
                           nir_tex_instr *instr);
     void nir_emit_jump(const brw::fs_builder &bld,
@@ -275,6 +279,8 @@ public:
     void emit_fb_writes();
     void emit_urb_writes();
     void emit_cs_terminate();
+   fs_reg *emit_cs_local_invocation_id_setup();
+   fs_reg *emit_cs_work_group_id_setup();
  
     void emit_barrier();
  
@@ -364,6 +370,7 @@ public:
        uint8_t sample_pos_reg;
        uint8_t sample_mask_in_reg;
        uint8_t barycentric_coord_reg[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
+      uint8_t local_invocation_id_reg;
  
        /** The number of thread payload registers the hardware will supply. */
        uint8_t num_regs;
@@ -427,6 +434,9 @@ private:
                          struct brw_reg *src);
     void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
                       struct brw_reg sampler_index);
+   void generate_get_buffer_size(fs_inst *inst, struct brw_reg dst,
+                                 struct brw_reg src,
+                                 struct brw_reg surf_index);
     void generate_math_gen6(fs_inst *inst,
                             struct brw_reg dst,
                             struct brw_reg src0,
@@ -514,6 +524,3 @@ private:
  
  bool brw_do_channel_expressions(struct exec_list *instructions);
  bool brw_do_vector_splitting(struct exec_list *instructions);
-void brw_setup_tex_for_precompile(struct brw_context *brw,
-                                  struct brw_sampler_prog_key_data *tex,
-                                  struct gl_program *prog);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp

index a8883a3..277b6cc 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
@@ -379,6 +379,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
     }
  
     case ir_binop_ubo_load:
+   case ir_unop_get_buffer_size:
        unreachable("not yet supported");
  
     case ir_triop_fma:
@@ -430,6 +431,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
     case ir_triop_vector_insert:
     case ir_quadop_bitfield_insert:
     case ir_quadop_vector:
+   case ir_unop_ssbo_unsized_array_length:
        unreachable("should have been lowered");
  
     case ir_unop_unpack_half_2x16_split_x:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp

index 5445ad5..230b0ca 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -279,6 +279,7 @@ static bool
  can_change_source_types(fs_inst *inst)
  {
     return !inst->src[0].abs && !inst->src[0].negate &&
+          inst->dst.type == inst->src[0].type &&
            (inst->opcode == BRW_OPCODE_MOV ||
             (inst->opcode == BRW_OPCODE_SEL &&
              inst->predicate != BRW_PREDICATE_NONE &&
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp

index c86ca04..6f8b75e 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -48,13 +48,15 @@ static uint32_t brw_file_from_reg(fs_reg *reg)
  }
  
  static struct brw_reg
-brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg)
+brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen)
  {
     struct brw_reg brw_reg;
  
     switch (reg->file) {
-   case GRF:
     case MRF:
+      assert((reg->reg & ~(1 << 7)) < BRW_MAX_MRF(gen));
+      /* Fallthrough */
+   case GRF:
        if (reg->stride == 0) {
           brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg, 0);
        } else if (inst->exec_size < 8) {
@@ -418,7 +420,7 @@ fs_generator::generate_blorp_fb_write(fs_inst *inst)
     brw_fb_WRITE(p,
                  16 /* dispatch_width */,
                  brw_message_reg(inst->base_mrf),
-                brw_reg_from_fs_reg(inst, &inst->src[0]),
+                brw_reg_from_fs_reg(inst, &inst->src[0], devinfo->gen),
                  BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE,
                  inst->target,
                  inst->mlen,
@@ -542,6 +544,50 @@ fs_generator::generate_math_g45(fs_inst *inst,
  }
  
  void
+fs_generator::generate_get_buffer_size(fs_inst *inst,
+                                       struct brw_reg dst,
+                                       struct brw_reg src,
+                                       struct brw_reg surf_index)
+{
+   assert(devinfo->gen >= 7);
+   assert(surf_index.file == BRW_IMMEDIATE_VALUE);
+
+   uint32_t simd_mode;
+   int rlen = 4;
+
+   switch (inst->exec_size) {
+   case 8:
+      simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
+      break;
+   case 16:
+      simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+      break;
+   default:
+      unreachable("Invalid width for texture instruction");
+   }
+
+   if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
+      rlen = 8;
+      dst = vec16(dst);
+   }
+
+   brw_SAMPLE(p,
+              retype(dst, BRW_REGISTER_TYPE_UW),
+              inst->base_mrf,
+              src,
+              surf_index.dw1.ud,
+              0,
+              GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO,
+              rlen, /* response length */
+              inst->mlen,
+              inst->header_size > 0,
+              simd_mode,
+              BRW_SAMPLER_RETURN_FORMAT_SINT32);
+
+   brw_mark_surface_used(prog_data, surf_index.dw1.ud);
+}
+
+void
  fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
                             struct brw_reg sampler_index)
  {
@@ -646,6 +692,9 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
              msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
           }
           break;
+      case SHADER_OPCODE_SAMPLEINFO:
+         msg_type = GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO;
+         break;
        default:
          unreachable("not reached");
        }
@@ -1533,7 +1582,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
           annotate(p->devinfo, &annotation, cfg, inst, p->next_insn_offset);
  
        for (unsigned int i = 0; i < inst->sources; i++) {
-        src[i] = brw_reg_from_fs_reg(inst, &inst->src[i]);
+        src[i] = brw_reg_from_fs_reg(inst, &inst->src[i], devinfo->gen);
  
          /* The accumulator result appears to get used for the
           * conditional modifier generation.  When negating a UD
@@ -1545,7 +1594,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
                 inst->src[i].type != BRW_REGISTER_TYPE_UD ||
                 !inst->src[i].negate);
        }
-      dst = brw_reg_from_fs_reg(inst, &inst->dst);
+      dst = brw_reg_from_fs_reg(inst, &inst->dst, devinfo->gen);
  
        brw_set_default_predicate_control(p, inst->predicate);
        brw_set_default_predicate_inverse(p, inst->predicate_inverse);
@@ -1555,6 +1604,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
        brw_set_default_acc_write_control(p, inst->writes_accumulator);
        brw_set_default_exec_size(p, cvt(inst->exec_size) - 1);
  
+      assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->gen));
+      assert(inst->mlen <= BRW_MAX_MSG_LENGTH);
+
        switch (inst->exec_size) {
        case 1:
        case 2:
@@ -1908,6 +1960,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
           src[0].subnr = 4 * type_sz(src[0].type);
           brw_MOV(p, dst, stride(src[0], 8, 4, 1));
           break;
+      case FS_OPCODE_GET_BUFFER_SIZE:
+         generate_get_buffer_size(inst, dst, src[0], src[1]);
+         break;
        case SHADER_OPCODE_TEX:
        case FS_OPCODE_TXB:
        case SHADER_OPCODE_TXD:
@@ -1920,6 +1975,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
        case SHADER_OPCODE_LOD:
        case SHADER_OPCODE_TG4:
        case SHADER_OPCODE_TG4_OFFSET:
+      case SHADER_OPCODE_SAMPLEINFO:
          generate_tex(inst, dst, src[0], src[1]);
          break;
        case FS_OPCODE_DDX_COARSE:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp

index da8d47f..61d0c89 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -29,8 +29,10 @@
  #include "brw_fs.h"
  #include "brw_fs_surface_builder.h"
  #include "brw_nir.h"
+#include "brw_fs_surface_builder.h"
  
  using namespace brw;
+using namespace brw::surface_access;
  
  void
  fs_visitor::emit_nir_code()
@@ -338,6 +340,20 @@ emit_system_values_block(nir_block *block, void *void_visitor)
                                   BRW_REGISTER_TYPE_D));
           break;
  
+      case nir_intrinsic_load_local_invocation_id:
+         assert(v->stage == MESA_SHADER_COMPUTE);
+         reg = &v->nir_system_values[SYSTEM_VALUE_LOCAL_INVOCATION_ID];
+         if (reg->file == BAD_FILE)
+            *reg = *v->emit_cs_local_invocation_id_setup();
+         break;
+
+      case nir_intrinsic_load_work_group_id:
+         assert(v->stage == MESA_SHADER_COMPUTE);
+         reg = &v->nir_system_values[SYSTEM_VALUE_WORK_GROUP_ID];
+         if (reg->file == BAD_FILE)
+            *reg = *v->emit_cs_work_group_id_setup();
+         break;
+
        default:
           break;
        }
@@ -1437,6 +1453,11 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
        break;
     }
  
+   case nir_intrinsic_image_samples:
+      /* The driver does not support multi-sampled images. */
+      bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), fs_reg(1));
+      break;
+
     case nir_intrinsic_load_front_face:
        bld.MOV(retype(dest, BRW_REGISTER_TYPE_D),
                *emit_frontfacing_interpolation());
@@ -1445,35 +1466,16 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
     case nir_intrinsic_load_vertex_id:
        unreachable("should be lowered by lower_vertex_id()");
  
-   case nir_intrinsic_load_vertex_id_zero_base: {
-      fs_reg vertex_id = nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
-      assert(vertex_id.file != BAD_FILE);
-      dest.type = vertex_id.type;
-      bld.MOV(dest, vertex_id);
-      break;
-   }
-
-   case nir_intrinsic_load_base_vertex: {
-      fs_reg base_vertex = nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
-      assert(base_vertex.file != BAD_FILE);
-      dest.type = base_vertex.type;
-      bld.MOV(dest, base_vertex);
-      break;
-   }
-
-   case nir_intrinsic_load_instance_id: {
-      fs_reg instance_id = nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
-      assert(instance_id.file != BAD_FILE);
-      dest.type = instance_id.type;
-      bld.MOV(dest, instance_id);
-      break;
-   }
-
-   case nir_intrinsic_load_sample_mask_in: {
-      fs_reg sample_mask_in = nir_system_values[SYSTEM_VALUE_SAMPLE_MASK_IN];
-      assert(sample_mask_in.file != BAD_FILE);
-      dest.type = sample_mask_in.type;
-      bld.MOV(dest, sample_mask_in);
+   case nir_intrinsic_load_vertex_id_zero_base:
+   case nir_intrinsic_load_base_vertex:
+   case nir_intrinsic_load_instance_id:
+   case nir_intrinsic_load_sample_mask_in:
+   case nir_intrinsic_load_sample_id: {
+      gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
+      fs_reg val = nir_system_values[sv];
+      assert(val.file != BAD_FILE);
+      dest.type = val.type;
+      bld.MOV(dest, val);
        break;
     }
  
@@ -1486,14 +1488,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
        break;
     }
  
-   case nir_intrinsic_load_sample_id: {
-      fs_reg sample_id = nir_system_values[SYSTEM_VALUE_SAMPLE_ID];
-      assert(sample_id.file != BAD_FILE);
-      dest.type = sample_id.type;
-      bld.MOV(dest, sample_id);
-      break;
-   }
-
     case nir_intrinsic_load_uniform_indirect:
        has_indirect = true;
        /* fallthrough */
@@ -1546,7 +1540,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
            */
           brw_mark_surface_used(prog_data,
                                 stage_prog_data->binding_table.ubo_start +
-                               shader_prog->NumUniformBlocks - 1);
+                               shader_prog->NumBufferInterfaceBlocks - 1);
        }
  
        if (has_indirect) {
@@ -1583,6 +1577,68 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
        break;
     }
  
+   case nir_intrinsic_load_ssbo_indirect:
+      has_indirect = true;
+      /* fallthrough */
+   case nir_intrinsic_load_ssbo: {
+      assert(devinfo->gen >= 7);
+
+      nir_const_value *const_uniform_block =
+         nir_src_as_const_value(instr->src[0]);
+
+      fs_reg surf_index;
+      if (const_uniform_block) {
+         unsigned index = stage_prog_data->binding_table.ubo_start +
+                          const_uniform_block->u[0];
+         surf_index = fs_reg(index);
+         brw_mark_surface_used(prog_data, index);
+      } else {
+         surf_index = vgrf(glsl_type::uint_type);
+         bld.ADD(surf_index, get_nir_src(instr->src[0]),
+                 fs_reg(stage_prog_data->binding_table.ubo_start));
+         surf_index = bld.emit_uniformize(surf_index);
+
+         /* Assume this may touch any UBO. It would be nice to provide
+          * a tighter bound, but the array information is already lowered away.
+          */
+         brw_mark_surface_used(prog_data,
+                               stage_prog_data->binding_table.ubo_start +
+                               shader_prog->NumBufferInterfaceBlocks - 1);
+      }
+
+      /* Get the offset to read from */
+      fs_reg offset_reg = vgrf(glsl_type::uint_type);
+      unsigned const_offset_bytes = 0;
+      if (has_indirect) {
+         bld.MOV(offset_reg, get_nir_src(instr->src[1]));
+      } else {
+         const_offset_bytes = instr->const_index[0];
+         bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+      }
+
+      /* Read the vector */
+      for (int i = 0; i < instr->num_components; i++) {
+         fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
+                                                1 /* dims */, 1 /* size */,
+                                                BRW_PREDICATE_NONE);
+         read_result.type = dest.type;
+         bld.MOV(dest, read_result);
+         dest = offset(dest, bld, 1);
+
+         /* Vector components are stored contiguous in memory */
+         if (i < instr->num_components) {
+            if (!has_indirect) {
+               const_offset_bytes += 4;
+               bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+            } else {
+               bld.ADD(offset_reg, offset_reg, brw_imm_ud(4));
+            }
+         }
+      }
+
+      break;
+   }
+
     case nir_intrinsic_load_input_indirect:
        has_indirect = true;
        /* fallthrough */
@@ -1717,6 +1773,75 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
        break;
     }
  
+   case nir_intrinsic_store_ssbo_indirect:
+      has_indirect = true;
+      /* fallthrough */
+   case nir_intrinsic_store_ssbo: {
+      assert(devinfo->gen >= 7);
+
+      /* Block index */
+      fs_reg surf_index;
+      nir_const_value *const_uniform_block =
+         nir_src_as_const_value(instr->src[1]);
+      if (const_uniform_block) {
+         unsigned index = stage_prog_data->binding_table.ubo_start +
+                          const_uniform_block->u[0];
+         surf_index = fs_reg(index);
+         brw_mark_surface_used(prog_data, index);
+      } else {
+         surf_index = vgrf(glsl_type::uint_type);
+         bld.ADD(surf_index, get_nir_src(instr->src[1]),
+                  fs_reg(stage_prog_data->binding_table.ubo_start));
+         surf_index = bld.emit_uniformize(surf_index);
+
+         brw_mark_surface_used(prog_data,
+                               stage_prog_data->binding_table.ubo_start +
+                               shader_prog->NumBufferInterfaceBlocks - 1);
+      }
+
+      /* Offset */
+      fs_reg offset_reg = vgrf(glsl_type::uint_type);
+      unsigned const_offset_bytes = 0;
+      if (has_indirect) {
+         bld.MOV(offset_reg, get_nir_src(instr->src[2]));
+      } else {
+         const_offset_bytes = instr->const_index[0];
+         bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+      }
+
+      /* Value */
+      fs_reg val_reg = get_nir_src(instr->src[0]);
+
+      /* Writemask */
+      unsigned writemask = instr->const_index[1];
+
+      /* Write each component present in the writemask */
+      unsigned skipped_channels = 0;
+      for (int i = 0; i < instr->num_components; i++) {
+         int component_mask = 1 << i;
+         if (writemask & component_mask) {
+            if (skipped_channels) {
+               if (!has_indirect) {
+                  const_offset_bytes += 4 * skipped_channels;
+                  bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+               } else {
+                  bld.ADD(offset_reg, offset_reg,
+                           brw_imm_ud(4 * skipped_channels));
+               }
+               skipped_channels = 0;
+            }
+
+            emit_untyped_write(bld, surf_index, offset_reg,
+                               offset(val_reg, bld, i),
+                               1 /* dims */, 1 /* size */,
+                               BRW_PREDICATE_NONE);
+         }
+
+         skipped_channels++;
+      }
+      break;
+   }
+
     case nir_intrinsic_store_output_indirect:
        has_indirect = true;
        /* fallthrough */
@@ -1737,7 +1862,103 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
  
     case nir_intrinsic_barrier:
        emit_barrier();
+      if (stage == MESA_SHADER_COMPUTE)
+         ((struct brw_cs_prog_data *) prog_data)->uses_barrier = true;
+      break;
+
+   case nir_intrinsic_load_local_invocation_id:
+   case nir_intrinsic_load_work_group_id: {
+      gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
+      fs_reg val = nir_system_values[sv];
+      assert(val.file != BAD_FILE);
+      dest.type = val.type;
+      for (unsigned i = 0; i < 3; i++)
+         bld.MOV(offset(dest, bld, i), offset(val, bld, i));
+      break;
+   }
+
+   case nir_intrinsic_ssbo_atomic_add:
+      nir_emit_ssbo_atomic(bld, BRW_AOP_ADD, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_min:
+      if (dest.type == BRW_REGISTER_TYPE_D)
+         nir_emit_ssbo_atomic(bld, BRW_AOP_IMIN, instr);
+      else
+         nir_emit_ssbo_atomic(bld, BRW_AOP_UMIN, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_max:
+      if (dest.type == BRW_REGISTER_TYPE_D)
+         nir_emit_ssbo_atomic(bld, BRW_AOP_IMAX, instr);
+      else
+         nir_emit_ssbo_atomic(bld, BRW_AOP_UMAX, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_and:
+      nir_emit_ssbo_atomic(bld, BRW_AOP_AND, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_or:
+      nir_emit_ssbo_atomic(bld, BRW_AOP_OR, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_xor:
+      nir_emit_ssbo_atomic(bld, BRW_AOP_XOR, instr);
        break;
+   case nir_intrinsic_ssbo_atomic_exchange:
+      nir_emit_ssbo_atomic(bld, BRW_AOP_MOV, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_comp_swap:
+      nir_emit_ssbo_atomic(bld, BRW_AOP_CMPWR, instr);
+      break;
+
+   case nir_intrinsic_get_buffer_size: {
+      nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
+      unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
+      int reg_width = dispatch_width / 8;
+
+      assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage);
+
+      /* Set LOD = 0 */
+      fs_reg source = fs_reg(0);
+
+      int mlen = 1 * reg_width;
+      fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
+                                  BRW_REGISTER_TYPE_UD);
+      bld.LOAD_PAYLOAD(src_payload, &source, 1, 0);
+
+      fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start + ubo_index);
+      fs_inst *inst = bld.emit(FS_OPCODE_GET_BUFFER_SIZE, dest,
+                               src_payload, surf_index);
+      inst->header_size = 0;
+      inst->mlen = mlen;
+      bld.emit(inst);
+      break;
+   }
+
+   case nir_intrinsic_load_num_work_groups: {
+      assert(devinfo->gen >= 7);
+      assert(stage == MESA_SHADER_COMPUTE);
+
+      struct brw_cs_prog_data *cs_prog_data =
+         (struct brw_cs_prog_data *) prog_data;
+      const unsigned surface =
+         cs_prog_data->binding_table.work_groups_start;
+
+      cs_prog_data->uses_num_work_groups = true;
+
+      fs_reg surf_index = fs_reg(surface);
+      brw_mark_surface_used(prog_data, surface);
+
+      /* Read the 3 GLuint components of gl_NumWorkGroups */
+      for (unsigned i = 0; i < 3; i++) {
+         fs_reg read_result =
+            emit_untyped_read(bld, surf_index,
+                              fs_reg(i << 2),
+                              1 /* dims */, 1 /* size */,
+                              BRW_PREDICATE_NONE);
+         read_result.type = dest.type;
+         bld.MOV(dest, read_result);
+         dest = offset(dest, bld, 1);
+      }
+      break;
+   }
  
     default:
        unreachable("unknown intrinsic");
@@ -1745,6 +1966,52 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
  }
  
  void
+fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
+                                 int op, nir_intrinsic_instr *instr)
+{
+   fs_reg dest;
+   if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+      dest = get_nir_dest(instr->dest);
+
+   fs_reg surface;
+   nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
+   if (const_surface) {
+      unsigned surf_index = stage_prog_data->binding_table.ubo_start +
+                            const_surface->u[0];
+      surface = fs_reg(surf_index);
+      brw_mark_surface_used(prog_data, surf_index);
+   } else {
+      surface = vgrf(glsl_type::uint_type);
+      bld.ADD(surface, get_nir_src(instr->src[0]),
+              fs_reg(stage_prog_data->binding_table.ubo_start));
+
+      /* Assume this may touch any UBO. This is the same we do for other
+       * UBO/SSBO accesses with non-constant surface.
+       */
+      brw_mark_surface_used(prog_data,
+                            stage_prog_data->binding_table.ubo_start +
+                            shader_prog->NumBufferInterfaceBlocks - 1);
+   }
+
+   fs_reg offset = get_nir_src(instr->src[1]);
+   fs_reg data1 = get_nir_src(instr->src[2]);
+   fs_reg data2;
+   if (op == BRW_AOP_CMPWR)
+      data2 = get_nir_src(instr->src[3]);
+
+   /* Emit the actual atomic operation operation */
+
+   fs_reg atomic_result =
+      surface_access::emit_untyped_atomic(bld, surface, offset,
+                                          data1, data2,
+                                          1 /* dims */, 1 /* rsize */,
+                                          op,
+                                          BRW_PREDICATE_NONE);
+   dest.type = atomic_result.type;
+   bld.MOV(dest, atomic_result);
+}
+
+void
  fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
  {
     uint32_t set = instr->sampler_set;
@@ -1885,6 +2152,16 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
     case nir_texop_txf_ms: op = ir_txf_ms; break;
     case nir_texop_txl: op = ir_txl; break;
     case nir_texop_txs: op = ir_txs; break;
+   case nir_texop_texture_samples: {
+      fs_reg dst = retype(get_nir_dest(instr->dest), BRW_REGISTER_TYPE_D);
+      fs_inst *inst = bld.emit(SHADER_OPCODE_SAMPLEINFO, dst,
+                               bld.vgrf(BRW_REGISTER_TYPE_D, 1),
+                               sampler_reg);
+      inst->mlen = 1;
+      inst->header_size = 1;
+      inst->base_mrf = -1;
+      return;
+   }
     default:
        unreachable("unknown texture opcode");
     }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp

index 6eb9889..6900cee 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -30,6 +30,8 @@
  #include "glsl/glsl_types.h"
  #include "glsl/ir_optimization.h"
  
+#define FIRST_SPILL_MRF(gen) (gen == 6 ? 21 : 13)
+
  using namespace brw;
  
  static void
@@ -478,7 +480,7 @@ get_used_mrfs(fs_visitor *v, bool *mrf_used)
  {
     int reg_width = v->dispatch_width / 8;
  
-   memset(mrf_used, 0, BRW_MAX_MRF * sizeof(bool));
+   memset(mrf_used, 0, BRW_MAX_MRF(v->devinfo->gen) * sizeof(bool));
  
     foreach_block_and_inst(block, fs_inst, inst, v->cfg) {
        if (inst->dst.file == MRF) {
@@ -509,11 +511,11 @@ static void
  setup_mrf_hack_interference(fs_visitor *v, struct ra_graph *g,
                              int first_mrf_node, int *first_used_mrf)
  {
-   bool mrf_used[BRW_MAX_MRF];
+   bool mrf_used[BRW_MAX_MRF(v->devinfo->gen)];
     get_used_mrfs(v, mrf_used);
  
-   *first_used_mrf = BRW_MAX_MRF;
-   for (int i = 0; i < BRW_MAX_MRF; i++) {
+   *first_used_mrf = BRW_MAX_MRF(v->devinfo->gen);
+   for (int i = 0; i < BRW_MAX_MRF(v->devinfo->gen); i++) {
        /* Mark each MRF reg node as being allocated to its physical register.
         *
         * The alternative would be to have per-physical-register classes, which
@@ -593,7 +595,7 @@ fs_visitor::assign_regs(bool allow_spilling)
  
     setup_payload_interference(g, payload_node_count, first_payload_node);
     if (devinfo->gen >= 7) {
-      int first_used_mrf = BRW_MAX_MRF;
+      int first_used_mrf = BRW_MAX_MRF(devinfo->gen);
        setup_mrf_hack_interference(this, g, first_mrf_hack_node,
                                    &first_used_mrf);
  
@@ -616,7 +618,7 @@ fs_visitor::assign_regs(bool allow_spilling)
               * register early enough in the register file that we don't
               * conflict with any used MRF hack registers.
               */
-            reg -= BRW_MAX_MRF - first_used_mrf;
+            reg -= BRW_MAX_MRF(devinfo->gen) - first_used_mrf;
  
              ra_set_node_reg(g, inst->src[0].reg, reg);
              break;
@@ -649,7 +651,7 @@ fs_visitor::assign_regs(bool allow_spilling)
     }
  
     /* Debug of register spilling: Go spill everything. */
-   if (unlikely(INTEL_DEBUG & DEBUG_SPILL)) {
+   if (unlikely(INTEL_DEBUG & DEBUG_SPILL_FS)) {
        int reg = choose_spill_reg(g);
  
        if (reg != -1) {
@@ -727,7 +729,7 @@ fs_visitor::emit_unspill(bblock_t *block, fs_inst *inst, fs_reg dst,
        unspill_inst->regs_written = reg_size;
  
        if (!gen7_read) {
-         unspill_inst->base_mrf = 14;
+         unspill_inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
           unspill_inst->mlen = 1; /* header contains offset */
        }
  
@@ -741,9 +743,9 @@ fs_visitor::emit_spill(bblock_t *block, fs_inst *inst, fs_reg src,
                         uint32_t spill_offset, int count)
  {
     int reg_size = 1;
-   int spill_base_mrf = 14;
+   int spill_base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
     if (dispatch_width == 16 && count % 2 == 0) {
-      spill_base_mrf = 13;
+      spill_base_mrf = FIRST_SPILL_MRF(devinfo->gen);
        reg_size = 2;
     }
  
@@ -843,7 +845,8 @@ fs_visitor::spill_reg(int spill_reg)
     int size = alloc.sizes[spill_reg];
     unsigned int spill_offset = last_scratch;
     assert(ALIGN(spill_offset, 16) == spill_offset); /* oword read/write req. */
-   int spill_base_mrf = dispatch_width > 8 ? 13 : 14;
+   int spill_base_mrf = dispatch_width > 8 ? FIRST_SPILL_MRF(devinfo->gen) :
+                                             FIRST_SPILL_MRF(devinfo->gen) + 1;
  
     /* Spills may use MRFs 13-15 in the SIMD16 case.  Our texturing is done
      * using up to 11 MRFs starting from either m1 or m2, and fb writes can use
@@ -853,10 +856,10 @@ fs_visitor::spill_reg(int spill_reg)
      * SIMD16 mode, because we'd stomp the FB writes.
      */
     if (!spilled_any_registers) {
-      bool mrf_used[BRW_MAX_MRF];
+      bool mrf_used[BRW_MAX_MRF(devinfo->gen)];
        get_used_mrfs(this, mrf_used);
  
-      for (int i = spill_base_mrf; i < BRW_MAX_MRF; i++) {
+      for (int i = spill_base_mrf; i < BRW_MAX_MRF(devinfo->gen); i++) {
           if (mrf_used[i]) {
              fail("Register spilling not supported with m%d used", i);
            return;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp

index 727e8d1..534d849 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
@@ -313,12 +313,42 @@ namespace {
  
     namespace image_validity {
        /**
+       * Check whether the bound image is suitable for untyped access.
+       */
+      brw_predicate
+      emit_untyped_image_check(const fs_builder &bld, const fs_reg &image,
+                               brw_predicate pred)
+      {
+         const brw_device_info *devinfo = bld.shader->devinfo;
+         const fs_reg stride = offset(image, bld, BRW_IMAGE_PARAM_STRIDE_OFFSET);
+
+         if (devinfo->gen == 7 && !devinfo->is_haswell) {
+            /* Check whether the first stride component (i.e. the Bpp value)
+             * is greater than four, what on Gen7 indicates that a surface of
+             * type RAW has been bound for untyped access.  Reading or writing
+             * to a surface of type other than RAW using untyped surface
+             * messages causes a hang on IVB and VLV.
+             */
+            set_predicate(pred,
+                          bld.CMP(bld.null_reg_ud(), stride, fs_reg(4),
+                                  BRW_CONDITIONAL_G));
+
+            return BRW_PREDICATE_NORMAL;
+         } else {
+            /* More recent generations handle the format mismatch
+             * gracefully.
+             */
+            return pred;
+         }
+      }
+
+      /**
         * Check whether there is an image bound at the given index and write
         * the comparison result to f0.0.  Returns an appropriate predication
         * mode to use on subsequent image operations.
         */
        brw_predicate
-      emit_surface_check(const fs_builder &bld, const fs_reg &image)
+      emit_typed_atomic_check(const fs_builder &bld, const fs_reg &image)
        {
           const brw_device_info *devinfo = bld.shader->devinfo;
           const fs_reg size = offset(image, bld, BRW_IMAGE_PARAM_SIZE_OFFSET);
@@ -895,7 +925,9 @@ namespace brw {
               * surface read on the result,
               */
              const brw_predicate pred =
-               emit_bounds_check(bld, image, saddr, dims);
+               emit_untyped_image_check(bld, image,
+                                        emit_bounds_check(bld, image,
+                                                          saddr, dims));
  
              /* and they don't know about surface coordinates, we need to
               * convert them to a raw memory offset.
@@ -905,7 +937,7 @@ namespace brw {
              tmp = emit_untyped_read(bld, image, laddr, 1, size, pred);
  
              /* An out of bounds surface access should give zero as result. */
-            for (unsigned c = 0; c < 4; ++c)
+            for (unsigned c = 0; c < size; ++c)
                 set_predicate(pred, bld.SEL(offset(tmp, bld, c),
                                             offset(tmp, bld, c), fs_reg(0)));
           }
@@ -1041,7 +1073,9 @@ namespace brw {
                  * the surface write on the result,
                  */
                 const brw_predicate pred =
-                  emit_bounds_check(bld, image, saddr, dims);
+                  emit_untyped_image_check(bld, image,
+                                           emit_bounds_check(bld, image,
+                                                             saddr, dims));
  
                 /* and, phew, they don't know about surface coordinates, we
                  * need to convert them to a raw memory offset.
@@ -1072,7 +1106,7 @@ namespace brw {
           using namespace image_coordinates;
           using namespace surface_access;
           /* Avoid performing an atomic operation on an unbound surface. */
-         const brw_predicate pred = emit_surface_check(bld, image);
+         const brw_predicate pred = emit_typed_atomic_check(bld, image);
  
           /* Transform the image coordinates into actual surface coordinates. */
           const fs_reg saddr =
diff --git a/src/mesa/drivers/dri/i965/brw_fs_validate.cpp b/src/mesa/drivers/dri/i965/brw_fs_validate.cpp

new file mode 100644 (file)

index 0000000..d0e04f3
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_fs_validate.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file brw_fs_validate.cpp
+ *
+ * Implements a pass that validates various invariants of the IR.  The current
+ * pass only validates that GRF's uses are sane.  More can be added later.
+ */
+
+#include "brw_fs.h"
+#include "brw_cfg.h"
+
+#define fsv_assert(cond) \
+   if (!(cond)) { \
+      fprintf(stderr, "ASSERT: FS validation failed!\n"); \
+      dump_instruction(inst, stderr); \
+      fprintf(stderr, "%s:%d: %s\n", __FILE__, __LINE__, #cond); \
+      abort(); \
+   }
+
+void
+fs_visitor::validate()
+{
+   foreach_block_and_inst (block, fs_inst, inst, cfg) {
+      if (inst->dst.file == GRF) {
+         fsv_assert(inst->dst.reg_offset + inst->regs_written <=
+                    alloc.sizes[inst->dst.reg]);
+      }
+
+      for (unsigned i = 0; i < inst->sources; i++) {
+         if (inst->src[i].file == GRF) {
+            fsv_assert(inst->src[i].reg_offset + inst->regs_read(i) <=
+                       (int)alloc.sizes[inst->src[i].reg]);
+         }
+      }
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp

index 96d4f37..6000e35 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
@@ -43,6 +43,7 @@
  #include "glsl/ir_visitor.h"
  #include "glsl/ir_rvalue_visitor.h"
  #include "glsl/glsl_types.h"
+#include "util/hash_table.h"
  
  static bool debug = false;
  
@@ -72,7 +73,8 @@ public:
     ir_vector_reference_visitor(void)
     {
        this->mem_ctx = ralloc_context(NULL);
-      this->variable_list.make_empty();
+      this->ht = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
     }
  
     ~ir_vector_reference_visitor(void)
@@ -89,7 +91,7 @@ public:
     variable_entry *get_variable_entry(ir_variable *var);
  
     /* List of variable_entry */
-   exec_list variable_list;
+   struct hash_table *ht;
  
     void *mem_ctx;
  };
@@ -104,6 +106,7 @@ ir_vector_reference_visitor::get_variable_entry(ir_variable *var)
  
     switch (var->data.mode) {
     case ir_var_uniform:
+   case ir_var_shader_storage:
     case ir_var_shader_in:
     case ir_var_shader_out:
     case ir_var_system_value:
@@ -119,13 +122,12 @@ ir_vector_reference_visitor::get_variable_entry(ir_variable *var)
        break;
     }
  
-   foreach_in_list(variable_entry, entry, &variable_list) {
-      if (entry->var == var)
-        return entry;
-   }
+   struct hash_entry *hte = _mesa_hash_table_search(ht, var);
+   if (hte)
+      return (struct variable_entry *) hte->data;
  
     variable_entry *entry = new(mem_ctx) variable_entry(var);
-   this->variable_list.push_tail(entry);
+   _mesa_hash_table_insert(ht, var, entry);
     return entry;
  }
  
@@ -195,9 +197,9 @@ ir_vector_reference_visitor::visit_enter(ir_function_signature *ir)
  
  class ir_vector_splitting_visitor : public ir_rvalue_visitor {
  public:
-   ir_vector_splitting_visitor(exec_list *vars)
+   ir_vector_splitting_visitor(struct hash_table *vars)
     {
-      this->variable_list = vars;
+      this->ht = vars;
     }
  
     virtual ir_visitor_status visit_leave(ir_assignment *);
@@ -205,7 +207,7 @@ public:
     void handle_rvalue(ir_rvalue **rvalue);
     variable_entry *get_splitting_entry(ir_variable *var);
  
-   exec_list *variable_list;
+   struct hash_table *ht;
  };
  
  variable_entry *
@@ -216,13 +218,8 @@ ir_vector_splitting_visitor::get_splitting_entry(ir_variable *var)
     if (!var->type->is_vector())
        return NULL;
  
-   foreach_in_list(variable_entry, entry, variable_list) {
-      if (entry->var == var) {
-        return entry;
-      }
-   }
-
-   return NULL;
+   struct hash_entry *hte = _mesa_hash_table_search(ht, var);
+   return hte ? (struct variable_entry *) hte->data : NULL;
  }
  
  void
@@ -329,12 +326,15 @@ ir_vector_splitting_visitor::visit_leave(ir_assignment *ir)
  bool
  brw_do_vector_splitting(exec_list *instructions)
  {
+   struct hash_entry *hte;
+
     ir_vector_reference_visitor refs;
  
     visit_list_elements(&refs, instructions);
  
     /* Trim out variables we can't split. */
-   foreach_in_list_safe(variable_entry, entry, &refs.variable_list) {
+   hash_table_foreach(refs.ht, hte) {
+      struct variable_entry *entry = (struct variable_entry *) hte->data;
        if (debug) {
          fprintf(stderr, "vector %s@%p: whole_access %d\n",
                   entry->var->name, (void *) entry->var,
@@ -342,11 +342,11 @@ brw_do_vector_splitting(exec_list *instructions)
        }
  
        if (entry->whole_vector_access) {
-        entry->remove();
+         _mesa_hash_table_remove(refs.ht, hte);
        }
     }
  
-   if (refs.variable_list.is_empty())
+   if (refs.ht->entries == 0)
        return false;
  
     void *mem_ctx = ralloc_context(NULL);
@@ -354,7 +354,8 @@ brw_do_vector_splitting(exec_list *instructions)
     /* Replace the decls of the vectors to be split with their split
      * components.
      */
-   foreach_in_list(variable_entry, entry, &refs.variable_list) {
+   hash_table_foreach(refs.ht, hte) {
+      struct variable_entry *entry = (struct variable_entry *) hte->data;
        const struct glsl_type *type;
        type = glsl_type::get_instance(entry->var->type->base_type, 1, 1);
  
@@ -378,7 +379,7 @@ brw_do_vector_splitting(exec_list *instructions)
        entry->var->remove();
     }
  
-   ir_vector_splitting_visitor split(&refs.variable_list);
+   ir_vector_splitting_visitor split(refs.ht);
     visit_list_elements(&split, instructions);
  
     ralloc_free(mem_ctx);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp

index 504673f..47d7ae4 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -783,8 +783,8 @@ fs_visitor::emit_fb_writes()
  void
  fs_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes)
  {
-   const struct brw_vue_prog_key *key =
-      (const struct brw_vue_prog_key *) this->key;
+   const struct brw_vs_prog_key *key =
+      (const struct brw_vs_prog_key *) this->key;
  
     for (int i = 0; i < key->nr_userclip_plane_consts; i++) {
        this->userplane[i] = fs_reg(UNIFORM, uniforms);
@@ -806,11 +806,11 @@ void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes)
  {
     struct brw_vue_prog_data *vue_prog_data =
        (struct brw_vue_prog_data *) prog_data;
-   const struct brw_vue_prog_key *key =
-      (const struct brw_vue_prog_key *) this->key;
+   const struct brw_vs_prog_key *key =
+      (const struct brw_vs_prog_key *) this->key;
  
     /* Bail unless some sort of legacy clipping is enabled */
-   if (!key->userclip_active || prog->UsesClipDistanceOut)
+   if (key->nr_userclip_plane_consts == 0)
        return;
  
     /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
@@ -840,7 +840,9 @@ void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes)
     const fs_builder abld = bld.annotate("user clip distances");
  
     this->outputs[VARYING_SLOT_CLIP_DIST0] = vgrf(glsl_type::vec4_type);
+   this->output_components[VARYING_SLOT_CLIP_DIST0] = 4;
     this->outputs[VARYING_SLOT_CLIP_DIST1] = vgrf(glsl_type::vec4_type);
+   this->output_components[VARYING_SLOT_CLIP_DIST1] = 4;
  
     for (int i = 0; i < key->nr_userclip_plane_consts; i++) {
        fs_reg u = userplane[i];
@@ -937,9 +939,6 @@ fs_visitor::emit_urb_writes()
           unreachable("unexpected scalar vs output");
           break;
  
-      case BRW_VARYING_SLOT_PAD:
-         break;
-
        default:
           /* gl_Position is always in the vue map, but isn't always written by
            * the shader.  Other varyings (clip distances) get added to the vue
@@ -949,7 +948,8 @@ fs_visitor::emit_urb_writes()
            * slot for writing we flush a mlen 5 urb write, otherwise we just
            * advance the urb_offset.
            */
-         if (this->outputs[varying].file == BAD_FILE) {
+         if (varying == BRW_VARYING_SLOT_PAD ||
+             this->outputs[varying].file == BAD_FILE) {
              if (length > 0)
                 flush = true;
              else
@@ -972,8 +972,10 @@ fs_visitor::emit_urb_writes()
                 sources[length++] = reg;
              }
           } else {
-            for (int i = 0; i < 4; i++)
+            for (unsigned i = 0; i < output_components[varying]; i++)
                 sources[length++] = offset(this->outputs[varying], bld, i);
+            for (unsigned i = output_components[varying]; i < 4; i++)
+               sources[length++] = fs_reg(0);
           }
           break;
        }
@@ -1041,12 +1043,14 @@ fs_visitor::emit_barrier()
  
     fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
  
+   const fs_builder pbld = bld.exec_all().group(8, 0);
+
     /* Clear the message payload */
-   bld.exec_all().MOV(payload, fs_reg(0u));
+   pbld.MOV(payload, fs_reg(0u));
  
     /* Copy bits 27:24 of r0.2 (barrier id) to the message payload reg.2 */
     fs_reg r0_2 = fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD));
-   bld.exec_all().AND(component(payload, 2), r0_2, fs_reg(0x0f000000u));
+   pbld.AND(component(payload, 2), r0_2, fs_reg(0x0f000000u));
  
     /* Emit a gateway "barrier" message using the payload we set up, followed
      * by a wait instruction.
@@ -1076,8 +1080,10 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
        key_tex = &((const brw_wm_prog_key *) key)->tex;
        break;
     case MESA_SHADER_VERTEX:
+      key_tex = &((const brw_vs_prog_key *) key)->tex;
+      break;
     case MESA_SHADER_GEOMETRY:
-      key_tex = &((const brw_vue_prog_key *) key)->tex;
+      key_tex = &((const brw_gs_prog_key *) key)->tex;
        break;
     case MESA_SHADER_COMPUTE:
        key_tex = &((const brw_cs_prog_key*) key)->tex;
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c

index 4ad6521..0119a90 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -66,8 +66,6 @@ brw_compile_gs_prog(struct brw_context *brw,
     struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
     int param_count = gs->num_uniform_components * 4;
  
-   /* We also upload clip plane data as uniforms */
-   param_count += MAX_CLIP_PLANES * 4;
     param_count += gs->NumImages * BRW_IMAGE_PARAM_SIZE;
  
     c.prog_data.base.base.param =
@@ -79,6 +77,11 @@ brw_compile_gs_prog(struct brw_context *brw,
     c.prog_data.base.base.nr_params = param_count;
     c.prog_data.base.base.nr_image_params = gs->NumImages;
  
+   if (brw->gen >= 8) {
+      c.prog_data.static_vertex_count = !gp->program.Base.nir ? -1 :
+         nir_gs_count_vertices(gp->program.Base.nir);
+   }
+
     if (brw->gen >= 7) {
        if (gp->program.OutputType == GL_POINTS) {
           /* When the output type is points, the geometry shader may output data
@@ -125,17 +128,9 @@ brw_compile_gs_prog(struct brw_context *brw,
  
     GLbitfield64 outputs_written = gp->program.Base.OutputsWritten;
  
-   /* In order for legacy clipping to work, we need to populate the clip
-    * distance varying slots whenever clipping is enabled, even if the vertex
-    * shader doesn't write to gl_ClipDistance.
-    */
-   if (c.key.base.userclip_active) {
-      outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
-      outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
-   }
-
     brw_compute_vue_map(brw->intelScreen->devinfo,
-                       &c.prog_data.base.vue_map, outputs_written);
+                       &c.prog_data.base.vue_map, outputs_written,
+                       prog ? prog->SeparateShader : false);
  
     /* Compute the output vertex size.
      *
@@ -257,8 +252,22 @@ brw_compile_gs_prog(struct brw_context *brw,
     c.prog_data.output_topology =
        get_hw_prim_for_gl_prim(gp->program.OutputType);
  
+   /* The GLSL linker will have already matched up GS inputs and the outputs
+    * of prior stages.  The driver does extend VS outputs in some cases, but
+    * only for legacy OpenGL or Gen4-5 hardware, neither of which offer
+    * geometry shader support.  So we can safely ignore that.
+    *
+    * For SSO pipelines, we use a fixed VUE map layout based on variable
+    * locations, so we can rely on rendezvous-by-location making this work.
+    *
+    * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not
+    * written by previous stages and shows up via payload magic.
+    */
+   GLbitfield64 inputs_read =
+      gp->program.Base.InputsRead & ~VARYING_BIT_PRIMITIVE_ID;
     brw_compute_vue_map(brw->intelScreen->devinfo,
-                       &c.input_vue_map, c.key.input_varyings);
+                       &c.input_vue_map, inputs_read,
+                       prog->SeparateShader);
  
     /* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we
      * need to program a URB read length of ceiling(num_slots / 2).
@@ -317,8 +326,7 @@ brw_gs_state_dirty(struct brw_context *brw)
     return brw_state_dirty(brw,
                            _NEW_TEXTURE,
                            BRW_NEW_GEOMETRY_PROGRAM |
-                          BRW_NEW_TRANSFORM_FEEDBACK |
-                          BRW_NEW_VUE_MAP_VS);
+                          BRW_NEW_TRANSFORM_FEEDBACK);
  }
  
  static void
@@ -333,16 +341,11 @@ brw_gs_populate_key(struct brw_context *brw,
  
     memset(key, 0, sizeof(*key));
  
-   key->base.program_string_id = gp->id;
-   brw_setup_vue_key_clip_info(brw, &key->base,
-                               gp->program.Base.UsesClipDistanceOut);
+   key->program_string_id = gp->id;
  
     /* _NEW_TEXTURE */
     brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count,
-                                      &key->base.tex);
-
-   /* BRW_NEW_VUE_MAP_VS */
-   key->input_varyings = brw->vue_map_vs.slots_valid;
+                                      &key->tex);
  }
  
  void
@@ -361,11 +364,6 @@ brw_upload_gs_prog(struct brw_context *brw)
  
     if (gp == NULL) {
        /* No geometry shader.  Vertex data just passes straight through. */
-      if (brw->ctx.NewDriverState & BRW_NEW_VUE_MAP_VS) {
-         brw->vue_map_geom_out = brw->vue_map_vs;
-         brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
-      }
-
        if (brw->gen == 6 &&
            (brw->ctx.NewDriverState & BRW_NEW_TRANSFORM_FEEDBACK)) {
           gen6_brw_upload_ff_gs_prog(brw);
@@ -392,12 +390,6 @@ brw_upload_gs_prog(struct brw_context *brw)
        (void)success;
     }
     brw->gs.base.prog_data = &brw->gs.prog_data->base.base;
-
-   if (memcmp(&brw->gs.prog_data->base.vue_map, &brw->vue_map_geom_out,
-              sizeof(brw->vue_map_geom_out)) != 0) {
-      brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map;
-      brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
-   }
  }
  
  bool
@@ -416,12 +408,8 @@ brw_gs_precompile(struct gl_context *ctx,
  
     memset(&key, 0, sizeof(key));
  
-   brw_vue_setup_prog_key_for_precompile(ctx, &key.base, bgp->id, &gp->Base);
-
-   /* Assume that the set of varyings coming in from the vertex shader exactly
-    * matches what the geometry shader requires.
-    */
-   key.input_varyings = gp->Base.InputsRead;
+   brw_setup_tex_for_precompile(brw, &key.tex, prog);
+   key.program_string_id = bgp->id;
  
     success = brw_codegen_gs_prog(brw, shader_prog, bgp, &key);
  
diff --git a/src/mesa/drivers/dri/i965/brw_inst.h b/src/mesa/drivers/dri/i965/brw_inst.h

index 46eff1d..c5132ba 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_inst.h
+++ b/src/mesa/drivers/dri/i965/brw_inst.h
@@ -39,6 +39,9 @@
  extern "C" {
  #endif
  
+/** Maximum SEND message length */
+#define BRW_MAX_MSG_LENGTH 15
+
  /* brw_context.h has a forward declaration of brw_inst, so name the struct. */
  typedef struct brw_inst {
     uint64_t data[2];
diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h

index 966a410..96dd633 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -161,7 +161,7 @@ public:
                      const src_reg &src1 = src_reg(),
                      const src_reg &src2 = src_reg());
  
-   struct brw_reg get_dst(void);
+   struct brw_reg get_dst(unsigned gen);
     struct brw_reg get_src(const struct brw_vue_prog_data *prog_data, int i);
  
     dst_reg dst;
@@ -175,7 +175,8 @@ public:
  
     bool is_send_from_grf();
     unsigned regs_read(unsigned arg) const;
-   bool can_reswizzle(int dst_writemask, int swizzle, int swizzle_mask);
+   bool can_reswizzle(const struct brw_device_info *devinfo, int dst_writemask,
+                      int swizzle, int swizzle_mask);
     void reswizzle(int dst_writemask, int swizzle);
     bool can_do_source_mods(const struct brw_device_info *devinfo);
  
diff --git a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp

index 7a5f983..d571ecd 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
+++ b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
@@ -48,6 +48,7 @@ public:
  
  private:
     void emit(ir_variable *, ir_rvalue *);
+   ir_variable *temp(void *ctx, const glsl_type *type, const char *name);
  };
  
  /**
@@ -60,6 +61,17 @@ lower_texture_grad_visitor::emit(ir_variable *var, ir_rvalue *value)
     base_ir->insert_before(assign(var, value));
  }
  
+/**
+ * Emit a temporary variable declaration
+ */
+ir_variable *
+lower_texture_grad_visitor::temp(void *ctx, const glsl_type *type, const char *name)
+{
+   ir_variable *var = new(ctx) ir_variable(type, name, ir_var_temporary);
+   base_ir->insert_before(var);
+   return var;
+}
+
  static const glsl_type *
  txs_type(const glsl_type *type)
  {
@@ -144,28 +156,179 @@ lower_texture_grad_visitor::visit_leave(ir_texture *ir)
        new(mem_ctx) ir_variable(grad_type, "dPdy", ir_var_temporary);
     emit(dPdy, mul(size, ir->lod_info.grad.dPdy));
  
-   /* Calculate rho from equation 3.20 of the GL 3.0 specification. */
-   ir_rvalue *rho;
-   if (dPdx->type->is_scalar()) {
-      rho = expr(ir_binop_max, expr(ir_unop_abs, dPdx),
-                              expr(ir_unop_abs, dPdy));
-   } else {
-      rho = expr(ir_binop_max, expr(ir_unop_sqrt, dot(dPdx, dPdx)),
-                              expr(ir_unop_sqrt, dot(dPdy, dPdy)));
-   }
-
-   /* lambda_base = log2(rho).  We're ignoring GL state biases for now.
-    *
-    * For cube maps the result of these formulas is giving us a value of rho
-    * that is twice the value we should use, so divide it by 2 or,
-    * alternatively, remove one unit from the result of the log2 computation.
-    */
     ir->op = ir_txl;
     if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
-      ir->lod_info.lod = expr(ir_binop_add,
-                              expr(ir_unop_log2, rho),
-                              new(mem_ctx) ir_constant(-1.0f));
+      /* Cubemap texture lookups first generate a texture coordinate normalized
+       * to [-1, 1] on the appropiate face. The appropiate face is determined
+       * by which component has largest magnitude and its sign. The texture
+       * coordinate is the quotient of the remaining texture coordinates against
+       * that absolute value of the component of largest magnitude. This
+       * division requires that the computing of the derivative of the texel
+       * coordinate must use the quotient rule. The high level GLSL code is as
+       * follows:
+       *
+       * Step 1: selection
+       *
+       * vec3 abs_p, Q, dQdx, dQdy;
+       * abs_p = abs(ir->coordinate);
+       * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
+       *    Q = ir->coordinate.yzx;
+       *    dQdx = ir->lod_info.grad.dPdx.yzx;
+       *    dQdy = ir->lod_info.grad.dPdy.yzx;
+       * }
+       * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
+       *    Q = ir->coordinate.xzy;
+       *    dQdx = ir->lod_info.grad.dPdx.xzy;
+       *    dQdy = ir->lod_info.grad.dPdy.xzy;
+       * }
+       * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
+       *    Q = ir->coordinate;
+       *    dQdx = ir->lod_info.grad.dPdx;
+       *    dQdy = ir->lod_info.grad.dPdy;
+       * }
+       *
+       * Step 2: use quotient rule to compute derivative. The normalized to
+       * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
+       * only concerned with the magnitudes of the derivatives whose values are
+       * not affected by the sign. We drop the sign from the computation.
+       *
+       * vec2 dx, dy;
+       * float recip;
+       *
+       * recip = 1.0 / Q.z;
+       * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
+       * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
+       *
+       * Step 3: compute LOD. At this point we have the derivatives of the
+       * texture coordinates normalized to [-1,1]. We take the LOD to be
+       *  result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
+       *         = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
+       *         = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
+       *         = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
+       *         = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
+       * where L is the dimension of the cubemap. The code is:
+       *
+       * float M, result;
+       * M = max(dot(dx, dx), dot(dy, dy));
+       * L = textureSize(sampler, 0).x;
+       * result = -1.0 + 0.5 * log2(L * L * M);
+       */
+
+/* Helpers to make code more human readable. */
+#define EMIT(instr) base_ir->insert_before(instr)
+#define THEN(irif, instr) irif->then_instructions.push_tail(instr)
+#define CLONE(x) x->clone(mem_ctx, NULL)
+
+      ir_variable *abs_p = temp(mem_ctx, glsl_type::vec3_type, "abs_p");
+
+      EMIT(assign(abs_p, swizzle_for_size(abs(CLONE(ir->coordinate)), 3)));
+
+      ir_variable *Q = temp(mem_ctx, glsl_type::vec3_type, "Q");
+      ir_variable *dQdx = temp(mem_ctx, glsl_type::vec3_type, "dQdx");
+      ir_variable *dQdy = temp(mem_ctx, glsl_type::vec3_type, "dQdy");
+
+      /* unmodified dPdx, dPdy values */
+      ir_rvalue *dPdx = ir->lod_info.grad.dPdx;
+      ir_rvalue *dPdy = ir->lod_info.grad.dPdy;
+
+      /* 1. compute selector */
+
+      /* if (abs_p.x >= max(abs_p.y, abs_p.z))  ... */
+      ir_if *branch_x =
+         new(mem_ctx) ir_if(gequal(swizzle_x(abs_p),
+                                   max2(swizzle_y(abs_p), swizzle_z(abs_p))));
+
+      /* Q = p.yzx;
+       * dQdx = dPdx.yzx;
+       * dQdy = dPdy.yzx;
+       */
+      int yzx = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, 0);
+      THEN(branch_x, assign(Q, swizzle(CLONE(ir->coordinate), yzx, 3)));
+      THEN(branch_x, assign(dQdx, swizzle(CLONE(dPdx), yzx, 3)));
+      THEN(branch_x, assign(dQdy, swizzle(CLONE(dPdy), yzx, 3)));
+      EMIT(branch_x);
+
+      /* if (abs_p.y >= max(abs_p.x, abs_p.z)) */
+      ir_if *branch_y =
+         new(mem_ctx) ir_if(gequal(swizzle_y(abs_p),
+                                   max2(swizzle_x(abs_p), swizzle_z(abs_p))));
+
+      /* Q = p.xzy;
+       * dQdx = dPdx.xzy;
+       * dQdy = dPdy.xzy;
+       */
+      int xzy = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Z, SWIZZLE_Y, 0);
+      THEN(branch_y, assign(Q, swizzle(CLONE(ir->coordinate), xzy, 3)));
+      THEN(branch_y, assign(dQdx, swizzle(CLONE(dPdx), xzy, 3)));
+      THEN(branch_y, assign(dQdy, swizzle(CLONE(dPdy), xzy, 3)));
+      EMIT(branch_y);
+
+      /* if (abs_p.z >= max(abs_p.x, abs_p.y)) */
+      ir_if *branch_z =
+         new(mem_ctx) ir_if(gequal(swizzle_z(abs_p),
+                            max2(swizzle_x(abs_p), swizzle_y(abs_p))));
+
+      /* Q = p;
+       * dQdx = dPdx;
+       * dQdy = dPdy;
+       */
+      THEN(branch_z, assign(Q, swizzle_for_size(CLONE(ir->coordinate), 3)));
+      THEN(branch_z, assign(dQdx, CLONE(dPdx)));
+      THEN(branch_z, assign(dQdy, CLONE(dPdy)));
+      EMIT(branch_z);
+
+      /* 2. quotient rule */
+      ir_variable *recip = temp(mem_ctx, glsl_type::float_type, "recip");
+      EMIT(assign(recip, div(new(mem_ctx) ir_constant(1.0f), swizzle_z(Q))));
+
+      ir_variable *dx = temp(mem_ctx, glsl_type::vec2_type, "dx");
+      ir_variable *dy = temp(mem_ctx, glsl_type::vec2_type, "dy");
+
+      /* tmp = Q.xy * recip;
+       * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
+       * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
+       */
+      ir_variable *tmp = temp(mem_ctx, glsl_type::vec2_type, "tmp");
+      EMIT(assign(tmp, mul(swizzle_xy(Q), recip)));
+      EMIT(assign(dx, mul(recip, sub(swizzle_xy(dQdx),
+                                     mul(tmp, swizzle_z(dQdx))))));
+      EMIT(assign(dy, mul(recip, sub(swizzle_xy(dQdy),
+                                     mul(tmp, swizzle_z(dQdy))))));
+
+      /* M = max(dot(dx, dx), dot(dy, dy)); */
+      ir_variable *M = temp(mem_ctx, glsl_type::float_type, "M");
+      EMIT(assign(M, max2(dot(dx, dx), dot(dy, dy))));
+
+      /* size has textureSize() of LOD 0 */
+      ir_variable *L = temp(mem_ctx, glsl_type::float_type, "L");
+      EMIT(assign(L, swizzle_x(size)));
+
+      ir_variable *result = temp(mem_ctx, glsl_type::float_type, "result");
+
+      /* result = -1.0 + 0.5 * log2(L * L * M); */
+      EMIT(assign(result,
+                  add(new(mem_ctx)ir_constant(-1.0f),
+                      mul(new(mem_ctx)ir_constant(0.5f),
+                          expr(ir_unop_log2, mul(mul(L, L), M))))));
+
+      /* 3. final assignment of parameters to textureLod call */
+      ir->lod_info.lod = new (mem_ctx) ir_dereference_variable(result);
+
+#undef THEN
+#undef EMIT
+
     } else {
+      /* Calculate rho from equation 3.20 of the GL 3.0 specification. */
+      ir_rvalue *rho;
+      if (dPdx->type->is_scalar()) {
+         rho = expr(ir_binop_max, expr(ir_unop_abs, dPdx),
+                    expr(ir_unop_abs, dPdy));
+      } else {
+         rho = expr(ir_binop_max, expr(ir_unop_sqrt, dot(dPdx, dPdx)),
+                    expr(ir_unop_sqrt, dot(dPdy, dPdy)));
+      }
+
+      /* lambda_base = log2(rho).  We're ignoring GL state biases for now. */
        ir->lod_info.lod = expr(ir_unop_log2, rho);
     }
  
diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c

index f5ecbb5..eb20173 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
@@ -204,7 +204,7 @@ brw_draw_rectlist(struct gl_context *ctx, struct rect *rect, int num_instances)
  }
  
  static void
-get_fast_clear_rect(struct brw_context *brw, struct gl_framebuffer *fb,
+get_fast_clear_rect(struct gl_framebuffer *fb,
                      struct intel_renderbuffer *irb, struct rect *rect)
  {
     unsigned int x_align, y_align;
@@ -226,7 +226,7 @@ get_fast_clear_rect(struct brw_context *brw, struct gl_framebuffer *fb,
         * alignment size returned by intel_get_non_msrt_mcs_alignment(), but
         * with X alignment multiplied by 16 and Y alignment multiplied by 32.
         */
-      intel_get_non_msrt_mcs_alignment(brw, irb->mt, &x_align, &y_align);
+      intel_get_non_msrt_mcs_alignment(irb->mt, &x_align, &y_align);
        x_align *= 16;
        y_align *= 32;
  
@@ -516,7 +516,7 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
           irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
           irb->need_downsample = true;
           fast_clear_buffers |= 1 << index;
-         get_fast_clear_rect(brw, fb, irb, &fast_clear_rect);
+         get_fast_clear_rect(fb, irb, &fast_clear_rect);
           break;
  
        case REP_CLEAR:
@@ -653,7 +653,7 @@ get_resolve_rect(struct brw_context *brw,
      * by 8 and 16 and 8 and 8 for SKL.
      */
  
-   intel_get_non_msrt_mcs_alignment(brw, mt, &x_align, &y_align);
+   intel_get_non_msrt_mcs_alignment(mt, &x_align, &y_align);
     if (brw->gen >= 9) {
        x_scaledown = x_align * 8;
        y_scaledown = y_align * 8;
diff --git a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c

index aa6df16..cbbb919 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
@@ -202,7 +202,7 @@ setup_bounding_rect(GLuint prog, const struct blit_dims *dims)
  
  /**
   * Setup uniforms telling the destination width, height and the offset. These
- * are needed to unnoormalize the input coordinates and to correctly translate
+ * are needed to unnormalize the input coordinates and to correctly translate
   * between destination and source that may have differing offsets.
   */
  static void
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c

index 2751152..7d17edb 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -174,13 +174,17 @@ brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt,
     uint32_t tile_mask_x = 0, tile_mask_y = 0;
  
     if (depth_mt) {
-      intel_miptree_get_tile_masks(depth_mt, &tile_mask_x, &tile_mask_y, false);
+      intel_get_tile_masks(depth_mt->tiling, depth_mt->tr_mode,
+                           depth_mt->cpp, false,
+                           &tile_mask_x, &tile_mask_y);
  
        if (intel_miptree_level_has_hiz(depth_mt, depth_level)) {
           uint32_t hiz_tile_mask_x, hiz_tile_mask_y;
-         intel_miptree_get_tile_masks(depth_mt->hiz_buf->mt,
-                                      &hiz_tile_mask_x, &hiz_tile_mask_y,
-                                      false);
+         intel_get_tile_masks(depth_mt->hiz_buf->mt->tiling,
+                              depth_mt->hiz_buf->mt->tr_mode,
+                              depth_mt->hiz_buf->mt->cpp,
+                              false, &hiz_tile_mask_x,
+                              &hiz_tile_mask_y);
  
           /* Each HiZ row represents 2 rows of pixels */
           hiz_tile_mask_y = hiz_tile_mask_y << 1 | 1;
@@ -200,9 +204,11 @@ brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt,
           tile_mask_y |= 63;
        } else {
           uint32_t stencil_tile_mask_x, stencil_tile_mask_y;
-         intel_miptree_get_tile_masks(stencil_mt,
-                                      &stencil_tile_mask_x,
-                                      &stencil_tile_mask_y, false);
+         intel_get_tile_masks(stencil_mt->tiling,
+                              stencil_mt->tr_mode,
+                              stencil_mt->cpp,
+                              false, &stencil_tile_mask_x,
+                              &stencil_tile_mask_y);
  
           tile_mask_x |= stencil_tile_mask_x;
           tile_mask_y |= stencil_tile_mask_y;
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c

index 4c8602a..3585509 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -61,6 +61,8 @@ nir_optimize(nir_shader *nir, bool is_scalar)
        nir_validate_shader(nir);
        progress |= nir_opt_constant_folding(nir);
        nir_validate_shader(nir);
+      progress |= nir_opt_dead_cf(nir);
+      nir_validate_shader(nir);
        progress |= nir_opt_remove_phis(nir);
        nir_validate_shader(nir);
        progress |= nir_opt_undef(nir);
@@ -110,11 +112,19 @@ brw_process_nir(nir_shader *nir,
                  gl_shader_stage stage, bool is_scalar)
  {
     bool debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
+   static const nir_lower_tex_options tex_options = {
+      .lower_txp = ~0,
+   };
+
+   if (stage == MESA_SHADER_GEOMETRY) {
+      nir_lower_gs_intrinsics(nir);
+      nir_validate_shader(nir);
+   }
  
     nir_lower_global_vars_to_local(nir);
     nir_validate_shader(nir);
  
-   nir_lower_tex_projector(nir);
+   nir_lower_tex(nir, &tex_options);
     nir_validate_shader(nir);
  
     nir_normalize_cubemap_coords(nir);
@@ -203,10 +213,13 @@ brw_process_nir(nir_shader *nir,
        nir_print_shader(nir, stderr);
     }
  
-   nir_convert_from_ssa(nir, is_scalar);
+   nir_convert_from_ssa(nir, true);
     nir_validate_shader(nir);
  
     if (!is_scalar) {
+      nir_move_vec_src_uses_to_dest(nir);
+      nir_validate_shader(nir);
+
        nir_lower_vec_to_movs(nir);
        nir_validate_shader(nir);
     }
diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c

index 7ee3cb6..a2aef8a 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
+++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
@@ -193,6 +193,14 @@ brw_emit_depth_stall_flushes(struct brw_context *brw)
  {
     assert(brw->gen >= 6 && brw->gen <= 9);
  
+   /* Starting on BDW, these pipe controls are unnecessary.
+    *
+    *   WM HW will internally manage the draining pipe and flushing of the caches
+    *   when this command is issued. The PIPE_CONTROL restrictions are removed.
+    */
+   if (brw->gen >= 8)
+      return;
+
     brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
     brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_CACHE_FLUSH);
     brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c

index 5a54cd3..fa59338 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -588,3 +588,22 @@ brw_dump_ir(const char *stage, struct gl_shader_program *shader_prog,
        _mesa_print_program(prog);
     }
  }
+
+void
+brw_setup_tex_for_precompile(struct brw_context *brw,
+                             struct brw_sampler_prog_key_data *tex,
+                             struct gl_program *prog)
+{
+   const bool has_shader_channel_select = brw->is_haswell || brw->gen >= 8;
+   unsigned sampler_count = _mesa_fls(prog->SamplersUsed);
+   for (unsigned i = 0; i < sampler_count; i++) {
+      if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
+         /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
+         tex->swizzles[i] =
+            MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
+      } else {
+         /* Color sampler: assume no swizzling. */
+         tex->swizzles[i] = SWIZZLE_XYZW;
+      }
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h

index eaa7e4e..72d68d8 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_program.h
+++ b/src/mesa/drivers/dri/i965/brw_program.h
@@ -78,27 +78,9 @@ struct brw_sampler_prog_key_data {
  };
  
  
-struct brw_vue_prog_key {
-   unsigned program_string_id;
-
-   /**
-    * True if at least one clip flag is enabled, regardless of whether the
-    * shader uses clip planes or gl_ClipDistance.
-    */
-   bool userclip_active:1;
-
-   /**
-    * How many user clipping planes are being uploaded to the vertex shader as
-    * push constants.
-    */
-   unsigned nr_userclip_plane_consts:4;
-
-   struct brw_sampler_prog_key_data tex;
-};
-
  /** The program key for Vertex Shaders. */
  struct brw_vs_prog_key {
-   struct brw_vue_prog_key base;
+   unsigned program_string_id;
  
     /*
      * Per-attribute workaround flags
@@ -110,6 +92,15 @@ struct brw_vs_prog_key {
     bool clamp_vertex_color:1;
  
     /**
+    * How many user clipping planes are being uploaded to the vertex shader as
+    * push constants.
+    *
+    * These are used for lowering legacy gl_ClipVertex/gl_Position clipping to
+    * clip distances.
+    */
+   unsigned nr_userclip_plane_consts:4;
+
+   /**
      * For pre-Gen6 hardware, a bitfield indicating which texture coordinates
      * are going to be replaced with point coordinates (as a consequence of a
      * call to glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)).  Because
@@ -118,14 +109,16 @@ struct brw_vs_prog_key {
      * the VUE, even if they aren't written by the vertex shader.
      */
     uint8_t point_coord_replace;
+
+   struct brw_sampler_prog_key_data tex;
  };
  
  /** The program key for Geometry Shaders. */
  struct brw_gs_prog_key
  {
-   struct brw_vue_prog_key base;
+   unsigned program_string_id;
  
-   uint64_t input_varyings;
+   struct brw_sampler_prog_key_data tex;
  };
  
  /** The program key for Fragment/Pixel Shaders. */
@@ -159,6 +152,10 @@ struct brw_wm_prog_key {
  extern "C" {
  #endif
  
+void brw_setup_tex_for_precompile(struct brw_context *brw,
+                                  struct brw_sampler_prog_key_data *tex,
+                                  struct gl_program *prog);
+
  void brw_populate_sampler_prog_key_data(struct gl_context *ctx,
                                         const struct gl_program *prog,
                                          unsigned sampler_count,
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h

index 31806f7..87e7e01 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -70,7 +70,7 @@ struct brw_device_info;
  #define GEN7_MRF_HACK_START 112
  
  /** Number of message register file registers */
-#define BRW_MAX_MRF 16
+#define BRW_MAX_MRF(gen) (gen == 6 ? 24 : 16)
  
  #define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
  #define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
@@ -344,10 +344,12 @@ brw_reg(unsigned file,
     struct brw_reg reg;
     if (file == BRW_GENERAL_REGISTER_FILE)
        assert(nr < BRW_MAX_GRF);
-   else if (file == BRW_MESSAGE_REGISTER_FILE)
-      assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
     else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
        assert(nr <= BRW_ARF_TIMESTAMP);
+   /* Asserting on the MRF register number requires to know the hardware gen
+    * (gen6 has 24 MRF registers), which we don't know here, so we assert
+    * for that in the generators and in brw_eu_emit.c
+    */
  
     reg.type = type;
     reg.file = file;
@@ -808,7 +810,6 @@ brw_mask_reg(unsigned subnr)
  static inline struct brw_reg
  brw_message_reg(unsigned nr)
  {
-   assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
     return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0);
  }
  
diff --git a/src/mesa/drivers/dri/i965/brw_sampler_state.c b/src/mesa/drivers/dri/i965/brw_sampler_state.c

index 2021bb3..c2db5f6 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_sampler_state.c
@@ -88,13 +88,11 @@ brw_emit_sampler_state(struct brw_context *brw,
                         unsigned min_lod,
                         unsigned max_lod,
                         int lod_bias,
-                       unsigned base_level,
                         unsigned shadow_function,
                         bool non_normalized_coordinates,
                         uint32_t border_color_offset)
  {
     ss[0] = BRW_SAMPLER_LOD_PRECLAMP_ENABLE |
-           SET_FIELD(base_level, BRW_SAMPLER_BASE_MIPLEVEL) |
             SET_FIELD(mip_filter, BRW_SAMPLER_MIP_FILTER) |
             SET_FIELD(mag_filter, BRW_SAMPLER_MAG_FILTER) |
             SET_FIELD(min_filter, BRW_SAMPLER_MIN_FILTER);
@@ -491,7 +489,6 @@ brw_update_sampler_state(struct brw_context *brw,
     const unsigned max_lod = U_FIXED(CLAMP(sampler->MaxLod, 0, 13), lod_bits);
     const int lod_bias =
        S_FIXED(CLAMP(tex_unit_lod_bias + sampler->LodBias, -16, 15), lod_bits);
-   const unsigned base_level = U_FIXED(0, 1);
  
     /* Upload the border color if necessary.  If not, just point it at
      * offset 0 (the start of the batch) - the color should be ignored,
@@ -515,7 +512,7 @@ brw_update_sampler_state(struct brw_context *brw,
                            max_anisotropy,
                            address_rounding,
                            wrap_s, wrap_t, wrap_r,
-                          min_lod, max_lod, lod_bias, base_level,
+                          min_lod, max_lod, lod_bias,
                            shadow_function,
                            non_normalized_coords,
                            border_color_offset);
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp

index b49961f..4e43e5c 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -762,7 +762,7 @@ fs_instruction_scheduler::calculate_deps()
      * GRF registers.
      */
     schedule_node *last_grf_write[grf_count * 16];
-   schedule_node *last_mrf_write[BRW_MAX_MRF];
+   schedule_node *last_mrf_write[BRW_MAX_MRF(v->devinfo->gen)];
     schedule_node *last_conditional_mod[2] = { NULL, NULL };
     schedule_node *last_accumulator_write = NULL;
     /* Fixed HW registers are assumed to be separate from the virtual
@@ -1035,7 +1035,7 @@ void
  vec4_instruction_scheduler::calculate_deps()
  {
     schedule_node *last_grf_write[grf_count];
-   schedule_node *last_mrf_write[BRW_MAX_MRF];
+   schedule_node *last_mrf_write[BRW_MAX_MRF(v->devinfo->gen)];
     schedule_node *last_conditional_mod = NULL;
     schedule_node *last_accumulator_write = NULL;
     /* Fixed HW registers are assumed to be separate from the virtual
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp

index 4ef2777..1060d93 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -98,6 +98,15 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
     nir_options->lower_sub = true;
     nir_options->lower_fdiv = true;
  
+   /* In the vec4 backend, our dpN instruction replicates its result to all
+    * the components of a vec4.  We would like NIR to give us replicated fdot
+    * instructions because it can optimize better for us.
+    *
+    * For the FS backend, it should be lowered away by the scalarizing pass so
+    * we should never see fdot anyway.
+    */
+   nir_options->fdot_replicates = true;
+
     /* We want the GLSL compiler to emit code that uses condition codes */
     for (int i = 0; i < MESA_SHADER_STAGES; i++) {
        compiler->glsl_compiler_options[i].MaxUnrollIterations = 32;
@@ -108,41 +117,35 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
        compiler->glsl_compiler_options[i].EmitNoNoise = true;
        compiler->glsl_compiler_options[i].EmitNoMainReturn = true;
        compiler->glsl_compiler_options[i].EmitNoIndirectInput = true;
-      compiler->glsl_compiler_options[i].EmitNoIndirectOutput =
-        (i == MESA_SHADER_FRAGMENT);
-      compiler->glsl_compiler_options[i].EmitNoIndirectTemp =
-        (i == MESA_SHADER_FRAGMENT);
        compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false;
        compiler->glsl_compiler_options[i].LowerClipDistance = true;
  
+      bool is_scalar;
+      switch (i) {
+      case MESA_SHADER_FRAGMENT:
+      case MESA_SHADER_COMPUTE:
+         is_scalar = true;
+         break;
+      case MESA_SHADER_VERTEX:
+         is_scalar = compiler->scalar_vs;
+         break;
+      default:
+         is_scalar = false;
+         break;
+      }
+
+      compiler->glsl_compiler_options[i].EmitNoIndirectOutput = is_scalar;
+      compiler->glsl_compiler_options[i].EmitNoIndirectTemp = is_scalar;
+      compiler->glsl_compiler_options[i].OptimizeForAOS = !is_scalar;
+
        /* !ARB_gpu_shader5 */
        if (devinfo->gen < 7)
           compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true;
-   }
  
-   compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = true;
-   compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
-
-   if (compiler->scalar_vs || brw_env_var_as_boolean("INTEL_USE_NIR", true)) {
-      if (compiler->scalar_vs) {
-         /* If we're using the scalar backend for vertex shaders, we need to
-          * configure these accordingly.
-          */
-         compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true;
-         compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true;
-         compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = false;
-      }
-
-      compiler->glsl_compiler_options[MESA_SHADER_VERTEX].NirOptions = nir_options;
-   }
-
-   if (brw_env_var_as_boolean("INTEL_USE_NIR", true)) {
-      compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].NirOptions = nir_options;
+      if (is_scalar || brw_env_var_as_boolean("INTEL_USE_NIR", true))
+         compiler->glsl_compiler_options[i].NirOptions = nir_options;
     }
  
-   compiler->glsl_compiler_options[MESA_SHADER_FRAGMENT].NirOptions = nir_options;
-   compiler->glsl_compiler_options[MESA_SHADER_COMPUTE].NirOptions = nir_options;
-
     return compiler;
  }
  
@@ -196,6 +199,7 @@ is_scalar_shader_stage(struct brw_context *brw, int stage)
  {
     switch (stage) {
     case MESA_SHADER_FRAGMENT:
+   case MESA_SHADER_COMPUTE:
        return true;
     case MESA_SHADER_VERTEX:
        return brw->intelScreen->compiler->scalar_vs;
@@ -323,9 +327,6 @@ process_glsl_ir(gl_shader_stage stage,
                                          options, ctx->Const.NativeIntegers) || progress;
     } while (progress);
  
-   if (options->NirOptions != NULL)
-      lower_output_reads(stage, shader->ir);
-
     validate_ir_tree(shader->ir);
  
     /* Now that we've finished altering the linked IR, reparent any live IR back
@@ -623,6 +624,8 @@ brw_instruction_name(enum opcode op)
        return "tg4_offset";
     case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
        return "tg4_offset_logical";
+   case SHADER_OPCODE_SAMPLEINFO:
+      return "sampleinfo";
  
     case SHADER_OPCODE_SHADER_TIME_ADD:
        return "shader_time_add";
@@ -697,6 +700,9 @@ brw_instruction_name(enum opcode op)
     case FS_OPCODE_PIXEL_Y:
        return "pixel_y";
  
+   case FS_OPCODE_GET_BUFFER_SIZE:
+      return "fs_get_buffer_size";
+
     case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
        return "uniform_pull_const";
     case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
@@ -745,6 +751,9 @@ brw_instruction_name(enum opcode op)
     case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
        return "set_simd4x2_header_gen9";
  
+   case VS_OPCODE_GET_BUFFER_SIZE:
+      return "vs_get_buffer_size";
+
     case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
        return "unpack_flags_simd4x2";
  
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h

index 78a1f87..3b7a433 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -99,6 +99,7 @@ extern const struct brw_tracked_state brw_indices;
  extern const struct brw_tracked_state brw_vertices;
  extern const struct brw_tracked_state brw_index_buffer;
  extern const struct brw_tracked_state brw_cs_state;
+extern const struct brw_tracked_state gen7_cs_push_constants;
  extern const struct brw_tracked_state gen6_binding_table_pointers;
  extern const struct brw_tracked_state gen6_blend_state;
  extern const struct brw_tracked_state gen6_cc_state_pointers;
@@ -157,6 +158,7 @@ extern const struct brw_tracked_state gen8_sf_clip_viewport;
  extern const struct brw_tracked_state gen8_vertices;
  extern const struct brw_tracked_state gen8_vf_topology;
  extern const struct brw_tracked_state gen8_vs_state;
+extern const struct brw_tracked_state brw_cs_work_groups_surface;
  
  static inline bool
  brw_state_dirty(struct brw_context *brw, GLuint mesa_flags, uint64_t brw_flags)
@@ -177,10 +179,6 @@ void brw_upload_invariant_state(struct brw_context *brw);
  uint32_t
  brw_depthbuffer_format(struct brw_context *brw);
  
-/* gen8_misc_state.c */
-void gen8_upload_state_base_address(struct brw_context *brw);
-
-
  /***********************************************************************
   * brw_state.c
   */
@@ -315,7 +313,6 @@ void brw_emit_sampler_state(struct brw_context *brw,
                              unsigned min_lod,
                              unsigned max_lod,
                              int lod_bias,
-                            unsigned base_level,
                              unsigned shadow_function,
                              bool non_normalized_coordinates,
                              uint32_t border_color_offset);
@@ -355,7 +352,7 @@ void gen6_init_vtable_surface_functions(struct brw_context *brw);
  /* brw_vs_surface_state.c */
  void
  brw_upload_pull_constants(struct brw_context *brw,
-                          GLbitfield brw_new_constbuf,
+                          GLbitfield64 brw_new_constbuf,
                            const struct gl_program *prog,
                            struct brw_stage_state *stage_state,
                            const struct brw_stage_prog_data *prog_data,
@@ -377,7 +374,6 @@ void gen7_update_binding_table_from_array(struct brw_context *brw,
                                            gl_shader_stage stage,
                                            const uint32_t* binding_table,
                                            int num_surfaces);
-void gen7_enable_hw_binding_tables(struct brw_context *brw);
  void gen7_disable_hw_binding_tables(struct brw_context *brw);
  void gen7_reset_hw_bt_pool_offsets(struct brw_context *brw);
  
diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c

index b6f4d59..0c974c4 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_state_dump.c
+++ b/src/mesa/drivers/dri/i965/brw_state_dump.c
@@ -376,13 +376,13 @@ dump_sdc(struct brw_context *brw, uint32_t offset)
  static void dump_sampler_state(struct brw_context *brw,
                                uint32_t offset, uint32_t size)
  {
-   int i;
+   unsigned i;
     uint32_t *samp = brw->batch.bo->virtual + offset;
  
     for (i = 0; i < size / 16; i++) {
        char name[20];
  
-      sprintf(name, "WM SAMP%d", i);
+      sprintf(name, "WM SAMP%u", i);
        batch_out(brw, name, offset, 0, "filtering\n");
        batch_out(brw, name, offset, 1, "wrapping, lod\n");
        batch_out(brw, name, offset, 2, "default color pointer\n");
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c

index 9de42ce..46687e3 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -258,7 +258,10 @@ static const struct brw_tracked_state *gen7_compute_atoms[] =
  {
     &brw_state_base_address,
     &brw_cs_image_surfaces,
+   &gen7_cs_push_constants,
     &brw_cs_abo_surfaces,
+   &brw_texture_surfaces,
+   &brw_cs_work_groups_surface,
     &brw_cs_state,
  };
  
@@ -348,7 +351,10 @@ static const struct brw_tracked_state *gen8_compute_atoms[] =
  {
     &gen8_state_base_address,
     &brw_cs_image_surfaces,
+   &gen7_cs_push_constants,
     &brw_cs_abo_surfaces,
+   &brw_texture_surfaces,
+   &brw_cs_work_groups_surface,
     &brw_cs_state,
  };
  
@@ -480,6 +486,7 @@ void brw_init_state( struct brw_context *brw )
     ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
     ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
     ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
+   ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
     ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
     ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
     ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
@@ -589,7 +596,6 @@ static struct dirty_bit_map brw_bits[] = {
     DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
     DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
     DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
-   DEFINE_BIT(BRW_NEW_VUE_MAP_VS),
     DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
     DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
     DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
@@ -609,6 +615,7 @@ static struct dirty_bit_map brw_bits[] = {
     DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
     DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
     DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
+   DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
     {0, 0, 0}
  };
  
@@ -644,6 +651,21 @@ brw_upload_programs(struct brw_context *brw,
        else
           brw_upload_gs_prog(brw);
  
+      /* Update the VUE map for data exiting the GS stage of the pipeline.
+       * This comes from the last enabled shader stage.
+       */
+      GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
+      bool old_separate = brw->vue_map_geom_out.separate;
+      if (brw->geometry_program)
+         brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map;
+      else
+         brw->vue_map_geom_out = brw->vs.prog_data->base.vue_map;
+
+      /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
+      if (old_slots != brw->vue_map_geom_out.slots_valid ||
+          old_separate != brw->vue_map_geom_out.separate)
+         brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
+
        brw_upload_wm_prog(brw);
     } else if (pipeline == BRW_COMPUTE_PIPELINE) {
        brw_upload_cs_prog(brw);
@@ -795,7 +817,7 @@ brw_pipeline_state_finished(struct brw_context *brw,
                              enum brw_pipeline pipeline)
  {
     /* Save all dirty state into the other pipelines */
-   for (int i = 0; i < BRW_NUM_PIPELINES; i++) {
+   for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
        if (i != pipeline) {
           brw->state.pipelines[i].mesa |= brw->NewGLState;
           brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp

index 5e528b5..1d62f2f 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -61,6 +61,8 @@ src_reg::src_reg(register_file file, int reg, const glsl_type *type)
        this->swizzle = brw_swizzle_for_size(type->vector_elements);
     else
        this->swizzle = BRW_SWIZZLE_XYZW;
+   if (type)
+      this->type = brw_type_for_base_type(type);
  }
  
  /** Generic unset register constructor. */
@@ -329,6 +331,8 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
     case SHADER_OPCODE_TXS:
     case SHADER_OPCODE_TG4:
     case SHADER_OPCODE_TG4_OFFSET:
+   case SHADER_OPCODE_SAMPLEINFO:
+   case VS_OPCODE_GET_BUFFER_SIZE:
        return inst->header_size;
     default:
        unreachable("not reached");
@@ -938,10 +942,18 @@ vec4_visitor::opt_set_dependency_control()
  }
  
  bool
-vec4_instruction::can_reswizzle(int dst_writemask,
+vec4_instruction::can_reswizzle(const struct brw_device_info *devinfo,
+                                int dst_writemask,
                                  int swizzle,
                                  int swizzle_mask)
  {
+   /* Gen6 MATH instructions can not execute in align16 mode, so swizzles
+    * or writemasking are not allowed.
+    */
+   if (devinfo->gen == 6 && is_math() &&
+       (swizzle != BRW_SWIZZLE_XYZW || dst_writemask != WRITEMASK_XYZW))
+      return false;
+
     /* If this instruction sets anything not referenced by swizzle, then we'd
      * totally break it when we reswizzle.
      */
@@ -951,6 +963,14 @@ vec4_instruction::can_reswizzle(int dst_writemask,
     if (mlen > 0)
        return false;
  
+   /* We can't use swizzles on the accumulator and that's really the only
+    * HW_REG we would care to reswizzle so just disallow them all.
+    */
+   for (int i = 0; i < 3; i++) {
+      if (src[i].file == HW_REG)
+         return false;
+   }
+
     return true;
  }
  
@@ -1010,6 +1030,28 @@ vec4_visitor::opt_register_coalesce()
           inst->src[0].abs || inst->src[0].negate || inst->src[0].reladdr)
          continue;
  
+      /* Remove no-op MOVs */
+      if (inst->dst.file == inst->src[0].file &&
+          inst->dst.reg == inst->src[0].reg &&
+          inst->dst.reg_offset == inst->src[0].reg_offset) {
+         bool is_nop_mov = true;
+
+         for (unsigned c = 0; c < 4; c++) {
+            if ((inst->dst.writemask & (1 << c)) == 0)
+               continue;
+
+            if (BRW_GET_SWZ(inst->src[0].swizzle, c) != c) {
+               is_nop_mov = false;
+               break;
+            }
+         }
+
+         if (is_nop_mov) {
+            inst->remove(block);
+            continue;
+         }
+      }
+
        bool to_mrf = (inst->dst.file == MRF);
  
        /* Can't coalesce this GRF if someone else was going to
@@ -1054,8 +1096,19 @@ vec4_visitor::opt_register_coalesce()
                 }
              }
  
+            /* This doesn't handle saturation on the instruction we
+             * want to coalesce away if the register types do not match.
+             * But if scan_inst is a non type-converting 'mov', we can fix
+             * the types later.
+             */
+            if (inst->saturate &&
+                inst->dst.type != scan_inst->dst.type &&
+                !(scan_inst->opcode == BRW_OPCODE_MOV &&
+                  scan_inst->dst.type == scan_inst->src[0].type))
+               break;
+
              /* If we can't handle the swizzle, bail. */
-            if (!scan_inst->can_reswizzle(inst->dst.writemask,
+            if (!scan_inst->can_reswizzle(devinfo, inst->dst.writemask,
                                            inst->src[0].swizzle,
                                            chans_needed)) {
                 break;
@@ -1087,11 +1140,13 @@ vec4_visitor::opt_register_coalesce()
          if (interfered)
             break;
  
-         /* If somebody else writes our destination here, we can't coalesce
-          * before that.
+         /* If somebody else writes the same channels of our destination here,
+          * we can't coalesce before that.
            */
-         if (inst->dst.in_range(scan_inst->dst, scan_inst->regs_written))
-           break;
+         if (inst->dst.in_range(scan_inst->dst, scan_inst->regs_written) &&
+             (inst->dst.writemask & scan_inst->dst.writemask) != 0) {
+            break;
+         }
  
           /* Check for reads of the register we're trying to coalesce into.  We
            * can't go rewriting instructions above that to put some other value
@@ -1129,6 +1184,16 @@ vec4_visitor::opt_register_coalesce()
                scan_inst->dst.file = inst->dst.file;
                scan_inst->dst.reg = inst->dst.reg;
                scan_inst->dst.reg_offset = inst->dst.reg_offset;
+               if (inst->saturate &&
+                   inst->dst.type != scan_inst->dst.type) {
+                  /* If we have reached this point, scan_inst is a non
+                   * type-converting 'mov' and we can modify its register types
+                   * to match the ones in inst. Otherwise, we could have an
+                   * incorrect saturation result.
+                   */
+                  scan_inst->dst.type = inst->dst.type;
+                  scan_inst->src[0].type = inst->src[0].type;
+               }
                scan_inst->saturate |= inst->saturate;
             }
             scan_inst = (vec4_instruction *)scan_inst->next;
@@ -1719,7 +1784,7 @@ vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value)
  }
  
  bool
-vec4_visitor::run(gl_clip_plane *clip_planes)
+vec4_visitor::run()
  {
     bool use_vec4_nir =
        compiler->glsl_compiler_options[stage].NirOptions != NULL;
@@ -1748,9 +1813,6 @@ vec4_visitor::run(gl_clip_plane *clip_planes)
     }
     base_ir = NULL;
  
-   if (key->userclip_active && !prog->UsesClipDistanceOut)
-      setup_uniform_clipplane_values(clip_planes);
-
     emit_thread_end();
  
     calculate_cfg();
@@ -1834,7 +1896,7 @@ vec4_visitor::run(gl_clip_plane *clip_planes)
  
     setup_payload();
  
-   if (false) {
+   if (unlikely(INTEL_DEBUG & DEBUG_SPILL_VEC4)) {
        /* Debug of register spilling: Go spill everything. */
        const int grf_count = alloc.count;
        float spill_costs[alloc.count];
@@ -1899,16 +1961,8 @@ brw_vs_emit(struct brw_context *brw,
              struct gl_shader_program *prog,
              unsigned *final_assembly_size)
  {
-   bool start_busy = false;
-   double start_time = 0;
     const unsigned *assembly = NULL;
  
-   if (unlikely(brw->perf_debug)) {
-      start_busy = (brw->batch.last_bo &&
-                    drm_intel_bo_busy(brw->batch.last_bo));
-      start_time = get_time();
-   }
-
     struct brw_shader *shader = NULL;
     if (prog)
        shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
@@ -1977,9 +2031,10 @@ brw_vs_emit(struct brw_context *brw,
        prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
  
        vec4_vs_visitor v(brw->intelScreen->compiler, brw, key, prog_data,
-                        vp, prog, mem_ctx, st_index,
+                        vp, prog, brw_select_clip_planes(&brw->ctx),
+                        mem_ctx, st_index,
                          !_mesa_is_gles3(&brw->ctx));
-      if (!v.run(brw_select_clip_planes(&brw->ctx))) {
+      if (!v.run()) {
           if (prog) {
              prog->LinkStatus = false;
              ralloc_strcat(&prog->InfoLog, v.fail_msg);
@@ -1997,30 +2052,7 @@ brw_vs_emit(struct brw_context *brw,
        assembly = g.generate_assembly(v.cfg, final_assembly_size);
     }
  
-   if (unlikely(brw->perf_debug) && shader) {
-      if (shader->compiled_once) {
-         brw_vs_debug_recompile(brw, prog, key);
-      }
-      if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
-         perf_debug("VS compile took %.03f ms and stalled the GPU\n",
-                    (get_time() - start_time) * 1000);
-      }
-      shader->compiled_once = true;
-   }
-
     return assembly;
  }
  
-
-void
-brw_vue_setup_prog_key_for_precompile(struct gl_context *ctx,
-                                      struct brw_vue_prog_key *key,
-                                      GLuint id, struct gl_program *prog)
-{
-   struct brw_context *brw = brw_context(ctx);
-   key->program_string_id = id;
-
-   brw_setup_tex_for_precompile(brw, &key->tex, prog);
-}
-
  } /* extern "C" */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h

index 673a29e..ac9bd4a 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -52,11 +52,6 @@ extern "C" {
  extern "C" {
  #endif
  
-void
-brw_vue_setup_prog_key_for_precompile(struct gl_context *ctx,
-                                      struct brw_vue_prog_key *key,
-                                      GLuint id, struct gl_program *prog);
-
  #ifdef __cplusplus
  } /* extern "C" */
  
@@ -76,7 +71,7 @@ public:
     vec4_visitor(const struct brw_compiler *compiler,
                  void *log_data,
                  struct gl_program *prog,
-                const struct brw_vue_prog_key *key,
+                const struct brw_sampler_prog_key_data *key,
                  struct brw_vue_prog_data *prog_data,
                 struct gl_shader_program *shader_prog,
                  gl_shader_stage stage,
@@ -100,7 +95,7 @@ public:
        return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
     }
  
-   const struct brw_vue_prog_key * const key;
+   const struct brw_sampler_prog_key_data * const key_tex;
     struct brw_vue_prog_data * const prog_data;
     unsigned int sanity_param_count;
  
@@ -173,10 +168,9 @@ public:
  
     struct hash_table *variable_ht;
  
-   bool run(gl_clip_plane *clip_planes);
+   bool run();
     void fail(const char *msg, ...);
  
-   void setup_uniform_clipplane_values(gl_clip_plane *clip_planes);
     virtual void setup_vec4_uniform_value(unsigned param_offset,
                                           const gl_constant_value *values,
                                           unsigned n);
@@ -359,9 +353,8 @@ public:
  
     void emit_ndc_computation();
     void emit_psiz_and_flags(dst_reg reg);
-   void emit_clip_distances(dst_reg reg, int offset);
     vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying);
-   void emit_urb_slot(dst_reg reg, int varying);
+   virtual void emit_urb_slot(dst_reg reg, int varying);
  
     void emit_shader_time_begin();
     void emit_shader_time_end();
@@ -430,6 +423,8 @@ public:
     virtual void nir_emit_alu(nir_alu_instr *instr);
     virtual void nir_emit_jump(nir_jump_instr *instr);
     virtual void nir_emit_texture(nir_tex_instr *instr);
+   virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
+   virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
  
     dst_reg get_nir_dest(nir_dest dest, enum brw_reg_type type);
     dst_reg get_nir_dest(nir_dest dest, nir_alu_type type);
@@ -566,6 +561,12 @@ private:
                                           struct brw_reg offset);
     void generate_set_simd4x2_header_gen9(vec4_instruction *inst,
                                           struct brw_reg dst);
+
+   void generate_get_buffer_size(vec4_instruction *inst,
+                                 struct brw_reg dst,
+                                 struct brw_reg src,
+                                 struct brw_reg index);
+
     void generate_unpack_flags(struct brw_reg dst);
  
     const struct brw_compiler *compiler;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_builder.h b/src/mesa/drivers/dri/i965/brw_vec4_builder.h

new file mode 100644 (file)

index 0000000..a90cadb
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_builder.h
@@ -0,0 +1,602 @@
+/* -*- c++ -*- */
+/*
+ * Copyright © 2010-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef BRW_VEC4_BUILDER_H
+#define BRW_VEC4_BUILDER_H
+
+#include "brw_ir_vec4.h"
+#include "brw_ir_allocator.h"
+#include "brw_context.h"
+
+namespace brw {
+   /**
+    * Toolbox to assemble a VEC4 IR program out of individual instructions.
+    *
+    * This object is meant to have an interface consistent with
+    * brw::fs_builder.  They cannot be fully interchangeable because
+    * brw::fs_builder generates scalar code while brw::vec4_builder generates
+    * vector code.
+    */
+   class vec4_builder {
+   public:
+      /** Type used in this IR to represent a source of an instruction. */
+      typedef brw::src_reg src_reg;
+
+      /** Type used in this IR to represent the destination of an instruction. */
+      typedef brw::dst_reg dst_reg;
+
+      /** Type used in this IR to represent an instruction. */
+      typedef vec4_instruction instruction;
+
+      /**
+       * Construct a vec4_builder that inserts instructions into \p shader.
+       */
+      vec4_builder(backend_shader *shader) :
+         shader(shader), block(NULL), cursor(NULL),
+         force_writemask_all(false),
+         annotation()
+      {
+      }
+
+      /**
+       * Construct a vec4_builder that inserts instructions into \p shader
+       * before instruction \p inst in basic block \p block.  The default
+       * execution controls and debug annotation are initialized from the
+       * instruction passed as argument.
+       */
+      vec4_builder(backend_shader *shader, bblock_t *block, instruction *inst) :
+         shader(shader), block(block), cursor(inst)
+      {
+         annotation.str = inst->annotation;
+         annotation.ir = inst->ir;
+      }
+
+      /**
+       * Construct a vec4_builder that inserts instructions before \p cursor
+       * in basic block \p block, inheriting other code generation parameters
+       * from this.
+       */
+      vec4_builder
+      at(bblock_t *block, exec_node *cursor) const
+      {
+         vec4_builder bld = *this;
+         bld.block = block;
+         bld.cursor = cursor;
+         return bld;
+      }
+
+      /**
+       * Construct a vec4_builder appending instructions at the end of the
+       * instruction list of the shader, inheriting other code generation
+       * parameters from this.
+       */
+      vec4_builder
+      at_end() const
+      {
+         return at(NULL, (exec_node *)&shader->instructions.tail);
+      }
+
+      /**
+       * Construct a builder with per-channel control flow execution masking
+       * disabled if \p b is true.  If control flow execution masking is
+       * already disabled this has no effect.
+       */
+      vec4_builder
+      exec_all(bool b = true) const
+      {
+         vec4_builder bld = *this;
+         if (b)
+            bld.force_writemask_all = true;
+         return bld;
+      }
+
+      /**
+       * Construct a builder with the given debug annotation info.
+       */
+      vec4_builder
+      annotate(const char *str, const void *ir = NULL) const
+      {
+         vec4_builder bld = *this;
+         bld.annotation.str = str;
+         bld.annotation.ir = ir;
+         return bld;
+      }
+
+      /**
+       * Get the SIMD width in use.
+       */
+      unsigned
+      dispatch_width() const
+      {
+         return 8;
+      }
+
+      /**
+       * Allocate a virtual register of natural vector size (four for this IR)
+       * and SIMD width.  \p n gives the amount of space to allocate in
+       * dispatch_width units (which is just enough space for four logical
+       * components in this IR).
+       */
+      dst_reg
+      vgrf(enum brw_reg_type type, unsigned n = 1) const
+      {
+         assert(dispatch_width() <= 32);
+
+         if (n > 0)
+            return retype(dst_reg(GRF, shader->alloc.allocate(
+                                     n * DIV_ROUND_UP(type_sz(type), 4))),
+                           type);
+         else
+            return retype(null_reg_ud(), type);
+      }
+
+      /**
+       * Create a null register of floating type.
+       */
+      dst_reg
+      null_reg_f() const
+      {
+         return dst_reg(retype(brw_null_vec(dispatch_width()),
+                               BRW_REGISTER_TYPE_F));
+      }
+
+      /**
+       * Create a null register of signed integer type.
+       */
+      dst_reg
+      null_reg_d() const
+      {
+         return dst_reg(retype(brw_null_vec(dispatch_width()),
+                               BRW_REGISTER_TYPE_D));
+      }
+
+      /**
+       * Create a null register of unsigned integer type.
+       */
+      dst_reg
+      null_reg_ud() const
+      {
+         return dst_reg(retype(brw_null_vec(dispatch_width()),
+                               BRW_REGISTER_TYPE_UD));
+      }
+
+      /**
+       * Insert an instruction into the program.
+       */
+      instruction *
+      emit(const instruction &inst) const
+      {
+         return emit(new(shader->mem_ctx) instruction(inst));
+      }
+
+      /**
+       * Create and insert a nullary control instruction into the program.
+       */
+      instruction *
+      emit(enum opcode opcode) const
+      {
+         return emit(instruction(opcode));
+      }
+
+      /**
+       * Create and insert a nullary instruction into the program.
+       */
+      instruction *
+      emit(enum opcode opcode, const dst_reg &dst) const
+      {
+         return emit(instruction(opcode, dst));
+      }
+
+      /**
+       * Create and insert a unary instruction into the program.
+       */
+      instruction *
+      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0) const
+      {
+         switch (opcode) {
+         case SHADER_OPCODE_RCP:
+         case SHADER_OPCODE_RSQ:
+         case SHADER_OPCODE_SQRT:
+         case SHADER_OPCODE_EXP2:
+         case SHADER_OPCODE_LOG2:
+         case SHADER_OPCODE_SIN:
+         case SHADER_OPCODE_COS:
+            return fix_math_instruction(
+               emit(instruction(opcode, dst,
+                                fix_math_operand(src0))));
+
+         default:
+            return emit(instruction(opcode, dst, src0));
+         }
+      }
+
+      /**
+       * Create and insert a binary instruction into the program.
+       */
+      instruction *
+      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
+           const src_reg &src1) const
+      {
+         switch (opcode) {
+         case SHADER_OPCODE_POW:
+         case SHADER_OPCODE_INT_QUOTIENT:
+         case SHADER_OPCODE_INT_REMAINDER:
+            return fix_math_instruction(
+               emit(instruction(opcode, dst,
+                                fix_math_operand(src0),
+                                fix_math_operand(src1))));
+
+         default:
+            return emit(instruction(opcode, dst, src0, src1));
+         }
+      }
+
+      /**
+       * Create and insert a ternary instruction into the program.
+       */
+      instruction *
+      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
+           const src_reg &src1, const src_reg &src2) const
+      {
+         switch (opcode) {
+         case BRW_OPCODE_BFE:
+         case BRW_OPCODE_BFI2:
+         case BRW_OPCODE_MAD:
+         case BRW_OPCODE_LRP:
+            return emit(instruction(opcode, dst,
+                                    fix_3src_operand(src0),
+                                    fix_3src_operand(src1),
+                                    fix_3src_operand(src2)));
+
+         default:
+            return emit(instruction(opcode, dst, src0, src1, src2));
+         }
+      }
+
+      /**
+       * Insert a preallocated instruction into the program.
+       */
+      instruction *
+      emit(instruction *inst) const
+      {
+         inst->force_writemask_all = force_writemask_all;
+         inst->annotation = annotation.str;
+         inst->ir = annotation.ir;
+
+         if (block)
+            static_cast<instruction *>(cursor)->insert_before(block, inst);
+         else
+            cursor->insert_before(inst);
+
+         return inst;
+      }
+
+      /**
+       * Select \p src0 if the comparison of both sources with the given
+       * conditional mod evaluates to true, otherwise select \p src1.
+       *
+       * Generally useful to get the minimum or maximum of two values.
+       */
+      void
+      emit_minmax(const dst_reg &dst, const src_reg &src0,
+                  const src_reg &src1, brw_conditional_mod mod) const
+      {
+         if (shader->devinfo->gen >= 6) {
+            set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
+                                 fix_unsigned_negate(src1)));
+         } else {
+            CMP(null_reg_d(), src0, src1, mod);
+            set_predicate(BRW_PREDICATE_NORMAL,
+                          SEL(dst, src0, src1));
+         }
+      }
+
+      /**
+       * Copy any live channel from \p src to the first channel of the result.
+       */
+      src_reg
+      emit_uniformize(const src_reg &src) const
+      {
+         const vec4_builder ubld = exec_all();
+         const dst_reg chan_index =
+            writemask(vgrf(BRW_REGISTER_TYPE_UD), WRITEMASK_X);
+         const dst_reg dst = vgrf(src.type);
+
+         ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
+         ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, src_reg(chan_index));
+
+         return src_reg(dst);
+      }
+
+      /**
+       * Assorted arithmetic ops.
+       * @{
+       */
+#define ALU1(op)                                        \
+      instruction *                                     \
+      op(const dst_reg &dst, const src_reg &src0) const \
+      {                                                 \
+         return emit(BRW_OPCODE_##op, dst, src0);       \
+      }
+
+#define ALU2(op)                                                        \
+      instruction *                                                     \
+      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
+      {                                                                 \
+         return emit(BRW_OPCODE_##op, dst, src0, src1);                 \
+      }
+
+#define ALU2_ACC(op)                                                    \
+      instruction *                                                     \
+      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
+      {                                                                 \
+         instruction *inst = emit(BRW_OPCODE_##op, dst, src0, src1);    \
+         inst->writes_accumulator = true;                               \
+         return inst;                                                   \
+      }
+
+#define ALU3(op)                                                        \
+      instruction *                                                     \
+      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1,  \
+         const src_reg &src2) const                                     \
+      {                                                                 \
+         return emit(BRW_OPCODE_##op, dst, src0, src1, src2);           \
+      }
+
+      ALU2(ADD)
+      ALU2_ACC(ADDC)
+      ALU2(AND)
+      ALU2(ASR)
+      ALU2(AVG)
+      ALU3(BFE)
+      ALU2(BFI1)
+      ALU3(BFI2)
+      ALU1(BFREV)
+      ALU1(CBIT)
+      ALU2(CMPN)
+      ALU3(CSEL)
+      ALU2(DP2)
+      ALU2(DP3)
+      ALU2(DP4)
+      ALU2(DPH)
+      ALU1(F16TO32)
+      ALU1(F32TO16)
+      ALU1(FBH)
+      ALU1(FBL)
+      ALU1(FRC)
+      ALU2(LINE)
+      ALU1(LZD)
+      ALU2(MAC)
+      ALU2_ACC(MACH)
+      ALU3(MAD)
+      ALU1(MOV)
+      ALU2(MUL)
+      ALU1(NOT)
+      ALU2(OR)
+      ALU2(PLN)
+      ALU1(RNDD)
+      ALU1(RNDE)
+      ALU1(RNDU)
+      ALU1(RNDZ)
+      ALU2(SAD2)
+      ALU2_ACC(SADA2)
+      ALU2(SEL)
+      ALU2(SHL)
+      ALU2(SHR)
+      ALU2_ACC(SUBB)
+      ALU2(XOR)
+
+#undef ALU3
+#undef ALU2_ACC
+#undef ALU2
+#undef ALU1
+      /** @} */
+
+      /**
+       * CMP: Sets the low bit of the destination channels with the result
+       * of the comparison, while the upper bits are undefined, and updates
+       * the flag register with the packed 16 bits of the result.
+       */
+      instruction *
+      CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
+          brw_conditional_mod condition) const
+      {
+         /* Take the instruction:
+          *
+          * CMP null<d> src0<f> src1<f>
+          *
+          * Original gen4 does type conversion to the destination type
+          * before comparison, producing garbage results for floating
+          * point comparisons.
+          *
+          * The destination type doesn't matter on newer generations,
+          * so we set the type to match src0 so we can compact the
+          * instruction.
+          */
+         return set_condmod(condition,
+                            emit(BRW_OPCODE_CMP, retype(dst, src0.type),
+                                 fix_unsigned_negate(src0),
+                                 fix_unsigned_negate(src1)));
+      }
+
+      /**
+       * Gen4 predicated IF.
+       */
+      instruction *
+      IF(brw_predicate predicate) const
+      {
+         return set_predicate(predicate, emit(BRW_OPCODE_IF));
+      }
+
+      /**
+       * Gen6 IF with embedded comparison.
+       */
+      instruction *
+      IF(const src_reg &src0, const src_reg &src1,
+         brw_conditional_mod condition) const
+      {
+         assert(shader->devinfo->gen == 6);
+         return set_condmod(condition,
+                            emit(BRW_OPCODE_IF,
+                                 null_reg_d(),
+                                 fix_unsigned_negate(src0),
+                                 fix_unsigned_negate(src1)));
+      }
+
+      /**
+       * Emit a linear interpolation instruction.
+       */
+      instruction *
+      LRP(const dst_reg &dst, const src_reg &x, const src_reg &y,
+          const src_reg &a) const
+      {
+         if (shader->devinfo->gen >= 6) {
+            /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
+             * we need to reorder the operands.
+             */
+            return emit(BRW_OPCODE_LRP, dst, a, y, x);
+
+         } else {
+            /* We can't use the LRP instruction.  Emit x*(1-a) + y*a. */
+            const dst_reg y_times_a = vgrf(dst.type);
+            const dst_reg one_minus_a = vgrf(dst.type);
+            const dst_reg x_times_one_minus_a = vgrf(dst.type);
+
+            MUL(y_times_a, y, a);
+            ADD(one_minus_a, negate(a), src_reg(1.0f));
+            MUL(x_times_one_minus_a, x, src_reg(one_minus_a));
+            return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a));
+         }
+      }
+
+      backend_shader *shader;
+
+   protected:
+      /**
+       * Workaround for negation of UD registers.  See comment in
+       * fs_generator::generate_code() for the details.
+       */
+      src_reg
+      fix_unsigned_negate(const src_reg &src) const
+      {
+         if (src.type == BRW_REGISTER_TYPE_UD && src.negate) {
+            dst_reg temp = vgrf(BRW_REGISTER_TYPE_UD);
+            MOV(temp, src);
+            return src_reg(temp);
+         } else {
+            return src;
+         }
+      }
+
+      /**
+       * Workaround for register access modes not supported by the ternary
+       * instruction encoding.
+       */
+      src_reg
+      fix_3src_operand(const src_reg &src) const
+      {
+         /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
+          * able to use vertical stride of zero to replicate the vec4 uniform, like
+          *
+          *    g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
+          *
+          * But you can't, since vertical stride is always four in three-source
+          * instructions. Instead, insert a MOV instruction to do the replication so
+          * that the three-source instruction can consume it.
+          */
+
+         /* The MOV is only needed if the source is a uniform or immediate. */
+         if (src.file != UNIFORM && src.file != IMM)
+            return src;
+
+         if (src.file == UNIFORM && brw_is_single_value_swizzle(src.swizzle))
+            return src;
+
+         const dst_reg expanded = vgrf(src.type);
+         emit(VEC4_OPCODE_UNPACK_UNIFORM, expanded, src);
+         return src_reg(expanded);
+      }
+
+      /**
+       * Workaround for register access modes not supported by the math
+       * instruction.
+       */
+      src_reg
+      fix_math_operand(const src_reg &src) const
+      {
+         /* The gen6 math instruction ignores the source modifiers --
+          * swizzle, abs, negate, and at least some parts of the register
+          * region description.
+          *
+          * Rather than trying to enumerate all these cases, *always* expand the
+          * operand to a temp GRF for gen6.
+          *
+          * For gen7, keep the operand as-is, except if immediate, which gen7 still
+          * can't use.
+          */
+         if (shader->devinfo->gen == 6 ||
+             (shader->devinfo->gen == 7 && src.file == IMM)) {
+            const dst_reg tmp = vgrf(src.type);
+            MOV(tmp, src);
+            return src_reg(tmp);
+         } else {
+            return src;
+         }
+      }
+
+      /**
+       * Workaround other weirdness of the math instruction.
+       */
+      instruction *
+      fix_math_instruction(instruction *inst) const
+      {
+         if (shader->devinfo->gen == 6 &&
+             inst->dst.writemask != WRITEMASK_XYZW) {
+            const dst_reg tmp = vgrf(inst->dst.type);
+            MOV(inst->dst, src_reg(tmp));
+            inst->dst = tmp;
+
+         } else if (shader->devinfo->gen < 6) {
+            const unsigned sources = (inst->src[1].file == BAD_FILE ? 1 : 2);
+            inst->base_mrf = 1;
+            inst->mlen = sources;
+         }
+
+         return inst;
+      }
+
+      bblock_t *block;
+      exec_node *cursor;
+
+      bool force_writemask_all;
+
+      /** Debug annotation info. */
+      struct {
+         const char *str;
+         const void *ir;
+      } annotation;
+   };
+}
+
+#endif
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp

index 5a15eb8..610caef 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -202,6 +202,13 @@ try_constant_propagate(const struct brw_device_info *devinfo,
          return true;
        }
        break;
+   case GS_OPCODE_SET_WRITE_OFFSET:
+      /* This is just a multiply by a constant with special strides.
+       * The generator will handle immediates in both arguments (generating
+       * a single MOV of the product).  So feel free to propagate in src0.
+       */
+      inst->src[arg] = value;
+      return true;
  
     case BRW_OPCODE_CMP:
        if (arg == 1) {
@@ -249,10 +256,25 @@ try_constant_propagate(const struct brw_device_info *devinfo,
  }
  
  static bool
+can_change_source_types(vec4_instruction *inst)
+{
+   return inst->dst.type == inst->src[0].type &&
+      !inst->src[0].abs && !inst->src[0].negate && !inst->saturate &&
+      (inst->opcode == BRW_OPCODE_MOV ||
+       (inst->opcode == BRW_OPCODE_SEL &&
+        inst->dst.type == inst->src[1].type &&
+        inst->predicate != BRW_PREDICATE_NONE &&
+        !inst->src[1].abs && !inst->src[1].negate));
+}
+
+static bool
  try_copy_propagate(const struct brw_device_info *devinfo,
                     vec4_instruction *inst,
                     int arg, struct copy_entry *entry)
  {
+   /* Build up the value we are propagating as if it were the source of a
+    * single MOV
+    */
     /* For constant propagation, we only handle the same constant
      * across all 4 channels.  Some day, we should handle the 8-bit
      * float vector format, which would let us constant propagate
@@ -279,9 +301,9 @@ try_copy_propagate(const struct brw_device_info *devinfo,
     for (int i = 0; i < 4; i++) {
        s[i] = BRW_GET_SWZ(entry->value[i]->swizzle, i);
     }
-   value.swizzle = brw_compose_swizzle(inst->src[arg].swizzle,
-                                       BRW_SWIZZLE4(s[0], s[1], s[2], s[3]));
+   value.swizzle = BRW_SWIZZLE4(s[0], s[1], s[2], s[3]);
  
+   /* Check that we can propagate that value */
     if (value.file != UNIFORM &&
         value.file != GRF &&
         value.file != ATTR)
@@ -292,13 +314,6 @@ try_copy_propagate(const struct brw_device_info *devinfo,
        return false;
     }
  
-   if (inst->src[arg].abs) {
-      value.negate = false;
-      value.abs = true;
-   }
-   if (inst->src[arg].negate)
-      value.negate = !value.negate;
-
     bool has_source_modifiers = value.negate || value.abs;
  
     /* gen6 math and gen7+ SENDs from GRFs ignore source modifiers on
@@ -308,14 +323,20 @@ try_copy_propagate(const struct brw_device_info *devinfo,
          value.swizzle != BRW_SWIZZLE_XYZW) && !inst->can_do_source_mods(devinfo))
        return false;
  
-   if (has_source_modifiers && value.type != inst->src[arg].type)
+   if (has_source_modifiers &&
+       value.type != inst->src[arg].type &&
+       !can_change_source_types(inst))
        return false;
  
     if (has_source_modifiers &&
         inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_WRITE)
        return false;
  
-   if (inst->is_3src() && value.file == UNIFORM)
+   unsigned composed_swizzle = brw_compose_swizzle(inst->src[arg].swizzle,
+                                                   value.swizzle);
+   if (inst->is_3src() &&
+       value.file == UNIFORM &&
+       !brw_is_single_value_swizzle(composed_swizzle))
        return false;
  
     if (inst->is_send_from_grf())
@@ -362,7 +383,26 @@ try_copy_propagate(const struct brw_device_info *devinfo,
        }
     }
  
-   value.type = inst->src[arg].type;
+   /* Build the final value */
+   if (inst->src[arg].abs) {
+      value.negate = false;
+      value.abs = true;
+   }
+   if (inst->src[arg].negate)
+      value.negate = !value.negate;
+
+   value.swizzle = composed_swizzle;
+   if (has_source_modifiers &&
+       value.type != inst->src[arg].type) {
+      assert(can_change_source_types(inst));
+      for (int i = 0; i < 3; i++) {
+         inst->src[i].type = value.type;
+      }
+      inst->dst.type = value.type;
+   } else {
+      value.type = inst->src[arg].type;
+   }
+
     inst->src[arg] = value;
     return true;
  }
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp

index 92050b9..dcacc90 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -34,7 +34,7 @@ extern "C" {
  namespace brw {
  
  struct brw_reg
-vec4_instruction::get_dst(void)
+vec4_instruction::get_dst(unsigned gen)
  {
     struct brw_reg brw_reg;
  
@@ -46,6 +46,7 @@ vec4_instruction::get_dst(void)
        break;
  
     case MRF:
+      assert(((dst.reg + dst.reg_offset) & ~(1 << 7)) < BRW_MAX_MRF(gen));
        brw_reg = brw_message_reg(dst.reg + dst.reg_offset);
        brw_reg = retype(brw_reg, dst.type);
        brw_reg.dw1.bits.writemask = dst.writemask;
@@ -286,6 +287,9 @@ vec4_generator::generate_tex(vec4_instruction *inst,
              msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
           }
           break;
+      case SHADER_OPCODE_SAMPLEINFO:
+         msg_type = GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO;
+         break;
        default:
          unreachable("should not get here: invalid vec4 texture opcode");
        }
@@ -486,7 +490,7 @@ vec4_generator::generate_gs_urb_write_allocate(vec4_instruction *inst)
     brw_push_insn_state(p);
     brw_set_default_access_mode(p, BRW_ALIGN_1);
     brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-   brw_MOV(p, get_element_ud(inst->get_dst(), 0),
+   brw_MOV(p, get_element_ud(inst->get_dst(devinfo->gen), 0),
             get_element_ud(inst->get_src(this->prog_data, 0), 0));
     brw_set_default_access_mode(p, BRW_ALIGN_16);
     brw_pop_insn_state(p);
@@ -501,7 +505,7 @@ vec4_generator::generate_gs_thread_end(vec4_instruction *inst)
                   inst->base_mrf, /* starting mrf reg nr */
                   src,
                   BRW_URB_WRITE_EOT | inst->urb_write_flags,
-                 devinfo->gen >= 8 ? 2 : 1,/* message len */
+                 inst->mlen,
                   0,              /* response len */
                   0,              /* urb destination offset */
                   BRW_URB_SWIZZLE_INTERLEAVE);
@@ -537,8 +541,13 @@ vec4_generator::generate_gs_set_write_offset(struct brw_reg dst,
            src1.file == BRW_IMMEDIATE_VALUE &&
            src1.type == BRW_REGISTER_TYPE_UD &&
            src1.dw1.ud <= USHRT_MAX);
-   brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4),
-           retype(src1, BRW_REGISTER_TYPE_UW));
+   if (src0.file == IMM) {
+      brw_MOV(p, suboffset(stride(dst, 2, 2, 1), 3),
+              brw_imm_ud(src0.dw1.ud * src1.dw1.ud));
+   } else {
+      brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4),
+              retype(src1, BRW_REGISTER_TYPE_UW));
+   }
     brw_set_default_access_mode(p, BRW_ALIGN_16);
     brw_pop_insn_state(p);
  }
@@ -1029,6 +1038,32 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
  }
  
  void
+vec4_generator::generate_get_buffer_size(vec4_instruction *inst,
+                                         struct brw_reg dst,
+                                         struct brw_reg src,
+                                         struct brw_reg surf_index)
+{
+   assert(devinfo->gen >= 7);
+   assert(surf_index.type == BRW_REGISTER_TYPE_UD &&
+          surf_index.file == BRW_IMMEDIATE_VALUE);
+
+   brw_SAMPLE(p,
+              dst,
+              inst->base_mrf,
+              src,
+              surf_index.dw1.ud,
+              0,
+              GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO,
+              1, /* response length */
+              inst->mlen,
+              inst->header_size > 0,
+              BRW_SAMPLER_SIMD_MODE_SIMD4X2,
+              BRW_SAMPLER_RETURN_FORMAT_SINT32);
+
+   brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
+}
+
+void
  vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
                                                   struct brw_reg dst,
                                                   struct brw_reg surf_index,
@@ -1122,7 +1157,7 @@ vec4_generator::generate_code(const cfg_t *cfg)
        for (unsigned int i = 0; i < 3; i++) {
          src[i] = inst->get_src(this->prog_data, i);
        }
-      dst = inst->get_dst();
+      dst = inst->get_dst(devinfo->gen);
  
        brw_set_default_predicate_control(p, inst->predicate);
        brw_set_default_predicate_inverse(p, inst->predicate_inverse);
@@ -1131,6 +1166,9 @@ vec4_generator::generate_code(const cfg_t *cfg)
        brw_set_default_mask_control(p, inst->force_writemask_all);
        brw_set_default_acc_write_control(p, inst->writes_accumulator);
  
+      assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->gen));
+      assert(inst->mlen <= BRW_MAX_MSG_LENGTH);
+
        unsigned pre_emit_nr_insn = p->nr_insn;
  
        if (dst.width == BRW_WIDTH_4) {
@@ -1374,6 +1412,7 @@ vec4_generator::generate_code(const cfg_t *cfg)
        case SHADER_OPCODE_TXS:
        case SHADER_OPCODE_TG4:
        case SHADER_OPCODE_TG4_OFFSET:
+      case SHADER_OPCODE_SAMPLEINFO:
           generate_tex(inst, dst, src[0], src[1]);
           break;
  
@@ -1401,6 +1440,11 @@ vec4_generator::generate_code(const cfg_t *cfg)
           generate_set_simd4x2_header_gen9(inst, dst);
           break;
  
+
+      case VS_OPCODE_GET_BUFFER_SIZE:
+         generate_get_buffer_size(inst, dst, src[0], src[1]);
+         break;
+
        case GS_OPCODE_URB_WRITE:
           generate_gs_urb_write(inst);
           break;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp

index 8a8dd57..4f4e1e1 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
@@ -92,16 +92,25 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
     src_reg src;
  
     switch (instr->intrinsic) {
-   case nir_intrinsic_emit_vertex: {
+   case nir_intrinsic_emit_vertex_with_counter: {
+      this->vertex_count =
+         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
        int stream_id = instr->const_index[0];
        gs_emit_vertex(stream_id);
        break;
     }
  
-   case nir_intrinsic_end_primitive:
+   case nir_intrinsic_end_primitive_with_counter:
+      this->vertex_count =
+         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
        gs_end_primitive();
        break;
  
+   case nir_intrinsic_set_vertex_count:
+      this->vertex_count =
+         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
+      break;
+
     case nir_intrinsic_load_invocation_id: {
        src_reg invocation_id =
           src_reg(nir_system_values[SYSTEM_VALUE_INVOCATION_ID]);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp

index 019efec..d2edc57 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -42,7 +42,7 @@ vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler,
                                   bool no_spills,
                                   int shader_time_index)
     : vec4_visitor(compiler, log_data,
-                  &c->gp->program.Base, &c->key.base,
+                  &c->gp->program.Base, &c->key.tex,
                    &c->prog_data.base, prog, MESA_SHADER_GEOMETRY, mem_ctx,
                    no_spills, shader_time_index),
       c(c)
@@ -234,17 +234,35 @@ vec4_gs_visitor::emit_thread_end()
      */
     int base_mrf = 1;
  
+   bool static_vertex_count = c->prog_data.static_vertex_count != -1;
+
+   /* If the previous instruction was a URB write, we don't need to issue
+    * a second one - we can just set the EOT bit on the previous write.
+    *
+    * Skip this on Gen8+ unless there's a static vertex count, as we also
+    * need to write the vertex count out, and combining the two may not be
+    * possible (or at least not straightforward).
+    */
+   vec4_instruction *last = (vec4_instruction *) instructions.get_tail();
+   if (last && last->opcode == GS_OPCODE_URB_WRITE &&
+       !(INTEL_DEBUG & DEBUG_SHADER_TIME) &&
+       devinfo->gen >= 8 && static_vertex_count) {
+      last->urb_write_flags = BRW_URB_WRITE_EOT | last->urb_write_flags;
+      return;
+   }
+
     current_annotation = "thread end";
     dst_reg mrf_reg(MRF, base_mrf);
     src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
     vec4_instruction *inst = emit(MOV(mrf_reg, r0));
     inst->force_writemask_all = true;
-   emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count);
+   if (devinfo->gen < 8 || !static_vertex_count)
+      emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count);
     if (INTEL_DEBUG & DEBUG_SHADER_TIME)
        emit_shader_time_end();
     inst = emit(GS_OPCODE_THREAD_END);
     inst->base_mrf = base_mrf;
-   inst->mlen = 1;
+   inst->mlen = devinfo->gen >= 8 && !static_vertex_count ? 2 : 1;
  }
  
  
@@ -284,7 +302,7 @@ vec4_gs_visitor::emit_urb_write_opcode(bool complete)
     /* We need to increment Global Offset by 1 to make room for Broadwell's
      * extra "Vertex Count" payload at the beginning of the URB entry.
      */
-   if (devinfo->gen >= 8)
+   if (devinfo->gen >= 8 && c->prog_data.static_vertex_count == -1)
        inst->offset++;
  
     inst->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
@@ -421,7 +439,7 @@ vec4_gs_visitor::emit_control_data_bits()
      * URB entry.  Since this is an OWord message, Global Offset is counted
      * in 128-bit units, so we must set it to 2.
      */
-   if (devinfo->gen >= 8)
+   if (devinfo->gen >= 8 && c->prog_data.static_vertex_count == -1)
        inst->offset = 2;
     inst->base_mrf = base_mrf;
     inst->mlen = 2;
@@ -484,90 +502,75 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
     if (stream_id > 0 && shader_prog->TransformFeedback.NumVarying == 0)
        return;
  
-   /* To ensure that we don't output more vertices than the shader specified
-    * using max_vertices, do the logic inside a conditional of the form "if
-    * (vertex_count < MAX)"
+   /* If we're outputting 32 control data bits or less, then we can wait
+    * until the shader is over to output them all.  Otherwise we need to
+    * output them as we go.  Now is the time to do it, since we're about to
+    * output the vertex_count'th vertex, so it's guaranteed that the
+    * control data bits associated with the (vertex_count - 1)th vertex are
+    * correct.
      */
-   unsigned num_output_vertices = c->gp->program.VerticesOut;
-   emit(CMP(dst_null_d(), this->vertex_count,
-            src_reg(num_output_vertices), BRW_CONDITIONAL_L));
-   emit(IF(BRW_PREDICATE_NORMAL));
-   {
-      /* If we're outputting 32 control data bits or less, then we can wait
-       * until the shader is over to output them all.  Otherwise we need to
-       * output them as we go.  Now is the time to do it, since we're about to
-       * output the vertex_count'th vertex, so it's guaranteed that the
-       * control data bits associated with the (vertex_count - 1)th vertex are
-       * correct.
+   if (c->control_data_header_size_bits > 32) {
+      this->current_annotation = "emit vertex: emit control data bits";
+      /* Only emit control data bits if we've finished accumulating a batch
+       * of 32 bits.  This is the case when:
+       *
+       *     (vertex_count * bits_per_vertex) % 32 == 0
+       *
+       * (in other words, when the last 5 bits of vertex_count *
+       * bits_per_vertex are 0).  Assuming bits_per_vertex == 2^n for some
+       * integer n (which is always the case, since bits_per_vertex is
+       * always 1 or 2), this is equivalent to requiring that the last 5-n
+       * bits of vertex_count are 0:
+       *
+       *     vertex_count & (2^(5-n) - 1) == 0
+       *
+       * 2^(5-n) == 2^5 / 2^n == 32 / bits_per_vertex, so this is
+       * equivalent to:
+       *
+       *     vertex_count & (32 / bits_per_vertex - 1) == 0
         */
-      if (c->control_data_header_size_bits > 32) {
-         this->current_annotation = "emit vertex: emit control data bits";
-         /* Only emit control data bits if we've finished accumulating a batch
-          * of 32 bits.  This is the case when:
-          *
-          *     (vertex_count * bits_per_vertex) % 32 == 0
-          *
-          * (in other words, when the last 5 bits of vertex_count *
-          * bits_per_vertex are 0).  Assuming bits_per_vertex == 2^n for some
-          * integer n (which is always the case, since bits_per_vertex is
-          * always 1 or 2), this is equivalent to requiring that the last 5-n
-          * bits of vertex_count are 0:
-          *
-          *     vertex_count & (2^(5-n) - 1) == 0
-          *
-          * 2^(5-n) == 2^5 / 2^n == 32 / bits_per_vertex, so this is
-          * equivalent to:
-          *
-          *     vertex_count & (32 / bits_per_vertex - 1) == 0
+      vec4_instruction *inst =
+         emit(AND(dst_null_d(), this->vertex_count,
+                  (uint32_t) (32 / c->control_data_bits_per_vertex - 1)));
+      inst->conditional_mod = BRW_CONDITIONAL_Z;
+
+      emit(IF(BRW_PREDICATE_NORMAL));
+      {
+         /* If vertex_count is 0, then no control data bits have been
+          * accumulated yet, so we skip emitting them.
            */
-         vec4_instruction *inst =
-            emit(AND(dst_null_d(), this->vertex_count,
-                     (uint32_t) (32 / c->control_data_bits_per_vertex - 1)));
-         inst->conditional_mod = BRW_CONDITIONAL_Z;
-
+         emit(CMP(dst_null_d(), this->vertex_count, 0u,
+                  BRW_CONDITIONAL_NEQ));
           emit(IF(BRW_PREDICATE_NORMAL));
-         {
-            /* If vertex_count is 0, then no control data bits have been
-             * accumulated yet, so we skip emitting them.
-             */
-            emit(CMP(dst_null_d(), this->vertex_count, 0u,
-                     BRW_CONDITIONAL_NEQ));
-            emit(IF(BRW_PREDICATE_NORMAL));
-            emit_control_data_bits();
-            emit(BRW_OPCODE_ENDIF);
-
-            /* Reset control_data_bits to 0 so we can start accumulating a new
-             * batch.
-             *
-             * Note: in the case where vertex_count == 0, this neutralizes the
-             * effect of any call to EndPrimitive() that the shader may have
-             * made before outputting its first vertex.
-             */
-            inst = emit(MOV(dst_reg(this->control_data_bits), 0u));
-            inst->force_writemask_all = true;
-         }
+         emit_control_data_bits();
           emit(BRW_OPCODE_ENDIF);
-      }
-
-      this->current_annotation = "emit vertex: vertex data";
-      emit_vertex();
  
-      /* In stream mode we have to set control data bits for all vertices
-       * unless we have disabled control data bits completely (which we do
-       * do for GL_POINTS outputs that don't use streams).
-       */
-      if (c->control_data_header_size_bits > 0 &&
-          c->prog_data.control_data_format ==
-             GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) {
-          this->current_annotation = "emit vertex: Stream control data bits";
-          set_stream_control_data_bits(stream_id);
+         /* Reset control_data_bits to 0 so we can start accumulating a new
+          * batch.
+          *
+          * Note: in the case where vertex_count == 0, this neutralizes the
+          * effect of any call to EndPrimitive() that the shader may have
+          * made before outputting its first vertex.
+          */
+         inst = emit(MOV(dst_reg(this->control_data_bits), 0u));
+         inst->force_writemask_all = true;
        }
+      emit(BRW_OPCODE_ENDIF);
+   }
  
-      this->current_annotation = "emit vertex: increment vertex count";
-      emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
-               src_reg(1u)));
+   this->current_annotation = "emit vertex: vertex data";
+   emit_vertex();
+
+   /* In stream mode we have to set control data bits for all vertices
+    * unless we have disabled control data bits completely (which we do
+    * do for GL_POINTS outputs that don't use streams).
+    */
+   if (c->control_data_header_size_bits > 0 &&
+       c->prog_data.control_data_format ==
+          GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) {
+       this->current_annotation = "emit vertex: Stream control data bits";
+       set_stream_control_data_bits(stream_id);
     }
-   emit(BRW_OPCODE_ENDIF);
  
     this->current_annotation = NULL;
  }
@@ -575,7 +578,22 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
  void
  vec4_gs_visitor::visit(ir_emit_vertex *ir)
  {
+   /* To ensure that we don't output more vertices than the shader specified
+    * using max_vertices, do the logic inside a conditional of the form "if
+    * (vertex_count < MAX)"
+    */
+   unsigned num_output_vertices = c->gp->program.VerticesOut;
+   emit(CMP(dst_null_d(), this->vertex_count,
+            src_reg(num_output_vertices), BRW_CONDITIONAL_L));
+   emit(IF(BRW_PREDICATE_NORMAL));
+
     gs_emit_vertex(ir->stream_id());
+
+   this->current_annotation = "emit vertex: increment vertex count";
+   emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
+            src_reg(1u)));
+
+   emit(BRW_OPCODE_ENDIF);
  }
  
  void
@@ -679,7 +697,7 @@ brw_gs_emit(struct brw_context *brw,
  
           vec4_gs_visitor v(brw->intelScreen->compiler, brw,
                             c, prog, mem_ctx, true /* no_spills */, st_index);
-         if (v.run(NULL /* clip planes */)) {
+         if (v.run()) {
              return generate_assembly(brw, prog, &c->gp->program.Base,
                                       &c->prog_data.base, mem_ctx, v.cfg,
                                       final_assembly_size);
@@ -727,7 +745,7 @@ brw_gs_emit(struct brw_context *brw,
                                 c, prog, mem_ctx, false /* no_spills */,
                                 st_index);
  
-   if (!gs->run(NULL /* clip planes */)) {
+   if (!gs->run()) {
        prog->LinkStatus = false;
        ralloc_strcat(&prog->InfoLog, gs->fail_msg);
     } else {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp

index d5a24d8..9d56f9a 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -23,8 +23,13 @@
  
  #include "brw_nir.h"
  #include "brw_vec4.h"
+#include "brw_vec4_builder.h"
+#include "brw_vec4_surface_builder.h"
  #include "glsl/ir_uniform.h"
  
+using namespace brw;
+using namespace brw::surface_access;
+
  namespace brw {
  
  void
@@ -58,25 +63,24 @@ vec4_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
        unreachable("should be lowered by lower_vertex_id().");
  
     case nir_intrinsic_load_vertex_id_zero_base:
-      reg = &this->nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
+      reg = &nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
        if (reg->file == BAD_FILE)
-         *reg =
-            *this->make_reg_for_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
-                                             glsl_type::int_type);
+         *reg = *make_reg_for_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
+                                           glsl_type::int_type);
        break;
  
     case nir_intrinsic_load_base_vertex:
-      reg = &this->nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
+      reg = &nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
        if (reg->file == BAD_FILE)
-         *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_BASE_VERTEX,
-                                                 glsl_type::int_type);
+         *reg = *make_reg_for_system_value(SYSTEM_VALUE_BASE_VERTEX,
+                                           glsl_type::int_type);
        break;
  
     case nir_intrinsic_load_instance_id:
-      reg = &this->nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
+      reg = &nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
        if (reg->file == BAD_FILE)
-         *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_INSTANCE_ID,
-                                                 glsl_type::int_type);
+         *reg = *make_reg_for_system_value(SYSTEM_VALUE_INSTANCE_ID,
+                                           glsl_type::int_type);
        break;
  
     default:
@@ -142,7 +146,7 @@ vec4_visitor::nir_setup_uniforms(nir_shader *shader)
           }
  
           assert(uniforms < uniform_array_size);
-         this->uniform_size[uniforms] = type_size_vec4(var->type);
+         uniform_size[uniforms] = type_size_vec4(var->type);
  
           if (strncmp(var->name, "gl_", 3) == 0)
              nir_setup_builtin_uniform(var);
@@ -158,7 +162,7 @@ vec4_visitor::nir_setup_uniforms(nir_shader *shader)
               strcmp(var->name, "parameters") == 0);
  
        assert(uniforms < uniform_array_size);
-      this->uniform_size[uniforms] = type_size_vec4(var->type);
+      uniform_size[uniforms] = type_size_vec4(var->type);
  
        struct gl_program_parameter_list *plist = prog->Parameters;
        for (unsigned p = 0; p < plist->NumParameters; p++) {
@@ -243,10 +247,10 @@ vec4_visitor::nir_setup_builtin_uniform(nir_variable *var)
         * ParameterValues directly, since unlike brw_fs.cpp, we never
         * add new state references during compile.
         */
-      int index = _mesa_add_state_reference(this->prog->Parameters,
+      int index = _mesa_add_state_reference(prog->Parameters,
                                             (gl_state_index *)slots[i].tokens);
        gl_constant_value *values =
-         &this->prog->Parameters->ParameterValues[index][0];
+         &prog->Parameters->ParameterValues[index][0];
  
        assert(uniforms < uniform_array_size);
  
@@ -254,7 +258,7 @@ vec4_visitor::nir_setup_builtin_uniform(nir_variable *var)
           stage_prog_data->param[uniforms * 4 + j] =
              &values[GET_SWZ(slots[i].swizzle, j)];
  
-      this->uniform_vector_size[uniforms] =
+      uniform_vector_size[uniforms] =
           (var->type->is_scalar() || var->type->is_vector() ||
            var->type->is_matrix() ? var->type->vector_elements : 4);
  
@@ -344,7 +348,7 @@ vec4_visitor::nir_emit_block(nir_block *block)
  void
  vec4_visitor::nir_emit_instr(nir_instr *instr)
  {
-   this->base_ir = instr;
+   base_ir = instr;
  
     switch (instr->type) {
     case nir_instr_type_load_const:
@@ -367,6 +371,10 @@ vec4_visitor::nir_emit_instr(nir_instr *instr)
        nir_emit_texture(nir_instr_as_tex(instr));
        break;
  
+   case nir_instr_type_ssa_undef:
+      nir_emit_undef(nir_instr_as_ssa_undef(instr));
+      break;
+
     default:
        fprintf(stderr, "VS instruction not yet implemented by NIR->vec4\n");
        break;
@@ -393,9 +401,14 @@ dst_reg_for_nir_reg(vec4_visitor *v, nir_register *nir_reg,
  dst_reg
  vec4_visitor::get_nir_dest(nir_dest dest)
  {
-   assert(!dest.is_ssa);
-   return dst_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset,
-                              dest.reg.indirect);
+   if (dest.is_ssa) {
+      dst_reg dst = dst_reg(GRF, alloc.allocate(1));
+      nir_ssa_values[dest.ssa.index] = dst;
+      return dst;
+   } else {
+      return dst_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset,
+                                 dest.reg.indirect);
+   }
  }
  
  dst_reg
@@ -450,7 +463,7 @@ void
  vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr)
  {
     dst_reg reg = dst_reg(GRF, alloc.allocate(1));
-   reg.type =  BRW_REGISTER_TYPE_F;
+   reg.type =  BRW_REGISTER_TYPE_D;
  
     unsigned remaining = brw_writemask_for_size(instr->def.num_components);
  
@@ -471,7 +484,7 @@ vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr)
        }
  
        reg.writemask = writemask;
-      emit(MOV(reg, src_reg(instr->value.f[i])));
+      emit(MOV(reg, src_reg(instr->value.i[i])));
  
        remaining &= ~writemask;
     }
@@ -530,33 +543,271 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
        break;
     }
  
-   case nir_intrinsic_load_vertex_id:
-      unreachable("should be lowered by lower_vertex_id()");
+   case nir_intrinsic_get_buffer_size: {
+      nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
+      unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
+
+      assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage);
+
+      src_reg surf_index = src_reg(prog_data->base.binding_table.ubo_start +
+                                   ubo_index);
+      dst_reg result_dst = get_nir_dest(instr->dest);
+      vec4_instruction *inst = new(mem_ctx)
+         vec4_instruction(VS_OPCODE_GET_BUFFER_SIZE, result_dst);
+
+      inst->base_mrf = 2;
+      inst->mlen = 1; /* always at least one */
+      inst->src[1] = src_reg(surf_index);
+
+      /* MRF for the first parameter */
+      src_reg lod = src_reg(0);
+      int param_base = inst->base_mrf;
+      int writemask = WRITEMASK_X;
+      emit(MOV(dst_reg(MRF, param_base, glsl_type::int_type, writemask), lod));
+
+      emit(inst);
+      break;
+   }
+
+   case nir_intrinsic_store_ssbo_indirect:
+      has_indirect = true;
+      /* fallthrough */
+   case nir_intrinsic_store_ssbo: {
+      assert(devinfo->gen >= 7);
+
+      /* Block index */
+      src_reg surf_index;
+      nir_const_value *const_uniform_block =
+         nir_src_as_const_value(instr->src[1]);
+      if (const_uniform_block) {
+         unsigned index = prog_data->base.binding_table.ubo_start +
+                          const_uniform_block->u[0];
+         surf_index = src_reg(index);
+         brw_mark_surface_used(&prog_data->base, index);
+      } else {
+         surf_index = src_reg(this, glsl_type::uint_type);
+         emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[1], 1),
+                  src_reg(prog_data->base.binding_table.ubo_start)));
+         surf_index = emit_uniformize(surf_index);
+
+         brw_mark_surface_used(&prog_data->base,
+                               prog_data->base.binding_table.ubo_start +
+                               shader_prog->NumBufferInterfaceBlocks - 1);
+      }
+
+      /* Offset */
+      src_reg offset_reg = src_reg(this, glsl_type::uint_type);
+      unsigned const_offset_bytes = 0;
+      if (has_indirect) {
+         emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[2], 1)));
+      } else {
+         const_offset_bytes = instr->const_index[0];
+         emit(MOV(dst_reg(offset_reg), src_reg(const_offset_bytes)));
+      }
+
+      /* Value */
+      src_reg val_reg = get_nir_src(instr->src[0], 4);
+
+      /* Writemask */
+      unsigned write_mask = instr->const_index[1];
+
+      /* IvyBridge does not have a native SIMD4x2 untyped write message so untyped
+       * writes will use SIMD8 mode. In order to hide this and keep symmetry across
+       * typed and untyped messages and across hardware platforms, the
+       * current implementation of the untyped messages will transparently convert
+       * the SIMD4x2 payload into an equivalent SIMD8 payload by transposing it
+       * and enabling only channel X on the SEND instruction.
+       *
+       * The above, works well for full vector writes, but not for partial writes
+       * where we want to write some channels and not others, like when we have
+       * code such as v.xyw = vec3(1,2,4). Because the untyped write messages are
+       * quite restrictive with regards to the channel enables we can configure in
+       * the message descriptor (not all combinations are allowed) we cannot simply
+       * implement these scenarios with a single message while keeping the
+       * aforementioned symmetry in the implementation. For now we de decided that
+       * it is better to keep the symmetry to reduce complexity, so in situations
+       * such as the one described we end up emitting two untyped write messages
+       * (one for xy and another for w).
+       *
+       * The code below packs consecutive channels into a single write message,
+       * detects gaps in the vector write and if needed, sends a second message
+       * with the remaining channels. If in the future we decide that we want to
+       * emit a single message at the expense of losing the symmetry in the
+       * implementation we can:
+       *
+       * 1) For IvyBridge: Only use the red channel of the untyped write SIMD8
+       *    message payload. In this mode we can write up to 8 offsets and dwords
+       *    to the red channel only (for the two vec4s in the SIMD4x2 execution)
+       *    and select which of the 8 channels carry data to write by setting the
+       *    appropriate writemask in the dst register of the SEND instruction.
+       *    It would require to write a new generator opcode specifically for
+       *    IvyBridge since we would need to prepare a SIMD8 payload that could
+       *    use any channel, not just X.
+       *
+       * 2) For Haswell+: Simply send a single write message but set the writemask
+       *    on the dst of the SEND instruction to select the channels we want to
+       *    write. It would require to modify the current messages to receive
+       *    and honor the writemask provided.
+       */
+      const vec4_builder bld = vec4_builder(this).at_end()
+                               .annotate(current_annotation, base_ir);
+
+      int swizzle[4] = { 0, 0, 0, 0};
+      int num_channels = 0;
+      unsigned skipped_channels = 0;
+      int num_components = instr->num_components;
+      for (int i = 0; i < num_components; i++) {
+         /* Check if this channel needs to be written. If so, record the
+          * channel we need to take the data from in the swizzle array
+          */
+         int component_mask = 1 << i;
+         int write_test = write_mask & component_mask;
+         if (write_test)
+            swizzle[num_channels++] = i;
+
+         /* If we don't have to write this channel it means we have a gap in the
+          * vector, so write the channels we accumulated until now, if any. Do
+          * the same if this was the last component in the vector.
+          */
+         if (!write_test || i == num_components - 1) {
+            if (num_channels > 0) {
+               /* We have channels to write, so update the offset we need to
+                * write at to skip the channels we skipped, if any.
+                */
+               if (skipped_channels > 0) {
+                  if (!has_indirect) {
+                     const_offset_bytes += 4 * skipped_channels;
+                     offset_reg = src_reg(const_offset_bytes);
+                  } else {
+                     emit(ADD(dst_reg(offset_reg), offset_reg,
+                              brw_imm_ud(4 * skipped_channels)));
+                  }
+               }
+
+               /* Swizzle the data register so we take the data from the channels
+                * we need to write and send the write message. This will write
+                * num_channels consecutive dwords starting at offset.
+                */
+               val_reg.swizzle =
+                  BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
+               emit_untyped_write(bld, surf_index, offset_reg, val_reg,
+                                  1 /* dims */, num_channels /* size */,
+                                  BRW_PREDICATE_NONE);
+
+               /* If we have to do a second write we will have to update the
+                * offset so that we jump over the channels we have just written
+                * now.
+                */
+               skipped_channels = num_channels;
+
+               /* Restart the count for the next write message */
+               num_channels = 0;
+            }
+
+            /* We did not write the current channel, so increase skipped count */
+            skipped_channels++;
+         }
+      }
  
-   case nir_intrinsic_load_vertex_id_zero_base: {
-      src_reg vertex_id =
-         src_reg(nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE]);
-      assert(vertex_id.file != BAD_FILE);
-      dest = get_nir_dest(instr->dest, vertex_id.type);
-      emit(MOV(dest, vertex_id));
        break;
     }
  
-   case nir_intrinsic_load_base_vertex: {
-      src_reg base_vertex =
-         src_reg(nir_system_values[SYSTEM_VALUE_BASE_VERTEX]);
-      assert(base_vertex.file != BAD_FILE);
-      dest = get_nir_dest(instr->dest, base_vertex.type);
-      emit(MOV(dest, base_vertex));
+   case nir_intrinsic_load_ssbo_indirect:
+      has_indirect = true;
+      /* fallthrough */
+   case nir_intrinsic_load_ssbo: {
+      assert(devinfo->gen >= 7);
+
+      nir_const_value *const_uniform_block =
+         nir_src_as_const_value(instr->src[0]);
+
+      src_reg surf_index;
+      if (const_uniform_block) {
+         unsigned index = prog_data->base.binding_table.ubo_start +
+                          const_uniform_block->u[0];
+         surf_index = src_reg(index);
+
+         brw_mark_surface_used(&prog_data->base, index);
+      } else {
+         surf_index = src_reg(this, glsl_type::uint_type);
+         emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[0], 1),
+                  src_reg(prog_data->base.binding_table.ubo_start)));
+         surf_index = emit_uniformize(surf_index);
+
+         /* Assume this may touch any UBO. It would be nice to provide
+          * a tighter bound, but the array information is already lowered away.
+          */
+         brw_mark_surface_used(&prog_data->base,
+                               prog_data->base.binding_table.ubo_start +
+                               shader_prog->NumBufferInterfaceBlocks - 1);
+      }
+
+      src_reg offset_reg = src_reg(this, glsl_type::uint_type);
+      unsigned const_offset_bytes = 0;
+      if (has_indirect) {
+         emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[1], 1)));
+      } else {
+         const_offset_bytes = instr->const_index[0];
+         emit(MOV(dst_reg(offset_reg), src_reg(const_offset_bytes)));
+      }
+
+      /* Read the vector */
+      const vec4_builder bld = vec4_builder(this).at_end()
+         .annotate(current_annotation, base_ir);
+
+      src_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
+                                              1 /* dims */, 4 /* size*/,
+                                              BRW_PREDICATE_NONE);
+      dst_reg dest = get_nir_dest(instr->dest);
+      read_result.type = dest.type;
+      read_result.swizzle = brw_swizzle_for_size(instr->num_components);
+      emit(MOV(dest, read_result));
+
        break;
     }
  
+   case nir_intrinsic_ssbo_atomic_add:
+      nir_emit_ssbo_atomic(BRW_AOP_ADD, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_min:
+      if (dest.type == BRW_REGISTER_TYPE_D)
+         nir_emit_ssbo_atomic(BRW_AOP_IMIN, instr);
+      else
+         nir_emit_ssbo_atomic(BRW_AOP_UMIN, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_max:
+      if (dest.type == BRW_REGISTER_TYPE_D)
+         nir_emit_ssbo_atomic(BRW_AOP_IMAX, instr);
+      else
+         nir_emit_ssbo_atomic(BRW_AOP_UMAX, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_and:
+      nir_emit_ssbo_atomic(BRW_AOP_AND, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_or:
+      nir_emit_ssbo_atomic(BRW_AOP_OR, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_xor:
+      nir_emit_ssbo_atomic(BRW_AOP_XOR, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_exchange:
+      nir_emit_ssbo_atomic(BRW_AOP_MOV, instr);
+      break;
+   case nir_intrinsic_ssbo_atomic_comp_swap:
+      nir_emit_ssbo_atomic(BRW_AOP_CMPWR, instr);
+      break;
+
+   case nir_intrinsic_load_vertex_id:
+      unreachable("should be lowered by lower_vertex_id()");
+
+   case nir_intrinsic_load_vertex_id_zero_base:
+   case nir_intrinsic_load_base_vertex:
     case nir_intrinsic_load_instance_id: {
-      src_reg instance_id =
-         src_reg(nir_system_values[SYSTEM_VALUE_INSTANCE_ID]);
-      assert(instance_id.file != BAD_FILE);
-      dest = get_nir_dest(instr->dest, instance_id.type);
-      emit(MOV(dest, instance_id));
+      gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
+      src_reg val = src_reg(nir_system_values[sv]);
+      assert(val.file != BAD_FILE);
+      dest = get_nir_dest(instr->dest, val.type);
+      emit(MOV(dest, val));
        break;
     }
  
@@ -640,7 +891,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
            */
           brw_mark_surface_used(&prog_data->base,
                                 prog_data->base.binding_table.ubo_start +
-                               shader_prog->NumUniformBlocks - 1);
+                               shader_prog->NumBufferInterfaceBlocks - 1);
        }
  
        unsigned const_offset = instr->const_index[1];
@@ -677,6 +928,53 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
     }
  }
  
+void
+vec4_visitor::nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr)
+{
+   dst_reg dest;
+   if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+      dest = get_nir_dest(instr->dest);
+
+   src_reg surface;
+   nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
+   if (const_surface) {
+      unsigned surf_index = prog_data->base.binding_table.ubo_start +
+                            const_surface->u[0];
+      surface = src_reg(surf_index);
+      brw_mark_surface_used(&prog_data->base, surf_index);
+   } else {
+      surface = src_reg(this, glsl_type::uint_type);
+      emit(ADD(dst_reg(surface), get_nir_src(instr->src[0]),
+               src_reg(prog_data->base.binding_table.ubo_start)));
+
+      /* Assume this may touch any UBO. This is the same we do for other
+       * UBO/SSBO accesses with non-constant surface.
+       */
+      brw_mark_surface_used(&prog_data->base,
+                            prog_data->base.binding_table.ubo_start +
+                            shader_prog->NumBufferInterfaceBlocks - 1);
+   }
+
+   src_reg offset = get_nir_src(instr->src[1], 1);
+   src_reg data1 = get_nir_src(instr->src[2], 1);
+   src_reg data2;
+   if (op == BRW_AOP_CMPWR)
+      data2 = get_nir_src(instr->src[3], 1);
+
+   /* Emit the actual atomic operation operation */
+   const vec4_builder bld =
+      vec4_builder(this).at_end().annotate(current_annotation, base_ir);
+
+   src_reg atomic_result =
+      surface_access::emit_untyped_atomic(bld, surface, offset,
+                                          data1, data2,
+                                          1 /* dims */, 1 /* rsize */,
+                                          op,
+                                          BRW_PREDICATE_NONE);
+   dest.type = atomic_result.type;
+   bld.MOV(dest, atomic_result);
+}
+
  static unsigned
  brw_swizzle_for_nir_swizzle(uint8_t swizzle[4])
  {
@@ -1263,21 +1561,26 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
        inst->predicate = BRW_PREDICATE_NORMAL;
        break;
  
-   case nir_op_fdot2:
+   case nir_op_fdot_replicated2:
        inst = emit(BRW_OPCODE_DP2, dst, op[0], op[1]);
        inst->saturate = instr->dest.saturate;
        break;
  
-   case nir_op_fdot3:
+   case nir_op_fdot_replicated3:
        inst = emit(BRW_OPCODE_DP3, dst, op[0], op[1]);
        inst->saturate = instr->dest.saturate;
        break;
  
-   case nir_op_fdot4:
+   case nir_op_fdot_replicated4:
        inst = emit(BRW_OPCODE_DP4, dst, op[0], op[1]);
        inst->saturate = instr->dest.saturate;
        break;
  
+   case nir_op_fdph_replicated:
+      inst = emit(BRW_OPCODE_DPH, dst, op[0], op[1]);
+      inst->saturate = instr->dest.saturate;
+      break;
+
     case nir_op_bany2:
     case nir_op_bany3:
     case nir_op_bany4: {
@@ -1355,6 +1658,7 @@ ir_texture_opcode_for_nir_texop(nir_texop texop)
     switch (texop) {
     case nir_texop_lod: op = ir_lod; break;
     case nir_texop_query_levels: op = ir_query_levels; break;
+   case nir_texop_texture_samples: op = ir_texture_samples; break;
     case nir_texop_tex: op = ir_tex; break;
     case nir_texop_tg4: op = ir_tg4; break;
     case nir_texop_txb: op = ir_txb; break;
@@ -1411,7 +1715,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
      * emitting anything other than setting up the constant result.
      */
     if (instr->op == nir_texop_tg4) {
-      int swiz = GET_SWZ(key->tex.swizzles[sampler], instr->component);
+      int swiz = GET_SWZ(key_tex->swizzles[sampler], instr->component);
        if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
           emit(MOV(dest, src_reg(swiz == SWIZZLE_ONE ? 1.0f : 0.0f)));
           return;
@@ -1473,7 +1777,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
           sample_index = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 1);
           assert(coord_type != NULL);
           if (devinfo->gen >= 7 &&
-             key->tex.compressed_multisample_layout_mask & (1<<sampler)) {
+             key_tex->compressed_multisample_layout_mask & (1 << sampler)) {
              mcs = emit_mcs_fetch(coord_type, coordinate, sampler_reg);
           } else {
              mcs = src_reg(0u);
@@ -1546,4 +1850,10 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
                  mcs, is_cube_array, sampler, sampler_reg);
  }
  
+void
+vec4_visitor::nir_emit_undef(nir_ssa_undef_instr *instr)
+{
+   nir_ssa_values[instr->def.index] = dst_reg(GRF, alloc.allocate(1));
+}
+
  }
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp

index 62ed708..a49eca5 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -267,6 +267,97 @@ vec4_visitor::reg_allocate()
     return true;
  }
  
+/**
+ * When we decide to spill a register, instead of blindly spilling every use,
+ * save unspills when the spill register is used (read) in consecutive
+ * instructions. This can potentially save a bunch of unspills that would
+ * have very little impact in register allocation anyway.
+ *
+ * Notice that we need to account for this behavior when spilling a register
+ * and when evaluating spilling costs. This function is designed so it can
+ * be called from both places and avoid repeating the logic.
+ *
+ *  - When we call this function from spill_reg(), we pass in scratch_reg the
+ *    actual unspill/spill register that we want to reuse in the current
+ *    instruction.
+ *
+ *  - When we call this from evaluate_spill_costs(), we pass the register for
+ *    which we are evaluating spilling costs.
+ *
+ * In either case, we check if the previous instructions read scratch_reg until
+ * we find one that writes to it with a compatible mask or does not read/write
+ * scratch_reg at all.
+ */
+static bool
+can_use_scratch_for_source(const vec4_instruction *inst, unsigned i,
+                           unsigned scratch_reg)
+{
+   assert(inst->src[i].file == GRF);
+   bool prev_inst_read_scratch_reg = false;
+
+   /* See if any previous source in the same instructions reads scratch_reg */
+   for (unsigned n = 0; n < i; n++) {
+      if (inst->src[n].file == GRF && inst->src[n].reg == scratch_reg)
+         prev_inst_read_scratch_reg = true;
+   }
+
+   /* Now check if previous instructions read/write scratch_reg */
+   for (vec4_instruction *prev_inst = (vec4_instruction *) inst->prev;
+        !prev_inst->is_head_sentinel();
+        prev_inst = (vec4_instruction *) prev_inst->prev) {
+
+      /* If the previous instruction writes to scratch_reg then we can reuse
+       * it if the write is not conditional and the channels we write are
+       * compatible with our read mask
+       */
+      if (prev_inst->dst.file == GRF && prev_inst->dst.reg == scratch_reg) {
+         return (!prev_inst->predicate || prev_inst->opcode == BRW_OPCODE_SEL) &&
+                (brw_mask_for_swizzle(inst->src[i].swizzle) &
+                 ~prev_inst->dst.writemask) == 0;
+      }
+
+      /* Skip scratch read/writes so that instructions generated by spilling
+       * other registers (that won't read/write scratch_reg) do not stop us from
+       * reusing scratch_reg for this instruction.
+       */
+      if (prev_inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_WRITE ||
+          prev_inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_READ)
+         continue;
+
+      /* If the previous instruction does not write to scratch_reg, then check
+       * if it reads it
+       */
+      int n;
+      for (n = 0; n < 3; n++) {
+         if (prev_inst->src[n].file == GRF &&
+             prev_inst->src[n].reg == scratch_reg) {
+            prev_inst_read_scratch_reg = true;
+            break;
+         }
+      }
+      if (n == 3) {
+         /* The previous instruction does not read scratch_reg. At this point,
+          * if no previous instruction has read scratch_reg it means that we
+          * will need to unspill it here and we can't reuse it (so we return
+          * false). Otherwise, if we found at least one consecutive instruction
+          * that read scratch_reg, then we know that we got here from
+          * evaluate_spill_costs (since for the spill_reg path any block of
+          * consecutive instructions using scratch_reg must start with a write
+          * to that register, so we would've exited the loop in the check for
+          * the write that we have at the start of this loop), and in that case
+          * it means that we found the point at which the scratch_reg would be
+          * unspilled. Since we always unspill a full vec4, it means that we
+          * have all the channels available and we can just return true to
+          * signal that we can reuse the register in the current instruction
+          * too.
+          */
+         return prev_inst_read_scratch_reg;
+      }
+   }
+
+   return prev_inst_read_scratch_reg;
+}
+
  void
  vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
  {
@@ -284,9 +375,15 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
     foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
        for (unsigned int i = 0; i < 3; i++) {
           if (inst->src[i].file == GRF) {
-            spill_costs[inst->src[i].reg] += loop_scale;
-            if (inst->src[i].reladdr)
-               no_spill[inst->src[i].reg] = true;
+            /* We will only unspill src[i] it it wasn't unspilled for the
+             * previous instruction, in which case we'll just reuse the scratch
+             * reg for this instruction.
+             */
+            if (!can_use_scratch_for_source(inst, i, inst->src[i].reg)) {
+               spill_costs[inst->src[i].reg] += loop_scale;
+               if (inst->src[i].reladdr)
+                  no_spill[inst->src[i].reg] = true;
+            }
           }
        }
  
@@ -345,19 +442,32 @@ vec4_visitor::spill_reg(int spill_reg_nr)
     unsigned int spill_offset = last_scratch++;
  
     /* Generate spill/unspill instructions for the objects being spilled. */
+   int scratch_reg = -1;
     foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
        for (unsigned int i = 0; i < 3; i++) {
           if (inst->src[i].file == GRF && inst->src[i].reg == spill_reg_nr) {
-            src_reg spill_reg = inst->src[i];
-            inst->src[i].reg = alloc.allocate(1);
-            dst_reg temp = dst_reg(inst->src[i]);
-
-            emit_scratch_read(block, inst, temp, spill_reg, spill_offset);
+            if (scratch_reg == -1 ||
+                !can_use_scratch_for_source(inst, i, scratch_reg)) {
+               /* We need to unspill anyway so make sure we read the full vec4
+                * in any case. This way, the cached register can be reused
+                * for consecutive instructions that read different channels of
+                * the same vec4.
+                */
+               scratch_reg = alloc.allocate(1);
+               src_reg temp = inst->src[i];
+               temp.reg = scratch_reg;
+               temp.swizzle = BRW_SWIZZLE_XYZW;
+               emit_scratch_read(block, inst,
+                                 dst_reg(temp), inst->src[i], spill_offset);
+            }
+            assert(scratch_reg != -1);
+            inst->src[i].reg = scratch_reg;
           }
        }
  
        if (inst->dst.file == GRF && inst->dst.reg == spill_reg_nr) {
           emit_scratch_write(block, inst, spill_offset);
+         scratch_reg = inst->dst.reg;
        }
     }
  
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp

new file mode 100644 (file)

index 0000000..a7c286d
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
@@ -0,0 +1,332 @@
+/*
+ * Copyright © 2013-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_vec4_surface_builder.h"
+
+using namespace brw;
+
+namespace {
+   namespace array_utils {
+      /**
+       * Copy one every \p src_stride logical components of the argument into
+       * one every \p dst_stride logical components of the result.
+       */
+      src_reg
+      emit_stride(const vec4_builder &bld, const src_reg &src, unsigned size,
+                  unsigned dst_stride, unsigned src_stride)
+      {
+         if (src_stride == 1 && dst_stride == 1) {
+            return src;
+         } else {
+            const dst_reg dst = bld.vgrf(src.type,
+                                         DIV_ROUND_UP(size * dst_stride, 4));
+
+            for (unsigned i = 0; i < size; ++i)
+               bld.MOV(writemask(offset(dst, i * dst_stride / 4),
+                                 1 << (i * dst_stride % 4)),
+                       swizzle(offset(src, i * src_stride / 4),
+                               brw_swizzle_for_mask(1 << (i * src_stride % 4))));
+
+            return src_reg(dst);
+         }
+      }
+
+      /**
+       * Convert a VEC4 into an array of registers with the layout expected by
+       * the recipient shared unit.  If \p has_simd4x2 is true the argument is
+       * left unmodified in SIMD4x2 form, otherwise it will be rearranged into
+       * a SIMD8 vector.
+       */
+      src_reg
+      emit_insert(const vec4_builder &bld, const src_reg &src,
+                  unsigned n, bool has_simd4x2)
+      {
+         if (src.file == BAD_FILE || n == 0) {
+            return src_reg();
+
+         } else {
+            /* Pad unused components with zeroes. */
+            const unsigned mask = (1 << n) - 1;
+            const dst_reg tmp = bld.vgrf(src.type);
+
+            bld.MOV(writemask(tmp, mask), src);
+            if (n < 4)
+               bld.MOV(writemask(tmp, ~mask), 0);
+
+            return emit_stride(bld, src_reg(tmp), n, has_simd4x2 ? 1 : 4, 1);
+         }
+      }
+
+      /**
+       * Convert an array of registers back into a VEC4 according to the
+       * layout expected from some shared unit.  If \p has_simd4x2 is true the
+       * argument is left unmodified in SIMD4x2 form, otherwise it will be
+       * rearranged from SIMD8 form.
+       */
+      src_reg
+      emit_extract(const vec4_builder &bld, const src_reg src,
+                   unsigned n, bool has_simd4x2)
+      {
+         if (src.file == BAD_FILE || n == 0) {
+            return src_reg();
+
+         } else {
+            return emit_stride(bld, src, n, 1, has_simd4x2 ? 1 : 4);
+         }
+      }
+   }
+}
+
+namespace brw {
+   namespace surface_access {
+      namespace {
+         using namespace array_utils;
+
+         /**
+          * Generate a send opcode for a surface message and return the
+          * result.
+          */
+         src_reg
+         emit_send(const vec4_builder &bld, enum opcode op,
+                   const src_reg &header,
+                   const src_reg &addr, unsigned addr_sz,
+                   const src_reg &src, unsigned src_sz,
+                   const src_reg &surface,
+                   unsigned arg, unsigned ret_sz,
+                   brw_predicate pred = BRW_PREDICATE_NONE)
+         {
+            /* Calculate the total number of components of the payload. */
+            const unsigned header_sz = (header.file == BAD_FILE ? 0 : 1);
+            const unsigned sz = header_sz + addr_sz + src_sz;
+
+            /* Construct the payload. */
+            const dst_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
+            unsigned n = 0;
+
+            if (header_sz)
+               bld.exec_all().MOV(offset(payload, n++),
+                                  retype(header, BRW_REGISTER_TYPE_UD));
+
+            for (unsigned i = 0; i < addr_sz; i++)
+               bld.MOV(offset(payload, n++),
+                       offset(retype(addr, BRW_REGISTER_TYPE_UD), i));
+
+            for (unsigned i = 0; i < src_sz; i++)
+               bld.MOV(offset(payload, n++),
+                       offset(retype(src, BRW_REGISTER_TYPE_UD), i));
+
+            /* Reduce the dynamically uniform surface index to a single
+             * scalar.
+             */
+            const src_reg usurface = bld.emit_uniformize(surface);
+
+            /* Emit the message send instruction. */
+            const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, ret_sz);
+            vec4_instruction *inst =
+               bld.emit(op, dst, src_reg(payload), usurface, arg);
+            inst->mlen = sz;
+            inst->regs_written = ret_sz;
+            inst->header_size = header_sz;
+            inst->predicate = pred;
+
+            return src_reg(dst);
+         }
+      }
+
+      /**
+       * Emit an untyped surface read opcode.  \p dims determines the number
+       * of components of the address and \p size the number of components of
+       * the returned value.
+       */
+      src_reg
+      emit_untyped_read(const vec4_builder &bld,
+                        const src_reg &surface, const src_reg &addr,
+                        unsigned dims, unsigned size,
+                        brw_predicate pred)
+      {
+         return emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_READ, src_reg(),
+                          emit_insert(bld, addr, dims, true), 1,
+                          src_reg(), 0,
+                          surface, size, 1, pred);
+      }
+
+      /**
+       * Emit an untyped surface write opcode.  \p dims determines the number
+       * of components of the address and \p size the number of components of
+       * the argument.
+       */
+      void
+      emit_untyped_write(const vec4_builder &bld, const src_reg &surface,
+                         const src_reg &addr, const src_reg &src,
+                         unsigned dims, unsigned size,
+                         brw_predicate pred)
+      {
+         const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
+                                   bld.shader->devinfo->is_haswell);
+         emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_WRITE, src_reg(),
+                   emit_insert(bld, addr, dims, has_simd4x2),
+                   has_simd4x2 ? 1 : dims,
+                   emit_insert(bld, src, size, has_simd4x2),
+                   has_simd4x2 ? 1 : size,
+                   surface, size, 0, pred);
+      }
+
+      /**
+       * Emit an untyped surface atomic opcode.  \p dims determines the number
+       * of components of the address and \p rsize the number of components of
+       * the returned value (either zero or one).
+       */
+      src_reg
+      emit_untyped_atomic(const vec4_builder &bld,
+                          const src_reg &surface, const src_reg &addr,
+                          const src_reg &src0, const src_reg &src1,
+                          unsigned dims, unsigned rsize, unsigned op,
+                          brw_predicate pred)
+      {
+         const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
+                                   bld.shader->devinfo->is_haswell);
+
+         /* Zip the components of both sources, they are represented as the X
+          * and Y components of the same vector.
+          */
+         const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
+         const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD);
+
+         if (size >= 1)
+            bld.MOV(writemask(srcs, WRITEMASK_X), src0);
+         if (size >= 2)
+            bld.MOV(writemask(srcs, WRITEMASK_Y), src1);
+
+         return emit_send(bld, SHADER_OPCODE_UNTYPED_ATOMIC, src_reg(),
+                          emit_insert(bld, addr, dims, has_simd4x2),
+                          has_simd4x2 ? 1 : dims,
+                          emit_insert(bld, src_reg(srcs), size, has_simd4x2),
+                          has_simd4x2 ? 1 : size,
+                          surface, op, rsize, pred);
+      }
+
+      namespace {
+         /**
+          * Initialize the header present in typed surface messages.
+          */
+         src_reg
+         emit_typed_message_header(const vec4_builder &bld)
+         {
+            const vec4_builder ubld = bld.exec_all();
+            const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD);
+
+            ubld.MOV(dst, src_reg(0));
+
+            if (bld.shader->devinfo->gen == 7 &&
+                !bld.shader->devinfo->is_haswell) {
+               /* The sample mask is used on IVB for the SIMD8 messages that
+                * have no SIMD4x2 variant.  We only use the two X channels
+                * in that case, mask everything else out.
+                */
+               ubld.MOV(writemask(dst, WRITEMASK_W), src_reg(0x11));
+            }
+
+            return src_reg(dst);
+         }
+      }
+
+      /**
+       * Emit a typed surface read opcode.  \p dims determines the number of
+       * components of the address and \p size the number of components of the
+       * returned value.
+       */
+      src_reg
+      emit_typed_read(const vec4_builder &bld, const src_reg &surface,
+                      const src_reg &addr, unsigned dims, unsigned size)
+      {
+         const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
+                                   bld.shader->devinfo->is_haswell);
+         const src_reg tmp =
+            emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_READ,
+                      emit_typed_message_header(bld),
+                      emit_insert(bld, addr, dims, has_simd4x2),
+                      has_simd4x2 ? 1 : dims,
+                      src_reg(), 0,
+                      surface, size,
+                      has_simd4x2 ? 1 : size);
+
+         return emit_extract(bld, tmp, size, has_simd4x2);
+      }
+
+      /**
+       * Emit a typed surface write opcode.  \p dims determines the number of
+       * components of the address and \p size the number of components of the
+       * argument.
+       */
+      void
+      emit_typed_write(const vec4_builder &bld, const src_reg &surface,
+                       const src_reg &addr, const src_reg &src,
+                       unsigned dims, unsigned size)
+      {
+         const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
+                                   bld.shader->devinfo->is_haswell);
+         emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_WRITE,
+                   emit_typed_message_header(bld),
+                   emit_insert(bld, addr, dims, has_simd4x2),
+                   has_simd4x2 ? 1 : dims,
+                   emit_insert(bld, src, size, has_simd4x2),
+                   has_simd4x2 ? 1 : size,
+                   surface, size, 0);
+      }
+
+      /**
+       * Emit a typed surface atomic opcode.  \p dims determines the number of
+       * components of the address and \p rsize the number of components of
+       * the returned value (either zero or one).
+       */
+      src_reg
+      emit_typed_atomic(const vec4_builder &bld,
+                        const src_reg &surface, const src_reg &addr,
+                        const src_reg &src0, const src_reg &src1,
+                        unsigned dims, unsigned rsize, unsigned op,
+                        brw_predicate pred)
+      {
+         const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
+                                   bld.shader->devinfo->is_haswell);
+
+         /* Zip the components of both sources, they are represented as the X
+          * and Y components of the same vector.
+          */
+         const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
+         const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD);
+
+         if (size >= 1)
+            bld.MOV(writemask(srcs, WRITEMASK_X), src0);
+         if (size >= 2)
+            bld.MOV(writemask(srcs, WRITEMASK_Y), src1);
+
+         return emit_send(bld, SHADER_OPCODE_TYPED_ATOMIC,
+                          emit_typed_message_header(bld),
+                          emit_insert(bld, addr, dims, has_simd4x2),
+                          has_simd4x2 ? 1 : dims,
+                          emit_insert(bld, src_reg(srcs), size, has_simd4x2),
+                          has_simd4x2 ? 1 : size,
+                          surface, op, rsize, pred);
+      }
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.h b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.h

new file mode 100644 (file)

index 0000000..6e61c0f
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.h
@@ -0,0 +1,69 @@
+/* -*- c++ -*- */
+/*
+ * Copyright © 2013-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef BRW_VEC4_SURFACE_BUILDER_H
+#define BRW_VEC4_SURFACE_BUILDER_H
+
+#include "brw_vec4_builder.h"
+
+namespace brw {
+   namespace surface_access {
+      src_reg
+      emit_untyped_read(const vec4_builder &bld,
+                        const src_reg &surface, const src_reg &addr,
+                        unsigned dims, unsigned size,
+                        brw_predicate pred = BRW_PREDICATE_NONE);
+
+      void
+      emit_untyped_write(const vec4_builder &bld, const src_reg &surface,
+                         const src_reg &addr, const src_reg &src,
+                         unsigned dims, unsigned size,
+                         brw_predicate pred = BRW_PREDICATE_NONE);
+
+      src_reg
+      emit_untyped_atomic(const vec4_builder &bld,
+                          const src_reg &surface, const src_reg &addr,
+                          const src_reg &src0, const src_reg &src1,
+                          unsigned dims, unsigned rsize, unsigned op,
+                          brw_predicate pred = BRW_PREDICATE_NONE);
+
+      src_reg
+      emit_typed_read(const vec4_builder &bld, const src_reg &surface,
+                      const src_reg &addr, unsigned dims, unsigned size);
+
+      void
+      emit_typed_write(const vec4_builder &bld, const src_reg &surface,
+                       const src_reg &addr, const src_reg &src,
+                       unsigned dims, unsigned size);
+
+      src_reg
+      emit_typed_atomic(const vec4_builder &bld, const src_reg &surface,
+                        const src_reg &addr,
+                        const src_reg &src0, const src_reg &src1,
+                        unsigned dims, unsigned rsize, unsigned op,
+                        brw_predicate pred = BRW_PREDICATE_NONE);
+   }
+}
+
+#endif
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

index 499f628..6d61112 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -26,6 +26,8 @@
  #include "glsl/ir_uniform.h"
  #include "program/sampler.h"
  
+#define FIRST_SPILL_MRF(gen) (gen == 6 ? 21 : 13)
+
  namespace brw {
  
  vec4_instruction::vec4_instruction(enum opcode opcode, const dst_reg &dst,
@@ -256,7 +258,7 @@ vec4_visitor::SCRATCH_READ(const dst_reg &dst, const src_reg &index)
  
     inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_READ,
                                         dst, index);
-   inst->base_mrf = 14;
+   inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
     inst->mlen = 2;
  
     return inst;
@@ -270,7 +272,7 @@ vec4_visitor::SCRATCH_WRITE(const dst_reg &dst, const src_reg &src,
  
     inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_WRITE,
                                         dst, src, index);
-   inst->base_mrf = 13;
+   inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen);
     inst->mlen = 3;
  
     return inst;
@@ -756,22 +758,6 @@ vec4_visitor::setup_uniform_values(ir_variable *ir)
     }
  }
  
-void
-vec4_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes)
-{
-   for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
-      assert(this->uniforms < uniform_array_size);
-      this->uniform_vector_size[this->uniforms] = 4;
-      this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
-      this->userplane[i].type = BRW_REGISTER_TYPE_F;
-      for (int j = 0; j < 4; ++j) {
-         stage_prog_data->param[this->uniforms * 4 + j] =
-            (gl_constant_value *) &clip_planes[i][j];
-      }
-      ++this->uniforms;
-   }
-}
-
  /* Our support for builtin uniforms is even scarier than non-builtin.
   * It sits on top of the PROG_STATE_VAR parameters that are
   * automatically updated from GL context state.
@@ -1089,11 +1075,12 @@ vec4_visitor::visit(ir_variable *ir)
        break;
  
     case ir_var_uniform:
+   case ir_var_shader_storage:
        reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
  
        /* Thanks to the lower_ubo_reference pass, we will see only
-       * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
-       * variables, so no need for them to be in variable_ht.
+       * ir_binop_{ubo,ssbo}_load expressions and not ir_dereference_variable
+       * for UBO/SSBO variables, so no need for them to be in variable_ht.
         *
         * Some uniforms, such as samplers and atomic counters, have no actual
         * storage, so we should ignore them.
@@ -1401,7 +1388,7 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
                                             dst,
                                             surf_index,
                                             offset_reg);
-      pull->base_mrf = 14;
+      pull->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
        pull->mlen = 1;
     }
  
@@ -1599,6 +1586,10 @@ vec4_visitor::visit(ir_expression *ir)
        emit(MOV(result_dst, op[0]));
        break;
  
+   case ir_unop_ssbo_unsized_array_length:
+      unreachable("not reached: should be handled by lower_ubo_reference");
+      break;
+
     case ir_binop_add:
        emit(ADD(result_dst, op[0], op[1]));
        break;
@@ -1805,6 +1796,10 @@ vec4_visitor::visit(ir_expression *ir)
        emit(RNDE(result_dst, op[0]));
        break;
  
+   case ir_unop_get_buffer_size:
+      unreachable("not reached: not implemented");
+      break;
+
     case ir_binop_min:
        emit_minmax(BRW_CONDITIONAL_L, result_dst, op[0], op[1]);
        break;
@@ -1878,7 +1873,7 @@ vec4_visitor::visit(ir_expression *ir)
            */
           brw_mark_surface_used(&prog_data->base,
                                 prog_data->base.binding_table.ubo_start +
-                               shader_prog->NumUniformBlocks - 1);
+                               shader_prog->NumBufferInterfaceBlocks - 1);
        }
  
        if (const_offset_ir) {
@@ -2567,6 +2562,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
     case ir_tg4: opcode = offset_value.file != BAD_FILE
                           ? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break;
     case ir_query_levels: opcode = SHADER_OPCODE_TXS; break;
+   case ir_texture_samples: opcode = SHADER_OPCODE_SAMPLEINFO; break;
     case ir_txb:
        unreachable("TXB is not valid for vertex shaders.");
     case ir_lod:
@@ -2586,13 +2582,15 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
      * - Texel offsets
      * - Gather channel selection
      * - Sampler indices too large to fit in a 4-bit value.
+    * - Sampleinfo message - takes no parameters, but mlen = 0 is illegal
      */
     inst->header_size =
        (devinfo->gen < 5 || devinfo->gen >= 9 ||
         inst->offset != 0 || op == ir_tg4 ||
+       op == ir_texture_samples ||
         is_high_sampler(sampler_reg)) ? 1 : 0;
     inst->base_mrf = 2;
-   inst->mlen = inst->header_size + 1; /* always at least one */
+   inst->mlen = inst->header_size;
     inst->dst.writemask = WRITEMASK_XYZW;
     inst->shadow_compare = shadow_comparitor.file != BAD_FILE;
  
@@ -2604,6 +2602,9 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
     if (op == ir_txs || op == ir_query_levels) {
        int writemask = devinfo->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
        emit(MOV(dst_reg(MRF, param_base, lod.type, writemask), lod));
+      inst->mlen++;
+   } else if (op == ir_texture_samples) {
+      inst->dst.writemask = WRITEMASK_X;
     } else {
        /* Load the coordinate */
        /* FINISHME: gl_clamp_mask and saturate */
@@ -2612,6 +2613,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
  
        emit(MOV(dst_reg(MRF, param_base, coordinate.type, coord_mask),
                 coordinate));
+      inst->mlen++;
  
        if (zero_mask != 0) {
           emit(MOV(dst_reg(MRF, param_base, coordinate.type, zero_mask),
@@ -2641,7 +2643,6 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
             mrf = param_base;
             writemask = WRITEMASK_W;
          }
-         lod.swizzle = BRW_SWIZZLE_XXXX;
          emit(MOV(dst_reg(MRF, mrf, lod.type, writemask), lod));
        } else if (op == ir_txf) {
           emit(MOV(dst_reg(MRF, param_base, lod.type, WRITEMASK_W), lod));
@@ -2710,7 +2711,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
     }
  
     if (devinfo->gen == 6 && op == ir_tg4) {
-      emit_gen6_gather_wa(key->tex.gen6_gather_wa[sampler], inst->dst);
+      emit_gen6_gather_wa(key_tex->gen6_gather_wa[sampler], inst->dst);
     }
  
     swizzle_result(op, dest,
@@ -2762,7 +2763,7 @@ vec4_visitor::visit(ir_texture *ir)
      */
     if (ir->op == ir_tg4) {
        ir_constant *chan = ir->lod_info.component->as_constant();
-      int swiz = GET_SWZ(key->tex.swizzles[sampler], chan->value.i[0]);
+      int swiz = GET_SWZ(key_tex->swizzles[sampler], chan->value.i[0]);
        if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
           dst_reg result(this, ir->type);
           this->result = src_reg(result);
@@ -2820,7 +2821,7 @@ vec4_visitor::visit(ir_texture *ir)
        ir->lod_info.sample_index->accept(this);
        sample_index = this->result;
  
-      if (devinfo->gen >= 7 && key->tex.compressed_multisample_layout_mask & (1<<sampler))
+      if (devinfo->gen >= 7 && key_tex->compressed_multisample_layout_mask & (1 << sampler))
           mcs = emit_mcs_fetch(ir->coordinate->type, coordinate, sampler_reg);
        else
           mcs = src_reg(0u);
@@ -2835,6 +2836,7 @@ vec4_visitor::visit(ir_texture *ir)
     case ir_txb:
     case ir_lod:
     case ir_tg4:
+   case ir_texture_samples:
        break;
     }
  
@@ -2898,14 +2900,14 @@ vec4_visitor::emit_gen6_gather_wa(uint8_t wa, dst_reg dst)
  uint32_t
  vec4_visitor::gather_channel(unsigned gather_component, uint32_t sampler)
  {
-   int swiz = GET_SWZ(key->tex.swizzles[sampler], gather_component);
+   int swiz = GET_SWZ(key_tex->swizzles[sampler], gather_component);
     switch (swiz) {
        case SWIZZLE_X: return 0;
        case SWIZZLE_Y:
           /* gather4 sampler is broken for green channel on RG32F --
            * we must ask for blue instead.
            */
-         if (key->tex.gather_channel_quirk_mask & (1<<sampler))
+         if (key_tex->gather_channel_quirk_mask & (1 << sampler))
              return 2;
           return 1;
        case SWIZZLE_Z: return 2;
@@ -2920,7 +2922,7 @@ vec4_visitor::swizzle_result(ir_texture_opcode op, dst_reg dest,
                               src_reg orig_val, uint32_t sampler,
                               const glsl_type *dest_type)
  {
-   int s = key->tex.swizzles[sampler];
+   int s = key_tex->swizzles[sampler];
  
     dst_reg swizzled_result = dest;
  
@@ -3122,7 +3124,8 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg)
  {
     if (devinfo->gen < 6 &&
         ((prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) ||
-        key->userclip_active || devinfo->has_negative_rhw_bug)) {
+        output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE ||
+        devinfo->has_negative_rhw_bug)) {
        dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
        dst_reg header1_w = header1;
        header1_w.writemask = WRITEMASK_W;
@@ -3137,7 +3140,7 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg)
          emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
        }
  
-      if (key->userclip_active) {
+      if (output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE) {
           current_annotation = "Clipping flags";
           dst_reg flags0 = dst_reg(this, glsl_type::uint_type);
           dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
@@ -3203,35 +3206,6 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg)
     }
  }
  
-void
-vec4_visitor::emit_clip_distances(dst_reg reg, int offset)
-{
-   /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
-    *
-    *     "If a linked set of shaders forming the vertex stage contains no
-    *     static write to gl_ClipVertex or gl_ClipDistance, but the
-    *     application has requested clipping against user clip planes through
-    *     the API, then the coordinate written to gl_Position is used for
-    *     comparison against the user clip planes."
-    *
-    * This function is only called if the shader didn't write to
-    * gl_ClipDistance.  Accordingly, we use gl_ClipVertex to perform clipping
-    * if the user wrote to it; otherwise we use gl_Position.
-    */
-   gl_varying_slot clip_vertex = VARYING_SLOT_CLIP_VERTEX;
-   if (!(prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)) {
-      clip_vertex = VARYING_SLOT_POS;
-   }
-
-   for (int i = 0; i + offset < key->nr_userclip_plane_consts && i < 4;
-        ++i) {
-      reg.writemask = 1 << i;
-      emit(DP4(reg,
-               src_reg(output_reg[clip_vertex]),
-               src_reg(this->userplane[i + offset])));
-   }
-}
-
  vec4_instruction *
  vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
  {
@@ -3278,21 +3252,6 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
     case BRW_VARYING_SLOT_PAD:
        /* No need to write to this slot */
        break;
-   case VARYING_SLOT_COL0:
-   case VARYING_SLOT_COL1:
-   case VARYING_SLOT_BFC0:
-   case VARYING_SLOT_BFC1: {
-      /* These built-in varyings are only supported in compatibility mode,
-       * and we only support GS in core profile.  So, this must be a vertex
-       * shader.
-       */
-      assert(stage == MESA_SHADER_VERTEX);
-      vec4_instruction *inst = emit_generic_urb_slot(reg, varying);
-      if (((struct brw_vs_prog_key *) key)->clamp_vertex_color)
-         inst->saturate = true;
-      break;
-   }
-
     default:
        emit_generic_urb_slot(reg, varying);
        break;
@@ -3337,7 +3296,7 @@ vec4_visitor::emit_vertex()
      * may need to unspill a register or load from an array.  Those
      * reads would use MRFs 14-15.
      */
-   int max_usable_mrf = 13;
+   int max_usable_mrf = FIRST_SPILL_MRF(devinfo->gen);
  
     /* The following assertion verifies that max_usable_mrf causes an
      * even-numbered amount of URB write data, which will meet gen6's
@@ -3354,17 +3313,6 @@ vec4_visitor::emit_vertex()
        emit_ndc_computation();
     }
  
-   /* Lower legacy ff and ClipVertex clipping to clip distances */
-   if (key->userclip_active && !prog->UsesClipDistanceOut) {
-      current_annotation = "user clip distances";
-
-      output_reg[VARYING_SLOT_CLIP_DIST0] = dst_reg(this, glsl_type::vec4_type);
-      output_reg[VARYING_SLOT_CLIP_DIST1] = dst_reg(this, glsl_type::vec4_type);
-
-      emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST0], 0);
-      emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1], 4);
-   }
-
     /* We may need to split this up into several URB writes, so do them in a
      * loop.
      */
@@ -3382,9 +3330,10 @@ vec4_visitor::emit_vertex()
                         prog_data->vue_map.slot_to_varying[slot]);
  
           /* If this was max_usable_mrf, we can't fit anything more into this
-          * URB WRITE.
+          * URB WRITE. Same thing if we reached the maximum length available.
            */
-         if (mrf > max_usable_mrf) {
+         if (mrf > max_usable_mrf ||
+             align_interleaved_urb_mlen(devinfo, mrf - base_mrf + 1) > BRW_MAX_MSG_LENGTH) {
              slot++;
              break;
           }
@@ -3763,7 +3712,7 @@ vec4_visitor::resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg)
  vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
                             void *log_data,
                             struct gl_program *prog,
-                           const struct brw_vue_prog_key *key,
+                           const struct brw_sampler_prog_key_data *key_tex,
                             struct brw_vue_prog_data *prog_data,
                            struct gl_shader_program *shader_prog,
                             gl_shader_stage stage,
@@ -3772,7 +3721,7 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
                             int shader_time_index)
     : backend_shader(compiler, log_data, mem_ctx,
                      shader_prog, prog, &prog_data->base, stage),
-     key(key),
+     key_tex(key_tex),
       prog_data(prog_data),
       sanity_param_count(0),
       fail_msg(NULL),
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp

index 620f652..f4b50ba 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
@@ -202,8 +202,94 @@ vec4_vs_visitor::emit_urb_write_opcode(bool complete)
  
  
  void
+vec4_vs_visitor::emit_urb_slot(dst_reg reg, int varying)
+{
+   reg.type = BRW_REGISTER_TYPE_F;
+   output_reg[varying].type = reg.type;
+
+   switch (varying) {
+   case VARYING_SLOT_COL0:
+   case VARYING_SLOT_COL1:
+   case VARYING_SLOT_BFC0:
+   case VARYING_SLOT_BFC1: {
+      /* These built-in varyings are only supported in compatibility mode,
+       * and we only support GS in core profile.  So, this must be a vertex
+       * shader.
+       */
+      vec4_instruction *inst = emit_generic_urb_slot(reg, varying);
+      if (key->clamp_vertex_color)
+         inst->saturate = true;
+      break;
+   }
+   default:
+      return vec4_visitor::emit_urb_slot(reg, varying);
+   }
+}
+
+
+void
+vec4_vs_visitor::emit_clip_distances(dst_reg reg, int offset)
+{
+   /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
+    *
+    *     "If a linked set of shaders forming the vertex stage contains no
+    *     static write to gl_ClipVertex or gl_ClipDistance, but the
+    *     application has requested clipping against user clip planes through
+    *     the API, then the coordinate written to gl_Position is used for
+    *     comparison against the user clip planes."
+    *
+    * This function is only called if the shader didn't write to
+    * gl_ClipDistance.  Accordingly, we use gl_ClipVertex to perform clipping
+    * if the user wrote to it; otherwise we use gl_Position.
+    */
+   gl_varying_slot clip_vertex = VARYING_SLOT_CLIP_VERTEX;
+   if (!(prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)) {
+      clip_vertex = VARYING_SLOT_POS;
+   }
+
+   for (int i = 0; i + offset < key->nr_userclip_plane_consts && i < 4;
+        ++i) {
+      reg.writemask = 1 << i;
+      emit(DP4(reg,
+               src_reg(output_reg[clip_vertex]),
+               src_reg(this->userplane[i + offset])));
+   }
+}
+
+
+void
+vec4_vs_visitor::setup_uniform_clipplane_values()
+{
+   for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
+      assert(this->uniforms < uniform_array_size);
+      this->uniform_vector_size[this->uniforms] = 4;
+      this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
+      this->userplane[i].type = BRW_REGISTER_TYPE_F;
+      for (int j = 0; j < 4; ++j) {
+         stage_prog_data->param[this->uniforms * 4 + j] =
+            (gl_constant_value *) &clip_planes[i][j];
+      }
+      ++this->uniforms;
+   }
+}
+
+
+void
  vec4_vs_visitor::emit_thread_end()
  {
+   setup_uniform_clipplane_values();
+
+   /* Lower legacy ff and ClipVertex clipping to clip distances */
+   if (key->nr_userclip_plane_consts > 0) {
+      current_annotation = "user clip distances";
+
+      output_reg[VARYING_SLOT_CLIP_DIST0] = dst_reg(this, glsl_type::vec4_type);
+      output_reg[VARYING_SLOT_CLIP_DIST1] = dst_reg(this, glsl_type::vec4_type);
+
+      emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST0], 0);
+      emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1], 4);
+   }
+
     /* For VS, we always end the thread by emitting a single vertex.
      * emit_urb_write_opcode() will take care of setting the eot flag on the
      * SEND instruction.
@@ -218,17 +304,19 @@ vec4_vs_visitor::vec4_vs_visitor(const struct brw_compiler *compiler,
                                   struct brw_vs_prog_data *vs_prog_data,
                                   struct gl_vertex_program *vp,
                                   struct gl_shader_program *prog,
+                                 gl_clip_plane *clip_planes,
                                   void *mem_ctx,
                                   int shader_time_index,
                                   bool use_legacy_snorm_formula)
     : vec4_visitor(compiler, log_data,
-                  &vp->Base, &key->base, &vs_prog_data->base, prog,
+                  &vp->Base, &key->tex, &vs_prog_data->base, prog,
                    MESA_SHADER_VERTEX,
                    mem_ctx, false /* no_spills */,
                    shader_time_index),
       key(key),
       vs_prog_data(vs_prog_data),
       vp(vp),
+     clip_planes(clip_planes),
       use_legacy_snorm_formula(use_legacy_snorm_formula)
  {
  }
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c

index c53cb49..0c60bde 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -98,10 +98,12 @@ brw_codegen_vs_prog(struct brw_context *brw,
     struct brw_stage_prog_data *stage_prog_data = &prog_data.base.base;
     void *mem_ctx;
     int i;
-   struct gl_shader *vs = NULL;
+   struct brw_shader *vs = NULL;
+   bool start_busy = false;
+   double start_time = 0;
  
     if (prog)
-      vs = prog->_LinkedShaders[MESA_SHADER_VERTEX];
+      vs = (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
  
     memset(&prog_data, 0, sizeof(prog_data));
  
@@ -121,16 +123,16 @@ brw_codegen_vs_prog(struct brw_context *brw,
         * case being a float value that gets blown up to a vec4, so be
         * conservative here.
         */
-      param_count = vs->num_uniform_components * 4 +
-                    vs->NumImages * BRW_IMAGE_PARAM_SIZE;
-      stage_prog_data->nr_image_params = vs->NumImages;
+      param_count = vs->base.num_uniform_components * 4 +
+                    vs->base.NumImages * BRW_IMAGE_PARAM_SIZE;
+      stage_prog_data->nr_image_params = vs->base.NumImages;
     } else {
        param_count = vp->program.Base.Parameters->NumParameters * 4;
     }
     /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip
      * planes as uniforms.
      */
-   param_count += key->base.nr_userclip_plane_consts * 4;
+   param_count += key->nr_userclip_plane_consts * 4;
  
     stage_prog_data->param =
        rzalloc_array(NULL, const gl_constant_value *, param_count);
@@ -172,19 +174,26 @@ brw_codegen_vs_prog(struct brw_context *brw,
      * distance varying slots whenever clipping is enabled, even if the vertex
      * shader doesn't write to gl_ClipDistance.
      */
-   if (key->base.userclip_active) {
+   if (key->nr_userclip_plane_consts > 0) {
        outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
        outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
     }
  
     brw_compute_vue_map(brw->intelScreen->devinfo,
-                       &prog_data.base.vue_map, outputs_written);
+                       &prog_data.base.vue_map, outputs_written,
+                       prog ? prog->SeparateShader : false);
  
     if (0) {
        _mesa_fprint_program_opt(stderr, &vp->program.Base, PROG_PRINT_DEBUG,
                                true);
     }
  
+   if (unlikely(brw->perf_debug)) {
+      start_busy = (brw->batch.last_bo &&
+                    drm_intel_bo_busy(brw->batch.last_bo));
+      start_time = get_time();
+   }
+
     /* Emit GEN4 code.
      */
     program = brw_vs_emit(brw, mem_ctx, key, &prog_data,
@@ -194,6 +203,17 @@ brw_codegen_vs_prog(struct brw_context *brw,
        return false;
     }
  
+   if (unlikely(brw->perf_debug) && vs) {
+      if (vs->compiled_once) {
+         brw_vs_debug_recompile(brw, prog, key);
+      }
+      if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
+         perf_debug("VS compile took %.03f ms and stalled the GPU\n",
+                    (get_time() - start_time) * 1000);
+      }
+      vs->compiled_once = true;
+   }
+
     /* Scratch space is used for register spilling */
     if (prog_data.base.base.total_scratch) {
        brw_get_scratch_bo(brw, &brw->vs.base.scratch_bo,
@@ -237,7 +257,7 @@ brw_vs_debug_recompile(struct brw_context *brw,
           if (c->cache_id == BRW_CACHE_VS_PROG) {
              old_key = c->key;
  
-            if (old_key->base.program_string_id == key->base.program_string_id)
+            if (old_key->program_string_id == key->program_string_id)
                 break;
           }
        }
@@ -257,12 +277,9 @@ brw_vs_debug_recompile(struct brw_context *brw,
                           key->gl_attrib_wa_flags[i]);
     }
  
-   found |= key_debug(brw, "user clip flags",
-                      old_key->base.userclip_active, key->base.userclip_active);
-
-   found |= key_debug(brw, "user clipping planes as push constants",
-                      old_key->base.nr_userclip_plane_consts,
-                      key->base.nr_userclip_plane_consts);
+   found |= key_debug(brw, "legacy user clipping",
+                      old_key->nr_userclip_plane_consts,
+                      key->nr_userclip_plane_consts);
  
     found |= key_debug(brw, "copy edgeflag",
                        old_key->copy_edgeflag, key->copy_edgeflag);
@@ -271,29 +288,13 @@ brw_vs_debug_recompile(struct brw_context *brw,
     found |= key_debug(brw, "vertex color clamping",
                        old_key->clamp_vertex_color, key->clamp_vertex_color);
  
-   found |= brw_debug_recompile_sampler_key(brw, &old_key->base.tex,
-                                            &key->base.tex);
+   found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex);
  
     if (!found) {
        perf_debug("  Something else\n");
     }
  }
  
-
-void
-brw_setup_vue_key_clip_info(struct brw_context *brw,
-                            struct brw_vue_prog_key *key,
-                            bool program_uses_clip_distance)
-{
-   struct gl_context *ctx = &brw->ctx;
-
-   key->userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
-   if (key->userclip_active && !program_uses_clip_distance) {
-      key->nr_userclip_plane_consts
-         = _mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
-   }
-}
-
  static bool
  brw_vs_state_dirty(struct brw_context *brw)
  {
@@ -324,9 +325,14 @@ brw_vs_populate_key(struct brw_context *brw,
     /* Just upload the program verbatim for now.  Always send it all
      * the inputs it asks for, whether they are varying or not.
      */
-   key->base.program_string_id = vp->id;
-   brw_setup_vue_key_clip_info(brw, &key->base,
-                               vp->program.Base.UsesClipDistanceOut);
+   key->program_string_id = vp->id;
+
+   if (ctx->Transform.ClipPlanesEnabled != 0 &&
+       ctx->API == API_OPENGL_COMPAT &&
+       !vp->program.Base.UsesClipDistanceOut) {
+      key->nr_userclip_plane_consts =
+         _mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
+   }
  
     /* _NEW_POLYGON */
     if (brw->gen < 6) {
@@ -350,7 +356,7 @@ brw_vs_populate_key(struct brw_context *brw,
  
     /* _NEW_TEXTURE */
     brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count,
-                                      &key->base.tex);
+                                      &key->tex);
  
     /* BRW_NEW_VS_ATTRIB_WORKAROUNDS */
     memcpy(key->gl_attrib_wa_flags, brw->vb.attrib_wa_flags,
@@ -381,19 +387,6 @@ brw_upload_vs_prog(struct brw_context *brw)
        assert(success);
     }
     brw->vs.base.prog_data = &brw->vs.prog_data->base.base;
-
-   if (memcmp(&brw->vs.prog_data->base.vue_map, &brw->vue_map_geom_out,
-              sizeof(brw->vue_map_geom_out)) != 0) {
-      brw->vue_map_vs = brw->vs.prog_data->base.vue_map;
-      brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_VS;
-      if (brw->gen < 6) {
-         /* No geometry shader support, so the VS VUE map is the VUE map for
-          * the output of the "geometry" portion of the pipeline.
-          */
-         brw->vue_map_geom_out = brw->vue_map_vs;
-         brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
-      }
-   }
  }
  
  bool
@@ -412,7 +405,8 @@ brw_vs_precompile(struct gl_context *ctx,
  
     memset(&key, 0, sizeof(key));
  
-   brw_vue_setup_prog_key_for_precompile(ctx, &key.base, bvp->id, &vp->Base);
+   brw_setup_tex_for_precompile(brw, &key.tex, prog);
+   key.program_string_id = bvp->id;
     key.clamp_vertex_color =
        (prog->OutputsWritten & (VARYING_BIT_COL0 | VARYING_BIT_COL1 |
                                 VARYING_BIT_BFC0 | VARYING_BIT_BFC1));
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h

index 1d9bee1..3a847fc 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -90,6 +90,7 @@ public:
                     struct brw_vs_prog_data *vs_prog_data,
                     struct gl_vertex_program *vp,
                     struct gl_shader_program *prog,
+                   gl_clip_plane *clip_planes,
                     void *mem_ctx,
                     int shader_time_index,
                     bool use_legacy_snorm_formula);
@@ -102,11 +103,14 @@ protected:
     virtual void emit_program_code();
     virtual void emit_thread_end();
     virtual void emit_urb_write_header(int mrf);
+   virtual void emit_urb_slot(dst_reg reg, int varying);
     virtual vec4_instruction *emit_urb_write_opcode(bool complete);
  
  private:
     int setup_attributes(int payload_reg);
     void setup_vp_regs();
+   void setup_uniform_clipplane_values();
+   void emit_clip_distances(dst_reg reg, int offset);
     dst_reg get_vp_dst_reg(const prog_dst_register &dst);
     src_reg get_vp_src_reg(const prog_src_register &src);
  
@@ -116,6 +120,8 @@ private:
     src_reg *vp_temp_regs;
     src_reg vp_addr_reg;
  
+   gl_clip_plane *clip_planes;
+
     bool use_legacy_snorm_formula;
  };
  
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c

index fd7e56e..9bb48eb 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -50,7 +50,7 @@
   */
  void
  brw_upload_pull_constants(struct brw_context *brw,
-                          GLbitfield brw_new_constbuf,
+                          GLbitfield64 brw_new_constbuf,
                            const struct gl_program *prog,
                            struct brw_stage_state *stage_state,
                            const struct brw_stage_prog_data *prog_data,
diff --git a/src/mesa/drivers/dri/i965/brw_vue_map.c b/src/mesa/drivers/dri/i965/brw_vue_map.c

index 7687578..45662bd 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vue_map.c
+++ b/src/mesa/drivers/dri/i965/brw_vue_map.c
@@ -44,13 +44,13 @@
  #include "brw_context.h"
  
  static inline void
-assign_vue_slot(struct brw_vue_map *vue_map, int varying)
+assign_vue_slot(struct brw_vue_map *vue_map, int varying, int slot)
  {
     /* Make sure this varying hasn't been assigned a slot already */
     assert (vue_map->varying_to_slot[varying] == -1);
  
-   vue_map->varying_to_slot[varying] = vue_map->num_slots;
-   vue_map->slot_to_varying[vue_map->num_slots++] = varying;
+   vue_map->varying_to_slot[varying] = slot;
+   vue_map->slot_to_varying[slot] = varying;
  }
  
  /**
@@ -59,10 +59,18 @@ assign_vue_slot(struct brw_vue_map *vue_map, int varying)
  void
  brw_compute_vue_map(const struct brw_device_info *devinfo,
                      struct brw_vue_map *vue_map,
-                    GLbitfield64 slots_valid)
+                    GLbitfield64 slots_valid,
+                    bool separate)
  {
+   /* Keep using the packed/contiguous layout on old hardware - we only need
+    * the SSO layout when using geometry/tessellation shaders or 32 FS input
+    * varyings, which only exist on Gen >= 6.  It's also a bit more efficient.
+    */
+   if (devinfo->gen < 6)
+      separate = false;
+
     vue_map->slots_valid = slots_valid;
-   int i;
+   vue_map->separate = separate;
  
     /* gl_Layer and gl_ViewportIndex don't get their own varying slots -- they
      * are stored in the first VUE slot (VARYING_SLOT_PSIZ).
@@ -77,12 +85,13 @@ brw_compute_vue_map(const struct brw_device_info *devinfo,
      */
     STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 127);
  
-   vue_map->num_slots = 0;
-   for (i = 0; i < BRW_VARYING_SLOT_COUNT; ++i) {
+   for (int i = 0; i < BRW_VARYING_SLOT_COUNT; ++i) {
        vue_map->varying_to_slot[i] = -1;
-      vue_map->slot_to_varying[i] = BRW_VARYING_SLOT_COUNT;
+      vue_map->slot_to_varying[i] = BRW_VARYING_SLOT_PAD;
     }
  
+   int slot = 0;
+
     /* VUE header: format depends on chip generation and whether clipping is
      * enabled.
      *
@@ -98,9 +107,9 @@ brw_compute_vue_map(const struct brw_device_info *devinfo,
         * On Ironlake the VUE header is nominally 20 dwords, but the hardware
         * will accept the same header layout as Gen4 [and should be a bit faster]
         */
-      assign_vue_slot(vue_map, VARYING_SLOT_PSIZ);
-      assign_vue_slot(vue_map, BRW_VARYING_SLOT_NDC);
-      assign_vue_slot(vue_map, VARYING_SLOT_POS);
+      assign_vue_slot(vue_map, VARYING_SLOT_PSIZ, slot++);
+      assign_vue_slot(vue_map, BRW_VARYING_SLOT_NDC, slot++);
+      assign_vue_slot(vue_map, VARYING_SLOT_POS, slot++);
     } else {
        /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
         * dword 0-3 of the header is indices, point width, clip flags.
@@ -109,40 +118,63 @@ brw_compute_vue_map(const struct brw_device_info *devinfo,
         * enabled.
         * dword 8-11 or 16-19 is the first vertex element data we fill.
         */
-      assign_vue_slot(vue_map, VARYING_SLOT_PSIZ);
-      assign_vue_slot(vue_map, VARYING_SLOT_POS);
+      assign_vue_slot(vue_map, VARYING_SLOT_PSIZ, slot++);
+      assign_vue_slot(vue_map, VARYING_SLOT_POS, slot++);
        if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0))
-         assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST0);
+         assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST0, slot++);
        if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1))
-         assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST1);
+         assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST1, slot++);
  
        /* front and back colors need to be consecutive so that we can use
         * ATTRIBUTE_SWIZZLE_INPUTATTR_FACING to swizzle them when doing
         * two-sided color.
         */
        if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL0))
-         assign_vue_slot(vue_map, VARYING_SLOT_COL0);
+         assign_vue_slot(vue_map, VARYING_SLOT_COL0, slot++);
        if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC0))
-         assign_vue_slot(vue_map, VARYING_SLOT_BFC0);
+         assign_vue_slot(vue_map, VARYING_SLOT_BFC0, slot++);
        if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL1))
-         assign_vue_slot(vue_map, VARYING_SLOT_COL1);
+         assign_vue_slot(vue_map, VARYING_SLOT_COL1, slot++);
        if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC1))
-         assign_vue_slot(vue_map, VARYING_SLOT_BFC1);
+         assign_vue_slot(vue_map, VARYING_SLOT_BFC1, slot++);
     }
  
-   /* The hardware doesn't care about the rest of the vertex outputs, so just
-    * assign them contiguously.  Don't reassign outputs that already have a
-    * slot.
+   /* The hardware doesn't care about the rest of the vertex outputs, so we
+    * can assign them however we like.  For normal programs, we simply assign
+    * them contiguously.
+    *
+    * For separate shader pipelines, we first assign built-in varyings
+    * contiguous slots.  This works because ARB_separate_shader_objects
+    * requires that all shaders have matching built-in varying interface
+    * blocks.  Next, we assign generic varyings based on their location
+    * (either explicit or linker assigned).  This guarantees a fixed layout.
      *
      * We generally don't need to assign a slot for VARYING_SLOT_CLIP_VERTEX,
      * since it's encoded as the clip distances by emit_clip_distances().
      * However, it may be output by transform feedback, and we'd rather not
      * recompute state when TF changes, so we just always include it.
      */
-   for (int i = 0; i < VARYING_SLOT_MAX; ++i) {
-      if ((slots_valid & BITFIELD64_BIT(i)) &&
-          vue_map->varying_to_slot[i] == -1) {
-         assign_vue_slot(vue_map, i);
+   GLbitfield64 builtins = slots_valid & BITFIELD64_MASK(VARYING_SLOT_VAR0);
+   while (builtins != 0) {
+      const int varying = ffsll(builtins) - 1;
+      if (vue_map->varying_to_slot[varying] == -1) {
+         assign_vue_slot(vue_map, varying, slot++);
        }
+      builtins &= ~BITFIELD64_BIT(varying);
     }
+
+   const int first_generic_slot = slot;
+   GLbitfield64 generics = slots_valid & ~BITFIELD64_MASK(VARYING_SLOT_VAR0);
+   while (generics != 0) {
+      const int varying = ffsll(generics) - 1;
+      if (separate) {
+         slot = first_generic_slot + varying - VARYING_SLOT_VAR0;
+         assign_vue_slot(vue_map, varying, slot);
+      } else {
+         assign_vue_slot(vue_map, varying, slot++);
+      }
+      generics &= ~BITFIELD64_BIT(varying);
+   }
+
+   vue_map->num_slots = separate ? slot + 1 : slot;
  }
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c

index 41266f5..1faf2ea 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -26,6 +26,7 @@
  #include "brw_context.h"
  #include "brw_wm.h"
  #include "brw_state.h"
+#include "brw_shader.h"
  #include "main/enums.h"
  #include "main/formats.h"
  #include "main/fbobject.h"
@@ -164,11 +165,13 @@ brw_codegen_wm_prog(struct brw_context *brw,
     void *mem_ctx = ralloc_context(NULL);
     struct brw_wm_prog_data prog_data;
     const GLuint *program;
-   struct gl_shader *fs = NULL;
+   struct brw_shader *fs = NULL;
     GLuint program_size;
+   bool start_busy = false;
+   double start_time = 0;
  
     if (prog)
-      fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
+      fs = (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
  
     memset(&prog_data, 0, sizeof(prog_data));
     /* key->alpha_test_func means simulating alpha testing via discards,
@@ -179,7 +182,7 @@ brw_codegen_wm_prog(struct brw_context *brw,
        fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
     prog_data.computed_depth_mode = computed_depth_mode(&fp->program);
  
-   prog_data.early_fragment_tests = fs && fs->EarlyFragmentTests;
+   prog_data.early_fragment_tests = fs && fs->base.EarlyFragmentTests;
  
     /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
     if (!prog)
@@ -191,9 +194,9 @@ brw_codegen_wm_prog(struct brw_context *brw,
      */
     int param_count;
     if (fs) {
-      param_count = fs->num_uniform_components +
-                    fs->NumImages * BRW_IMAGE_PARAM_SIZE;
-      prog_data.base.nr_image_params = fs->NumImages;
+      param_count = fs->base.num_uniform_components +
+                    fs->base.NumImages * BRW_IMAGE_PARAM_SIZE;
+      prog_data.base.nr_image_params = fs->base.NumImages;
     } else {
        param_count = fp->program.Base.Parameters->NumParameters * 4;
     }
@@ -213,6 +216,12 @@ brw_codegen_wm_prog(struct brw_context *brw,
                                             key->persample_shading,
                                             &fp->program);
  
+   if (unlikely(brw->perf_debug)) {
+      start_busy = (brw->batch.last_bo &&
+                    drm_intel_bo_busy(brw->batch.last_bo));
+      start_time = get_time();
+   }
+
     program = brw_wm_fs_emit(brw, mem_ctx, key, &prog_data,
                              &fp->program, prog, &program_size);
     if (program == NULL) {
@@ -220,6 +229,17 @@ brw_codegen_wm_prog(struct brw_context *brw,
        return false;
     }
  
+   if (unlikely(brw->perf_debug) && fs) {
+      if (fs->compiled_once)
+         brw_wm_debug_recompile(brw, prog, key);
+      fs->compiled_once = true;
+
+      if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
+         perf_debug("FS compile took %.03f ms and stalled the GPU\n",
+                    (get_time() - start_time) * 1000);
+      }
+   }
+
     if (prog_data.base.total_scratch) {
        brw_get_scratch_bo(brw, &brw->wm.base.scratch_bo,
                          prog_data.base.total_scratch * brw->max_wm_threads);
@@ -642,3 +662,61 @@ brw_upload_wm_prog(struct brw_context *brw)
     }
     brw->wm.base.prog_data = &brw->wm.prog_data->base;
  }
+
+bool
+brw_fs_precompile(struct gl_context *ctx,
+                  struct gl_shader_program *shader_prog,
+                  struct gl_program *prog)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_wm_prog_key key;
+
+   struct gl_fragment_program *fp = (struct gl_fragment_program *) prog;
+   struct brw_fragment_program *bfp = brw_fragment_program(fp);
+   bool program_uses_dfdy = fp->UsesDFdy;
+
+   memset(&key, 0, sizeof(key));
+
+   if (brw->gen < 6) {
+      if (fp->UsesKill)
+         key.iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+
+      if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
+         key.iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+
+      /* Just assume depth testing. */
+      key.iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
+      key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
+   }
+
+   if (brw->gen < 6 || _mesa_bitcount_64(fp->Base.InputsRead &
+                                         BRW_FS_VARYING_INPUT_MASK) > 16)
+      key.input_slots_valid = fp->Base.InputsRead | VARYING_BIT_POS;
+
+   brw_setup_tex_for_precompile(brw, &key.tex, &fp->Base);
+
+   if (fp->Base.InputsRead & VARYING_BIT_POS) {
+      key.drawable_height = ctx->DrawBuffer->Height;
+   }
+
+   key.nr_color_regions = _mesa_bitcount_64(fp->Base.OutputsWritten &
+         ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
+         BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)));
+
+   if ((fp->Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) {
+      key.render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer) ||
+                          key.nr_color_regions > 1;
+   }
+
+   key.program_string_id = bfp->id;
+
+   uint32_t old_prog_offset = brw->wm.base.prog_offset;
+   struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data;
+
+   bool success = brw_codegen_wm_prog(brw, shader_prog, bfp, &key);
+
+   brw->wm.base.prog_offset = old_prog_offset;
+   brw->wm.prog_data = old_prog_data;
+
+   return success;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c

index 8213f4e..c931696 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -411,6 +411,29 @@ brw_create_constant_surface(struct brw_context *brw,
  }
  
  /**
+ * Create the buffer surface. Shader buffer variables will be
+ * read from / write to this buffer with Data Port Read/Write
+ * instructions/messages.
+ */
+void
+brw_create_buffer_surface(struct brw_context *brw,
+                          drm_intel_bo *bo,
+                          uint32_t offset,
+                          uint32_t size,
+                          uint32_t *out_offset,
+                          bool dword_pitch)
+{
+   /* Use a raw surface so we can reuse existing untyped read/write/atomic
+    * messages. We need these specifically for the fragment shader since they
+    * include a pixel mask header that we need to ensure correct behavior
+    * with helper invocations, which cannot write to the buffer.
+    */
+   brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
+                                       BRW_SURFACEFORMAT_RAW,
+                                       size, 1, true);
+}
+
+/**
   * Set up a binding table entry for use by stream output logic (transform
   * feedback).
   *
@@ -848,10 +871,14 @@ brw_update_texture_surfaces(struct brw_context *brw)
     /* BRW_NEW_FRAGMENT_PROGRAM */
     struct gl_program *fs = (struct gl_program *) brw->fragment_program;
  
+   /* BRW_NEW_COMPUTE_PROGRAM */
+   struct gl_program *cs = (struct gl_program *) brw->compute_program;
+
     /* _NEW_TEXTURE */
     update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
     update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
     update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
+   update_stage_texture_surfaces(brw, cs, &brw->cs.base, false);
  
     /* emit alternate set of surface state for gather. this
      * allows the surface format to be overriden for only the
@@ -863,6 +890,8 @@ brw_update_texture_surfaces(struct brw_context *brw)
           update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
        if (fs && fs->UsesGather)
           update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
+      if (cs && cs->UsesGather)
+         update_stage_texture_surfaces(brw, cs, &brw->cs.base, true);
     }
  
     brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
@@ -872,6 +901,7 @@ const struct brw_tracked_state brw_texture_surfaces = {
     .dirty = {
        .mesa = _NEW_TEXTURE,
        .brw = BRW_NEW_BATCH |
+             BRW_NEW_COMPUTE_PROGRAM |
               BRW_NEW_FRAGMENT_PROGRAM |
               BRW_NEW_FS_PROG_DATA |
               BRW_NEW_GEOMETRY_PROGRAM |
@@ -898,25 +928,48 @@ brw_upload_ubo_surfaces(struct brw_context *brw,
     uint32_t *surf_offsets =
        &stage_state->surf_offset[prog_data->binding_table.ubo_start];
  
-   for (unsigned i = 0; i < shader->NumUniformBlocks; i++) {
-      struct gl_uniform_buffer_binding *binding;
+   for (int i = 0; i < shader->NumUniformBlocks; i++) {
        struct intel_buffer_object *intel_bo;
  
-      binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
-      intel_bo = intel_buffer_object(binding->BufferObject);
-      drm_intel_bo *bo =
-         intel_bufferobj_buffer(brw, intel_bo,
-                                binding->Offset,
-                                binding->BufferObject->Size - binding->Offset);
-
        /* Because behavior for referencing outside of the binding's size in the
         * glBindBufferRange case is undefined, we can just bind the whole buffer
         * glBindBufferBase wants and be a correct implementation.
         */
-      brw_create_constant_surface(brw, bo, binding->Offset,
-                                  bo->size - binding->Offset,
-                                  &surf_offsets[i],
-                                  dword_pitch);
+      if (!shader->UniformBlocks[i].IsShaderStorage) {
+         struct gl_uniform_buffer_binding *binding;
+         binding =
+            &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
+         if (binding->BufferObject == ctx->Shared->NullBufferObj) {
+            brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &surf_offsets[i]);
+         } else {
+            intel_bo = intel_buffer_object(binding->BufferObject);
+            drm_intel_bo *bo =
+               intel_bufferobj_buffer(brw, intel_bo,
+                                      binding->Offset,
+                                      binding->BufferObject->Size - binding->Offset);
+            brw_create_constant_surface(brw, bo, binding->Offset,
+                                        binding->BufferObject->Size - binding->Offset,
+                                        &surf_offsets[i],
+                                        dword_pitch);
+         }
+      } else {
+         struct gl_shader_storage_buffer_binding *binding;
+         binding =
+            &ctx->ShaderStorageBufferBindings[shader->UniformBlocks[i].Binding];
+         if (binding->BufferObject == ctx->Shared->NullBufferObj) {
+            brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &surf_offsets[i]);
+         } else {
+            intel_bo = intel_buffer_object(binding->BufferObject);
+            drm_intel_bo *bo =
+               intel_bufferobj_buffer(brw, intel_bo,
+                                      binding->Offset,
+                                      binding->BufferObject->Size - binding->Offset);
+            brw_create_buffer_surface(brw, bo, binding->Offset,
+                                      binding->BufferObject->Size - binding->Offset,
+                                      &surf_offsets[i],
+                                      dword_pitch);
+         }
+      }
     }
  
     if (shader->NumUniformBlocks)
@@ -1283,3 +1336,46 @@ gen4_init_vtable_surface_functions(struct brw_context *brw)
     brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
     brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
  }
+
+static void
+brw_upload_cs_work_groups_surface(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* _NEW_PROGRAM */
+   struct gl_shader_program *prog =
+      ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
+
+   if (prog && brw->cs.prog_data->uses_num_work_groups) {
+      const unsigned surf_idx =
+         brw->cs.prog_data->binding_table.work_groups_start;
+      uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
+      drm_intel_bo *bo;
+      uint32_t bo_offset;
+
+      if (brw->compute.num_work_groups_bo == NULL) {
+         bo = NULL;
+         intel_upload_data(brw,
+                           (void *)brw->compute.num_work_groups,
+                           3 * sizeof(GLuint),
+                           sizeof(GLuint),
+                           &bo,
+                           &bo_offset);
+      } else {
+         bo = brw->compute.num_work_groups_bo;
+         bo_offset = brw->compute.num_work_groups_offset;
+      }
+
+      brw->vtbl.emit_buffer_surface_state(brw, surf_offset,
+                                          bo, bo_offset,
+                                          BRW_SURFACEFORMAT_RAW,
+                                          3 * sizeof(GLuint), 1, true);
+      brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
+   }
+}
+
+const struct brw_tracked_state brw_cs_work_groups_surface = {
+   .dirty = {
+      .brw = BRW_NEW_CS_WORK_GROUPS
+   },
+   .emit = brw_upload_cs_work_groups_surface,
+};
diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp

index 54c4a6d..cba5c2f 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -486,7 +486,6 @@ gen6_blorp_emit_sampler_state(struct brw_context *brw,
                            0, /* min LOD */
                            max_lod,
                            0, /* LOD bias */
-                          0, /* base miplevel */
                            0, /* shadow function */
                            non_normalized_coords,
                            0); /* border color offset - unused */
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp

index 68e443d..4c9c960 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
@@ -149,81 +149,85 @@ gen6_gs_visitor::emit_prolog()
  void
  gen6_gs_visitor::visit(ir_emit_vertex *ir)
  {
+   /* To ensure that we don't output more vertices than the shader specified
+    * using max_vertices, do the logic inside a conditional of the form "if
+    * (vertex_count < MAX)"
+    */
+   unsigned num_output_vertices = c->gp->program.VerticesOut;
+   emit(CMP(dst_null_d(), this->vertex_count,
+            src_reg(num_output_vertices), BRW_CONDITIONAL_L));
+   emit(IF(BRW_PREDICATE_NORMAL));
+
     gs_emit_vertex(ir->stream_id());
+
+   this->current_annotation = "emit vertex: increment vertex count";
+   emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
+            src_reg(1u)));
+
+   emit(BRW_OPCODE_ENDIF);
  }
+
  void
  gen6_gs_visitor::gs_emit_vertex(int stream_id)
  {
     this->current_annotation = "gen6 emit vertex";
-   /* Honor max_vertex layout indication in geometry shader by ignoring any
-    * vertices coming after c->gp->program.VerticesOut.
-    */
-   unsigned num_output_vertices = c->gp->program.VerticesOut;
-   emit(CMP(dst_null_d(), this->vertex_count, src_reg(num_output_vertices),
-            BRW_CONDITIONAL_L));
-   emit(IF(BRW_PREDICATE_NORMAL));
-   {
-      /* Buffer all output slots for this vertex in vertex_output */
-      for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
-         int varying = prog_data->vue_map.slot_to_varying[slot];
-         if (varying != VARYING_SLOT_PSIZ) {
-            dst_reg dst(this->vertex_output);
-            dst.reladdr = ralloc(mem_ctx, src_reg);
-            memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
-            emit_urb_slot(dst, varying);
-         } else {
-            /* The PSIZ slot can pack multiple varyings in different channels
-             * and emit_urb_slot() will produce a MOV instruction for each of
-             * them. Since we are writing to an array, that will translate to
-             * possibly multiple MOV instructions with an array destination and
-             * each will generate a scratch write with the same offset into
-             * scratch space (thus, each one overwriting the previous). This is
-             * not what we want. What we will do instead is emit PSIZ to a
-             * a regular temporary register, then move that resgister into the
-             * array. This way we only have one instruction with an array
-             * destination and we only produce a single scratch write.
-             */
-            dst_reg tmp = dst_reg(src_reg(this, glsl_type::uvec4_type));
-            emit_urb_slot(tmp, varying);
-            dst_reg dst(this->vertex_output);
-            dst.reladdr = ralloc(mem_ctx, src_reg);
-            memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
-            vec4_instruction *inst = emit(MOV(dst, src_reg(tmp)));
-            inst->force_writemask_all = true;
-         }
  
-         emit(ADD(dst_reg(this->vertex_output_offset),
-                  this->vertex_output_offset, 1u));
-      }
-
-      /* Now buffer flags for this vertex */
-      dst_reg dst(this->vertex_output);
-      dst.reladdr = ralloc(mem_ctx, src_reg);
-      memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
-      if (c->gp->program.OutputType == GL_POINTS) {
-         /* If we are outputting points, then every vertex has PrimStart and
-          * PrimEnd set.
-          */
-         emit(MOV(dst, (_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) |
-                  URB_WRITE_PRIM_START | URB_WRITE_PRIM_END));
-         emit(ADD(dst_reg(this->prim_count), this->prim_count, 1u));
+   /* Buffer all output slots for this vertex in vertex_output */
+   for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
+      int varying = prog_data->vue_map.slot_to_varying[slot];
+      if (varying != VARYING_SLOT_PSIZ) {
+         dst_reg dst(this->vertex_output);
+         dst.reladdr = ralloc(mem_ctx, src_reg);
+         memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
+         emit_urb_slot(dst, varying);
        } else {
-         /* Otherwise, we can only set the PrimStart flag, which we have stored
-          * in the first_vertex register. We will have to wait until we execute
-          * EndPrimitive() or we end the thread to set the PrimEnd flag on a
-          * vertex.
+         /* The PSIZ slot can pack multiple varyings in different channels
+          * and emit_urb_slot() will produce a MOV instruction for each of
+          * them. Since we are writing to an array, that will translate to
+          * possibly multiple MOV instructions with an array destination and
+          * each will generate a scratch write with the same offset into
+          * scratch space (thus, each one overwriting the previous). This is
+          * not what we want. What we will do instead is emit PSIZ to a
+          * a regular temporary register, then move that resgister into the
+          * array. This way we only have one instruction with an array
+          * destination and we only produce a single scratch write.
            */
-         emit(OR(dst, this->first_vertex,
-                 (c->prog_data.output_topology << URB_WRITE_PRIM_TYPE_SHIFT)));
-         emit(MOV(dst_reg(this->first_vertex), 0u));
+         dst_reg tmp = dst_reg(src_reg(this, glsl_type::uvec4_type));
+         emit_urb_slot(tmp, varying);
+         dst_reg dst(this->vertex_output);
+         dst.reladdr = ralloc(mem_ctx, src_reg);
+         memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
+         vec4_instruction *inst = emit(MOV(dst, src_reg(tmp)));
+         inst->force_writemask_all = true;
        }
+
        emit(ADD(dst_reg(this->vertex_output_offset),
                 this->vertex_output_offset, 1u));
+   }
  
-      /* Update vertex count */
-      emit(ADD(dst_reg(this->vertex_count), this->vertex_count, 1u));
+   /* Now buffer flags for this vertex */
+   dst_reg dst(this->vertex_output);
+   dst.reladdr = ralloc(mem_ctx, src_reg);
+   memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
+   if (c->gp->program.OutputType == GL_POINTS) {
+      /* If we are outputting points, then every vertex has PrimStart and
+       * PrimEnd set.
+       */
+      emit(MOV(dst, (_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) |
+               URB_WRITE_PRIM_START | URB_WRITE_PRIM_END));
+      emit(ADD(dst_reg(this->prim_count), this->prim_count, 1u));
+   } else {
+      /* Otherwise, we can only set the PrimStart flag, which we have stored
+       * in the first_vertex register. We will have to wait until we execute
+       * EndPrimitive() or we end the thread to set the PrimEnd flag on a
+       * vertex.
+       */
+      emit(OR(dst, this->first_vertex,
+              (c->prog_data.output_topology << URB_WRITE_PRIM_TYPE_SHIFT)));
+      emit(MOV(dst_reg(this->first_vertex), 0u));
     }
-   emit(BRW_OPCODE_ENDIF);
+   emit(ADD(dst_reg(this->vertex_output_offset),
+            this->vertex_output_offset, 1u));
  }
  
  void
diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c b/src/mesa/drivers/dri/i965/gen7_cs_state.c

new file mode 100644 (file)

index 0000000..0b88b2c
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c
@@ -0,0 +1,350 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "util/ralloc.h"
+#include "brw_context.h"
+#include "brw_cs.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+#include "brw_shader.h"
+#include "intel_mipmap_tree.h"
+#include "intel_batchbuffer.h"
+#include "brw_state.h"
+
+static unsigned
+get_cs_thread_count(const struct brw_cs_prog_data *cs_prog_data)
+{
+   const unsigned simd_size = cs_prog_data->simd_size;
+   unsigned group_size = cs_prog_data->local_size[0] *
+      cs_prog_data->local_size[1] * cs_prog_data->local_size[2];
+
+   return (group_size + simd_size - 1) / simd_size;
+}
+
+
+static void
+brw_upload_cs_state(struct brw_context *brw)
+{
+   if (!brw->cs.prog_data)
+      return;
+
+   uint32_t offset;
+   uint32_t *desc = (uint32_t*) brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
+                                                8 * 4, 64, &offset);
+   struct gl_program *prog = (struct gl_program *) brw->compute_program;
+   struct brw_stage_state *stage_state = &brw->cs.base;
+   struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data;
+   struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
+
+   if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
+      brw->vtbl.emit_buffer_surface_state(
+         brw, &stage_state->surf_offset[
+                 prog_data->binding_table.shader_time_start],
+         brw->shader_time.bo, 0, BRW_SURFACEFORMAT_RAW,
+         brw->shader_time.bo->size, 1, true);
+   }
+
+   uint32_t *bind = (uint32_t*) brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
+                                            prog_data->binding_table.size_bytes,
+                                            32, &stage_state->bind_bo_offset);
+
+   unsigned local_id_dwords = 0;
+
+   if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
+      local_id_dwords =
+         brw_cs_prog_local_id_payload_dwords(prog, cs_prog_data->simd_size);
+   }
+
+   unsigned push_constant_data_size =
+      (prog_data->nr_params + local_id_dwords) * sizeof(gl_constant_value);
+   unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
+   unsigned push_constant_regs = reg_aligned_constant_size / 32;
+   unsigned threads = get_cs_thread_count(cs_prog_data);
+
+   uint32_t dwords = brw->gen < 8 ? 8 : 9;
+   BEGIN_BATCH(dwords);
+   OUT_BATCH(MEDIA_VFE_STATE << 16 | (dwords - 2));
+
+   if (prog_data->total_scratch) {
+      if (brw->gen >= 8)
+         OUT_RELOC64(stage_state->scratch_bo,
+                     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                     ffs(prog_data->total_scratch) - 11);
+      else
+         OUT_RELOC(stage_state->scratch_bo,
+                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                   ffs(prog_data->total_scratch) - 11);
+   } else {
+      OUT_BATCH(0);
+      if (brw->gen >= 8)
+         OUT_BATCH(0);
+   }
+
+   const uint32_t vfe_num_urb_entries = brw->gen >= 8 ? 2 : 0;
+   const uint32_t vfe_gpgpu_mode =
+      brw->gen == 7 ? SET_FIELD(1, GEN7_MEDIA_VFE_STATE_GPGPU_MODE) : 0;
+   OUT_BATCH(SET_FIELD(brw->max_cs_threads - 1, MEDIA_VFE_STATE_MAX_THREADS) |
+             SET_FIELD(vfe_num_urb_entries, MEDIA_VFE_STATE_URB_ENTRIES) |
+             SET_FIELD(1, MEDIA_VFE_STATE_RESET_GTW_TIMER) |
+             SET_FIELD(1, MEDIA_VFE_STATE_BYPASS_GTW) |
+             vfe_gpgpu_mode);
+
+   OUT_BATCH(0);
+   const uint32_t vfe_urb_allocation = brw->gen >= 8 ? 2 : 0;
+
+   /* We are uploading duplicated copies of push constant uniforms for each
+    * thread. Although the local id data needs to vary per thread, it won't
+    * change for other uniform data. Unfortunately this duplication is
+    * required for gen7. As of Haswell, this duplication can be avoided, but
+    * this older mechanism with duplicated data continues to work.
+    *
+    * FINISHME: As of Haswell, we could make use of the
+    * INTERFACE_DESCRIPTOR_DATA "Cross-Thread Constant Data Read Length" field
+    * to only store one copy of uniform data.
+    *
+    * FINISHME: Broadwell adds a new alternative "Indirect Payload Storage"
+    * which is described in the GPGPU_WALKER command and in the Broadwell PRM
+    * Volume 7: 3D Media GPGPU, under Media GPGPU Pipeline => Mode of
+    * Operations => GPGPU Mode => Indirect Payload Storage.
+    *
+    * Note: The constant data is built in brw_upload_cs_push_constants below.
+    */
+   const uint32_t vfe_curbe_allocation = push_constant_regs * threads;
+   OUT_BATCH(SET_FIELD(vfe_urb_allocation, MEDIA_VFE_STATE_URB_ALLOC) |
+             SET_FIELD(vfe_curbe_allocation, MEDIA_VFE_STATE_CURBE_ALLOC));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   if (reg_aligned_constant_size > 0) {
+      BEGIN_BATCH(4);
+      OUT_BATCH(MEDIA_CURBE_LOAD << 16 | (4 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(reg_aligned_constant_size * threads);
+      OUT_BATCH(stage_state->push_const_offset);
+      ADVANCE_BATCH();
+   }
+
+   /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
+   memcpy(bind, stage_state->surf_offset,
+          prog_data->binding_table.size_bytes);
+
+   memset(desc, 0, 8 * 4);
+
+   int dw = 0;
+   desc[dw++] = brw->cs.base.prog_offset;
+   if (brw->gen >= 8)
+      desc[dw++] = 0; /* Kernel Start Pointer High */
+   desc[dw++] = 0;
+   desc[dw++] = stage_state->sampler_offset |
+      ((stage_state->sampler_count + 3) / 4);
+   desc[dw++] = stage_state->bind_bo_offset;
+   desc[dw++] = SET_FIELD(push_constant_regs, MEDIA_CURBE_READ_LENGTH);
+   const uint32_t media_threads =
+      brw->gen >= 8 ?
+      SET_FIELD(threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) :
+      SET_FIELD(threads, MEDIA_GPGPU_THREAD_COUNT);
+   assert(threads <= brw->max_cs_threads);
+   desc[dw++] =
+      SET_FIELD(cs_prog_data->uses_barrier, MEDIA_BARRIER_ENABLE) |
+      media_threads;
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(MEDIA_INTERFACE_DESCRIPTOR_LOAD << 16 | (4 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(8 * 4);
+   OUT_BATCH(offset);
+   ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state brw_cs_state = {
+   .dirty = {
+      .mesa = _NEW_PROGRAM_CONSTANTS,
+      .brw = BRW_NEW_BATCH |
+             BRW_NEW_CS_PROG_DATA |
+             BRW_NEW_PUSH_CONSTANT_ALLOCATION |
+             BRW_NEW_SURFACES,
+   },
+   .emit = brw_upload_cs_state
+};
+
+
+/**
+ * We are building the local ID push constant data using the simplest possible
+ * method. We simply push the local IDs directly as they should appear in the
+ * registers for the uvec3 gl_LocalInvocationID variable.
+ *
+ * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6
+ * registers worth of push constant space.
+ *
+ * Note: Any updates to brw_cs_prog_local_id_payload_dwords,
+ * fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup need
+ * to coordinated.
+ *
+ * FINISHME: There are a few easy optimizations to consider.
+ *
+ * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is
+ *    no need for using push constant space for that dimension.
+ *
+ * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can
+ *    easily use 16-bit words rather than 32-bit dwords in the push constant
+ *    data.
+ *
+ * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for
+ *    conveying the data, and thereby reduce push constant usage.
+ *
+ */
+unsigned
+brw_cs_prog_local_id_payload_dwords(const struct gl_program *prog,
+                                    unsigned dispatch_width)
+{
+   return 3 * dispatch_width;
+}
+
+
+static void
+fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data,
+                      void *buffer, unsigned *x, unsigned *y, unsigned *z)
+{
+   uint32_t *param = (uint32_t *)buffer;
+   for (unsigned i = 0; i < cs_prog_data->simd_size; i++) {
+      param[0 * cs_prog_data->simd_size + i] = *x;
+      param[1 * cs_prog_data->simd_size + i] = *y;
+      param[2 * cs_prog_data->simd_size + i] = *z;
+
+      (*x)++;
+      if (*x == cs_prog_data->local_size[0]) {
+         *x = 0;
+         (*y)++;
+         if (*y == cs_prog_data->local_size[1]) {
+            *y = 0;
+            (*z)++;
+            if (*z == cs_prog_data->local_size[2])
+               *z = 0;
+         }
+      }
+   }
+}
+
+
+/**
+ * Creates a region containing the push constants for the CS on gen7+.
+ *
+ * Push constants are constant values (such as GLSL uniforms) that are
+ * pre-loaded into a shader stage's register space at thread spawn time.
+ *
+ * For other stages, see brw_curbe.c:brw_upload_constant_buffer for the
+ * equivalent gen4/5 code and gen6_vs_state.c:gen6_upload_push_constants for
+ * gen6+.
+ */
+static void
+brw_upload_cs_push_constants(struct brw_context *brw,
+                             const struct gl_program *prog,
+                             const struct brw_cs_prog_data *cs_prog_data,
+                             struct brw_stage_state *stage_state,
+                             enum aub_state_struct_type type)
+{
+   struct gl_context *ctx = &brw->ctx;
+   const struct brw_stage_prog_data *prog_data =
+      (struct brw_stage_prog_data*) cs_prog_data;
+   unsigned local_id_dwords = 0;
+
+   if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
+      local_id_dwords =
+         brw_cs_prog_local_id_payload_dwords(prog, cs_prog_data->simd_size);
+   }
+
+   /* Updates the ParamaterValues[i] pointers for all parameters of the
+    * basic type of PROGRAM_STATE_VAR.
+    */
+   /* XXX: Should this happen somewhere before to get our state flag set? */
+   _mesa_load_state_parameters(ctx, prog->Parameters);
+
+   if (prog_data->nr_params == 0 && local_id_dwords == 0) {
+      stage_state->push_const_size = 0;
+   } else {
+      gl_constant_value *param;
+      unsigned i, t;
+
+      const unsigned push_constant_data_size =
+         (local_id_dwords + prog_data->nr_params) * sizeof(gl_constant_value);
+      const unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
+      const unsigned param_aligned_count =
+         reg_aligned_constant_size / sizeof(*param);
+
+      unsigned threads = get_cs_thread_count(cs_prog_data);
+
+      param = (gl_constant_value*)
+         brw_state_batch(brw, type,
+                         reg_aligned_constant_size * threads,
+                         32, &stage_state->push_const_offset);
+      assert(param);
+
+      STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
+
+      /* _NEW_PROGRAM_CONSTANTS */
+      unsigned x = 0, y = 0, z = 0;
+      for (t = 0; t < threads; t++) {
+         gl_constant_value *next_param = &param[t * param_aligned_count];
+         if (local_id_dwords > 0) {
+            fill_local_id_payload(cs_prog_data, (void*)next_param, &x, &y, &z);
+            next_param += local_id_dwords;
+         }
+         for (i = 0; i < prog_data->nr_params; i++) {
+            next_param[i] = *prog_data->param[i];
+         }
+      }
+
+      stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8;
+   }
+}
+
+
+static void
+gen7_upload_cs_push_constants(struct brw_context *brw)
+{
+   struct brw_stage_state *stage_state = &brw->cs.base;
+
+   /* BRW_NEW_COMPUTE_PROGRAM */
+   const struct brw_compute_program *cp =
+      (struct brw_compute_program *) brw->compute_program;
+
+   if (cp) {
+      /* CACHE_NEW_CS_PROG */
+      struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data;
+
+      brw_upload_cs_push_constants(brw, &cp->program.Base, cs_prog_data,
+                                   stage_state, AUB_TRACE_WM_CONSTANTS);
+   }
+}
+
+const struct brw_tracked_state gen7_cs_push_constants = {
+   .dirty = {
+      .mesa = _NEW_PROGRAM_CONSTANTS,
+      .brw = BRW_NEW_BATCH |
+             BRW_NEW_COMPUTE_PROGRAM |
+             BRW_NEW_PUSH_CONSTANT_ALLOCATION,
+   },
+   .emit = gen7_upload_cs_push_constants,
+};
diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c

index 81bd3b2..4195f4c 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen8_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c
@@ -90,6 +90,12 @@ gen8_upload_gs_state(struct brw_context *brw)
        uint32_t dw8 = brw->gs.prog_data->control_data_format <<
                       HSW_GS_CONTROL_DATA_FORMAT_SHIFT;
  
+      if (brw->gs.prog_data->static_vertex_count != -1) {
+         dw8 |= GEN8_GS_STATIC_OUTPUT |
+                SET_FIELD(brw->gs.prog_data->static_vertex_count,
+                          GEN8_GS_STATIC_VERTEX_COUNT);
+      }
+
        if (brw->gen < 9)
           dw7 |= (brw->max_gs_threads / 2 - 1) << HSW_GS_MAX_THREADS_SHIFT;
        else
diff --git a/src/mesa/drivers/dri/i965/gen8_misc_state.c b/src/mesa/drivers/dri/i965/gen8_misc_state.c

index b20038e..a46b252 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen8_misc_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_misc_state.c
@@ -29,7 +29,8 @@
  /**
   * Define the base addresses which some state is referenced from.
   */
-void gen8_upload_state_base_address(struct brw_context *brw)
+static void
+gen8_upload_state_base_address(struct brw_context *brw)
  {
     uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
     int pkt_len = brw->gen >= 9 ? 19 : 16;
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c

index ae18f0f..a686fed 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -52,8 +52,12 @@ gen8_upload_ps_extra(struct brw_context *brw,
         _mesa_get_min_invocations_per_fragment(ctx, fp, false) > 1)
        dw1 |= GEN8_PSX_SHADER_IS_PER_SAMPLE;
  
-   if (fp->Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_MASK_IN)
-      dw1 |= GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK;
+   if (fp->Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_MASK_IN) {
+      if (brw->gen >= 9)
+         dw1 |= BRW_PSICMS_INNER << GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT;
+      else
+         dw1 |= GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK;
+   }
  
     if (prog_data->uses_omask)
        dw1 |= GEN8_PSX_OMASK_TO_RENDER_TARGET;
diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c

index d2f333f..c5f1bae 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
@@ -276,8 +276,13 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
     }
  
     if (aux_mt) {
+      uint32_t tile_w, tile_h;
+      assert(aux_mt->tiling == I915_TILING_Y);
+      intel_get_tile_dims(aux_mt->tiling, aux_mt->tr_mode,
+                          aux_mt->cpp, &tile_w, &tile_h);
        surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
-                SET_FIELD((aux_mt->pitch / 128) - 1, GEN8_SURFACE_AUX_PITCH) |
+                SET_FIELD((aux_mt->pitch / tile_w) - 1,
+                          GEN8_SURFACE_AUX_PITCH) |
                  aux_mode;
     } else {
        surf[6] = 0;
@@ -501,8 +506,13 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
     }
  
     if (aux_mt) {
+      uint32_t tile_w, tile_h;
+      assert(aux_mt->tiling == I915_TILING_Y);
+      intel_get_tile_dims(aux_mt->tiling, aux_mt->tr_mode,
+                          aux_mt->cpp, &tile_w, &tile_h);
        surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
-                SET_FIELD((aux_mt->pitch / 128) - 1, GEN8_SURFACE_AUX_PITCH) |
+                SET_FIELD((aux_mt->pitch / tile_w) - 1,
+                          GEN8_SURFACE_AUX_PITCH) |
                  aux_mode;
     } else {
        surf[6] = 0;
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c

index 85f20a0..0363bd3 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2006 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,13 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  #include "intel_batchbuffer.h"
  #include "intel_buffer_objects.h"
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h

index 84add92..2b177d3 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
@@ -162,6 +162,8 @@ intel_batchbuffer_advance(struct brw_context *brw)
        abort();
     }
     batch->total = 0;
+#else
+   (void) brw;
  #endif
  }
  
diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c

index 6d92580..46fccc8 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2003 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,14 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
+ */
  
  #include "main/mtypes.h"
  #include "main/blit.h"
@@ -330,10 +327,6 @@ intel_miptree_blit(struct brw_context *brw,
     if (dst_flip)
        dst_y = minify(dst_mt->physical_height0, dst_level - dst_mt->first_level) - dst_y - height;
  
-   int src_pitch = src_mt->pitch;
-   if (src_flip != dst_flip)
-      src_pitch = -src_pitch;
-
     uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y;
     intel_miptree_get_image_offset(src_mt, src_level, src_slice,
                                    &src_image_x, &src_image_y);
@@ -356,7 +349,7 @@ intel_miptree_blit(struct brw_context *brw,
  
     if (!intelEmitCopyBlit(brw,
                            src_mt->cpp,
-                          src_pitch,
+                          src_flip == dst_flip ? src_mt->pitch : -src_mt->pitch,
                            src_mt->bo, src_mt->offset,
                            src_mt->tiling,
                            src_mt->tr_mode,
@@ -427,6 +420,10 @@ can_fast_copy_blit(struct brw_context *brw,
         dst_tr_mode == INTEL_MIPTREE_TRMODE_NONE)
        return false;
  
+   /* The start pixel for Fast Copy blit should be on an OWord boundary. */
+   if ((dst_x * cpp | src_x * cpp) & 15)
+      return false;
+
     /* For all surface types buffers must be cacheline-aligned. */
     if ((dst_offset | src_offset) & 63)
        return false;
@@ -446,14 +443,6 @@ can_fast_copy_blit(struct brw_context *brw,
         (dst_tiling_none && dst_pitch % 16 != 0))
        return false;
  
-   /* For Tiled surfaces, the pitch has to be a multiple of the Tile width
-    * (X direction width of the Tile). This means the pitch value will
-    * always be Cache Line aligned (64byte multiple).
-    */
-   if ((!dst_tiling_none && dst_pitch % 64 != 0) ||
-       (!src_tiling_none && src_pitch % 64 != 0))
-      return false;
-
     return true;
  }
  
@@ -529,6 +518,8 @@ intelEmitCopyBlit(struct brw_context *brw,
     bool dst_y_tiled = dst_tiling == I915_TILING_Y;
     bool src_y_tiled = src_tiling == I915_TILING_Y;
     bool use_fast_copy_blit = false;
+   uint32_t src_tile_w, src_tile_h;
+   uint32_t dst_tile_w, dst_tile_h;
  
     if ((dst_y_tiled || src_y_tiled) && brw->gen < 6)
        return false;
@@ -557,6 +548,16 @@ intelEmitCopyBlit(struct brw_context *brw,
         src_buffer, src_pitch, src_offset, src_x, src_y,
         dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);
  
+   intel_get_tile_dims(src_tiling, src_tr_mode, cpp, &src_tile_w, &src_tile_h);
+   intel_get_tile_dims(dst_tiling, dst_tr_mode, cpp, &dst_tile_w, &dst_tile_h);
+
+   /* For Tiled surfaces, the pitch has to be a multiple of the Tile width
+    * (X direction width of the Tile). This is ensured while allocating the
+    * buffer object.
+    */
+   assert(src_tiling == I915_TILING_NONE || (src_pitch % src_tile_w) == 0);
+   assert(dst_tiling == I915_TILING_NONE || (dst_pitch % dst_tile_w) == 0);
+
     use_fast_copy_blit = can_fast_copy_blit(brw,
                                             src_buffer,
                                             src_x, src_y,
@@ -594,19 +595,7 @@ intelEmitCopyBlit(struct brw_context *brw,
                          dst_tiling, dst_tr_mode,
                          cpp, use_fast_copy_blit);
  
-      /* For tiled source and destination, pitch value should be specified
-       * as a number of Dwords.
-       */
-      if (dst_tiling != I915_TILING_NONE)
-         dst_pitch /= 4;
-
-      if (src_tiling != I915_TILING_NONE)
-         src_pitch /= 4;
-
     } else {
-      assert(!dst_y_tiled || (dst_pitch % 128) == 0);
-      assert(!src_y_tiled || (src_pitch % 128) == 0);
-
        /* For big formats (such as floating point), do the copy using 16 or
         * 32bpp and multiply the coordinates.
         */
@@ -643,17 +632,19 @@ intelEmitCopyBlit(struct brw_context *brw,
        CMD = xy_blit_cmd(src_tiling, src_tr_mode,
                          dst_tiling, dst_tr_mode,
                          cpp, use_fast_copy_blit);
+   }
  
-      if (dst_tiling != I915_TILING_NONE)
-         dst_pitch /= 4;
+   /* For tiled source and destination, pitch value should be specified
+    * as a number of Dwords.
+    */
+   if (dst_tiling != I915_TILING_NONE)
+      dst_pitch /= 4;
  
-      if (src_tiling != I915_TILING_NONE)
-         src_pitch /= 4;
-   }
+   if (src_tiling != I915_TILING_NONE)
+      src_pitch /= 4;
  
-   if (dst_y2 <= dst_y || dst_x2 <= dst_x) {
+   if (dst_y2 <= dst_y || dst_x2 <= dst_x)
        return true;
-   }
  
     assert(dst_x < dst_x2);
     assert(dst_y < dst_y2);
@@ -796,47 +787,43 @@ intel_emit_linear_blit(struct brw_context *brw,
     int16_t src_x, dst_x;
     bool ok;
  
-   /* The pitch given to the GPU must be DWORD aligned, and
-    * we want width to match pitch. Max width is (1 << 15 - 1),
-    * rounding that down to the nearest DWORD is 1 << 15 - 4
-    */
-   pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 1), 4);
-   height = (pitch == 0) ? 1 : size / pitch;
-   src_x = src_offset % 64;
-   dst_x = dst_offset % 64;
-   ok = intelEmitCopyBlit(brw, 1,
-                         pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
-                          INTEL_MIPTREE_TRMODE_NONE,
-                         pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
-                          INTEL_MIPTREE_TRMODE_NONE,
-                         src_x, 0, /* src x/y */
-                         dst_x, 0, /* dst x/y */
-                         pitch, height, /* w, h */
-                         GL_COPY);
-   if (!ok)
-      _mesa_problem(ctx, "Failed to linear blit %dx%d\n", pitch, height);
-
-   src_offset += pitch * height;
-   dst_offset += pitch * height;
-   src_x = src_offset % 64;
-   dst_x = dst_offset % 64;
-   size -= pitch * height;
-   assert (size < (1 << 15));
-   pitch = ALIGN(size, 4);
-
-   if (size != 0) {
+   do {
+      /* The pitch given to the GPU must be DWORD aligned, and
+       * we want width to match pitch. Max width is (1 << 15 - 1),
+       * rounding that down to the nearest DWORD is 1 << 15 - 4
+       */
+      pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 64), 4);
+      height = (size < pitch || pitch == 0) ? 1 : size / pitch;
+
+      src_x = src_offset % 64;
+      dst_x = dst_offset % 64;
+      pitch = ALIGN(MIN2(size, (1 << 15) - 64), 4);
+      assert(src_x + pitch < 1 << 15);
+      assert(dst_x + pitch < 1 << 15);
+
        ok = intelEmitCopyBlit(brw, 1,
-                            pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
+                             pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
                               INTEL_MIPTREE_TRMODE_NONE,
-                            pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
+                             pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
                               INTEL_MIPTREE_TRMODE_NONE,
-                            src_x, 0, /* src x/y */
-                            dst_x, 0, /* dst x/y */
-                            size, 1, /* w, h */
-                            GL_COPY);
-      if (!ok)
-         _mesa_problem(ctx, "Failed to linear blit %dx%d\n", size, 1);
-   }
+                             src_x, 0, /* src x/y */
+                             dst_x, 0, /* dst x/y */
+                             MIN2(size, pitch), height, /* w, h */
+                             GL_COPY);
+      if (!ok) {
+         _mesa_problem(ctx, "Failed to linear blit %dx%d\n",
+                       MIN2(size, pitch), height);
+         return;
+      }
+
+      pitch *= height;
+      if (size <= pitch)
+         return;
+
+      src_offset += pitch;
+      dst_offset += pitch;
+      size -= pitch;
+   } while (1);
  }
  
  /**
diff --git a/src/mesa/drivers/dri/i965/intel_blit.h b/src/mesa/drivers/dri/i965/intel_blit.h

index c3d19a5..f4ed919 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_blit.h
+++ b/src/mesa/drivers/dri/i965/intel_blit.h
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2003 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,13 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  #ifndef INTEL_BLIT_H
  #define INTEL_BLIT_H
diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c

index ff05b5c..7a5b3fc 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2003 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,13 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  /**
   * @file intel_buffer_objects.c
@@ -108,6 +106,8 @@ alloc_buffer_object(struct brw_context *brw,
      */
     if (intel_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER)
        brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
+   if (intel_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER)
+      brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
     if (intel_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER)
        brw->ctx.NewDriverState |= BRW_NEW_TEXTURE_BUFFER;
     if (intel_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER)
diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.h b/src/mesa/drivers/dri/i965/intel_buffer_objects.h

index 5eaf9dc..b523edc 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.h
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.h
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2005 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,13 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  #ifndef INTEL_BUFFEROBJ_H
  #define INTEL_BUFFEROBJ_H
diff --git a/src/mesa/drivers/dri/i965/intel_buffers.c b/src/mesa/drivers/dri/i965/intel_buffers.c

index 6ad67f1..c98e193 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_buffers.c
+++ b/src/mesa/drivers/dri/i965/intel_buffers.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2003 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,13 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  #include "brw_context.h"
  #include "intel_buffers.h"
diff --git a/src/mesa/drivers/dri/i965/intel_buffers.h b/src/mesa/drivers/dri/i965/intel_buffers.h

index 9014029..85f54b2 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_buffers.h
+++ b/src/mesa/drivers/dri/i965/intel_buffers.h
@@ -1,6 +1,4 @@
-
-/**************************************************************************
- *
+/*
   * Copyright 2006 VMware, Inc.
   * All Rights Reserved.
   *
@@ -8,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -18,13 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  #ifndef INTEL_BUFFERS_H
  #define INTEL_BUFFERS_H
diff --git a/src/mesa/drivers/dri/i965/intel_copy_image.c b/src/mesa/drivers/dri/i965/intel_copy_image.c

index ac2738f..d57651c 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_copy_image.c
+++ b/src/mesa/drivers/dri/i965/intel_copy_image.c
@@ -25,10 +25,12 @@
   *    Jason Ekstrand <jason.ekstrand@intel.com>
   */
  
+#include "intel_fbo.h"
  #include "intel_tex.h"
  #include "intel_blit.h"
  #include "intel_mipmap_tree.h"
  #include "main/formats.h"
+#include "main/teximage.h"
  #include "drivers/common/meta.h"
  
  static bool
@@ -196,54 +198,86 @@ copy_image_with_memcpy(struct brw_context *brw,
     }
  }
  
+
  static void
  intel_copy_image_sub_data(struct gl_context *ctx,
                            struct gl_texture_image *src_image,
+                          struct gl_renderbuffer *src_renderbuffer,
                            int src_x, int src_y, int src_z,
                            struct gl_texture_image *dst_image,
+                          struct gl_renderbuffer *dst_renderbuffer,
                            int dst_x, int dst_y, int dst_z,
                            int src_width, int src_height)
  {
     struct brw_context *brw = brw_context(ctx);
-   struct intel_texture_image *intel_src_image = intel_texture_image(src_image);
-   struct intel_texture_image *intel_dst_image = intel_texture_image(dst_image);
+   struct intel_mipmap_tree *src_mt, *dst_mt;
+   unsigned src_level, dst_level;
  
     if (_mesa_meta_CopyImageSubData_uncompressed(ctx,
-                                                src_image, src_x, src_y, src_z,
-                                                dst_image, dst_x, dst_y, dst_z,
+                                                src_image, src_renderbuffer,
+                                                src_x, src_y, src_z,
+                                                dst_image, dst_renderbuffer,
+                                                dst_x, dst_y, dst_z,
                                                  src_width, src_height)) {
        return;
     }
  
-   if (intel_src_image->mt->num_samples > 0 ||
-       intel_dst_image->mt->num_samples > 0) {
+   if (src_image) {
+      src_mt = intel_texture_image(src_image)->mt;
+   } else {
+      assert(src_renderbuffer);
+      src_mt = intel_renderbuffer(src_renderbuffer)->mt;
+      src_image = src_renderbuffer->TexImage;
+   }
+
+   if (dst_image) {
+      dst_mt = intel_texture_image(dst_image)->mt;
+   } else {
+      assert(dst_renderbuffer);
+      dst_mt = intel_renderbuffer(dst_renderbuffer)->mt;
+      src_image = src_renderbuffer->TexImage;
+   }
+
+   if (src_mt->num_samples > 0 || dst_mt->num_samples > 0) {
        _mesa_problem(ctx, "Failed to copy multisampled texture with meta path\n");
        return;
     }
  
-   /* Cube maps actually have different images per face */
-   if (src_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
-      src_z = src_image->Face;
-   if (dst_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
-      dst_z = dst_image->Face;
+   if (src_image) {
+      src_level = src_image->Level + src_image->TexObject->MinLevel;
+
+      /* Cube maps actually have different images per face */
+      if (src_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
+         src_z = src_image->Face;
+   } else {
+      src_level = 0;
+   }
+
+   if (dst_image) {
+      dst_level = dst_image->Level + dst_image->TexObject->MinLevel;
+
+      /* Cube maps actually have different images per face */
+      if (dst_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
+         dst_z = dst_image->Face;
+   } else {
+      dst_level = 0;
+   }
  
     /* We are now going to try and copy the texture using the blitter.  If
      * that fails, we will fall back mapping the texture and using memcpy.
      * In either case, we need to do a full resolve.
      */
-   intel_miptree_all_slices_resolve_hiz(brw, intel_src_image->mt);
-   intel_miptree_all_slices_resolve_depth(brw, intel_src_image->mt);
-   intel_miptree_resolve_color(brw, intel_src_image->mt);
+   intel_miptree_all_slices_resolve_hiz(brw, src_mt);
+   intel_miptree_all_slices_resolve_depth(brw, src_mt);
+   intel_miptree_resolve_color(brw, src_mt);
  
-   intel_miptree_all_slices_resolve_hiz(brw, intel_dst_image->mt);
-   intel_miptree_all_slices_resolve_depth(brw, intel_dst_image->mt);
-   intel_miptree_resolve_color(brw, intel_dst_image->mt);
+   intel_miptree_all_slices_resolve_hiz(brw, dst_mt);
+   intel_miptree_all_slices_resolve_depth(brw, dst_mt);
+   intel_miptree_resolve_color(brw, dst_mt);
  
-   unsigned src_level = src_image->Level + src_image->TexObject->MinLevel;
-   unsigned dst_level = dst_image->Level + dst_image->TexObject->MinLevel;
-   if (copy_image_with_blitter(brw, intel_src_image->mt, src_level,
+   if (copy_image_with_blitter(brw, src_mt, src_level,
                                 src_x, src_y, src_z,
-                               intel_dst_image->mt, dst_level,
+                               dst_mt, dst_level,
                                 dst_x, dst_y, dst_z,
                                 src_width, src_height))
        return;
@@ -251,9 +285,9 @@ intel_copy_image_sub_data(struct gl_context *ctx,
     /* This is a worst-case scenario software fallback that maps the two
      * textures and does a memcpy between them.
      */
-   copy_image_with_memcpy(brw, intel_src_image->mt, src_level,
+   copy_image_with_memcpy(brw, src_mt, src_level,
                            src_x, src_y, src_z,
-                          intel_dst_image->mt, dst_level,
+                          dst_mt, dst_level,
                            dst_x, dst_y, dst_z,
                            src_width, src_height);
  }
diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c

index b3b3c21..5a9c953 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_debug.c
+++ b/src/mesa/drivers/dri/i965/intel_debug.c
@@ -69,7 +69,8 @@ static const struct dri_debug_control debug_control[] = {
     { "ann",         DEBUG_ANNOTATION },
     { "no8",         DEBUG_NO8 },
     { "vec4",        DEBUG_VEC4VS },
-   { "spill",       DEBUG_SPILL },
+   { "spill_fs",    DEBUG_SPILL_FS },
+   { "spill_vec4",  DEBUG_SPILL_VEC4 },
     { "cs",          DEBUG_CS },
     { NULL,    0 }
  };
diff --git a/src/mesa/drivers/dri/i965/intel_debug.h b/src/mesa/drivers/dri/i965/intel_debug.h

index 4689492..b7d0c82 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_debug.h
+++ b/src/mesa/drivers/dri/i965/intel_debug.h
@@ -64,8 +64,9 @@ extern uint64_t INTEL_DEBUG;
  #define DEBUG_ANNOTATION          (1ull << 28)
  #define DEBUG_NO8                 (1ull << 29)
  #define DEBUG_VEC4VS              (1ull << 30)
-#define DEBUG_SPILL               (1ull << 31)
-#define DEBUG_CS                  (1ull << 32)
+#define DEBUG_SPILL_FS            (1ull << 31)
+#define DEBUG_SPILL_VEC4          (1ull << 32)
+#define DEBUG_CS                  (1ull << 33)
  
  #ifdef HAVE_ANDROID_PLATFORM
  #define LOG_TAG "INTEL-MESA"
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c

index 4365b71..3c77f47 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2003 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,13 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  #include "main/version.h"
  
@@ -250,6 +248,7 @@ intelInitExtensions(struct gl_context *ctx)
     ctx->Extensions.MESA_pack_invert = true;
     ctx->Extensions.NV_conditional_render = true;
     ctx->Extensions.NV_primitive_restart = true;
+   ctx->Extensions.NV_texture_barrier = true;
     ctx->Extensions.NV_texture_env_combine4 = true;
     ctx->Extensions.NV_texture_rectangle = true;
     ctx->Extensions.TDFX_texture_compression_FXT1 = true;
@@ -327,8 +326,10 @@ intelInitExtensions(struct gl_context *ctx)
        ctx->Extensions.ARB_shader_atomic_counters = true;
        ctx->Extensions.ARB_shader_image_load_store = true;
        ctx->Extensions.ARB_shader_image_size = true;
+      ctx->Extensions.ARB_shader_texture_image_samples = true;
        ctx->Extensions.ARB_texture_compression_bptc = true;
        ctx->Extensions.ARB_texture_view = true;
+      ctx->Extensions.ARB_shader_storage_buffer_object = true;
  
        if (can_do_pipelined_register_writes(brw)) {
           ctx->Extensions.ARB_draw_indirect = true;
@@ -354,6 +355,11 @@ intelInitExtensions(struct gl_context *ctx)
        ctx->Extensions.ARB_stencil_texturing = true;
     }
  
+   if (brw->gen >= 9) {
+      ctx->Extensions.KHR_texture_compression_astc_ldr = true;
+      ctx->Extensions.KHR_texture_compression_astc_hdr = true;
+   }
+
     if (ctx->API == API_OPENGL_CORE)
        ctx->Extensions.ARB_base_instance = true;
     if (ctx->API != API_OPENGL_CORE)
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c

index 64d57e8..6b2349e 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2006 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,14 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
+ */
  
  #include "main/enums.h"
  #include "main/imports.h"
@@ -415,6 +412,7 @@ static GLboolean
  intel_alloc_window_storage(struct gl_context * ctx, struct gl_renderbuffer *rb,
                             GLenum internalFormat, GLuint width, GLuint height)
  {
+   (void) ctx;
     assert(rb->Name == 0);
     rb->Width = width;
     rb->Height = height;
@@ -428,6 +426,10 @@ static GLboolean
  intel_nop_alloc_storage(struct gl_context * ctx, struct gl_renderbuffer *rb,
                          GLenum internalFormat, GLuint width, GLuint height)
  {
+   (void) rb;
+   (void) internalFormat;
+   (void) width;
+   (void) height;
     _mesa_problem(ctx, "intel_nop_alloc_storage should never be called.");
     return false;
  }
@@ -787,7 +789,7 @@ intel_blit_framebuffer_with_blitter(struct gl_context *ctx,
                                      GLint srcX1, GLint srcY1,
                                      GLint dstX0, GLint dstY0,
                                      GLint dstX1, GLint dstY1,
-                                    GLbitfield mask, GLenum filter)
+                                    GLbitfield mask)
  {
     struct brw_context *brw = brw_context(ctx);
  
@@ -907,7 +909,7 @@ intel_blit_framebuffer(struct gl_context *ctx,
     mask = intel_blit_framebuffer_with_blitter(ctx, readFb, drawFb,
                                                srcX0, srcY0, srcX1, srcY1,
                                                dstX0, dstY0, dstX1, dstY1,
-                                              mask, filter);
+                                              mask);
     if (mask == 0x0)
        return;
  
@@ -945,7 +947,7 @@ gen4_blit_framebuffer(struct gl_context *ctx,
     mask = intel_blit_framebuffer_with_blitter(ctx, readFb, drawFb,
                                                srcX0, srcY0, srcX1, srcY1,
                                                dstX0, dstY0, dstX1, dstY1,
-                                              mask, filter);
+                                              mask);
     if (mask == 0x0)
        return;
  
@@ -1025,7 +1027,7 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw,
     uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD |
                             MIPTREE_LAYOUT_TILING_ANY;
  
-   intel_miptree_get_dimensions_for_image(rb->TexImage, &width, &height, &depth);
+   intel_get_image_dims(rb->TexImage, &width, &height, &depth);
  
     new_mt = intel_miptree_create(brw, rb->TexImage->TexObject->Target,
                                   intel_image->base.Base.TexFormat,
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.h b/src/mesa/drivers/dri/i965/intel_fbo.h

index c7cc570..5ba4c8f 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_fbo.h
+++ b/src/mesa/drivers/dri/i965/intel_fbo.h
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2006 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,13 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  #ifndef INTEL_FBO_H
  #define INTEL_FBO_H
diff --git a/src/mesa/drivers/dri/i965/intel_image.h b/src/mesa/drivers/dri/i965/intel_image.h

index a82cf3b..9b3816e 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_image.h
+++ b/src/mesa/drivers/dri/i965/intel_image.h
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2006 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,13 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  #ifndef INTEL_IMAGE_H
  #define INTEL_IMAGE_H
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c

index 0bcbbbc..9c0304c 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2006 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,13 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  #include <GL/gl.h>
  #include <GL/internal/dri_interface.h>
@@ -49,6 +47,11 @@
  
  #define FILE_DEBUG_FLAG DEBUG_MIPTREE
  
+static void *intel_miptree_map_raw(struct brw_context *brw,
+                                   struct intel_mipmap_tree *mt);
+
+static void intel_miptree_unmap_raw(struct intel_mipmap_tree *mt);
+
  static bool
  intel_miptree_alloc_mcs(struct brw_context *brw,
                          struct intel_mipmap_tree *mt,
@@ -59,7 +62,7 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
   * created, based on the chip generation and the surface type.
   */
  static enum intel_msaa_layout
-compute_msaa_layout(struct brw_context *brw, mesa_format format, GLenum target,
+compute_msaa_layout(struct brw_context *brw, mesa_format format,
                      bool disable_aux_buffers)
  {
     /* Prior to Gen7, all MSAA surfaces used IMS layout. */
@@ -140,8 +143,7 @@ compute_msaa_layout(struct brw_context *brw, mesa_format format, GLenum target,
   *   by half the block width, and Y coordinates by half the block height.
   */
  void
-intel_get_non_msrt_mcs_alignment(struct brw_context *brw,
-                                 struct intel_mipmap_tree *mt,
+intel_get_non_msrt_mcs_alignment(struct intel_mipmap_tree *mt,
                                   unsigned *width_px, unsigned *height)
  {
     switch (mt->tiling) {
@@ -322,7 +324,7 @@ intel_miptree_create_layout(struct brw_context *brw,
     if (num_samples > 1) {
        /* Adjust width/height/depth for MSAA */
        mt->msaa_layout = compute_msaa_layout(brw, format,
-                                            mt->target, mt->disable_aux_buffers);
+                                            mt->disable_aux_buffers);
        if (mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS) {
           /* From the Ivybridge PRM, Volume 1, Part 1, page 108:
            * "If the surface is multisampled and it is a depth or stencil
@@ -555,35 +557,15 @@ static unsigned long
  intel_get_yf_ys_bo_size(struct intel_mipmap_tree *mt, unsigned *alignment,
                          unsigned long *pitch)
  {
-   const uint32_t bpp = mt->cpp * 8;
-   const uint32_t aspect_ratio = (bpp == 16 || bpp == 64) ? 2 : 1;
     uint32_t tile_width, tile_height;
     unsigned long stride, size, aligned_y;
  
     assert(mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE);
-
-   switch (bpp) {
-   case 8:
-      tile_height = 64;
-      break;
-   case 16:
-   case 32:
-      tile_height = 32;
-      break;
-   case 64:
-   case 128:
-      tile_height = 16;
-      break;
-   default:
-      unreachable("not reached");
-   }
-
-   if (mt->tr_mode == INTEL_MIPTREE_TRMODE_YS)
-      tile_height *= 4;
+   intel_get_tile_dims(mt->tiling, mt->tr_mode, mt->cpp,
+                       &tile_width, &tile_height);
  
     aligned_y = ALIGN(mt->total_height, tile_height);
     stride = mt->total_width * mt->cpp;
-   tile_width = tile_height * mt->cpp * aspect_ratio;
     stride = ALIGN(stride, tile_width);
     size = stride * aligned_y;
  
@@ -939,12 +921,18 @@ intel_miptree_release(struct intel_mipmap_tree **mt)
     *mt = NULL;
  }
  
+
  void
-intel_miptree_get_dimensions_for_image(struct gl_texture_image *image,
-                                       int *width, int *height, int *depth)
+intel_get_image_dims(struct gl_texture_image *image,
+                     int *width, int *height, int *depth)
  {
     switch (image->TexObject->Target) {
     case GL_TEXTURE_1D_ARRAY:
+      /* For a 1D Array texture the OpenGL API will treat the image height as
+       * the number of array slices. For Intel hardware, we treat the 1D array
+       * as a 2D Array with a height of 1. So, here we want to swap image
+       * height and depth.
+       */
        *width = image->Width;
        *height = 1;
        *depth = image->Height;
@@ -988,7 +976,7 @@ intel_miptree_match_image(struct intel_mipmap_tree *mt,
     if (image->TexFormat != mt_format)
        return false;
  
-   intel_miptree_get_dimensions_for_image(image, &width, &height, &depth);
+   intel_get_image_dims(image, &width, &height, &depth);
  
     if (mt->target == GL_TEXTURE_CUBE_MAP)
        depth = 6;
@@ -1073,37 +1061,82 @@ intel_miptree_get_image_offset(const struct intel_mipmap_tree *mt,
     *y = mt->level[level].slice[slice].y_offset;
  }
  
+
+/**
+ * This function computes the tile_w (in bytes) and tile_h (in rows) of
+ * different tiling patterns. If the BO is untiled, tile_w is set to cpp
+ * and tile_h is set to 1.
+ */
+void
+intel_get_tile_dims(uint32_t tiling, uint32_t tr_mode, uint32_t cpp,
+                    uint32_t *tile_w, uint32_t *tile_h)
+{
+   if (tr_mode == INTEL_MIPTREE_TRMODE_NONE) {
+      switch (tiling) {
+      case I915_TILING_X:
+         *tile_w = 512;
+         *tile_h = 8;
+         break;
+      case I915_TILING_Y:
+         *tile_w = 128;
+         *tile_h = 32;
+         break;
+      case I915_TILING_NONE:
+         *tile_w = cpp;
+         *tile_h = 1;
+         break;
+      default:
+         unreachable("not reached");
+      }
+   } else {
+      uint32_t aspect_ratio = 1;
+      assert(_mesa_is_pow_two(cpp));
+
+      switch (cpp) {
+      case 1:
+         *tile_h = 64;
+         break;
+      case 2:
+      case 4:
+         *tile_h = 32;
+         break;
+      case 8:
+      case 16:
+         *tile_h = 16;
+         break;
+      default:
+         unreachable("not reached");
+      }
+
+      if (cpp == 2 || cpp == 8)
+         aspect_ratio = 2;
+
+      if (tr_mode == INTEL_MIPTREE_TRMODE_YS)
+         *tile_h *= 4;
+
+      *tile_w = *tile_h * aspect_ratio * cpp;
+   }
+}
+
+
  /**
   * This function computes masks that may be used to select the bits of the X
   * and Y coordinates that indicate the offset within a tile.  If the BO is
   * untiled, the masks are set to 0.
   */
  void
-intel_miptree_get_tile_masks(const struct intel_mipmap_tree *mt,
-                             uint32_t *mask_x, uint32_t *mask_y,
-                             bool map_stencil_as_y_tiled)
+intel_get_tile_masks(uint32_t tiling, uint32_t tr_mode, uint32_t cpp,
+                     bool map_stencil_as_y_tiled,
+                     uint32_t *mask_x, uint32_t *mask_y)
  {
-   int cpp = mt->cpp;
-   uint32_t tiling = mt->tiling;
-
+   uint32_t tile_w_bytes, tile_h;
     if (map_stencil_as_y_tiled)
        tiling = I915_TILING_Y;
  
-   switch (tiling) {
-   default:
-      unreachable("not reached");
-   case I915_TILING_NONE:
-      *mask_x = *mask_y = 0;
-      break;
-   case I915_TILING_X:
-      *mask_x = 512 / cpp - 1;
-      *mask_y = 7;
-      break;
-   case I915_TILING_Y:
-      *mask_x = 128 / cpp - 1;
-      *mask_y = 31;
-      break;
-   }
+   intel_get_tile_dims(tiling, tr_mode, cpp, &tile_w_bytes, &tile_h);
+
+   *mask_x = tile_w_bytes / cpp - 1;
+   *mask_y = tile_h - 1;
  }
  
  /**
@@ -1168,7 +1201,7 @@ intel_miptree_get_tile_offsets(const struct intel_mipmap_tree *mt,
     uint32_t x, y;
     uint32_t mask_x, mask_y;
  
-   intel_miptree_get_tile_masks(mt, &mask_x, &mask_y, false);
+   intel_get_tile_masks(mt->tiling, mt->tr_mode, mt->cpp, false, &mask_x, &mask_y);
     intel_miptree_get_image_offset(mt, level, slice, &x, &y);
  
     *tile_x = x & mask_x;
@@ -1401,7 +1434,7 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
      */
     void *data = intel_miptree_map_raw(brw, mt->mcs_mt);
     memset(data, 0xff, mt->mcs_mt->total_height * mt->mcs_mt->pitch);
-   intel_miptree_unmap_raw(brw, mt->mcs_mt);
+   intel_miptree_unmap_raw(mt->mcs_mt);
     mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR;
  
     return mt->mcs_mt;
@@ -1427,7 +1460,7 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
     const mesa_format format = MESA_FORMAT_R_UINT32;
     unsigned block_width_px;
     unsigned block_height;
-   intel_get_non_msrt_mcs_alignment(brw, mt, &block_width_px, &block_height);
+   intel_get_non_msrt_mcs_alignment(mt, &block_width_px, &block_height);
     unsigned width_divisor = block_width_px * 4;
     unsigned height_divisor = block_height * 8;
     unsigned mcs_width =
@@ -1509,23 +1542,21 @@ intel_gen7_hiz_buf_create(struct brw_context *brw,
     /* Gen7 PRM Volume 2, Part 1, 11.5.3 "Hierarchical Depth Buffer" documents
      * adjustments required for Z_Height and Z_Width based on multisampling.
      */
-   if (brw->gen < 9) {
-      switch (mt->num_samples) {
-      case 0:
-      case 1:
-         break;
-      case 2:
-      case 4:
-         z_width *= 2;
-         z_height *= 2;
-         break;
-      case 8:
-         z_width *= 4;
-         z_height *= 2;
-         break;
-      default:
-         unreachable("unsupported sample count");
-      }
+   switch (mt->num_samples) {
+   case 0:
+   case 1:
+      break;
+   case 2:
+   case 4:
+      z_width *= 2;
+      z_height *= 2;
+      break;
+   case 8:
+      z_width *= 4;
+      z_height *= 2;
+      break;
+   default:
+      unreachable("unsupported sample count");
     }
  
     const unsigned vertical_align = 8; /* 'j' in the docs */
@@ -1541,7 +1572,7 @@ intel_gen7_hiz_buf_create(struct brw_context *brw,
        unsigned H_i = H0;
        unsigned Z_i = Z0;
        hz_height = 0;
-      for (int level = mt->first_level; level <= mt->last_level; ++level) {
+      for (unsigned level = mt->first_level; level <= mt->last_level; ++level) {
           unsigned h_i = ALIGN(H_i, vertical_align);
           /* sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i))) */
           hz_height += h_i * Z_i;
@@ -1605,21 +1636,23 @@ intel_gen8_hiz_buf_create(struct brw_context *brw,
     /* Gen7 PRM Volume 2, Part 1, 11.5.3 "Hierarchical Depth Buffer" documents
      * adjustments required for Z_Height and Z_Width based on multisampling.
      */
-   switch (mt->num_samples) {
-   case 0:
-   case 1:
-      break;
-   case 2:
-   case 4:
-      z_width *= 2;
-      z_height *= 2;
-      break;
-   case 8:
-      z_width *= 4;
-      z_height *= 2;
-      break;
-   default:
-      unreachable("unsupported sample count");
+   if (brw->gen < 9) {
+      switch (mt->num_samples) {
+      case 0:
+      case 1:
+         break;
+      case 2:
+      case 4:
+         z_width *= 2;
+         z_height *= 2;
+         break;
+      case 8:
+         z_width *= 4;
+         z_height *= 2;
+         break;
+      default:
+         unreachable("unsupported sample count");
+      }
     }
  
     const unsigned vertical_align = 8; /* 'j' in the docs */
@@ -1635,7 +1668,7 @@ intel_gen8_hiz_buf_create(struct brw_context *brw,
     unsigned Z_i = Z0;
     unsigned sum_h_i = 0;
     unsigned hz_height_3d_sum = 0;
-   for (int level = mt->first_level; level <= mt->last_level; ++level) {
+   for (unsigned level = mt->first_level; level <= mt->last_level; ++level) {
        unsigned i = level - mt->first_level;
        unsigned h_i = ALIGN(H_i, vertical_align);
        /* sum(i=2 to m; h_i) */
@@ -1768,11 +1801,11 @@ intel_miptree_alloc_hiz(struct brw_context *brw,
        return false;
  
     /* Mark that all slices need a HiZ resolve. */
-   for (int level = mt->first_level; level <= mt->last_level; ++level) {
+   for (unsigned level = mt->first_level; level <= mt->last_level; ++level) {
        if (!intel_miptree_level_enable_hiz(brw, mt, level))
           continue;
  
-      for (int layer = 0; layer < mt->level[level].depth; ++layer) {
+      for (unsigned layer = 0; layer < mt->level[level].depth; ++layer) {
           struct intel_resolve_map *m = malloc(sizeof(struct intel_resolve_map));
           exec_node_init(&m->link);
           m->level = level;
@@ -2073,8 +2106,7 @@ intel_miptree_map_raw(struct brw_context *brw, struct intel_mipmap_tree *mt)
  }
  
  void
-intel_miptree_unmap_raw(struct brw_context *brw,
-                        struct intel_mipmap_tree *mt)
+intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
  {
     drm_intel_bo_unmap(mt->bo);
  }
@@ -2125,13 +2157,9 @@ intel_miptree_map_gtt(struct brw_context *brw,
  }
  
  static void
-intel_miptree_unmap_gtt(struct brw_context *brw,
-                       struct intel_mipmap_tree *mt,
-                       struct intel_miptree_map *map,
-                       unsigned int level,
-                       unsigned int slice)
+intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
  {
-   intel_miptree_unmap_raw(brw, mt);
+   intel_miptree_unmap_raw(mt);
  }
  
  static void
@@ -2192,7 +2220,7 @@ intel_miptree_unmap_blit(struct brw_context *brw,
  {
     struct gl_context *ctx = &brw->ctx;
  
-   intel_miptree_unmap_raw(brw, map->mt);
+   intel_miptree_unmap_raw(map->mt);
  
     if (map->mode & GL_MAP_WRITE_BIT) {
        bool ok = intel_miptree_blit(brw,
@@ -2264,7 +2292,7 @@ intel_miptree_map_movntdqa(struct brw_context *brw,
        _mesa_streaming_load_memcpy(dst_ptr, src_ptr, width_bytes);
     }
  
-   intel_miptree_unmap_raw(brw, mt);
+   intel_miptree_unmap_raw(mt);
  }
  
  static void
@@ -2313,7 +2341,7 @@ intel_miptree_map_s8(struct brw_context *brw,
          }
        }
  
-      intel_miptree_unmap_raw(brw, mt);
+      intel_miptree_unmap_raw(mt);
  
        DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __func__,
           map->x, map->y, map->w, map->h,
@@ -2349,7 +2377,7 @@ intel_miptree_unmap_s8(struct brw_context *brw,
          }
        }
  
-      intel_miptree_unmap_raw(brw, mt);
+      intel_miptree_unmap_raw(mt);
     }
  
     free(map->buffer);
@@ -2403,7 +2431,7 @@ intel_miptree_unmap_etc(struct brw_context *brw,
                                 map->ptr, map->stride,
                                 map->w, map->h, mt->etc_format);
  
-   intel_miptree_unmap_raw(brw, mt);
+   intel_miptree_unmap_raw(mt);
     free(map->buffer);
  }
  
@@ -2473,8 +2501,8 @@ intel_miptree_map_depthstencil(struct brw_context *brw,
          }
        }
  
-      intel_miptree_unmap_raw(brw, s_mt);
-      intel_miptree_unmap_raw(brw, z_mt);
+      intel_miptree_unmap_raw(s_mt);
+      intel_miptree_unmap_raw(z_mt);
  
        DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n",
           __func__,
@@ -2533,8 +2561,8 @@ intel_miptree_unmap_depthstencil(struct brw_context *brw,
          }
        }
  
-      intel_miptree_unmap_raw(brw, s_mt);
-      intel_miptree_unmap_raw(brw, z_mt);
+      intel_miptree_unmap_raw(s_mt);
+      intel_miptree_unmap_raw(z_mt);
  
        DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n",
           __func__,
@@ -2735,7 +2763,7 @@ intel_miptree_unmap(struct brw_context *brw,
        intel_miptree_unmap_movntdqa(brw, mt, map, level, slice);
  #endif
     } else {
-      intel_miptree_unmap_gtt(brw, mt, map, level, slice);
+      intel_miptree_unmap_gtt(mt);
     }
  
     intel_miptree_release_map(mt, level, slice);
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h

index c28162a..7610d75 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2006 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,13 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  /** @file intel_mipmap_tree.h
   *
@@ -517,8 +515,7 @@ struct intel_mipmap_tree
  };
  
  void
-intel_get_non_msrt_mcs_alignment(struct brw_context *brw,
-                                 struct intel_mipmap_tree *mt,
+intel_get_non_msrt_mcs_alignment(struct intel_mipmap_tree *mt,
                                   unsigned *width_px, unsigned *height);
  bool
  intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling);
@@ -597,6 +594,10 @@ intel_miptree_check_level_layer(struct intel_mipmap_tree *mt,
                                  uint32_t level,
                                  uint32_t layer)
  {
+   (void) mt;
+   (void) level;
+   (void) layer;
+
     assert(level >= mt->first_level);
     assert(level <= mt->last_level);
     assert(layer < mt->level[level].depth);
@@ -618,13 +619,17 @@ intel_miptree_get_image_offset(const struct intel_mipmap_tree *mt,
                                GLuint *x, GLuint *y);
  
  void
-intel_miptree_get_dimensions_for_image(struct gl_texture_image *image,
-                                       int *width, int *height, int *depth);
+intel_get_image_dims(struct gl_texture_image *image,
+                     int *width, int *height, int *depth);
+
+void
+intel_get_tile_masks(uint32_t tiling, uint32_t tr_mode, uint32_t cpp,
+                     bool map_stencil_as_y_tiled,
+                     uint32_t *mask_x, uint32_t *mask_y);
  
  void
-intel_miptree_get_tile_masks(const struct intel_mipmap_tree *mt,
-                             uint32_t *mask_x, uint32_t *mask_y,
-                             bool map_stencil_as_y_tiled);
+intel_get_tile_dims(uint32_t tiling, uint32_t tr_mode, uint32_t cpp,
+                    uint32_t *tile_w, uint32_t *tile_h);
  
  uint32_t
  intel_miptree_get_tile_offsets(const struct intel_mipmap_tree *mt,
@@ -771,12 +776,6 @@ brw_miptree_layout(struct brw_context *brw,
                     struct intel_mipmap_tree *mt,
                     uint32_t layout_flags);
  
-void *intel_miptree_map_raw(struct brw_context *brw,
-                            struct intel_mipmap_tree *mt);
-
-void intel_miptree_unmap_raw(struct brw_context *brw,
-                             struct intel_mipmap_tree *mt);
-
  void
  intel_miptree_map(struct brw_context *brw,
                   struct intel_mipmap_tree *mt,
diff --git a/src/mesa/drivers/dri/i965/intel_pixel.c b/src/mesa/drivers/dri/i965/intel_pixel.c

index 3454333..30d3a52 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_pixel.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2006 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,13 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  #include "main/accum.h"
  #include "main/enums.h"
diff --git a/src/mesa/drivers/dri/i965/intel_pixel.h b/src/mesa/drivers/dri/i965/intel_pixel.h

index bc184ef..f5b931f 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_pixel.h
+++ b/src/mesa/drivers/dri/i965/intel_pixel.h
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2006 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,13 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  #ifndef INTEL_PIXEL_H
  #define INTEL_PIXEL_H
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c

index 224dc65..3326ac4 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2006 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,13 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  #include "main/blend.h"
  #include "main/glheader.h"
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_copy.c b/src/mesa/drivers/dri/i965/intel_pixel_copy.c

index ce053ed..066b6a2 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_pixel_copy.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_copy.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2003 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,13 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  #include "main/glheader.h"
  #include "main/image.h"
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_draw.c b/src/mesa/drivers/dri/i965/intel_pixel_draw.c

index 6c6bd86..0c8a918 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_pixel_draw.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_draw.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2006 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,13 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  #include "main/glheader.h"
  #include "main/enums.h"
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c

index 3fe506e..9bcbbd1 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2003 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,13 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  #include "main/glheader.h"
  #include "main/enums.h"
@@ -109,6 +107,10 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
         pack->Invert)
        return false;
  
+   /* Only a simple blit, no scale, bias or other mapping. */
+   if (ctx->_ImageTransferState)
+      return false;
+
     /* This renderbuffer can come from a texture.  In this case, we impose
      * some of the same restrictions we have for textures and adjust for
      * miplevels.
diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h

index b4283da..a261c2b 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_reg.h
+++ b/src/mesa/drivers/dri/i965/intel_reg.h
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2003 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,13 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  #define CMD_MI                         (0x0 << 29)
  #define CMD_2D                         (0x2 << 29)
@@ -175,6 +173,11 @@
  #define GEN7_3DPRIM_START_INSTANCE      0x243C
  #define GEN7_3DPRIM_BASE_VERTEX         0x2440
  
+/* Auto-Compute / Indirect Registers */
+#define GEN7_GPGPU_DISPATCHDIMX         0x2500
+#define GEN7_GPGPU_DISPATCHDIMY         0x2504
+#define GEN7_GPGPU_DISPATCHDIMZ         0x2508
+
  #define GEN7_CACHE_MODE_1               0x7004
  # define GEN8_HIZ_NP_PMA_FIX_ENABLE        (1 << 11)
  # define GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE (1 << 13)
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c

index 85863a0..1783835 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2003 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,13 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  #include <errno.h>
  #include <time.h>
diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h

index fd5143e..96bb995 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_screen.h
+++ b/src/mesa/drivers/dri/i965/intel_screen.h
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2003 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,13 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  #ifndef _INTEL_INIT_H_
  #define _INTEL_INIT_H_
diff --git a/src/mesa/drivers/dri/i965/intel_state.c b/src/mesa/drivers/dri/i965/intel_state.c

index d734670..498cab4 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_state.c
+++ b/src/mesa/drivers/dri/i965/intel_state.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2003 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,14 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
+ */
  
  #include "main/glheader.h"
  #include "main/context.h"
diff --git a/src/mesa/drivers/dri/i965/intel_tex.c b/src/mesa/drivers/dri/i965/intel_tex.c

index e16b0de..cac33ac 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_tex.c
+++ b/src/mesa/drivers/dri/i965/intel_tex.c
@@ -357,6 +357,14 @@ intel_set_texture_storage_for_buffer_object(struct gl_context *ctx,
     return true;
  }
  
+static void
+intel_texture_barrier(struct gl_context *ctx)
+{
+   struct brw_context *brw = brw_context(ctx);
+
+   brw_emit_mi_flush(brw);
+}
+
  void
  intelInitTextureFuncs(struct dd_function_table *functions)
  {
@@ -372,4 +380,5 @@ intelInitTextureFuncs(struct dd_function_table *functions)
     functions->TextureView = intel_texture_view;
     functions->SetTextureStorageForBufferObject =
        intel_set_texture_storage_for_buffer_object;
+   functions->TextureBarrier = intel_texture_barrier;
  }
diff --git a/src/mesa/drivers/dri/i965/intel_tex.h b/src/mesa/drivers/dri/i965/intel_tex.h

index 402a389..dc83d08 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_tex.h
+++ b/src/mesa/drivers/dri/i965/intel_tex.h
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2003 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,13 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  #ifndef INTELTEX_INC
  #define INTELTEX_INC
diff --git a/src/mesa/drivers/dri/i965/intel_tex_copy.c b/src/mesa/drivers/dri/i965/intel_tex_copy.c

index 4d8c82e..9c255ae 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_copy.c
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2003 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,13 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  #include "main/mtypes.h"
  #include "main/enums.h"
@@ -55,6 +53,10 @@ intel_copy_texsubimage(struct brw_context *brw,
     const GLenum internalFormat = intelImage->base.Base.InternalFormat;
     bool ret;
  
+   /* No pixel transfer operations (zoom, bias, mapping), just a blit */
+   if (brw->ctx._ImageTransferState)
+      return false;
+
     intel_prepare_render(brw);
  
     /* glCopyTexSubImage() can be called on a multisampled renderbuffer (if
diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c

index 93a8cde..34b91e8 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -42,8 +42,7 @@ intel_miptree_create_for_teximage(struct brw_context *brw,
     int width, height, depth;
     GLuint i;
  
-   intel_miptree_get_dimensions_for_image(&intelImage->base.Base,
-                                          &width, &height, &depth);
+   intel_get_image_dims(&intelImage->base.Base, &width, &height, &depth);
  
     DBG("%s\n", __func__);
  
diff --git a/src/mesa/drivers/dri/i965/intel_tex_obj.h b/src/mesa/drivers/dri/i965/intel_tex_obj.h

index e078e0a..750e4c3 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_tex_obj.h
+++ b/src/mesa/drivers/dri/i965/intel_tex_obj.h
@@ -1,5 +1,4 @@
-/**************************************************************************
- *
+/*
   * Copyright 2003 VMware, Inc.
   * All Rights Reserved.
   *
@@ -7,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -17,13 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  #ifndef _INTEL_TEX_OBJ_H
  #define _INTEL_TEX_OBJ_H
diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c b/src/mesa/drivers/dri/i965/intel_tex_subimage.c

index 31e511f..970ded1 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
@@ -1,6 +1,4 @@
-
-/**************************************************************************
- *
+/*
   * Copyright 2003 VMware, Inc.
   * All Rights Reserved.
   *
@@ -8,7 +6,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -18,13 +16,12 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
  
  #include "main/bufferobj.h"
  #include "main/image.h"
@@ -118,6 +115,10 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
         packing->Invert)
        return false;
  
+   /* Only a simple blit, no scale, bias or other mapping. */
+   if (ctx->_ImageTransferState)
+      return false;
+
     if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp,
                           INTEL_UPLOAD))
        return false;
diff --git a/src/mesa/drivers/dri/i965/intel_tex_validate.c b/src/mesa/drivers/dri/i965/intel_tex_validate.c

index d3fb252..2cf9c13 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_validate.c
@@ -128,8 +128,7 @@ intel_finalize_mipmap_tree(struct brw_context *brw, GLuint unit)
     /* May need to create a new tree:
      */
     if (!intelObj->mt) {
-      intel_miptree_get_dimensions_for_image(&firstImage->base.Base,
-                                            &width, &height, &depth);
+      intel_get_image_dims(&firstImage->base.Base, &width, &height, &depth);
  
        perf_debug("Creating new %s %dx%dx%d %d-level miptree to handle "
                   "finalized texture miptree.\n",
diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c

index dcf0462..2383401 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
+++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
@@ -8,7 +8,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -18,7 +18,7 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h

index 9dc1088..01543bf 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h
+++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h
@@ -8,7 +8,7 @@
   * copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
@@ -18,7 +18,7 @@
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c

index c39b71d..bb9be21 100644 (file)
--- a/src/mesa/drivers/dri/r200/r200_swtcl.c
+++ b/src/mesa/drivers/dri/r200/r200_swtcl.c
@@ -350,7 +350,6 @@ static void r200ResetLineStipple( struct gl_context *ctx );
  #define HAVE_LINE_STRIPS 1
  #define HAVE_TRIANGLES   1
  #define HAVE_TRI_STRIPS  1
-#define HAVE_TRI_STRIP_1 0
  #define HAVE_TRI_FANS    1
  #define HAVE_QUADS       0
  #define HAVE_QUAD_STRIPS 0
diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c

index 3eccaa7..7472753 100644 (file)
--- a/src/mesa/drivers/dri/r200/r200_tcl.c
+++ b/src/mesa/drivers/dri/r200/r200_tcl.c
@@ -61,7 +61,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  #define HAVE_LINE_STRIPS 1
  #define HAVE_TRIANGLES   1
  #define HAVE_TRI_STRIPS  1
-#define HAVE_TRI_STRIP_1 0
  #define HAVE_TRI_FANS    1
  #define HAVE_QUADS       1
  #define HAVE_QUAD_STRIPS 1
diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c

index 2fbd353..b671a3b 100644 (file)
--- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
@@ -353,10 +353,7 @@ void r100_swtcl_flush(struct gl_context *ctx, uint32_t current_offset)
  #define HAVE_LINE_STRIPS 1
  #define HAVE_TRIANGLES   1
  #define HAVE_TRI_STRIPS  1
-#define HAVE_TRI_STRIP_1 0
  #define HAVE_TRI_FANS    1
-#define HAVE_QUADS       0
-#define HAVE_QUAD_STRIPS 0
  #define HAVE_POLYGONS    0
  /* \todo: is it possible to make "ELTS" work with t_vertex code ? */
  #define HAVE_ELTS        0
@@ -446,7 +443,7 @@ static GLboolean radeon_run_render( struct gl_context *ctx,
                  start, start+length);
  
        if (length)
-        tab[prim & PRIM_MODE_MASK]( ctx, start, start + length, prim );
+         tab[prim & PRIM_MODE_MASK](ctx, start, length, prim);
     }
  
     tnl->Driver.Render.Finish( ctx );
diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c

index 1d07459..3e2f426 100644 (file)
--- a/src/mesa/drivers/dri/radeon/radeon_tcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c
@@ -65,7 +65,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  #define HAVE_LINE_STRIPS 1
  #define HAVE_TRIANGLES   1
  #define HAVE_TRI_STRIPS  1
-#define HAVE_TRI_STRIP_1 0
  #define HAVE_TRI_FANS    1
  #define HAVE_QUADS       0
  #define HAVE_QUAD_STRIPS 0
diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c

index 53c8fb8..a46c194 100644 (file)
--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@@ -882,3 +882,111 @@ _mesa_validate_MultiDrawElementsIndirect(struct gl_context *ctx,
  
     return GL_TRUE;
  }
+
+static bool
+check_valid_to_compute(struct gl_context *ctx, const char *function)
+{
+   struct gl_shader_program *prog;
+
+   if (!_mesa_has_compute_shaders(ctx)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "unsupported function (%s) called",
+                  function);
+      return false;
+   }
+
+   prog = ctx->Shader.CurrentProgram[MESA_SHADER_COMPUTE];
+   if (prog == NULL || prog->_LinkedShaders[MESA_SHADER_COMPUTE] == NULL) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "%s(no active compute shader)",
+                  function);
+      return false;
+   }
+
+   return true;
+}
+
+GLboolean
+_mesa_validate_DispatchCompute(struct gl_context *ctx,
+                               const GLuint *num_groups)
+{
+   int i;
+   FLUSH_CURRENT(ctx, 0);
+
+   if (!check_valid_to_compute(ctx, "glDispatchCompute"))
+      return GL_FALSE;
+
+   for (i = 0; i < 3; i++) {
+      if (num_groups[i] > ctx->Const.MaxComputeWorkGroupCount[i]) {
+         _mesa_error(ctx, GL_INVALID_VALUE,
+                     "glDispatchCompute(num_groups_%c)", 'x' + i);
+         return GL_FALSE;
+      }
+   }
+
+   return GL_TRUE;
+}
+
+static GLboolean
+valid_dispatch_indirect(struct gl_context *ctx,
+                        GLintptr indirect,
+                        GLsizei size, const char *name)
+{
+   GLintptr end = (GLintptr)indirect + size;
+
+   if (!check_valid_to_compute(ctx, name))
+      return GL_FALSE;
+
+   /* From the ARB_compute_shader specification:
+    *
+    * "An INVALID_OPERATION error is generated [...] if <indirect> is less
+    *  than zero or not a multiple of the size, in basic machine units, of
+    *  uint."
+    */
+   if ((GLintptr)indirect & (sizeof(GLuint) - 1)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "%s(indirect is not aligned)", name);
+      return GL_FALSE;
+   }
+
+   if ((GLintptr)indirect < 0) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "%s(indirect is less than zero)", name);
+      return GL_FALSE;
+   }
+
+   if (!_mesa_is_bufferobj(ctx->DispatchIndirectBuffer)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "%s: no buffer bound to DISPATCH_INDIRECT_BUFFER", name);
+      return GL_FALSE;
+   }
+
+   if (_mesa_check_disallowed_mapping(ctx->DispatchIndirectBuffer)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "%s(DISPATCH_INDIRECT_BUFFER is mapped)", name);
+      return GL_FALSE;
+   }
+
+   /* From the ARB_compute_shader specification:
+    *
+    * "An INVALID_OPERATION error is generated if this command sources data
+    *  beyond the end of the buffer object [...]"
+    */
+   if (ctx->DispatchIndirectBuffer->Size < end) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "%s(DISPATCH_INDIRECT_BUFFER too small)", name);
+      return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+
+GLboolean
+_mesa_validate_DispatchComputeIndirect(struct gl_context *ctx,
+                                       GLintptr indirect)
+{
+   FLUSH_CURRENT(ctx, 0);
+
+   return valid_dispatch_indirect(ctx, indirect, 3 * sizeof(GLuint),
+                                  "glDispatchComputeIndirect");
+}
diff --git a/src/mesa/main/api_validate.h b/src/mesa/main/api_validate.h

index 0ce7b69..5d030a7 100644 (file)
--- a/src/mesa/main/api_validate.h
+++ b/src/mesa/main/api_validate.h
@@ -105,5 +105,13 @@ _mesa_validate_MultiDrawElementsIndirect(struct gl_context *ctx,
                                           GLsizei primcount,
                                           GLsizei stride);
  
+extern GLboolean
+_mesa_validate_DispatchCompute(struct gl_context *ctx,
+                               const GLuint *num_groups);
+
+extern GLboolean
+_mesa_validate_DispatchComputeIndirect(struct gl_context *ctx,
+                                       GLintptr indirect);
+
  
  #endif
diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c

index 7c40040..2885143 100644 (file)
--- a/src/mesa/main/arrayobj.c
+++ b/src/mesa/main/arrayobj.c
@@ -200,10 +200,6 @@ _mesa_reference_vao_(struct gl_context *ctx,
        mtx_lock(&oldObj->Mutex);
        assert(oldObj->RefCount > 0);
        oldObj->RefCount--;
-#if 0
-      printf("ArrayObj %p %d DECR to %d\n",
-             (void *) oldObj, oldObj->Name, oldObj->RefCount);
-#endif
        deleteFlag = (oldObj->RefCount == 0);
        mtx_unlock(&oldObj->Mutex);
  
@@ -227,10 +223,6 @@ _mesa_reference_vao_(struct gl_context *ctx,
        }
        else {
           vao->RefCount++;
-#if 0
-         printf("ArrayObj %p %d INCR to %d\n",
-                (void *) vao, vao->Name, vao->RefCount);
-#endif
           *ptr = vao;
        }
        mtx_unlock(&vao->Mutex);
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c

index e17b41c..f985982 100644 (file)
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -36,17 +36,13 @@
  #include "enums.h"
  #include "hash.h"
  #include "imports.h"
-#include "image.h"
  #include "context.h"
  #include "bufferobj.h"
-#include "fbobject.h"
  #include "mtypes.h"
-#include "texobj.h"
  #include "teximage.h"
  #include "glformats.h"
  #include "texstore.h"
  #include "transformfeedback.h"
-#include "dispatch.h"
  
  
  /* Debug flags */
@@ -97,6 +93,11 @@ get_buffer_target(struct gl_context *ctx, GLenum target)
           return &ctx->DrawIndirectBuffer;
        }
        break;
+   case GL_DISPATCH_INDIRECT_BUFFER:
+      if (_mesa_has_compute_shaders(ctx)) {
+         return &ctx->DispatchIndirectBuffer;
+      }
+      break;
     case GL_TRANSFORM_FEEDBACK_BUFFER:
        if (ctx->Extensions.EXT_transform_feedback) {
           return &ctx->TransformFeedback.CurrentBuffer;
@@ -250,7 +251,7 @@ bufferobj_range_mapped(const struct gl_buffer_object *obj,
   */
  static bool
  buffer_object_subdata_range_good(struct gl_context *ctx,
-                                 struct gl_buffer_object *bufObj,
+                                 const struct gl_buffer_object *bufObj,
                                   GLintptr offset, GLsizeiptr size,
                                   bool mappedRange, const char *caller)
  {
@@ -391,7 +392,7 @@ convert_clear_buffer_data(struct gl_context *ctx,
  
  /**
   * Allocate and initialize a new buffer object.
- * 
+ *
   * Default callback for the \c dd_function_table::NewBufferObject() hook.
   */
  static struct gl_buffer_object *
@@ -409,7 +410,7 @@ _mesa_new_buffer_object(struct gl_context *ctx, GLuint name)
  
  /**
   * Delete a buffer object.
- * 
+ *
   * Default callback for the \c dd_function_table::DeleteBuffer() hook.
   */
  static void
@@ -449,23 +450,10 @@ _mesa_reference_buffer_object_(struct gl_context *ctx,
        mtx_lock(&oldObj->Mutex);
        assert(oldObj->RefCount > 0);
        oldObj->RefCount--;
-#if 0
-      printf("BufferObj %p %d DECR to %d\n",
-             (void *) oldObj, oldObj->Name, oldObj->RefCount);
-#endif
        deleteFlag = (oldObj->RefCount == 0);
        mtx_unlock(&oldObj->Mutex);
  
        if (deleteFlag) {
-
-         /* some sanity checking: don't delete a buffer still in use */
-#if 0
-         /* unfortunately, these tests are invalid during context tear-down */
-        assert(ctx->Array.ArrayBufferObj != bufObj);
-        assert(ctx->Array.VAO->IndexBufferObj != bufObj);
-        assert(ctx->Array.VAO->Vertex.BufferObj != bufObj);
-#endif
-
          assert(ctx->Driver.DeleteBuffer);
           ctx->Driver.DeleteBuffer(ctx, oldObj);
        }
@@ -485,10 +473,6 @@ _mesa_reference_buffer_object_(struct gl_context *ctx,
        }
        else {
           bufObj->RefCount++;
-#if 0
-         printf("BufferObj %p %d INCR to %d\n",
-                (void *) bufObj, bufObj->Name, bufObj->RefCount);
-#endif
           *ptr = bufObj;
        }
        mtx_unlock(&bufObj->Mutex);
@@ -523,6 +507,7 @@ count_buffer_size(GLuint key, void *data, void *userData)
        (const struct gl_buffer_object *) data;
     GLuint *total = (GLuint *) userData;
  
+   (void) key;
     *total = *total + bufObj->Size;
  }
  
@@ -742,6 +727,7 @@ flush_mapped_buffer_range_fallback(struct gl_context *ctx,
     (void) offset;
     (void) length;
     (void) obj;
+   (void) index;
     /* no-op */
  }
  
@@ -846,6 +832,9 @@ _mesa_init_buffer_objects( struct gl_context *ctx )
     _mesa_reference_buffer_object(ctx, &ctx->DrawIndirectBuffer,
                                  ctx->Shared->NullBufferObj);
  
+   _mesa_reference_buffer_object(ctx, &ctx->DispatchIndirectBuffer,
+                                ctx->Shared->NullBufferObj);
+
     for (i = 0; i < MAX_COMBINED_UNIFORM_BUFFERS; i++) {
        _mesa_reference_buffer_object(ctx,
                                     &ctx->UniformBufferBindings[i].BufferObject,
@@ -890,6 +879,8 @@ _mesa_free_buffer_objects( struct gl_context *ctx )
  
     _mesa_reference_buffer_object(ctx, &ctx->DrawIndirectBuffer, NULL);
  
+   _mesa_reference_buffer_object(ctx, &ctx->DispatchIndirectBuffer, NULL);
+
     for (i = 0; i < MAX_COMBINED_UNIFORM_BUFFERS; i++) {
        _mesa_reference_buffer_object(ctx,
                                     &ctx->UniformBufferBindings[i].BufferObject,
@@ -912,14 +903,13 @@ _mesa_free_buffer_objects( struct gl_context *ctx )
  
  bool
  _mesa_handle_bind_buffer_gen(struct gl_context *ctx,
-                             GLenum target,
                               GLuint buffer,
                               struct gl_buffer_object **buf_handle,
                               const char *caller)
  {
     struct gl_buffer_object *buf = *buf_handle;
  
-   if (!buf && ctx->API == API_OPENGL_CORE) {
+   if (!buf && (ctx->API == API_OPENGL_CORE || _mesa_is_gles31(ctx))) {
        _mesa_error(ctx, GL_INVALID_OPERATION, "%s(non-gen name)", caller);
        return false;
     }
@@ -975,11 +965,11 @@ bind_buffer_object(struct gl_context *ctx, GLenum target, GLuint buffer)
     else {
        /* non-default buffer object */
        newBufObj = _mesa_lookup_bufferobj(ctx, buffer);
-      if (!_mesa_handle_bind_buffer_gen(ctx, target, buffer,
+      if (!_mesa_handle_bind_buffer_gen(ctx, buffer,
                                          &newBufObj, "glBindBuffer"))
           return;
     }
-   
+
     /* bind new buffer */
     _mesa_reference_buffer_object(ctx, bindTarget, newBufObj);
  }
@@ -987,7 +977,7 @@ bind_buffer_object(struct gl_context *ctx, GLenum target, GLuint buffer)
  
  /**
   * Update the default buffer objects in the given context to reference those
- * specified in the shared state and release those referencing the old 
+ * specified in the shared state and release those referencing the old
   * shared state.
   */
  void
@@ -1191,7 +1181,7 @@ _mesa_BindBuffer(GLenum target, GLuint buffer)
  
  /**
   * Delete a set of buffer objects.
- * 
+ *
   * \param n      Number of buffer objects to delete.
   * \param ids    Array of \c n buffer object IDs.
   */
@@ -1236,6 +1226,11 @@ _mesa_DeleteBuffers(GLsizei n, const GLuint *ids)
              _mesa_BindBuffer( GL_DRAW_INDIRECT_BUFFER, 0 );
           }
  
+         /* unbind ARB_compute_shader binding point */
+         if (ctx->DispatchIndirectBuffer == bufObj) {
+            _mesa_BindBuffer(GL_DISPATCH_INDIRECT_BUFFER, 0);
+         }
+
           /* unbind ARB_copy_buffer binding points */
           if (ctx->CopyReadBuffer == bufObj) {
              _mesa_BindBuffer( GL_COPY_READ_BUFFER, 0 );
@@ -1409,9 +1404,9 @@ _mesa_CreateBuffers(GLsizei n, GLuint *buffers)
  
  /**
   * Determine if ID is the name of a buffer object.
- * 
+ *
   * \param id  ID of the potential buffer object.
- * \return  \c GL_TRUE if \c id is the name of a buffer object, 
+ * \return  \c GL_TRUE if \c id is the name of a buffer object,
   *          \c GL_FALSE otherwise.
   */
  GLboolean GLAPIENTRY
@@ -2633,380 +2628,6 @@ _mesa_FlushMappedNamedBufferRange(GLuint buffer, GLintptr offset,
  }
  
  
-static GLenum
-buffer_object_purgeable(struct gl_context *ctx, GLuint name, GLenum option)
-{
-   struct gl_buffer_object *bufObj;
-   GLenum retval;
-
-   bufObj = _mesa_lookup_bufferobj(ctx, name);
-   if (!bufObj) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glObjectPurgeable(name = 0x%x)", name);
-      return 0;
-   }
-   if (!_mesa_is_bufferobj(bufObj)) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glObjectPurgeable(buffer 0)" );
-      return 0;
-   }
-
-   if (bufObj->Purgeable) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glObjectPurgeable(name = 0x%x) is already purgeable", name);
-      return GL_VOLATILE_APPLE;
-   }
-
-   bufObj->Purgeable = GL_TRUE;
-
-   retval = GL_VOLATILE_APPLE;
-   if (ctx->Driver.BufferObjectPurgeable)
-      retval = ctx->Driver.BufferObjectPurgeable(ctx, bufObj, option);
-
-   return retval;
-}
-
-
-static GLenum
-renderbuffer_purgeable(struct gl_context *ctx, GLuint name, GLenum option)
-{
-   struct gl_renderbuffer *bufObj;
-   GLenum retval;
-
-   bufObj = _mesa_lookup_renderbuffer(ctx, name);
-   if (!bufObj) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glObjectUnpurgeable(name = 0x%x)", name);
-      return 0;
-   }
-
-   if (bufObj->Purgeable) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glObjectPurgeable(name = 0x%x) is already purgeable", name);
-      return GL_VOLATILE_APPLE;
-   }
-
-   bufObj->Purgeable = GL_TRUE;
-
-   retval = GL_VOLATILE_APPLE;
-   if (ctx->Driver.RenderObjectPurgeable)
-      retval = ctx->Driver.RenderObjectPurgeable(ctx, bufObj, option);
-
-   return retval;
-}
-
-
-static GLenum
-texture_object_purgeable(struct gl_context *ctx, GLuint name, GLenum option)
-{
-   struct gl_texture_object *bufObj;
-   GLenum retval;
-
-   bufObj = _mesa_lookup_texture(ctx, name);
-   if (!bufObj) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glObjectPurgeable(name = 0x%x)", name);
-      return 0;
-   }
-
-   if (bufObj->Purgeable) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glObjectPurgeable(name = 0x%x) is already purgeable", name);
-      return GL_VOLATILE_APPLE;
-   }
-
-   bufObj->Purgeable = GL_TRUE;
-
-   retval = GL_VOLATILE_APPLE;
-   if (ctx->Driver.TextureObjectPurgeable)
-      retval = ctx->Driver.TextureObjectPurgeable(ctx, bufObj, option);
-
-   return retval;
-}
-
-
-GLenum GLAPIENTRY
-_mesa_ObjectPurgeableAPPLE(GLenum objectType, GLuint name, GLenum option)
-{
-   GLenum retval;
-
-   GET_CURRENT_CONTEXT(ctx);
-   ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, 0);
-
-   if (name == 0) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glObjectPurgeable(name = 0x%x)", name);
-      return 0;
-   }
-
-   switch (option) {
-   case GL_VOLATILE_APPLE:
-   case GL_RELEASED_APPLE:
-      /* legal */
-      break;
-   default:
-      _mesa_error(ctx, GL_INVALID_ENUM,
-                  "glObjectPurgeable(name = 0x%x) invalid option: %d",
-                  name, option);
-      return 0;
-   }
-
-   switch (objectType) {
-   case GL_TEXTURE:
-      retval = texture_object_purgeable(ctx, name, option);
-      break;
-   case GL_RENDERBUFFER_EXT:
-      retval = renderbuffer_purgeable(ctx, name, option);
-      break;
-   case GL_BUFFER_OBJECT_APPLE:
-      retval = buffer_object_purgeable(ctx, name, option);
-      break;
-   default:
-      _mesa_error(ctx, GL_INVALID_ENUM,
-                  "glObjectPurgeable(name = 0x%x) invalid type: %d",
-                  name, objectType);
-      return 0;
-   }
-
-   /* In strict conformance to the spec, we must only return VOLATILE when
-    * when passed the VOLATILE option. Madness.
-    *
-    * XXX First fix the spec, then fix me.
-    */
-   return option == GL_VOLATILE_APPLE ? GL_VOLATILE_APPLE : retval;
-}
-
-
-static GLenum
-buffer_object_unpurgeable(struct gl_context *ctx, GLuint name, GLenum option)
-{
-   struct gl_buffer_object *bufObj;
-   GLenum retval;
-
-   bufObj = _mesa_lookup_bufferobj(ctx, name);
-   if (!bufObj) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glObjectUnpurgeable(name = 0x%x)", name);
-      return 0;
-   }
-
-   if (! bufObj->Purgeable) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glObjectUnpurgeable(name = 0x%x) object is "
-                  " already \"unpurged\"", name);
-      return 0;
-   }
-
-   bufObj->Purgeable = GL_FALSE;
-
-   retval = option;
-   if (ctx->Driver.BufferObjectUnpurgeable)
-      retval = ctx->Driver.BufferObjectUnpurgeable(ctx, bufObj, option);
-
-   return retval;
-}
-
-
-static GLenum
-renderbuffer_unpurgeable(struct gl_context *ctx, GLuint name, GLenum option)
-{
-   struct gl_renderbuffer *bufObj;
-   GLenum retval;
-
-   bufObj = _mesa_lookup_renderbuffer(ctx, name);
-   if (!bufObj) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glObjectUnpurgeable(name = 0x%x)", name);
-      return 0;
-   }
-
-   if (! bufObj->Purgeable) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glObjectUnpurgeable(name = 0x%x) object is "
-                  " already \"unpurged\"", name);
-      return 0;
-   }
-
-   bufObj->Purgeable = GL_FALSE;
-
-   retval = option;
-   if (ctx->Driver.RenderObjectUnpurgeable)
-      retval = ctx->Driver.RenderObjectUnpurgeable(ctx, bufObj, option);
-
-   return retval;
-}
-
-
-static GLenum
-texture_object_unpurgeable(struct gl_context *ctx, GLuint name, GLenum option)
-{
-   struct gl_texture_object *bufObj;
-   GLenum retval;
-
-   bufObj = _mesa_lookup_texture(ctx, name);
-   if (!bufObj) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glObjectUnpurgeable(name = 0x%x)", name);
-      return 0;
-   }
-
-   if (! bufObj->Purgeable) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glObjectUnpurgeable(name = 0x%x) object is"
-                  " already \"unpurged\"", name);
-      return 0;
-   }
-
-   bufObj->Purgeable = GL_FALSE;
-
-   retval = option;
-   if (ctx->Driver.TextureObjectUnpurgeable)
-      retval = ctx->Driver.TextureObjectUnpurgeable(ctx, bufObj, option);
-
-   return retval;
-}
-
-
-GLenum GLAPIENTRY
-_mesa_ObjectUnpurgeableAPPLE(GLenum objectType, GLuint name, GLenum option)
-{
-   GET_CURRENT_CONTEXT(ctx);
-   ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, 0);
-
-   if (name == 0) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glObjectUnpurgeable(name = 0x%x)", name);
-      return 0;
-   }
-
-   switch (option) {
-   case GL_RETAINED_APPLE:
-   case GL_UNDEFINED_APPLE:
-      /* legal */
-      break;
-   default:
-      _mesa_error(ctx, GL_INVALID_ENUM,
-                  "glObjectUnpurgeable(name = 0x%x) invalid option: %d",
-                  name, option);
-      return 0;
-   }
-
-   switch (objectType) {
-   case GL_BUFFER_OBJECT_APPLE:
-      return buffer_object_unpurgeable(ctx, name, option);
-   case GL_TEXTURE:
-      return texture_object_unpurgeable(ctx, name, option);
-   case GL_RENDERBUFFER_EXT:
-      return renderbuffer_unpurgeable(ctx, name, option);
-   default:
-      _mesa_error(ctx, GL_INVALID_ENUM,
-                  "glObjectUnpurgeable(name = 0x%x) invalid type: %d",
-                  name, objectType);
-      return 0;
-   }
-}
-
-
-static void
-get_buffer_object_parameteriv(struct gl_context *ctx, GLuint name,
-                              GLenum pname, GLint *params)
-{
-   struct gl_buffer_object *bufObj = _mesa_lookup_bufferobj(ctx, name);
-   if (!bufObj) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glGetObjectParameteriv(name = 0x%x) invalid object", name);
-      return;
-   }
-
-   switch (pname) {
-   case GL_PURGEABLE_APPLE:
-      *params = bufObj->Purgeable;
-      break;
-   default:
-      _mesa_error(ctx, GL_INVALID_ENUM,
-                  "glGetObjectParameteriv(name = 0x%x) invalid enum: %d",
-                  name, pname);
-      break;
-   }
-}
-
-
-static void
-get_renderbuffer_parameteriv(struct gl_context *ctx, GLuint name,
-                             GLenum pname, GLint *params)
-{
-   struct gl_renderbuffer *rb = _mesa_lookup_renderbuffer(ctx, name);
-   if (!rb) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glObjectUnpurgeable(name = 0x%x)", name);
-      return;
-   }
-
-   switch (pname) {
-   case GL_PURGEABLE_APPLE:
-      *params = rb->Purgeable;
-      break;
-   default:
-      _mesa_error(ctx, GL_INVALID_ENUM,
-                  "glGetObjectParameteriv(name = 0x%x) invalid enum: %d",
-                  name, pname);
-      break;
-   }
-}
-
-
-static void
-get_texture_object_parameteriv(struct gl_context *ctx, GLuint name,
-                               GLenum pname, GLint *params)
-{
-   struct gl_texture_object *texObj = _mesa_lookup_texture(ctx, name);
-   if (!texObj) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glObjectUnpurgeable(name = 0x%x)", name);
-      return;
-   }
-
-   switch (pname) {
-   case GL_PURGEABLE_APPLE:
-      *params = texObj->Purgeable;
-      break;
-   default:
-      _mesa_error(ctx, GL_INVALID_ENUM,
-                  "glGetObjectParameteriv(name = 0x%x) invalid enum: %d",
-                  name, pname);
-      break;
-   }
-}
-
-
-void GLAPIENTRY
-_mesa_GetObjectParameterivAPPLE(GLenum objectType, GLuint name, GLenum pname,
-                                GLint *params)
-{
-   GET_CURRENT_CONTEXT(ctx);
-
-   if (name == 0) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glGetObjectParameteriv(name = 0x%x)", name);
-      return;
-   }
-
-   switch (objectType) {
-   case GL_TEXTURE:
-      get_texture_object_parameteriv(ctx, name, pname, params);
-      break;
-   case GL_BUFFER_OBJECT_APPLE:
-      get_buffer_object_parameteriv(ctx, name, pname, params);
-      break;
-   case GL_RENDERBUFFER_EXT:
-      get_renderbuffer_parameteriv(ctx, name, pname, params);
-      break;
-   default:
-      _mesa_error(ctx, GL_INVALID_ENUM,
-                  "glGetObjectParameteriv(name = 0x%x) invalid type: %d",
-                  name, objectType);
-   }
-}
-
  /**
   * Binds a buffer object to a uniform buffer binding point.
   *
@@ -4234,7 +3855,7 @@ _mesa_BindBufferRange(GLenum target, GLuint index,
     } else {
        bufObj = _mesa_lookup_bufferobj(ctx, buffer);
     }
-   if (!_mesa_handle_bind_buffer_gen(ctx, target, buffer,
+   if (!_mesa_handle_bind_buffer_gen(ctx, buffer,
                                       &bufObj, "glBindBufferRange"))
        return;
  
@@ -4286,7 +3907,7 @@ _mesa_BindBufferBase(GLenum target, GLuint index, GLuint buffer)
     } else {
        bufObj = _mesa_lookup_bufferobj(ctx, buffer);
     }
-   if (!_mesa_handle_bind_buffer_gen(ctx, target, buffer,
+   if (!_mesa_handle_bind_buffer_gen(ctx, buffer,
                                       &bufObj, "glBindBufferBase"))
        return;
  
diff --git a/src/mesa/main/bufferobj.h b/src/mesa/main/bufferobj.h

index b5d73ae..3eac96d 100644 (file)
--- a/src/mesa/main/bufferobj.h
+++ b/src/mesa/main/bufferobj.h
@@ -74,7 +74,6 @@ _mesa_free_buffer_objects(struct gl_context *ctx);
  
  extern bool
  _mesa_handle_bind_buffer_gen(struct gl_context *ctx,
-                             GLenum target,
                               GLuint buffer,
                               struct gl_buffer_object **buf_handle,
                               const char *caller);
@@ -318,16 +317,6 @@ void GLAPIENTRY
  _mesa_FlushMappedNamedBufferRange(GLuint buffer, GLintptr offset,
                                    GLsizeiptr length);
  
-GLenum GLAPIENTRY
-_mesa_ObjectPurgeableAPPLE(GLenum objectType, GLuint name, GLenum option);
-
-GLenum GLAPIENTRY
-_mesa_ObjectUnpurgeableAPPLE(GLenum objectType, GLuint name, GLenum option);
-
-void GLAPIENTRY
-_mesa_GetObjectParameterivAPPLE(GLenum objectType, GLuint name,
-                                GLenum pname, GLint* params);
-
  void GLAPIENTRY
  _mesa_BindBufferRange(GLenum target, GLuint index,
                        GLuint buffer, GLintptr offset, GLsizeiptr size);
diff --git a/src/mesa/main/compute.c b/src/mesa/main/compute.c

index 37a4ba7..8bc3bcd 100644 (file)
--- a/src/mesa/main/compute.c
+++ b/src/mesa/main/compute.c
@@ -24,6 +24,7 @@
  #include "glheader.h"
  #include "compute.h"
  #include "context.h"
+#include "api_validate.h"
  
  void GLAPIENTRY
  _mesa_DispatchCompute(GLuint num_groups_x,
@@ -31,31 +32,16 @@ _mesa_DispatchCompute(GLuint num_groups_x,
                        GLuint num_groups_z)
  {
     GET_CURRENT_CONTEXT(ctx);
-   int i;
-   struct gl_shader_program *prog;
     const GLuint num_groups[3] = { num_groups_x, num_groups_y, num_groups_z };
  
-   if (ctx->Extensions.ARB_compute_shader) {
-      for (i = 0; i < 3; i++) {
-         if (num_groups[i] > ctx->Const.MaxComputeWorkGroupCount[i]) {
-            _mesa_error(ctx, GL_INVALID_VALUE,
-                        "glDispatchCompute(num_groups_%c)", 'x' + i);
-            return;
-         }
-      }
-      if (!_mesa_valid_to_render(ctx, "glDispatchCompute"))
-         return;
-      prog = ctx->Shader.CurrentProgram[MESA_SHADER_COMPUTE];
-      if (prog == NULL || prog->_LinkedShaders[MESA_SHADER_COMPUTE] == NULL) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
-                     "glDispatchCompute(no active compute shader)");
-         return;
-      }
-      ctx->Driver.DispatchCompute(ctx, num_groups);
-   } else {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "unsupported function (glDispatchCompute) called");
-   }
+   if (MESA_VERBOSE & VERBOSE_API)
+      _mesa_debug(ctx, "glDispatchCompute(%d, %d, %d)\n",
+                  num_groups_x, num_groups_y, num_groups_z);
+
+   if (!_mesa_validate_DispatchCompute(ctx, num_groups))
+      return;
+
+   ctx->Driver.DispatchCompute(ctx, num_groups);
  }
  
  extern void GLAPIENTRY
@@ -63,10 +49,11 @@ _mesa_DispatchComputeIndirect(GLintptr indirect)
  {
     GET_CURRENT_CONTEXT(ctx);
  
-   if (ctx->Extensions.ARB_compute_shader) {
-      assert(!"TODO");
-   } else {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "unsupported function (glDispatchComputeIndirect) called");
-   }
+   if (MESA_VERBOSE & VERBOSE_API)
+      _mesa_debug(ctx, "glDispatchComputeIndirect(%d)\n", indirect);
+
+   if (!_mesa_validate_DispatchComputeIndirect(ctx, indirect))
+      return;
+
+   ctx->Driver.DispatchComputeIndirect(ctx, indirect);
  }
diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h

index b35031d..f29de5f 100644 (file)
--- a/src/mesa/main/config.h
+++ b/src/mesa/main/config.h
@@ -171,7 +171,7 @@
  #define MAX_PROGRAM_LOCAL_PARAMS       4096
  #define MAX_UNIFORMS                   4096
  #define MAX_UNIFORM_BUFFERS            15 /* + 1 default uniform buffer */
-#define MAX_SHADER_STORAGE_BUFFERS     7  /* + 1 default shader storage buffer */
+#define MAX_SHADER_STORAGE_BUFFERS     16
  /* 6 is for vertex, hull, domain, geometry, fragment, and compute shader. */
  #define MAX_COMBINED_UNIFORM_BUFFERS   (MAX_UNIFORM_BUFFERS * 6)
  #define MAX_COMBINED_SHADER_STORAGE_BUFFERS   (MAX_SHADER_STORAGE_BUFFERS * 6)
diff --git a/src/mesa/main/copyimage.c b/src/mesa/main/copyimage.c

index 05bc50d..f02e842 100644 (file)
--- a/src/mesa/main/copyimage.c
+++ b/src/mesa/main/copyimage.c
@@ -41,22 +41,27 @@ enum mesa_block_class {
  };
  
  /**
- * Prepare the source or destination resource, including:
- * - Error checking
- * - Creating texture wrappers for renderbuffers
+ * Prepare the source or destination resource.  This involves error
+ * checking and returning the relevant gl_texture_image or gl_renderbuffer.
+ * Note that one of the resulting tex_image or renderbuffer pointers will be
+ * NULL and the other will be non-null.
+ *
   * \param name  the texture or renderbuffer name
- * \param target  GL_TEXTURE target or GL_RENDERBUFFER.  For the later, will
- *                be changed to a compatible GL_TEXTURE target.
+ * \param target  One of GL_TEXTURE_x target or GL_RENDERBUFFER
   * \param level  mipmap level
- * \param tex_obj  returns a pointer to a texture object
+ * \param z  src or dest Z
+ * \param depth  number of slices/faces/layers to copy
   * \param tex_image  returns a pointer to a texture image
- * \param tmp_tex  returns temporary texture object name
+ * \param renderbuffer  returns a pointer to a renderbuffer
   * \return true if success, false if error
   */
  static bool
-prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level,
-               struct gl_texture_object **tex_obj,
-               struct gl_texture_image **tex_image, GLuint *tmp_tex,
+prepare_target(struct gl_context *ctx, GLuint name, GLenum target,
+               int level, int z, int depth,
+               struct gl_texture_image **tex_image,
+               struct gl_renderbuffer **renderbuffer,
+               mesa_format *format,
+               GLenum *internalFormat,
                 const char *dbg_prefix)
  {
     if (name == 0) {
@@ -72,7 +77,7 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level,
      *   - is TEXTURE_BUFFER, or
      *   - is one of the cubemap face selectors described in table 3.17,
      */
-   switch (*target) {
+   switch (target) {
     case GL_RENDERBUFFER:
        /* Not a texture target, but valid */
     case GL_TEXTURE_1D:
@@ -93,12 +98,13 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level,
     default:
        _mesa_error(ctx, GL_INVALID_ENUM,
                    "glCopyImageSubData(%sTarget = %s)", dbg_prefix,
-                  _mesa_enum_to_string(*target));
+                  _mesa_enum_to_string(target));
        return false;
     }
  
-   if (*target == GL_RENDERBUFFER) {
+   if (target == GL_RENDERBUFFER) {
        struct gl_renderbuffer *rb = _mesa_lookup_renderbuffer(ctx, name);
+
        if (!rb) {
           _mesa_error(ctx, GL_INVALID_VALUE,
                       "glCopyImageSubData(%sName = %u)", dbg_prefix, name);
@@ -117,49 +123,38 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level,
           return false;
        }
  
-      if (rb->NumSamples > 1)
-         *target = GL_TEXTURE_2D_MULTISAMPLE;
-      else
-         *target = GL_TEXTURE_2D;
-
-      *tmp_tex = 0;
-      _mesa_GenTextures(1, tmp_tex);
-      if (*tmp_tex == 0)
-         return false; /* Error already set by GenTextures */
-
-      _mesa_BindTexture(*target, *tmp_tex);
-      *tex_obj = _mesa_lookup_texture(ctx, *tmp_tex);
-      *tex_image = _mesa_get_tex_image(ctx, *tex_obj, *target, 0);
-
-      if (!ctx->Driver.BindRenderbufferTexImage(ctx, rb, *tex_image)) {
-         _mesa_problem(ctx, "Failed to create texture from renderbuffer");
-         return false;
-      }
-
-      if (ctx->Driver.FinishRenderTexture && !rb->NeedsFinishRenderTexture) {
-         rb->NeedsFinishRenderTexture = true;
-         ctx->Driver.FinishRenderTexture(ctx, rb);
-      }
+      *renderbuffer = rb;
+      *format = rb->Format;
+      *internalFormat = rb->InternalFormat;
+      *tex_image = NULL;
     } else {
-      *tex_obj = _mesa_lookup_texture(ctx, name);
-      if (!*tex_obj) {
+      struct gl_texture_object *texObj = _mesa_lookup_texture(ctx, name);
+
+      if (!texObj) {
           _mesa_error(ctx, GL_INVALID_VALUE,
                       "glCopyImageSubData(%sName = %u)", dbg_prefix, name);
           return false;
        }
  
-      _mesa_test_texobj_completeness(ctx, *tex_obj);
-      if (!(*tex_obj)->_BaseComplete ||
-          (level != 0 && !(*tex_obj)->_MipmapComplete)) {
+      _mesa_test_texobj_completeness(ctx, texObj);
+      if (!texObj->_BaseComplete ||
+          (level != 0 && !texObj->_MipmapComplete)) {
           _mesa_error(ctx, GL_INVALID_OPERATION,
                       "glCopyImageSubData(%sName incomplete)", dbg_prefix);
           return false;
        }
  
-      if ((*tex_obj)->Target != *target) {
-         _mesa_error(ctx, GL_INVALID_ENUM,
+      /* Note that target will not be a cube face name */
+      if (texObj->Target != target) {
+         /*
+          * From GL_ARB_copy_image specification:
+          * "INVALID_VALUE is generated if either <srcName> or <dstName> does
+          * not correspond to a valid renderbuffer or texture object according
+          * to the corresponding target parameter."
+          */
+         _mesa_error(ctx, GL_INVALID_VALUE,
                       "glCopyImageSubData(%sTarget = %s)", dbg_prefix,
-                     _mesa_enum_to_string(*target));
+                     _mesa_enum_to_string(target));
           return false;
        }
  
@@ -169,12 +164,36 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level,
           return false;
        }
  
-      *tex_image = _mesa_select_tex_image(*tex_obj, *target, level);
+      if (target == GL_TEXTURE_CUBE_MAP) {
+         int i;
+
+         assert(z < MAX_FACES);  /* should have been caught earlier */
+
+         /* make sure all the cube faces are present */
+         for (i = 0; i < depth; i++) {
+            if (!texObj->Image[z+i][level]) {
+               /* missing cube face */
+               _mesa_error(ctx, GL_INVALID_OPERATION,
+                           "glCopyImageSubData(missing cube face)");
+               return false;
+            }
+         }
+
+         *tex_image = texObj->Image[z][level];
+      }
+      else {
+         *tex_image = _mesa_select_tex_image(texObj, target, level);
+      }
+
        if (!*tex_image) {
           _mesa_error(ctx, GL_INVALID_VALUE,
                       "glCopyImageSubData(%sLevel = %u)", dbg_prefix, level);
           return false;
        }
+
+      *renderbuffer = NULL;
+      *format = (*tex_image)->TexFormat;
+      *internalFormat = (*tex_image)->InternalFormat;
     }
  
     return true;
@@ -188,10 +207,14 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level,
   */
  static bool
  check_region_bounds(struct gl_context *ctx,
+                    GLenum target,
                      const struct gl_texture_image *tex_image,
+                    const struct gl_renderbuffer *renderbuffer,
                      int x, int y, int z, int width, int height, int depth,
                      const char *dbg_prefix)
  {
+   int surfWidth, surfHeight, surfDepth;
+
     if (width < 0 || height < 0 || depth < 0) {
        _mesa_error(ctx, GL_INVALID_VALUE,
                    "glCopyImageSubData(%sWidth, %sHeight, or %sDepth is negative)",
@@ -207,7 +230,14 @@ check_region_bounds(struct gl_context *ctx,
     }
  
     /* Check X direction */
-   if (x + width > tex_image->Width) {
+   if (target == GL_RENDERBUFFER) {
+      surfWidth = renderbuffer->Width;
+   }
+   else {
+      surfWidth = tex_image->Width;
+   }
+
+   if (x + width > surfWidth) {
        _mesa_error(ctx, GL_INVALID_VALUE,
                    "glCopyImageSubData(%sX or %sWidth exceeds image bounds)",
                    dbg_prefix, dbg_prefix);
@@ -215,66 +245,49 @@ check_region_bounds(struct gl_context *ctx,
     }
  
     /* Check Y direction */
-   switch (tex_image->TexObject->Target) {
+   switch (target) {
+   case GL_RENDERBUFFER:
+      surfHeight = renderbuffer->Height;
+      break;
     case GL_TEXTURE_1D:
     case GL_TEXTURE_1D_ARRAY:
-      if (y != 0 || height != 1) {
-         _mesa_error(ctx, GL_INVALID_VALUE,
-                     "glCopyImageSubData(%sY or %sHeight exceeds image bounds)",
-                     dbg_prefix, dbg_prefix);
-         return false;
-      }
+      surfHeight = 1;
        break;
     default:
-      if (y + height > tex_image->Height) {
-         _mesa_error(ctx, GL_INVALID_VALUE,
-                     "glCopyImageSubData(%sY or %sHeight exceeds image bounds)",
-                     dbg_prefix, dbg_prefix);
-         return false;
-      }
-      break;
+      surfHeight = tex_image->Height;
+   }
+
+   if (y + height > surfHeight) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glCopyImageSubData(%sY or %sHeight exceeds image bounds)",
+                  dbg_prefix, dbg_prefix);
+      return false;
     }
  
     /* Check Z direction */
-   switch (tex_image->TexObject->Target) {
+   switch (target) {
+   case GL_RENDERBUFFER:
     case GL_TEXTURE_1D:
     case GL_TEXTURE_2D:
     case GL_TEXTURE_2D_MULTISAMPLE:
     case GL_TEXTURE_RECTANGLE:
-      if (z != 0 || depth != 1) {
-         _mesa_error(ctx, GL_INVALID_VALUE,
-                     "glCopyImageSubData(%sZ or %sDepth exceeds image bounds)",
-                     dbg_prefix, dbg_prefix);
-         return false;
-      }
+      surfDepth = 1;
        break;
     case GL_TEXTURE_CUBE_MAP:
-      if (z < 0 || z + depth > 6) {
-         _mesa_error(ctx, GL_INVALID_VALUE,
-                     "glCopyImageSubData(%sZ or %sDepth exceeds image bounds)",
-                     dbg_prefix, dbg_prefix);
-         return false;
-      }
+      surfDepth = 6;
        break;
     case GL_TEXTURE_1D_ARRAY:
-      if (z < 0 || z + depth > tex_image->Height) {
-         _mesa_error(ctx, GL_INVALID_VALUE,
-                     "glCopyImageSubData(%sZ or %sDepth exceeds image bounds)",
-                     dbg_prefix, dbg_prefix);
-         return false;
-      }
-      break;
-   case GL_TEXTURE_CUBE_MAP_ARRAY:
-   case GL_TEXTURE_2D_ARRAY:
-   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
-   case GL_TEXTURE_3D:
-      if (z < 0 || z + depth > tex_image->Depth) {
-         _mesa_error(ctx, GL_INVALID_VALUE,
-                     "glCopyImageSubData(%sZ or %sDepth exceeds image bounds)",
-                     dbg_prefix, dbg_prefix);
-         return false;
-      }
+      surfDepth = tex_image->Height;
        break;
+   default:
+      surfDepth = tex_image->Depth;
+   }
+
+   if (z < 0 || z + depth > surfDepth) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glCopyImageSubData(%sZ or %sDepth exceeds image bounds)",
+                  dbg_prefix, dbg_prefix);
+      return false;
     }
  
     return true;
@@ -406,10 +419,12 @@ _mesa_CopyImageSubData(GLuint srcName, GLenum srcTarget, GLint srcLevel,
                         GLsizei srcWidth, GLsizei srcHeight, GLsizei srcDepth)
  {
     GET_CURRENT_CONTEXT(ctx);
-   GLuint tmpTexNames[2] = { 0, 0 };
-   struct gl_texture_object *srcTexObj, *dstTexObj;
     struct gl_texture_image *srcTexImage, *dstTexImage;
+   struct gl_renderbuffer *srcRenderbuffer, *dstRenderbuffer;
+   mesa_format srcFormat, dstFormat;
+   GLenum srcIntFormat, dstIntFormat;
     GLuint src_bw, src_bh, dst_bw, dst_bh;
+   int dstWidth, dstHeight, dstDepth;
     int i;
  
     if (MESA_VERBOSE & VERBOSE_API)
@@ -420,7 +435,7 @@ _mesa_CopyImageSubData(GLuint srcName, GLenum srcTarget, GLint srcLevel,
                    srcX, srcY, srcZ,
                    dstName, _mesa_enum_to_string(dstTarget), dstLevel,
                    dstX, dstY, dstZ,
-                  srcWidth, srcHeight, srcWidth);
+                  srcWidth, srcHeight, srcDepth);
  
     if (!ctx->Extensions.ARB_copy_image) {
        _mesa_error(ctx, GL_INVALID_OPERATION,
@@ -428,67 +443,93 @@ _mesa_CopyImageSubData(GLuint srcName, GLenum srcTarget, GLint srcLevel,
        return;
     }
  
-   if (!prepare_target(ctx, srcName, &srcTarget, srcLevel,
-                       &srcTexObj, &srcTexImage, &tmpTexNames[0], "src"))
-      goto cleanup;
+   if (!prepare_target(ctx, srcName, srcTarget, srcLevel, srcZ, srcDepth,
+                       &srcTexImage, &srcRenderbuffer, &srcFormat,
+                       &srcIntFormat, "src"))
+      return;
  
-   if (!prepare_target(ctx, dstName, &dstTarget, dstLevel,
-                       &dstTexObj, &dstTexImage, &tmpTexNames[1], "dst"))
-      goto cleanup;
+   if (!prepare_target(ctx, dstName, dstTarget, dstLevel, dstZ, srcDepth,
+                       &dstTexImage, &dstRenderbuffer, &dstFormat,
+                       &dstIntFormat, "dst"))
+      return;
  
-   _mesa_get_format_block_size(srcTexImage->TexFormat, &src_bw, &src_bh);
+   _mesa_get_format_block_size(srcFormat, &src_bw, &src_bh);
     if ((srcX % src_bw != 0) || (srcY % src_bh != 0) ||
         (srcWidth % src_bw != 0) || (srcHeight % src_bh != 0)) {
        _mesa_error(ctx, GL_INVALID_VALUE,
                    "glCopyImageSubData(unaligned src rectangle)");
-      goto cleanup;
+      return;
     }
  
-   _mesa_get_format_block_size(dstTexImage->TexFormat, &dst_bw, &dst_bh);
+   _mesa_get_format_block_size(dstFormat, &dst_bw, &dst_bh);
     if ((dstX % dst_bw != 0) || (dstY % dst_bh != 0)) {
        _mesa_error(ctx, GL_INVALID_VALUE,
                    "glCopyImageSubData(unaligned dst rectangle)");
-      goto cleanup;
+      return;
     }
  
-   if (!check_region_bounds(ctx, srcTexImage, srcX, srcY, srcZ,
-                            srcWidth, srcHeight, srcDepth, "src"))
-      goto cleanup;
+   /* From the GL_ARB_copy_image spec:
+    *
+    * "The dimensions are always specified in texels, even for compressed
+    * texture formats. But it should be noted that if only one of the
+    * source and destination textures is compressed then the number of
+    * texels touched in the compressed image will be a factor of the
+    * block size larger than in the uncompressed image."
+    *
+    * So, if copying from compressed to uncompressed, the dest region is
+    * shrunk by the src block size factor.  If copying from uncompressed
+    * to compressed, the dest region is grown by the dest block size factor.
+    * Note that we're passed the _source_ width, height, depth and those
+    * dimensions are never changed.
+    */
+   dstWidth = srcWidth * dst_bw / src_bw;
+   dstHeight = srcHeight * dst_bh / src_bh;
+   dstDepth = srcDepth;
+
+   if (!check_region_bounds(ctx, srcTarget, srcTexImage, srcRenderbuffer,
+                            srcX, srcY, srcZ, srcWidth, srcHeight, srcDepth,
+                            "src"))
+      return;
  
-   if (!check_region_bounds(ctx, dstTexImage, dstX, dstY, dstZ,
-                            (srcWidth / src_bw) * dst_bw,
-                            (srcHeight / src_bh) * dst_bh, srcDepth, "dst"))
-      goto cleanup;
+   if (!check_region_bounds(ctx, dstTarget, dstTexImage, dstRenderbuffer,
+                            dstX, dstY, dstZ, dstWidth, dstHeight, dstDepth,
+                            "dst"))
+      return;
  
-   if (!copy_format_compatible(ctx, srcTexImage->InternalFormat,
-                               dstTexImage->InternalFormat)) {
+   if (!copy_format_compatible(ctx, srcIntFormat, dstIntFormat)) {
        _mesa_error(ctx, GL_INVALID_OPERATION,
                    "glCopyImageSubData(internalFormat mismatch)");
-      goto cleanup;
+      return;
     }
  
+   /* loop over 2D slices/faces/layers */
     for (i = 0; i < srcDepth; ++i) {
-      int srcNewZ, dstNewZ;
-
-      if (srcTexObj->Target == GL_TEXTURE_CUBE_MAP) {
-         srcTexImage = srcTexObj->Image[i + srcZ][srcLevel];
-         srcNewZ = 0;
-      } else {
-         srcNewZ = srcZ + i;
+      int newSrcZ = srcZ + i;
+      int newDstZ = dstZ + i;
+
+      if (srcTexImage &&
+          srcTexImage->TexObject->Target == GL_TEXTURE_CUBE_MAP) {
+         /* need to update srcTexImage pointer for the cube face */
+         assert(srcZ + i < MAX_FACES);
+         srcTexImage = srcTexImage->TexObject->Image[srcZ + i][srcLevel];
+         assert(srcTexImage);
+         newSrcZ = 0;
        }
  
-      if (dstTexObj->Target == GL_TEXTURE_CUBE_MAP) {
-         dstTexImage = dstTexObj->Image[i + dstZ][dstLevel];
-         dstNewZ = 0;
-      } else {
-         dstNewZ = dstZ + i;
+      if (dstTexImage &&
+          dstTexImage->TexObject->Target == GL_TEXTURE_CUBE_MAP) {
+         /* need to update dstTexImage pointer for the cube face */
+         assert(dstZ + i < MAX_FACES);
+         dstTexImage = dstTexImage->TexObject->Image[dstZ + i][dstLevel];
+         assert(dstTexImage);
+         newDstZ = 0;
        }
  
-      ctx->Driver.CopyImageSubData(ctx, srcTexImage, srcX, srcY, srcNewZ,
-                                   dstTexImage, dstX, dstY, dstNewZ,
+      ctx->Driver.CopyImageSubData(ctx,
+                                   srcTexImage, srcRenderbuffer,
+                                   srcX, srcY, newSrcZ,
+                                   dstTexImage, dstRenderbuffer,
+                                   dstX, dstY, newDstZ,
                                     srcWidth, srcHeight);
     }
-
-cleanup:
-   _mesa_DeleteTextures(2, tmpTexNames);
  }
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h

index 87eb63e..88f3727 100644 (file)
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -269,20 +269,25 @@ struct dd_function_table {
                             struct gl_renderbuffer *rb,
                             GLint x, GLint y,
                             GLsizei width, GLsizei height);
-
     /**
      * Called by glCopyImageSubData().
      *
-    * This function should copy one 2-D slice from srcTexImage to
-    * dstTexImage.  If one of the textures is 3-D or is a 1-D or 2-D array
+    * This function should copy one 2-D slice from src_teximage or
+    * src_renderbuffer to dst_teximage or dst_renderbuffer.  Either the
+    * teximage or renderbuffer pointer will be non-null to indicate which
+    * is the real src/dst.
+    *
+    * If one of the textures is 3-D or is a 1-D or 2-D array
      * texture, this function will be called multiple times: once for each
      * slice.  If one of the textures is a cube map, this function will be
      * called once for each face to be copied.
      */
     void (*CopyImageSubData)(struct gl_context *ctx,
-                            struct gl_texture_image *src_image,
+                            struct gl_texture_image *src_teximage,
+                            struct gl_renderbuffer *src_renderbuffer,
                              int src_x, int src_y, int src_z,
-                            struct gl_texture_image *dstTexImage,
+                            struct gl_texture_image *dst_teximage,
+                            struct gl_renderbuffer *dst_renderbuffer,
                              int dst_x, int dst_y, int dst_z,
                              int src_width, int src_height);
  
@@ -1016,6 +1021,7 @@ struct dd_function_table {
      */
     /*@{*/
     void (*DispatchCompute)(struct gl_context *ctx, const GLuint *num_groups);
+   void (*DispatchComputeIndirect)(struct gl_context *ctx, GLintptr indirect);
     /*@}*/
  };
  
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c

index a57d5ba..b2c88c3 100644 (file)
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -159,6 +159,7 @@ static const struct extension extension_table[] = {
     { "GL_ARB_shader_stencil_export",               o(ARB_shader_stencil_export),               GL,             2009 },
     { "GL_ARB_shader_storage_buffer_object",        o(ARB_shader_storage_buffer_object),        GL,             2012 },
     { "GL_ARB_shader_subroutine",                   o(ARB_shader_subroutine),                   GLC,            2010 },
+   { "GL_ARB_shader_texture_image_samples",        o(ARB_shader_texture_image_samples),        GL,             2014 },
     { "GL_ARB_shader_texture_lod",                  o(ARB_shader_texture_lod),                  GL,             2009 },
     { "GL_ARB_shading_language_100",                o(dummy_true),                              GLL,            2003 },
     { "GL_ARB_shading_language_packing",            o(ARB_shading_language_packing),            GL,             2011 },
@@ -262,7 +263,7 @@ static const struct extension extension_table[] = {
     { "GL_EXT_texture_compression_dxt1",            o(ANGLE_texture_compression_dxt),           GL | ES1 | ES2, 2004 },
     { "GL_ANGLE_texture_compression_dxt3",          o(ANGLE_texture_compression_dxt),           GL | ES1 | ES2, 2011 },
     { "GL_ANGLE_texture_compression_dxt5",          o(ANGLE_texture_compression_dxt),           GL | ES1 | ES2, 2011 },
-   { "GL_EXT_texture_compression_latc",            o(EXT_texture_compression_latc),            GL,             2006 },
+   { "GL_EXT_texture_compression_latc",            o(EXT_texture_compression_latc),            GLL,            2006 },
     { "GL_EXT_texture_compression_rgtc",            o(ARB_texture_compression_rgtc),            GL,             2004 },
     { "GL_EXT_texture_compression_s3tc",            o(EXT_texture_compression_s3tc),            GL,             2000 },
     { "GL_EXT_texture_cube_map",                    o(ARB_texture_cube_map),                    GLL,            2001 },
@@ -365,7 +366,7 @@ static const struct extension extension_table[] = {
     { "GL_ATI_draw_buffers",                        o(dummy_true),                              GLL,            2002 },
     { "GL_ATI_fragment_shader",                     o(ATI_fragment_shader),                     GLL,            2001 },
     { "GL_ATI_separate_stencil",                    o(ATI_separate_stencil),                    GLL,            2006 },
-   { "GL_ATI_texture_compression_3dc",             o(ATI_texture_compression_3dc),             GL,             2004 },
+   { "GL_ATI_texture_compression_3dc",             o(ATI_texture_compression_3dc),             GLL,            2004 },
     { "GL_ATI_texture_env_combine3",                o(ATI_texture_env_combine3),                GLL,            2002 },
     { "GL_ATI_texture_float",                       o(ARB_texture_float),                       GL,             2002 },
     { "GL_ATI_texture_mirror_once",                 o(ATI_texture_mirror_once),                 GL,             2006 },
diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c

index 07db195..fe6bdc2 100644 (file)
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -1389,8 +1389,16 @@ framebuffer_parameteri(struct gl_context *ctx, struct gl_framebuffer *fb,
           fb->DefaultGeometry.Height = param;
        break;
     case GL_FRAMEBUFFER_DEFAULT_LAYERS:
+     /*
+      * According to the OpenGL ES 3.1 specification section 9.2.1, the
+      * GL_FRAMEBUFFER_DEFAULT_LAYERS parameter name is not supported.
+      */
+      if (_mesa_is_gles31(ctx)) {
+         _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=0x%x)", func, pname);
+         break;
+      }
        if (param < 0 || param > ctx->Const.MaxFramebufferLayers)
-        _mesa_error(ctx, GL_INVALID_VALUE, "%s", func);
+         _mesa_error(ctx, GL_INVALID_VALUE, "%s", func);
        else
           fb->DefaultGeometry.Layers = param;
        break;
@@ -1451,6 +1459,14 @@ get_framebuffer_parameteriv(struct gl_context *ctx, struct gl_framebuffer *fb,
        *params = fb->DefaultGeometry.Height;
        break;
     case GL_FRAMEBUFFER_DEFAULT_LAYERS:
+      /*
+       * According to the OpenGL ES 3.1 specification section 9.2.3, the
+       * GL_FRAMEBUFFER_LAYERS parameter name is not supported.
+       */
+      if (_mesa_is_gles31(ctx)) {
+         _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=0x%x)", func, pname);
+         break;
+      }
        *params = fb->DefaultGeometry.Layers;
        break;
     case GL_FRAMEBUFFER_DEFAULT_SAMPLES:
@@ -3595,7 +3611,16 @@ _mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx,
  
     switch (pname) {
     case GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE_EXT:
-      *params = _mesa_is_winsys_fbo(buffer)
+      /* From the OpenGL spec, 9.2. Binding and Managing Framebuffer Objects:
+       *
+       * "If the value of FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE is NONE, then
+       *  either no framebuffer is bound to target; or the default framebuffer
+       *  is bound, attachment is DEPTH or STENCIL, and the number of depth or
+       *  stencil bits, respectively, is zero."
+       */
+      *params = (_mesa_is_winsys_fbo(buffer) &&
+                 ((attachment != GL_DEPTH && attachment != GL_STENCIL) ||
+                  (att->Type != GL_NONE)))
           ? GL_FRAMEBUFFER_DEFAULT : att->Type;
        return;
     case GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME_EXT:
diff --git a/src/mesa/main/format_info.py b/src/mesa/main/format_info.py

index 22eb5a7..50626a8 100644 (file)
--- a/src/mesa/main/format_info.py
+++ b/src/mesa/main/format_info.py
@@ -111,7 +111,7 @@ def get_channel_bits(fmat, chan_name):
              return 1 if fmat.has_channel('a') else 0
           else:
              return 0
-      elif fmat.layout == 'rgtc':
+      elif fmat.layout in ('rgtc', 'latc'):
           return 8 if fmat.has_channel(chan_name) else 0
        elif fmat.layout in ('etc1', 'etc2'):
           if fmat.name.endswith('_ALPHA1') and chan_name == 'a':
diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c

index 587221c..9f92007 100644 (file)
--- a/src/mesa/main/formats.c
+++ b/src/mesa/main/formats.c
@@ -188,17 +188,6 @@ _mesa_get_format_max_bits(mesa_format format)
  
  /**
   * Return the layout type of the given format.
- * The return value will be one of:
- *    MESA_FORMAT_LAYOUT_ARRAY
- *    MESA_FORMAT_LAYOUT_PACKED
- *    MESA_FORMAT_LAYOUT_S3TC
- *    MESA_FORMAT_LAYOUT_RGTC
- *    MESA_FORMAT_LAYOUT_FXT1
- *    MESA_FORMAT_LAYOUT_ETC1
- *    MESA_FORMAT_LAYOUT_ETC2
- *    MESA_FORMAT_LAYOUT_BPTC
- *    MESA_FORMAT_LAYOUT_ASTC
- *    MESA_FORMAT_LAYOUT_OTHER
   */
  extern enum mesa_format_layout
  _mesa_get_format_layout(mesa_format format)
@@ -926,6 +915,10 @@ _mesa_uncompressed_format_to_type_and_comps(mesa_format format,
     case MESA_FORMAT_R8G8B8X8_UNORM:
     case MESA_FORMAT_B8G8R8X8_UNORM:
     case MESA_FORMAT_X8R8G8B8_UNORM:
+   case MESA_FORMAT_A8B8G8R8_UINT:
+   case MESA_FORMAT_R8G8B8A8_UINT:
+   case MESA_FORMAT_B8G8R8A8_UINT:
+   case MESA_FORMAT_A8R8G8B8_UINT:
        *datatype = GL_UNSIGNED_BYTE;
        *comps = 4;
        return;
@@ -936,6 +929,8 @@ _mesa_uncompressed_format_to_type_and_comps(mesa_format format,
        return;
     case MESA_FORMAT_B5G6R5_UNORM:
     case MESA_FORMAT_R5G6B5_UNORM:
+   case MESA_FORMAT_B5G6R5_UINT:
+   case MESA_FORMAT_R5G6B5_UINT:
        *datatype = GL_UNSIGNED_SHORT_5_6_5;
        *comps = 3;
        return;
@@ -943,6 +938,8 @@ _mesa_uncompressed_format_to_type_and_comps(mesa_format format,
     case MESA_FORMAT_B4G4R4A4_UNORM:
     case MESA_FORMAT_A4R4G4B4_UNORM:
     case MESA_FORMAT_B4G4R4X4_UNORM:
+   case MESA_FORMAT_B4G4R4A4_UINT:
+   case MESA_FORMAT_A4R4G4B4_UINT:
        *datatype = GL_UNSIGNED_SHORT_4_4_4_4;
        *comps = 4;
        return;
@@ -950,6 +947,8 @@ _mesa_uncompressed_format_to_type_and_comps(mesa_format format,
     case MESA_FORMAT_B5G5R5A1_UNORM:
     case MESA_FORMAT_A1R5G5B5_UNORM:
     case MESA_FORMAT_B5G5R5X1_UNORM:
+   case MESA_FORMAT_B5G5R5A1_UINT:
+   case MESA_FORMAT_A1R5G5B5_UINT:
        *datatype = GL_UNSIGNED_SHORT_1_5_5_5_REV;
        *comps = 4;
        return;
@@ -960,6 +959,7 @@ _mesa_uncompressed_format_to_type_and_comps(mesa_format format,
        return;
  
     case MESA_FORMAT_A1B5G5R5_UNORM:
+   case MESA_FORMAT_A1B5G5R5_UINT:
        *datatype = GL_UNSIGNED_SHORT_5_5_5_1;
        *comps = 4;
        return;
@@ -994,19 +994,23 @@ _mesa_uncompressed_format_to_type_and_comps(mesa_format format,
        return;
  
     case MESA_FORMAT_R3G3B2_UNORM:
+   case MESA_FORMAT_R3G3B2_UINT:
        *datatype = GL_UNSIGNED_BYTE_2_3_3_REV;
        *comps = 3;
        return;
     case MESA_FORMAT_A4B4G4R4_UNORM:
+   case MESA_FORMAT_A4B4G4R4_UINT:
        *datatype = GL_UNSIGNED_SHORT_4_4_4_4;
        *comps = 4;
        return;
  
     case MESA_FORMAT_R4G4B4A4_UNORM:
+   case MESA_FORMAT_R4G4B4A4_UINT:
        *datatype = GL_UNSIGNED_SHORT_4_4_4_4;
        *comps = 4;
        return;
     case MESA_FORMAT_R5G5B5A1_UNORM:
+   case MESA_FORMAT_R5G5B5A1_UINT:
        *datatype = GL_UNSIGNED_SHORT_1_5_5_5_REV;
        *comps = 4;
        return;
@@ -1022,6 +1026,7 @@ _mesa_uncompressed_format_to_type_and_comps(mesa_format format,
        return;
  
     case MESA_FORMAT_B2G3R3_UNORM:
+   case MESA_FORMAT_B2G3R3_UINT:
        *datatype = GL_UNSIGNED_BYTE_3_3_2;
        *comps = 3;
        return;
@@ -1972,6 +1977,96 @@ _mesa_format_matches_format_and_type(mesa_format mesa_format,
                type == GL_UNSIGNED_INT_2_10_10_10_REV &&
                !swapBytes);
  
+   case MESA_FORMAT_B5G6R5_UINT:
+      return format == GL_RGB_INTEGER && type == GL_UNSIGNED_SHORT_5_6_5;
+
+   case MESA_FORMAT_R5G6B5_UINT:
+      return format == GL_RGB_INTEGER && type == GL_UNSIGNED_SHORT_5_6_5_REV;
+
+   case MESA_FORMAT_B2G3R3_UINT:
+      return format == GL_RGB_INTEGER && type == GL_UNSIGNED_BYTE_3_3_2;
+
+   case MESA_FORMAT_R3G3B2_UINT:
+      return format == GL_RGB_INTEGER && type == GL_UNSIGNED_BYTE_2_3_3_REV;
+
+   case MESA_FORMAT_A4B4G4R4_UINT:
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4 && !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4_REV && swapBytes)
+         return GL_TRUE;
+      return GL_FALSE;
+
+   case MESA_FORMAT_R4G4B4A4_UINT:
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4_REV && !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4 && swapBytes)
+         return GL_TRUE;
+
+      return GL_FALSE;
+
+   case MESA_FORMAT_B4G4R4A4_UINT:
+      return format == GL_BGRA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4_REV &&
+         !swapBytes;
+
+   case MESA_FORMAT_A4R4G4B4_UINT:
+      return GL_FALSE;
+
+   case MESA_FORMAT_A1B5G5R5_UINT:
+      return format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_5_5_5_1 &&
+         !swapBytes;
+
+   case MESA_FORMAT_B5G5R5A1_UINT:
+      return format == GL_BGRA_INTEGER && type == GL_UNSIGNED_SHORT_1_5_5_5_REV &&
+         !swapBytes;
+
+   case MESA_FORMAT_A1R5G5B5_UINT:
+      return format == GL_BGRA_INTEGER && type == GL_UNSIGNED_SHORT_5_5_5_1 &&
+         !swapBytes;
+
+   case MESA_FORMAT_R5G5B5A1_UINT:
+      return format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_1_5_5_5_REV;
+
+   case MESA_FORMAT_A8B8G8R8_UINT:
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8 && !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8_REV && swapBytes)
+         return GL_TRUE;
+      return GL_FALSE;
+
+   case MESA_FORMAT_A8R8G8B8_UINT:
+      if (format == GL_BGRA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8 &&
+          !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_BGRA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8_REV &&
+          swapBytes)
+         return GL_TRUE;
+
+      return GL_FALSE;
+
+   case MESA_FORMAT_R8G8B8A8_UINT:
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8_REV &&
+          !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8 && swapBytes)
+         return GL_TRUE;
+
+      return GL_FALSE;
+
+   case MESA_FORMAT_B8G8R8A8_UINT:
+      if (format == GL_BGRA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8_REV &&
+          !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_BGRA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8 && swapBytes)
+         return GL_TRUE;
+
+      return GL_FALSE;
+
     case MESA_FORMAT_R9G9B9E5_FLOAT:
        return format == GL_RGB && type == GL_UNSIGNED_INT_5_9_9_9_REV &&
           !swapBytes;
diff --git a/src/mesa/main/formats.csv b/src/mesa/main/formats.csv

index 80729d9..529de31 100644 (file)
--- a/src/mesa/main/formats.csv
+++ b/src/mesa/main/formats.csv
@@ -186,10 +186,26 @@ MESA_FORMAT_RGBX_FLOAT32                  , array , 1, 1, f32 , f32 , f32 , x32
  MESA_FORMAT_Z_FLOAT32                     , array , 1, 1, f32 ,     ,     ,     , x___, zs
  
  # Packed signed/unsigned non-normalized integer formats
+MESA_FORMAT_A8B8G8R8_UINT                 , packed, 1, 1, u8  , u8  , u8  , u8  , wzyx, rgb
+MESA_FORMAT_A8R8G8B8_UINT                 , packed, 1, 1, u8  , u8  , u8  , u8  , yzwx, rgb
+MESA_FORMAT_R8G8B8A8_UINT                 , packed, 1, 1, u8  , u8  , u8  , u8  , xyzw, rgb
+MESA_FORMAT_B8G8R8A8_UINT                 , packed, 1, 1, u8  , u8  , u8  , u8  , zyxw, rgb
  MESA_FORMAT_B10G10R10A2_UINT              , packed, 1, 1, u10 , u10 , u10 , u2  , zyxw, rgb
  MESA_FORMAT_R10G10B10A2_UINT              , packed, 1, 1, u10 , u10 , u10 , u2  , xyzw, rgb
  MESA_FORMAT_A2B10G10R10_UINT              , packed, 1, 1, u2  , u10 , u10 , u10 , wzyx, rgb
  MESA_FORMAT_A2R10G10B10_UINT              , packed, 1, 1, u2  , u10 , u10 , u10 , yzwx, rgb
+MESA_FORMAT_B5G6R5_UINT                   , packed, 1, 1, u5  , u6  ,  u5 ,     , zyx1, rgb
+MESA_FORMAT_R5G6B5_UINT                   , packed, 1, 1, u5  , u6  ,  u5 ,     , xyz1, rgb
+MESA_FORMAT_B2G3R3_UINT                   , packed, 1, 1, u2  , u3  ,  u3 ,     , zyx1, rgb
+MESA_FORMAT_R3G3B2_UINT                   , packed, 1, 1, u3  , u3  ,  u2 ,     , xyz1, rgb
+MESA_FORMAT_A4B4G4R4_UINT                 , packed, 1, 1, u4  , u4  ,  u4 , u4  , wzyx, rgb
+MESA_FORMAT_R4G4B4A4_UINT                 , packed, 1, 1, u4  , u4  ,  u4 , u4  , xyzw, rgb
+MESA_FORMAT_B4G4R4A4_UINT                 , packed, 1, 1, u4  , u4  ,  u4 , u4  , zyxw, rgb
+MESA_FORMAT_A4R4G4B4_UINT                 , packed, 1, 1, u4  , u4  ,  u4 , u4  , yzwx, rgb
+MESA_FORMAT_A1B5G5R5_UINT                 , packed, 1, 1, u1  , u5  ,  u5 , u5  , wzyx, rgb
+MESA_FORMAT_B5G5R5A1_UINT                 , packed, 1, 1, u5  , u5  ,  u5 , u1  , zyxw, rgb
+MESA_FORMAT_A1R5G5B5_UINT                 , packed, 1, 1, u1  , u5  ,  u5 , u5  , yzwx, rgb
+MESA_FORMAT_R5G5B5A1_UINT                 , packed, 1, 1, u5  , u5  ,  u5 , u1  , xyzw, rgb
  
  # Array signed/unsigned non-normalized integer formats
  MESA_FORMAT_A_UINT8                       , array , 1, 1, u8  ,     ,     ,     , 000x, rgb
@@ -278,10 +294,10 @@ MESA_FORMAT_RG_RGTC2_UNORM                , rgtc  , 4, 4, x128,     ,     ,
  MESA_FORMAT_RG_RGTC2_SNORM                , rgtc  , 4, 4, x128,     ,     ,     , xy01, rgb
  
  # LATC1/2 compressed formats
-MESA_FORMAT_L_LATC1_UNORM                 , rgtc  , 4, 4, x64 ,     ,     ,     , xxx1, rgb
-MESA_FORMAT_L_LATC1_SNORM                 , rgtc  , 4, 4, x64 ,     ,     ,     , xxx1, rgb
-MESA_FORMAT_LA_LATC2_UNORM                , rgtc  , 4, 4, x128,     ,     ,     , xxxy, rgb
-MESA_FORMAT_LA_LATC2_SNORM                , rgtc  , 4, 4, x128,     ,     ,     , xxxy, rgb
+MESA_FORMAT_L_LATC1_UNORM                 , latc  , 4, 4, x64 ,     ,     ,     , xxx1, rgb
+MESA_FORMAT_L_LATC1_SNORM                 , latc  , 4, 4, x64 ,     ,     ,     , xxx1, rgb
+MESA_FORMAT_LA_LATC2_UNORM                , latc  , 4, 4, x128,     ,     ,     , xxxy, rgb
+MESA_FORMAT_LA_LATC2_SNORM                , latc  , 4, 4, x128,     ,     ,     , xxxy, rgb
  
  # ETC1/2 compressed formats
  MESA_FORMAT_ETC1_RGB8                     , etc1  , 4, 4, x64 ,     ,     ,     , xyz1, rgb
diff --git a/src/mesa/main/formats.h b/src/mesa/main/formats.h

index ccb09b2..794d599 100644 (file)
--- a/src/mesa/main/formats.h
+++ b/src/mesa/main/formats.h
@@ -66,6 +66,7 @@ enum mesa_format_layout {
     MESA_FORMAT_LAYOUT_PACKED,
     MESA_FORMAT_LAYOUT_S3TC,
     MESA_FORMAT_LAYOUT_RGTC,
+   MESA_FORMAT_LAYOUT_LATC,
     MESA_FORMAT_LAYOUT_FXT1,
     MESA_FORMAT_LAYOUT_ETC1,
     MESA_FORMAT_LAYOUT_ETC2,
@@ -471,10 +472,27 @@ typedef enum
     MESA_FORMAT_Z_FLOAT32,
  
     /* Packed signed/unsigned non-normalized integer formats */
+
+   MESA_FORMAT_A8B8G8R8_UINT,    /* RRRR RRRR GGGG GGGG BBBB BBBB AAAA AAAA */
+   MESA_FORMAT_A8R8G8B8_UINT,    /* BBBB BBBB GGGG GGGG RRRR RRRR AAAA AAAA */
+   MESA_FORMAT_R8G8B8A8_UINT,    /* AAAA AAAA BBBB BBBB GGGG GGGG RRRR RRRR */
+   MESA_FORMAT_B8G8R8A8_UINT,    /* AAAA AAAA RRRR RRRR GGGG GGGG BBBB BBBB */
     MESA_FORMAT_B10G10R10A2_UINT, /* AARR RRRR RRRR GGGG GGGG GGBB BBBB BBBB */
     MESA_FORMAT_R10G10B10A2_UINT, /* AABB BBBB BBBB GGGG GGGG GGRR RRRR RRRR */
     MESA_FORMAT_A2B10G10R10_UINT, /* RRRR RRRR RRGG GGGG GGGG BBBB BBBB BBAA */
     MESA_FORMAT_A2R10G10B10_UINT, /* BBBB BBBB BBGG GGGG GGGG RRRR RRRR RRAA */
+   MESA_FORMAT_B5G6R5_UINT,                          /* RRRR RGGG GGGB BBBB */
+   MESA_FORMAT_R5G6B5_UINT,                          /* BBBB BGGG GGGR RRRR */
+   MESA_FORMAT_B2G3R3_UINT,                                    /* RRRG GGBB */
+   MESA_FORMAT_R3G3B2_UINT,                                    /* BBGG GRRR */
+   MESA_FORMAT_A4B4G4R4_UINT,                        /* RRRR GGGG BBBB AAAA */
+   MESA_FORMAT_R4G4B4A4_UINT,                        /* AAAA BBBB GGGG RRRR */
+   MESA_FORMAT_B4G4R4A4_UINT,                        /* AAAA RRRR GGGG BBBB */
+   MESA_FORMAT_A4R4G4B4_UINT,                        /* BBBB GGGG RRRR AAAA */
+   MESA_FORMAT_A1B5G5R5_UINT,                        /* RRRR RGGG GGBB BBBA */
+   MESA_FORMAT_B5G5R5A1_UINT,                        /* ARRR RRGG GGGB BBBB */
+   MESA_FORMAT_A1R5G5B5_UINT,                        /* BBBB BGGG GGRR RRRA */
+   MESA_FORMAT_R5G5B5A1_UINT,                        /* ABBB BBGG GGGR RRRR */
  
     /* Array signed/unsigned non-normalized integer formats */
     MESA_FORMAT_A_UINT8,
diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c

index 4855187..539c411 100644 (file)
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -369,6 +369,12 @@ static const int extra_ARB_compute_shader_es31[] = {
     EXTRA_END
  };
  
+static const int extra_ARB_shader_storage_buffer_object_es31[] = {
+   EXT(ARB_shader_storage_buffer_object),
+   EXTRA_API_ES31,
+   EXTRA_END
+};
+
  EXTRA_EXT(ARB_texture_cube_map);
  EXTRA_EXT(EXT_texture_array);
  EXTRA_EXT(NV_fog_distance);
@@ -417,6 +423,7 @@ EXTRA_EXT(EXT_polygon_offset_clamp);
  EXTRA_EXT(ARB_framebuffer_no_attachments);
  EXTRA_EXT(ARB_tessellation_shader);
  EXTRA_EXT(ARB_shader_subroutine);
+EXTRA_EXT(ARB_shader_storage_buffer_object);
  
  static const int
  extra_ARB_color_buffer_float_or_glcore[] = {
@@ -1001,6 +1008,10 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu
     case GL_UNIFORM_BUFFER_BINDING:
        v->value_int = ctx->UniformBuffer->Name;
        break;
+   /* GL_ARB_shader_storage_buffer_object */
+   case GL_SHADER_STORAGE_BUFFER_BINDING:
+      v->value_int = ctx->ShaderStorageBuffer->Name;
+      break;
     /* GL_ARB_timer_query */
     case GL_TIMESTAMP:
        if (ctx->Driver.GetTimestamp) {
@@ -1036,6 +1047,10 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu
           v->value_int = 0;
        }
        break;
+   /* GL_ARB_compute_shader */
+   case GL_DISPATCH_INDIRECT_BUFFER_BINDING:
+      v->value_int = ctx->DispatchIndirectBuffer->Name;
+      break;
     }
  }
  
@@ -1924,7 +1939,8 @@ find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v)
          goto invalid_value;
        if (!ctx->Extensions.ARB_uniform_buffer_object)
          goto invalid_enum;
-      v->value_int = ctx->UniformBufferBindings[index].Offset;
+      v->value_int = ctx->UniformBufferBindings[index].Offset < 0 ? 0 :
+                     ctx->UniformBufferBindings[index].Offset;
        return TYPE_INT;
  
     case GL_UNIFORM_BUFFER_SIZE:
@@ -1932,7 +1948,35 @@ find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v)
          goto invalid_value;
        if (!ctx->Extensions.ARB_uniform_buffer_object)
          goto invalid_enum;
-      v->value_int = ctx->UniformBufferBindings[index].Size;
+      v->value_int = ctx->UniformBufferBindings[index].Size < 0 ? 0 :
+                     ctx->UniformBufferBindings[index].Size;
+      return TYPE_INT;
+
+   /* ARB_shader_storage_buffer_object */
+   case GL_SHADER_STORAGE_BUFFER_BINDING:
+      if (!ctx->Extensions.ARB_shader_storage_buffer_object)
+         goto invalid_enum;
+      if (index >= ctx->Const.MaxShaderStorageBufferBindings)
+         goto invalid_value;
+      v->value_int = ctx->ShaderStorageBufferBindings[index].BufferObject->Name;
+      return TYPE_INT;
+
+   case GL_SHADER_STORAGE_BUFFER_START:
+      if (!ctx->Extensions.ARB_shader_storage_buffer_object)
+         goto invalid_enum;
+      if (index >= ctx->Const.MaxShaderStorageBufferBindings)
+         goto invalid_value;
+      v->value_int = ctx->ShaderStorageBufferBindings[index].Offset < 0 ? 0 :
+                     ctx->ShaderStorageBufferBindings[index].Offset;
+      return TYPE_INT;
+
+   case GL_SHADER_STORAGE_BUFFER_SIZE:
+      if (!ctx->Extensions.ARB_shader_storage_buffer_object)
+         goto invalid_enum;
+      if (index >= ctx->Const.MaxShaderStorageBufferBindings)
+         goto invalid_value;
+      v->value_int = ctx->ShaderStorageBufferBindings[index].Size < 0 ? 0 :
+                     ctx->ShaderStorageBufferBindings[index].Size;
        return TYPE_INT;
  
     /* ARB_texture_multisample / GL3.2 */
@@ -1969,7 +2013,8 @@ find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v)
        return TYPE_INT64;
  
     case GL_VERTEX_BINDING_DIVISOR:
-      if (!_mesa_is_desktop_gl(ctx) || !ctx->Extensions.ARB_instanced_arrays)
+      if ((!_mesa_is_desktop_gl(ctx) || !ctx->Extensions.ARB_instanced_arrays) &&
+          !_mesa_is_gles31(ctx))
            goto invalid_enum;
        if (index >= ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs)
            goto invalid_value;
@@ -1977,7 +2022,7 @@ find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v)
        return TYPE_INT;
  
     case GL_VERTEX_BINDING_OFFSET:
-      if (!_mesa_is_desktop_gl(ctx))
+      if (!_mesa_is_desktop_gl(ctx) && !_mesa_is_gles31(ctx))
            goto invalid_enum;
        if (index >= ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs)
            goto invalid_value;
@@ -1985,13 +2030,21 @@ find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v)
        return TYPE_INT;
  
     case GL_VERTEX_BINDING_STRIDE:
-      if (!_mesa_is_desktop_gl(ctx))
+      if (!_mesa_is_desktop_gl(ctx) && !_mesa_is_gles31(ctx))
            goto invalid_enum;
        if (index >= ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs)
            goto invalid_value;
        v->value_int = ctx->Array.VAO->VertexBinding[VERT_ATTRIB_GENERIC(index)].Stride;
        return TYPE_INT;
  
+   case GL_VERTEX_BINDING_BUFFER:
+      if (ctx->API == API_OPENGLES2 && ctx->Version < 31)
+         goto invalid_enum;
+      if (index >= ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs)
+         goto invalid_value;
+      v->value_int = ctx->Array.VAO->VertexBinding[VERT_ATTRIB_GENERIC(index)].BufferObj->Name;
+      return TYPE_INT;
+
     /* ARB_shader_image_load_store */
     case GL_IMAGE_BINDING_NAME: {
        struct gl_texture_object *t;
diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py

index b3c337e..38b08b0 100644 (file)
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -234,10 +234,10 @@ descriptor=[
  
  { "apis": ["GLES"], "params": [
  # OES_point_size_array
-  [ "POINT_SIZE_ARRAY_OES", "ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled, TYPE_BOOLEAN)" ],
-  [ "POINT_SIZE_ARRAY_TYPE_OES", "ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Type, TYPE_ENUM)" ],
-  [ "POINT_SIZE_ARRAY_STRIDE_OES", "ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Stride, TYPE_INT)" ],
-  [ "POINT_SIZE_ARRAY_BUFFER_BINDING_OES", "LOC_CUSTOM, TYPE_INT, 0" ],
+  [ "POINT_SIZE_ARRAY_OES", "ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled, TYPE_BOOLEAN), NO_EXTRA" ],
+  [ "POINT_SIZE_ARRAY_TYPE_OES", "ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Type, TYPE_ENUM), NO_EXTRA" ],
+  [ "POINT_SIZE_ARRAY_STRIDE_OES", "ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Stride, TYPE_INT), NO_EXTRA" ],
+  [ "POINT_SIZE_ARRAY_BUFFER_BINDING_OES", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ],
  ]},
  
  { "apis": ["GL", "GL_CORE", "GLES2"], "params": [
@@ -450,9 +450,36 @@ descriptor=[
    [ "MAX_COMPUTE_SHARED_MEMORY_SIZE", "CONST(MAX_COMPUTE_SHARED_MEMORY_SIZE), extra_ARB_compute_shader_es31" ],
    [ "MAX_COMPUTE_UNIFORM_COMPONENTS", "CONST(MAX_COMPUTE_UNIFORM_COMPONENTS), extra_ARB_compute_shader_es31" ],
    [ "MAX_COMPUTE_IMAGE_UNIFORMS", "CONST(MAX_COMPUTE_IMAGE_UNIFORMS), extra_ARB_compute_shader_es31" ],
+  [ "DISPATCH_INDIRECT_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_compute_shader_es31" ],
+
+# GL_ARB_framebuffer_no_attachments / GLES 3.1
+  ["MAX_FRAMEBUFFER_WIDTH", "CONTEXT_INT(Const.MaxFramebufferWidth), extra_ARB_framebuffer_no_attachments"],
+  ["MAX_FRAMEBUFFER_HEIGHT", "CONTEXT_INT(Const.MaxFramebufferHeight), extra_ARB_framebuffer_no_attachments"],
+  ["MAX_FRAMEBUFFER_SAMPLES", "CONTEXT_INT(Const.MaxFramebufferSamples), extra_ARB_framebuffer_no_attachments"],
  
  # GL_ARB_explicit_uniform_location / GLES 3.1
    [ "MAX_UNIFORM_LOCATIONS", "CONTEXT_INT(Const.MaxUserAssignableUniformLocations), extra_ARB_explicit_uniform_location" ],
+
+# GL_ARB_separate_shader_objects / GLES 3.1
+  [ "PROGRAM_PIPELINE_BINDING", "LOC_CUSTOM, TYPE_INT, GL_PROGRAM_PIPELINE_BINDING, NO_EXTRA" ],
+
+# GL_ARB_vertex_attrib_binding / GLES 3.1
+  [ "MAX_VERTEX_ATTRIB_RELATIVE_OFFSET", "CONTEXT_ENUM(Const.MaxVertexAttribRelativeOffset), NO_EXTRA" ],
+  [ "MAX_VERTEX_ATTRIB_BINDINGS", "CONTEXT_ENUM(Const.MaxVertexAttribBindings), NO_EXTRA" ],
+
+# GL 4.4 / GLES 3.1
+  [ "MAX_VERTEX_ATTRIB_STRIDE", "CONTEXT_ENUM(Const.MaxVertexAttribStride), NO_EXTRA" ],
+
+  # GL_ARB_shader_storage_buffer_object / GLES 3.1
+  [ "MAX_VERTEX_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object_es31" ],
+  [ "MAX_FRAGMENT_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object_es31" ],
+  [ "MAX_COMPUTE_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object_es31" ],
+  [ "MAX_COMBINED_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.MaxCombinedShaderStorageBlocks), extra_ARB_shader_storage_buffer_object_es31" ],
+  [ "MAX_SHADER_STORAGE_BLOCK_SIZE", "CONTEXT_INT(Const.MaxShaderStorageBlockSize), extra_ARB_shader_storage_buffer_object_es31" ],
+  [ "MAX_SHADER_STORAGE_BUFFER_BINDINGS", "CONTEXT_INT(Const.MaxShaderStorageBufferBindings), extra_ARB_shader_storage_buffer_object_es31" ],
+  [ "MAX_COMBINED_SHADER_OUTPUT_RESOURCES", "CONTEXT_INT(Const.MaxCombinedShaderOutputResources), extra_ARB_shader_storage_buffer_object_es31" ],
+  [ "SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT", "CONTEXT_INT(Const.ShaderStorageBufferOffsetAlignment), extra_ARB_shader_storage_buffer_object_es31" ],
+  [ "SHADER_STORAGE_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_shader_storage_buffer_object_es31" ],
  ]},
  
  # Enums in OpenGL Core profile and ES 3.1
@@ -761,9 +788,6 @@ descriptor=[
    [ "MAX_GEOMETRY_INPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents), extra_version_32" ],
    [ "MAX_GEOMETRY_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents), extra_version_32" ],
  
-# GL 4.4
-  [ "MAX_VERTEX_ATTRIB_STRIDE", "CONTEXT_ENUM(Const.MaxVertexAttribStride), NO_EXTRA" ],
-
  # GL_ARB_robustness
    [ "RESET_NOTIFICATION_STRATEGY_ARB", "CONTEXT_ENUM(Const.ResetStrategy), NO_EXTRA" ],
  
@@ -794,30 +818,25 @@ descriptor=[
  # GL_ARB_texture_gather
    [ "MAX_PROGRAM_TEXTURE_GATHER_COMPONENTS_ARB", "CONTEXT_INT(Const.MaxProgramTextureGatherComponents), extra_ARB_texture_gather"],
  
-# GL_ARB_separate_shader_objects
-  [ "PROGRAM_PIPELINE_BINDING", "LOC_CUSTOM, TYPE_INT, GL_PROGRAM_PIPELINE_BINDING, NO_EXTRA" ],
-
  # GL_ARB_shader_atomic_counters
    [ "MAX_GEOMETRY_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_and_geometry_shader" ],
    [ "MAX_GEOMETRY_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters), extra_ARB_shader_atomic_counters_and_geometry_shader" ],
  
-# GL_ARB_vertex_attrib_binding
-  [ "MAX_VERTEX_ATTRIB_RELATIVE_OFFSET", "CONTEXT_ENUM(Const.MaxVertexAttribRelativeOffset), NO_EXTRA" ],
-  [ "MAX_VERTEX_ATTRIB_BINDINGS", "CONTEXT_ENUM(Const.MaxVertexAttribBindings), NO_EXTRA" ],
-
  # GL_ARB_shader_image_load_store
    [ "MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS", "CONTEXT_INT(Const.MaxCombinedShaderOutputResources), extra_ARB_shader_image_load_store" ],
    [ "MAX_IMAGE_SAMPLES", "CONTEXT_INT(Const.MaxImageSamples), extra_ARB_shader_image_load_store" ],
    [ "MAX_GEOMETRY_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxImageUniforms), extra_ARB_shader_image_load_store_and_geometry_shader"],
  
  # GL_ARB_framebuffer_no_attachments
-  ["MAX_FRAMEBUFFER_WIDTH", "CONTEXT_INT(Const.MaxFramebufferWidth), extra_ARB_framebuffer_no_attachments"],
-  ["MAX_FRAMEBUFFER_HEIGHT", "CONTEXT_INT(Const.MaxFramebufferHeight), extra_ARB_framebuffer_no_attachments"],
    ["MAX_FRAMEBUFFER_LAYERS", "CONTEXT_INT(Const.MaxFramebufferLayers), extra_ARB_framebuffer_no_attachments"],
-  ["MAX_FRAMEBUFFER_SAMPLES", "CONTEXT_INT(Const.MaxFramebufferSamples), extra_ARB_framebuffer_no_attachments"],
  
  # GL_EXT_polygon_offset_clamp
    [ "POLYGON_OFFSET_CLAMP_EXT", "CONTEXT_FLOAT(Polygon.OffsetClamp), extra_EXT_polygon_offset_clamp" ],
+
+# GL_ARB_shader_storage_buffer_object
+  [ "MAX_GEOMETRY_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object" ],
+  [ "MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object" ],
+  [ "MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object" ],
  ]},
  
  # Enums restricted to OpenGL Core profile
diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c

index ce66699..7dab33c 100644 (file)
--- a/src/mesa/main/glformats.c
+++ b/src/mesa/main/glformats.c
@@ -28,6 +28,7 @@
  #include "context.h"
  #include "glformats.h"
  #include "formats.h"
+#include "texcompress.h"
  #include "enums.h"
  
  enum {
@@ -494,7 +495,8 @@ _mesa_bytes_per_pixel(GLenum format, GLenum type)
        else
           return -1;
     case GL_UNSIGNED_INT_24_8_EXT:
-      if (format == GL_DEPTH_STENCIL_EXT)
+      if (format == GL_DEPTH_COMPONENT ||
+          format == GL_DEPTH_STENCIL_EXT)
           return sizeof(GLuint);
        else
           return -1;
@@ -1044,6 +1046,34 @@ _mesa_is_color_format(GLenum format)
        case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
        case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
        case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
+      case GL_COMPRESSED_RGBA_ASTC_4x4_KHR:
+      case GL_COMPRESSED_RGBA_ASTC_5x4_KHR:
+      case GL_COMPRESSED_RGBA_ASTC_5x5_KHR:
+      case GL_COMPRESSED_RGBA_ASTC_6x5_KHR:
+      case GL_COMPRESSED_RGBA_ASTC_6x6_KHR:
+      case GL_COMPRESSED_RGBA_ASTC_8x5_KHR:
+      case GL_COMPRESSED_RGBA_ASTC_8x6_KHR:
+      case GL_COMPRESSED_RGBA_ASTC_8x8_KHR:
+      case GL_COMPRESSED_RGBA_ASTC_10x5_KHR:
+      case GL_COMPRESSED_RGBA_ASTC_10x6_KHR:
+      case GL_COMPRESSED_RGBA_ASTC_10x8_KHR:
+      case GL_COMPRESSED_RGBA_ASTC_10x10_KHR:
+      case GL_COMPRESSED_RGBA_ASTC_12x10_KHR:
+      case GL_COMPRESSED_RGBA_ASTC_12x12_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR:
        /* generic integer formats */
        case GL_RED_INTEGER_EXT:
        case GL_GREEN_INTEGER_EXT:
@@ -1243,95 +1273,22 @@ _mesa_is_depth_or_stencil_format(GLenum format)
  GLboolean
  _mesa_is_compressed_format(const struct gl_context *ctx, GLenum format)
  {
+   mesa_format m_format = _mesa_glenum_to_compressed_format(format);
+
+   /* Some formats in this switch have an equivalent mesa_format_layout
+    * to the compressed formats in the layout switch below and thus
+    * must be handled first.
+    */
     switch (format) {
-   case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
-   case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
-   case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
-   case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
-      /* Assume that the ANGLE flag will always be set if the EXT flag is set.
-       */
-      return ctx->Extensions.ANGLE_texture_compression_dxt;
     case GL_RGB_S3TC:
     case GL_RGB4_S3TC:
     case GL_RGBA_S3TC:
     case GL_RGBA4_S3TC:
        return _mesa_is_desktop_gl(ctx) &&
           ctx->Extensions.ANGLE_texture_compression_dxt;
-   case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
-   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
-   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
-   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
-      return _mesa_is_desktop_gl(ctx)
-         && ctx->Extensions.EXT_texture_sRGB
-         && ctx->Extensions.EXT_texture_compression_s3tc;
-   case GL_COMPRESSED_RGB_FXT1_3DFX:
-   case GL_COMPRESSED_RGBA_FXT1_3DFX:
-      return _mesa_is_desktop_gl(ctx)
-         && ctx->Extensions.TDFX_texture_compression_FXT1;
-   case GL_COMPRESSED_RED_RGTC1:
-   case GL_COMPRESSED_SIGNED_RED_RGTC1:
-   case GL_COMPRESSED_RG_RGTC2:
-   case GL_COMPRESSED_SIGNED_RG_RGTC2:
-      return _mesa_is_desktop_gl(ctx)
-         && ctx->Extensions.ARB_texture_compression_rgtc;
-   case GL_COMPRESSED_LUMINANCE_LATC1_EXT:
-   case GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT:
-   case GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT:
-   case GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT:
-      return ctx->API == API_OPENGL_COMPAT
-         && ctx->Extensions.EXT_texture_compression_latc;
     case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI:
        return ctx->API == API_OPENGL_COMPAT
           && ctx->Extensions.ATI_texture_compression_3dc;
-   case GL_ETC1_RGB8_OES:
-      return _mesa_is_gles(ctx)
-         && ctx->Extensions.OES_compressed_ETC1_RGB8_texture;
-   case GL_COMPRESSED_RGB8_ETC2:
-   case GL_COMPRESSED_SRGB8_ETC2:
-   case GL_COMPRESSED_RGBA8_ETC2_EAC:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC:
-   case GL_COMPRESSED_R11_EAC:
-   case GL_COMPRESSED_RG11_EAC:
-   case GL_COMPRESSED_SIGNED_R11_EAC:
-   case GL_COMPRESSED_SIGNED_RG11_EAC:
-   case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
-   case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
-      return _mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility;
-   case GL_COMPRESSED_RGBA_BPTC_UNORM:
-   case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
-   case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
-   case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
-      return _mesa_is_desktop_gl(ctx) &&
-         ctx->Extensions.ARB_texture_compression_bptc;
-   case GL_COMPRESSED_RGBA_ASTC_4x4_KHR:
-   case GL_COMPRESSED_RGBA_ASTC_5x4_KHR:
-   case GL_COMPRESSED_RGBA_ASTC_5x5_KHR:
-   case GL_COMPRESSED_RGBA_ASTC_6x5_KHR:
-   case GL_COMPRESSED_RGBA_ASTC_6x6_KHR:
-   case GL_COMPRESSED_RGBA_ASTC_8x5_KHR:
-   case GL_COMPRESSED_RGBA_ASTC_8x6_KHR:
-   case GL_COMPRESSED_RGBA_ASTC_8x8_KHR:
-   case GL_COMPRESSED_RGBA_ASTC_10x5_KHR:
-   case GL_COMPRESSED_RGBA_ASTC_10x6_KHR:
-   case GL_COMPRESSED_RGBA_ASTC_10x8_KHR:
-   case GL_COMPRESSED_RGBA_ASTC_10x10_KHR:
-   case GL_COMPRESSED_RGBA_ASTC_12x10_KHR:
-   case GL_COMPRESSED_RGBA_ASTC_12x12_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR:
-   case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR:
-      return ctx->Extensions.KHR_texture_compression_astc_ldr;
     case GL_PALETTE4_RGB8_OES:
     case GL_PALETTE4_RGBA8_OES:
     case GL_PALETTE4_R5_G6_B5_OES:
@@ -1343,6 +1300,39 @@ _mesa_is_compressed_format(const struct gl_context *ctx, GLenum format)
     case GL_PALETTE8_RGBA4_OES:
     case GL_PALETTE8_RGB5_A1_OES:
        return ctx->API == API_OPENGLES;
+   }
+
+   switch (_mesa_get_format_layout(m_format)) {
+   case MESA_FORMAT_LAYOUT_S3TC:
+      if (_mesa_get_format_color_encoding(m_format) == GL_LINEAR) {
+         /* Assume that the ANGLE flag will always be set if the
+          * EXT flag is set.
+          */
+         return ctx->Extensions.ANGLE_texture_compression_dxt;
+      } else {
+         return _mesa_is_desktop_gl(ctx)
+            && ctx->Extensions.EXT_texture_sRGB
+            && ctx->Extensions.EXT_texture_compression_s3tc;
+      }
+   case MESA_FORMAT_LAYOUT_FXT1:
+      return _mesa_is_desktop_gl(ctx)
+         && ctx->Extensions.TDFX_texture_compression_FXT1;
+   case MESA_FORMAT_LAYOUT_RGTC:
+      return _mesa_is_desktop_gl(ctx)
+         && ctx->Extensions.ARB_texture_compression_rgtc;
+   case MESA_FORMAT_LAYOUT_LATC:
+      return ctx->API == API_OPENGL_COMPAT
+         && ctx->Extensions.EXT_texture_compression_latc;
+   case MESA_FORMAT_LAYOUT_ETC1:
+      return _mesa_is_gles(ctx)
+         && ctx->Extensions.OES_compressed_ETC1_RGB8_texture;
+   case MESA_FORMAT_LAYOUT_ETC2:
+      return _mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility;
+   case MESA_FORMAT_LAYOUT_BPTC:
+      return _mesa_is_desktop_gl(ctx) &&
+         ctx->Extensions.ARB_texture_compression_bptc;
+   case MESA_FORMAT_LAYOUT_ASTC:
+      return ctx->Extensions.KHR_texture_compression_astc_ldr;
     default:
        return GL_FALSE;
     }
@@ -1761,6 +1751,10 @@ _mesa_error_check_format_and_type(const struct gl_context *ctx,
        return GL_INVALID_OPERATION;
  
     case GL_UNSIGNED_INT_24_8:
+      /* Depth buffer OK to read in OpenGL ES (NV_read_depth). */
+      if (ctx->API == API_OPENGLES2 && format == GL_DEPTH_COMPONENT)
+         return GL_NO_ERROR;
+
        if (format != GL_DEPTH_STENCIL) {
           return GL_INVALID_OPERATION;
        }
@@ -2145,6 +2139,632 @@ _mesa_es_error_check_format_and_type(GLenum format, GLenum type,
     return type_valid ? GL_NO_ERROR : GL_INVALID_OPERATION;
  }
  
+/**
+ * Return the simple base format for a given internal texture format.
+ * For example, given GL_LUMINANCE12_ALPHA4, return GL_LUMINANCE_ALPHA.
+ *
+ * \param ctx GL context.
+ * \param internalFormat the internal texture format token or 1, 2, 3, or 4.
+ *
+ * \return the corresponding \u base internal format (GL_ALPHA, GL_LUMINANCE,
+ * GL_LUMANCE_ALPHA, GL_INTENSITY, GL_RGB, or GL_RGBA), or -1 if invalid enum.
+ *
+ * This is the format which is used during texture application (i.e. the
+ * texture format and env mode determine the arithmetic used.
+ */
+GLint
+_mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat)
+{
+   switch (internalFormat) {
+   case GL_ALPHA:
+   case GL_ALPHA4:
+   case GL_ALPHA8:
+   case GL_ALPHA12:
+   case GL_ALPHA16:
+      return (ctx->API != API_OPENGL_CORE) ? GL_ALPHA : -1;
+   case 1:
+   case GL_LUMINANCE:
+   case GL_LUMINANCE4:
+   case GL_LUMINANCE8:
+   case GL_LUMINANCE12:
+   case GL_LUMINANCE16:
+      return (ctx->API != API_OPENGL_CORE) ? GL_LUMINANCE : -1;
+   case 2:
+   case GL_LUMINANCE_ALPHA:
+   case GL_LUMINANCE4_ALPHA4:
+   case GL_LUMINANCE6_ALPHA2:
+   case GL_LUMINANCE8_ALPHA8:
+   case GL_LUMINANCE12_ALPHA4:
+   case GL_LUMINANCE12_ALPHA12:
+   case GL_LUMINANCE16_ALPHA16:
+      return (ctx->API != API_OPENGL_CORE) ? GL_LUMINANCE_ALPHA : -1;
+   case GL_INTENSITY:
+   case GL_INTENSITY4:
+   case GL_INTENSITY8:
+   case GL_INTENSITY12:
+   case GL_INTENSITY16:
+      return (ctx->API != API_OPENGL_CORE) ? GL_INTENSITY : -1;
+   case 3:
+      return (ctx->API != API_OPENGL_CORE) ? GL_RGB : -1;
+   case GL_RGB:
+   case GL_R3_G3_B2:
+   case GL_RGB4:
+   case GL_RGB5:
+   case GL_RGB8:
+   case GL_RGB10:
+   case GL_RGB12:
+   case GL_RGB16:
+      return GL_RGB;
+   case 4:
+      return (ctx->API != API_OPENGL_CORE) ? GL_RGBA : -1;
+   case GL_RGBA:
+   case GL_RGBA2:
+   case GL_RGBA4:
+   case GL_RGB5_A1:
+   case GL_RGBA8:
+   case GL_RGB10_A2:
+   case GL_RGBA12:
+   case GL_RGBA16:
+      return GL_RGBA;
+   default:
+      ; /* fallthrough */
+   }
+
+   /* GL_BGRA can be an internal format *only* in OpenGL ES (1.x or 2.0).
+    */
+   if (_mesa_is_gles(ctx)) {
+      switch (internalFormat) {
+      case GL_BGRA:
+         return GL_RGBA;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.ARB_ES2_compatibility) {
+      switch (internalFormat) {
+      case GL_RGB565:
+         return GL_RGB;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.ARB_depth_texture) {
+      switch (internalFormat) {
+      case GL_DEPTH_COMPONENT:
+      case GL_DEPTH_COMPONENT16:
+      case GL_DEPTH_COMPONENT24:
+      case GL_DEPTH_COMPONENT32:
+         return GL_DEPTH_COMPONENT;
+      case GL_DEPTH_STENCIL:
+      case GL_DEPTH24_STENCIL8:
+         return GL_DEPTH_STENCIL;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.ARB_texture_stencil8) {
+      switch (internalFormat) {
+      case GL_STENCIL_INDEX:
+      case GL_STENCIL_INDEX1:
+      case GL_STENCIL_INDEX4:
+      case GL_STENCIL_INDEX8:
+      case GL_STENCIL_INDEX16:
+         return GL_STENCIL_INDEX;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   switch (internalFormat) {
+   case GL_COMPRESSED_ALPHA:
+      return GL_ALPHA;
+   case GL_COMPRESSED_LUMINANCE:
+      return GL_LUMINANCE;
+   case GL_COMPRESSED_LUMINANCE_ALPHA:
+      return GL_LUMINANCE_ALPHA;
+   case GL_COMPRESSED_INTENSITY:
+      return GL_INTENSITY;
+   case GL_COMPRESSED_RGB:
+      return GL_RGB;
+   case GL_COMPRESSED_RGBA:
+      return GL_RGBA;
+   default:
+      ; /* fallthrough */
+   }
+
+   if (ctx->Extensions.TDFX_texture_compression_FXT1) {
+      switch (internalFormat) {
+      case GL_COMPRESSED_RGB_FXT1_3DFX:
+         return GL_RGB;
+      case GL_COMPRESSED_RGBA_FXT1_3DFX:
+         return GL_RGBA;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   /* Assume that the ANGLE flag will always be set if the EXT flag is set.
+    */
+   if (ctx->Extensions.ANGLE_texture_compression_dxt) {
+      switch (internalFormat) {
+      case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+         return GL_RGB;
+      case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+      case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+      case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+         return GL_RGBA;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (_mesa_is_desktop_gl(ctx)
+       && ctx->Extensions.ANGLE_texture_compression_dxt) {
+      switch (internalFormat) {
+      case GL_RGB_S3TC:
+      case GL_RGB4_S3TC:
+         return GL_RGB;
+      case GL_RGBA_S3TC:
+      case GL_RGBA4_S3TC:
+         return GL_RGBA;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.MESA_ycbcr_texture) {
+      if (internalFormat == GL_YCBCR_MESA)
+         return GL_YCBCR_MESA;
+   }
+
+   if (ctx->Extensions.ARB_texture_float) {
+      switch (internalFormat) {
+      case GL_ALPHA16F_ARB:
+      case GL_ALPHA32F_ARB:
+         return GL_ALPHA;
+      case GL_RGBA16F_ARB:
+      case GL_RGBA32F_ARB:
+         return GL_RGBA;
+      case GL_RGB16F_ARB:
+      case GL_RGB32F_ARB:
+         return GL_RGB;
+      case GL_INTENSITY16F_ARB:
+      case GL_INTENSITY32F_ARB:
+         return GL_INTENSITY;
+      case GL_LUMINANCE16F_ARB:
+      case GL_LUMINANCE32F_ARB:
+         return GL_LUMINANCE;
+      case GL_LUMINANCE_ALPHA16F_ARB:
+      case GL_LUMINANCE_ALPHA32F_ARB:
+         return GL_LUMINANCE_ALPHA;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.EXT_texture_snorm) {
+      switch (internalFormat) {
+      case GL_RED_SNORM:
+      case GL_R8_SNORM:
+      case GL_R16_SNORM:
+         return GL_RED;
+      case GL_RG_SNORM:
+      case GL_RG8_SNORM:
+      case GL_RG16_SNORM:
+         return GL_RG;
+      case GL_RGB_SNORM:
+      case GL_RGB8_SNORM:
+      case GL_RGB16_SNORM:
+         return GL_RGB;
+      case GL_RGBA_SNORM:
+      case GL_RGBA8_SNORM:
+      case GL_RGBA16_SNORM:
+         return GL_RGBA;
+      case GL_ALPHA_SNORM:
+      case GL_ALPHA8_SNORM:
+      case GL_ALPHA16_SNORM:
+         return GL_ALPHA;
+      case GL_LUMINANCE_SNORM:
+      case GL_LUMINANCE8_SNORM:
+      case GL_LUMINANCE16_SNORM:
+         return GL_LUMINANCE;
+      case GL_LUMINANCE_ALPHA_SNORM:
+      case GL_LUMINANCE8_ALPHA8_SNORM:
+      case GL_LUMINANCE16_ALPHA16_SNORM:
+         return GL_LUMINANCE_ALPHA;
+      case GL_INTENSITY_SNORM:
+      case GL_INTENSITY8_SNORM:
+      case GL_INTENSITY16_SNORM:
+         return GL_INTENSITY;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.EXT_texture_sRGB) {
+      switch (internalFormat) {
+      case GL_SRGB_EXT:
+      case GL_SRGB8_EXT:
+      case GL_COMPRESSED_SRGB_EXT:
+         return GL_RGB;
+      case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
+         return ctx->Extensions.EXT_texture_compression_s3tc ? GL_RGB : -1;
+      case GL_SRGB_ALPHA_EXT:
+      case GL_SRGB8_ALPHA8_EXT:
+      case GL_COMPRESSED_SRGB_ALPHA_EXT:
+         return GL_RGBA;
+      case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
+      case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
+      case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
+         return ctx->Extensions.EXT_texture_compression_s3tc ? GL_RGBA : -1;
+      case GL_SLUMINANCE_ALPHA_EXT:
+      case GL_SLUMINANCE8_ALPHA8_EXT:
+      case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT:
+         return GL_LUMINANCE_ALPHA;
+      case GL_SLUMINANCE_EXT:
+      case GL_SLUMINANCE8_EXT:
+      case GL_COMPRESSED_SLUMINANCE_EXT:
+         return GL_LUMINANCE;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Version >= 30 ||
+       ctx->Extensions.EXT_texture_integer) {
+      switch (internalFormat) {
+      case GL_RGBA8UI_EXT:
+      case GL_RGBA16UI_EXT:
+      case GL_RGBA32UI_EXT:
+      case GL_RGBA8I_EXT:
+      case GL_RGBA16I_EXT:
+      case GL_RGBA32I_EXT:
+      case GL_RGB10_A2UI:
+         return GL_RGBA;
+      case GL_RGB8UI_EXT:
+      case GL_RGB16UI_EXT:
+      case GL_RGB32UI_EXT:
+      case GL_RGB8I_EXT:
+      case GL_RGB16I_EXT:
+      case GL_RGB32I_EXT:
+         return GL_RGB;
+      }
+   }
+
+   if (ctx->Extensions.EXT_texture_integer) {
+      switch (internalFormat) {
+      case GL_ALPHA8UI_EXT:
+      case GL_ALPHA16UI_EXT:
+      case GL_ALPHA32UI_EXT:
+      case GL_ALPHA8I_EXT:
+      case GL_ALPHA16I_EXT:
+      case GL_ALPHA32I_EXT:
+         return GL_ALPHA;
+      case GL_INTENSITY8UI_EXT:
+      case GL_INTENSITY16UI_EXT:
+      case GL_INTENSITY32UI_EXT:
+      case GL_INTENSITY8I_EXT:
+      case GL_INTENSITY16I_EXT:
+      case GL_INTENSITY32I_EXT:
+         return GL_INTENSITY;
+      case GL_LUMINANCE8UI_EXT:
+      case GL_LUMINANCE16UI_EXT:
+      case GL_LUMINANCE32UI_EXT:
+      case GL_LUMINANCE8I_EXT:
+      case GL_LUMINANCE16I_EXT:
+      case GL_LUMINANCE32I_EXT:
+         return GL_LUMINANCE;
+      case GL_LUMINANCE_ALPHA8UI_EXT:
+      case GL_LUMINANCE_ALPHA16UI_EXT:
+      case GL_LUMINANCE_ALPHA32UI_EXT:
+      case GL_LUMINANCE_ALPHA8I_EXT:
+      case GL_LUMINANCE_ALPHA16I_EXT:
+      case GL_LUMINANCE_ALPHA32I_EXT:
+         return GL_LUMINANCE_ALPHA;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.ARB_texture_rg) {
+      switch (internalFormat) {
+      case GL_R16F:
+      case GL_R32F:
+        if (!ctx->Extensions.ARB_texture_float)
+           break;
+         return GL_RED;
+      case GL_R8I:
+      case GL_R8UI:
+      case GL_R16I:
+      case GL_R16UI:
+      case GL_R32I:
+      case GL_R32UI:
+        if (ctx->Version < 30 && !ctx->Extensions.EXT_texture_integer)
+           break;
+        /* FALLTHROUGH */
+      case GL_R8:
+      case GL_R16:
+      case GL_RED:
+      case GL_COMPRESSED_RED:
+         return GL_RED;
+
+      case GL_RG16F:
+      case GL_RG32F:
+        if (!ctx->Extensions.ARB_texture_float)
+           break;
+         return GL_RG;
+      case GL_RG8I:
+      case GL_RG8UI:
+      case GL_RG16I:
+      case GL_RG16UI:
+      case GL_RG32I:
+      case GL_RG32UI:
+        if (ctx->Version < 30 && !ctx->Extensions.EXT_texture_integer)
+           break;
+        /* FALLTHROUGH */
+      case GL_RG:
+      case GL_RG8:
+      case GL_RG16:
+      case GL_COMPRESSED_RG:
+         return GL_RG;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.EXT_texture_shared_exponent) {
+      switch (internalFormat) {
+      case GL_RGB9_E5_EXT:
+         return GL_RGB;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.EXT_packed_float) {
+      switch (internalFormat) {
+      case GL_R11F_G11F_B10F_EXT:
+         return GL_RGB;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.ARB_depth_buffer_float) {
+      switch (internalFormat) {
+      case GL_DEPTH_COMPONENT32F:
+         return GL_DEPTH_COMPONENT;
+      case GL_DEPTH32F_STENCIL8:
+         return GL_DEPTH_STENCIL;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.ARB_texture_compression_rgtc) {
+      switch (internalFormat) {
+      case GL_COMPRESSED_RED_RGTC1:
+      case GL_COMPRESSED_SIGNED_RED_RGTC1:
+         return GL_RED;
+      case GL_COMPRESSED_RG_RGTC2:
+      case GL_COMPRESSED_SIGNED_RG_RGTC2:
+         return GL_RG;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.EXT_texture_compression_latc) {
+      switch (internalFormat) {
+      case GL_COMPRESSED_LUMINANCE_LATC1_EXT:
+      case GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT:
+         return GL_LUMINANCE;
+      case GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT:
+      case GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT:
+         return GL_LUMINANCE_ALPHA;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.ATI_texture_compression_3dc) {
+      switch (internalFormat) {
+      case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI:
+         return GL_LUMINANCE_ALPHA;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.OES_compressed_ETC1_RGB8_texture) {
+      switch (internalFormat) {
+      case GL_ETC1_RGB8_OES:
+         return GL_RGB;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (_mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility) {
+      switch (internalFormat) {
+      case GL_COMPRESSED_RGB8_ETC2:
+      case GL_COMPRESSED_SRGB8_ETC2:
+         return GL_RGB;
+      case GL_COMPRESSED_RGBA8_ETC2_EAC:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC:
+      case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
+      case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
+         return GL_RGBA;
+      case GL_COMPRESSED_R11_EAC:
+      case GL_COMPRESSED_SIGNED_R11_EAC:
+         return GL_RED;
+      case GL_COMPRESSED_RG11_EAC:
+      case GL_COMPRESSED_SIGNED_RG11_EAC:
+         return GL_RG;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (_mesa_is_desktop_gl(ctx) &&
+       ctx->Extensions.ARB_texture_compression_bptc) {
+      switch (internalFormat) {
+      case GL_COMPRESSED_RGBA_BPTC_UNORM:
+      case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
+         return GL_RGBA;
+      case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
+      case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
+         return GL_RGB;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->API == API_OPENGLES) {
+      switch (internalFormat) {
+      case GL_PALETTE4_RGB8_OES:
+      case GL_PALETTE4_R5_G6_B5_OES:
+      case GL_PALETTE8_RGB8_OES:
+      case GL_PALETTE8_R5_G6_B5_OES:
+        return GL_RGB;
+      case GL_PALETTE4_RGBA8_OES:
+      case GL_PALETTE8_RGB5_A1_OES:
+      case GL_PALETTE4_RGBA4_OES:
+      case GL_PALETTE4_RGB5_A1_OES:
+      case GL_PALETTE8_RGBA8_OES:
+      case GL_PALETTE8_RGBA4_OES:
+        return GL_RGBA;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   return -1; /* error */
+}
+
+/**
+ * Returns the effective internal format from a texture format and type.
+ * This is used by texture image operations internally for validation, when
+ * the specified internal format is a base (unsized) format.
+ *
+ * This method will only return a valid effective internal format if the
+ * combination of format, type and internal format in base form, is acceptable.
+ *
+ * If a single sized internal format is defined in the spec (OpenGL-ES 3.0.4) or
+ * in extensions, to unambiguously correspond to the given base format, then
+ * that internal format is returned as the effective. Otherwise, if the
+ * combination is accepted but a single effective format is not defined, the
+ * passed base format will be returned instead.
+ *
+ * \param format the texture format
+ * \param type the texture type
+ */
+static GLenum
+_mesa_es3_effective_internal_format_for_format_and_type(GLenum format,
+                                                        GLenum type)
+{
+   switch (type) {
+   case GL_UNSIGNED_BYTE:
+      switch (format) {
+      case GL_RGBA:
+         return GL_RGBA8;
+      case GL_RGB:
+         return GL_RGB8;
+      /* Although LUMINANCE_ALPHA, LUMINANCE and ALPHA appear in table 3.12,
+       * (section 3.8 Texturing, page 128 of the OpenGL-ES 3.0.4) as effective
+       * internal formats, they do not correspond to GL constants, so the base
+       * format is returned instead.
+       */
+      case GL_LUMINANCE_ALPHA:
+      case GL_LUMINANCE:
+      case GL_ALPHA:
+         return format;
+      }
+      break;
+
+   case GL_UNSIGNED_SHORT_4_4_4_4:
+      if (format == GL_RGBA)
+         return GL_RGBA4;
+      break;
+
+   case GL_UNSIGNED_SHORT_5_5_5_1:
+      if (format == GL_RGBA)
+         return GL_RGB5_A1;
+      break;
+
+   case GL_UNSIGNED_SHORT_5_6_5:
+      if (format == GL_RGB)
+         return GL_RGB565;
+      break;
+
+   /* OES_packed_depth_stencil */
+   case GL_UNSIGNED_INT_24_8:
+      if (format == GL_DEPTH_STENCIL)
+         return GL_DEPTH24_STENCIL8;
+      break;
+
+   case GL_FLOAT_32_UNSIGNED_INT_24_8_REV:
+      if (format == GL_DEPTH_STENCIL)
+         return GL_DEPTH32F_STENCIL8;
+      break;
+
+   case GL_UNSIGNED_SHORT:
+      if (format == GL_DEPTH_COMPONENT)
+         return GL_DEPTH_COMPONENT16;
+      break;
+
+   case GL_UNSIGNED_INT:
+      /* It can be DEPTH_COMPONENT16 or DEPTH_COMPONENT24, so just return
+       * the format.
+       */
+      if (format == GL_DEPTH_COMPONENT)
+         return format;
+      break;
+
+   /* OES_texture_float and OES_texture_half_float */
+   case GL_FLOAT:
+      if (format == GL_DEPTH_COMPONENT)
+         return GL_DEPTH_COMPONENT32F;
+      /* fall through */
+   case GL_HALF_FLOAT_OES:
+      switch (format) {
+      case GL_RGBA:
+      case GL_RGB:
+      case GL_LUMINANCE_ALPHA:
+      case GL_LUMINANCE:
+      case GL_ALPHA:
+      case GL_RED:
+      case GL_RG:
+         return format;
+      }
+      break;
+   case GL_HALF_FLOAT:
+      switch (format) {
+      case GL_RG:
+      case GL_RED:
+         return format;
+      }
+      break;
+
+   /* GL_EXT_texture_type_2_10_10_10_REV */
+   case GL_UNSIGNED_INT_2_10_10_10_REV:
+      switch (format) {
+      case GL_RGBA:
+      case GL_RGB:
+         return format;
+      }
+      break;
+
+   default:
+      /* fall through and return NONE */
+      break;
+   }
+
+   return GL_NONE;
+}
  
  /**
   * Do error checking of format/type combinations for OpenGL ES 3
@@ -2156,6 +2776,36 @@ _mesa_es3_error_check_format_and_type(const struct gl_context *ctx,
                                        GLenum format, GLenum type,
                                        GLenum internalFormat)
  {
+   /* If internalFormat is an unsized format, then the effective internal
+    * format derived from format and type should be used instead. Page 127,
+    * section "3.8 Texturing" of the GLES 3.0.4 spec states:
+    *
+    *    "if internalformat is a base internal format, the effective
+    *     internal format is a sized internal format that is derived
+    *     from the format and type for internal use by the GL.
+    *     Table 3.12 specifies the mapping of format and type to effective
+    *     internal formats. The effective internal format is used by the GL
+    *     for purposes such as texture completeness or type checks for
+    *     CopyTex* commands. In these cases, the GL is required to operate
+    *     as if the effective internal format was used as the internalformat
+    *     when specifying the texture data."
+    */
+   if (_mesa_is_enum_format_unsized(internalFormat)) {
+      GLenum effectiveInternalFormat =
+         _mesa_es3_effective_internal_format_for_format_and_type(format, type);
+
+      if (effectiveInternalFormat == GL_NONE)
+         return GL_INVALID_OPERATION;
+
+      GLenum baseInternalFormat =
+         _mesa_base_tex_format(ctx, effectiveInternalFormat);
+
+      if (internalFormat != baseInternalFormat)
+         return GL_INVALID_OPERATION;
+
+      internalFormat = effectiveInternalFormat;
+   }
+
     switch (format) {
     case GL_RGBA:
        switch (type) {
@@ -2788,12 +3438,16 @@ _mesa_format_from_format_and_type(GLenum format, GLenum type)
           return MESA_FORMAT_B5G6R5_UNORM;
        else if (format == GL_BGR)
           return MESA_FORMAT_R5G6B5_UNORM;
+      else if (format == GL_RGB_INTEGER)
+         return MESA_FORMAT_B5G6R5_UINT;
        break;
     case GL_UNSIGNED_SHORT_5_6_5_REV:
        if (format == GL_RGB)
           return MESA_FORMAT_R5G6B5_UNORM;
        else if (format == GL_BGR)
           return MESA_FORMAT_B5G6R5_UNORM;
+      else if (format == GL_RGB_INTEGER)
+         return MESA_FORMAT_R5G6B5_UINT;
        break;
     case GL_UNSIGNED_SHORT_4_4_4_4:
        if (format == GL_RGBA)
@@ -2802,6 +3456,10 @@ _mesa_format_from_format_and_type(GLenum format, GLenum type)
           return MESA_FORMAT_A4R4G4B4_UNORM;
        else if (format == GL_ABGR_EXT)
           return MESA_FORMAT_R4G4B4A4_UNORM;
+      else if (format == GL_RGBA_INTEGER)
+         return MESA_FORMAT_A4B4G4R4_UINT;
+      else if (format == GL_BGRA_INTEGER)
+         return MESA_FORMAT_A4R4G4B4_UINT;
        break;
     case GL_UNSIGNED_SHORT_4_4_4_4_REV:
        if (format == GL_RGBA)
@@ -2810,26 +3468,42 @@ _mesa_format_from_format_and_type(GLenum format, GLenum type)
           return MESA_FORMAT_B4G4R4A4_UNORM;
        else if (format == GL_ABGR_EXT)
           return MESA_FORMAT_A4B4G4R4_UNORM;
+      else if (format == GL_RGBA_INTEGER)
+         return MESA_FORMAT_R4G4B4A4_UINT;
+      else if (format == GL_BGRA_INTEGER)
+         return MESA_FORMAT_B4G4R4A4_UINT;
        break;
     case GL_UNSIGNED_SHORT_5_5_5_1:
        if (format == GL_RGBA)
           return MESA_FORMAT_A1B5G5R5_UNORM;
        else if (format == GL_BGRA)
           return MESA_FORMAT_A1R5G5B5_UNORM;
+      else if (format == GL_RGBA_INTEGER)
+         return MESA_FORMAT_A1B5G5R5_UINT;
+      else if (format == GL_BGRA_INTEGER)
+         return MESA_FORMAT_A1R5G5B5_UINT;
        break;
     case GL_UNSIGNED_SHORT_1_5_5_5_REV:
        if (format == GL_RGBA)
           return MESA_FORMAT_R5G5B5A1_UNORM;
        else if (format == GL_BGRA)
           return MESA_FORMAT_B5G5R5A1_UNORM;
+      else if (format == GL_RGBA_INTEGER)
+         return MESA_FORMAT_R5G5B5A1_UINT;
+      else if (format == GL_BGRA_INTEGER)
+         return MESA_FORMAT_B5G5R5A1_UINT;
        break;
     case GL_UNSIGNED_BYTE_3_3_2:
        if (format == GL_RGB)
           return MESA_FORMAT_B2G3R3_UNORM;
+      else if (format == GL_RGB_INTEGER)
+         return MESA_FORMAT_B2G3R3_UINT;
        break;
     case GL_UNSIGNED_BYTE_2_3_3_REV:
        if (format == GL_RGB)
           return MESA_FORMAT_R3G3B2_UNORM;
+      else if (format == GL_RGB_INTEGER)
+         return MESA_FORMAT_R3G3B2_UINT;
        break;
     case GL_UNSIGNED_INT_5_9_9_9_REV:
        if (format == GL_RGB)
@@ -2864,6 +3538,10 @@ _mesa_format_from_format_and_type(GLenum format, GLenum type)
           return MESA_FORMAT_A8R8G8B8_UNORM;
        else if (format == GL_ABGR_EXT)
           return MESA_FORMAT_R8G8B8A8_UNORM;
+      else if (format == GL_RGBA_INTEGER)
+         return MESA_FORMAT_A8B8G8R8_UINT;
+      else if (format == GL_BGRA_INTEGER)
+         return MESA_FORMAT_A8R8G8B8_UINT;
        break;
     case GL_UNSIGNED_INT_8_8_8_8_REV:
        if (format == GL_RGBA)
@@ -2872,6 +3550,10 @@ _mesa_format_from_format_and_type(GLenum format, GLenum type)
           return MESA_FORMAT_B8G8R8A8_UNORM;
        else if (format == GL_ABGR_EXT)
           return MESA_FORMAT_A8B8G8R8_UNORM;
+      else if (format == GL_RGBA_INTEGER)
+         return MESA_FORMAT_R8G8B8A8_UINT;
+      else if (format == GL_BGRA_INTEGER)
+         return MESA_FORMAT_B8G8R8A8_UINT;
        break;
     case GL_UNSIGNED_SHORT_8_8_MESA:
        if (format == GL_YCBCR_MESA)
diff --git a/src/mesa/main/glformats.h b/src/mesa/main/glformats.h

index aec905d..92f4bc6 100644 (file)
--- a/src/mesa/main/glformats.h
+++ b/src/mesa/main/glformats.h
@@ -134,6 +134,8 @@ extern GLenum
  _mesa_es3_error_check_format_and_type(const struct gl_context *ctx,
                                        GLenum format, GLenum type,
                                        GLenum internalFormat);
+extern GLint
+_mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat );
  
  extern uint32_t
  _mesa_format_from_format_and_type(GLenum format, GLenum type);
diff --git a/src/mesa/main/image.c b/src/mesa/main/image.c

index 711a190..e79e3e6 100644 (file)
--- a/src/mesa/main/image.c
+++ b/src/mesa/main/image.c
@@ -49,8 +49,8 @@
   * \param src the array with the source data we want to byte-swap.
   * \param n number of words.
   */
-void
-_mesa_swap2_copy( GLushort *dst, GLushort *src, GLuint n )
+static void
+swap2_copy( GLushort *dst, GLushort *src, GLuint n )
  {
     GLuint i;
     for (i = 0; i < n; i++) {
@@ -58,7 +58,11 @@ _mesa_swap2_copy( GLushort *dst, GLushort *src, GLuint n )
     }
  }
  
-
+void
+_mesa_swap2(GLushort *p, GLuint n)
+{
+   swap2_copy(p, p, n);
+}
  
  /*
   * Flip the order of the 4 bytes in each word in the given array (src) and
@@ -69,8 +73,8 @@ _mesa_swap2_copy( GLushort *dst, GLushort *src, GLuint n )
   * \param src the array with the source data we want to byte-swap.
   * \param n number of words.
   */
-void
-_mesa_swap4_copy( GLuint *dst, GLuint *src, GLuint n )
+static void
+swap4_copy( GLuint *dst, GLuint *src, GLuint n )
  {
     GLuint i, a, b;
     for (i = 0; i < n; i++) {
@@ -83,6 +87,11 @@ _mesa_swap4_copy( GLuint *dst, GLuint *src, GLuint n )
     }
  }
  
+void
+_mesa_swap4(GLuint *p, GLuint n)
+{
+   swap4_copy(p, p, n);
+}
  
  /**
   * Return the byte offset of a specific pixel in an image (1D, 2D or 3D).
@@ -958,3 +967,42 @@ _mesa_clip_blit(struct gl_context *ctx,
  
     return GL_TRUE;
  }
+
+/**
+ * Swap the bytes in a 2D image.
+ *
+ * using the packing information this swaps the bytes
+ * according to the format and type of data being input.
+ * It takes into a/c various packing parameters like
+ * Alignment and RowLength.
+ */
+void
+_mesa_swap_bytes_2d_image(GLenum format, GLenum type,
+                          const struct gl_pixelstore_attrib *packing,
+                          GLsizei width, GLsizei height,
+                          GLvoid *dst, const GLvoid *src)
+{
+   GLint swapSize = _mesa_sizeof_packed_type(type);
+
+   assert(packing->SwapBytes);
+
+   if (swapSize == 2 || swapSize == 4) {
+      int swapsPerPixel = _mesa_bytes_per_pixel(format, type) / swapSize;
+      int stride = _mesa_image_row_stride(packing, width, format, type);
+      int row;
+      uint8_t *dstrow;
+      const uint8_t *srcrow;
+      assert(swapsPerPixel > 0);
+      assert(_mesa_bytes_per_pixel(format, type) % swapSize == 0);
+      dstrow = dst;
+      srcrow = src;
+      for (row = 0; row < height; row++) {
+         if (swapSize == 2)
+            swap2_copy((GLushort *)dstrow, (GLushort *)srcrow, width * swapsPerPixel);
+         else if (swapSize == 4)
+            swap4_copy((GLuint *)dstrow, (GLuint *)srcrow, width * swapsPerPixel);
+         dstrow += stride;
+         srcrow += stride;
+      }
+   }
+}
diff --git a/src/mesa/main/image.h b/src/mesa/main/image.h

index 501586b..b5075be 100644 (file)
--- a/src/mesa/main/image.h
+++ b/src/mesa/main/image.h
@@ -35,22 +35,11 @@ struct gl_pixelstore_attrib;
  struct gl_framebuffer;
  
  extern void
-_mesa_swap2_copy(GLushort *dst, GLushort *src, GLuint n);
+_mesa_swap2(GLushort *p, GLuint n);
  
  extern void
-_mesa_swap4_copy(GLuint *dst, GLuint *src, GLuint n);
+_mesa_swap4(GLuint *p, GLuint n);
  
-static inline void
-_mesa_swap2(GLushort *p, GLuint n)
-{
-   _mesa_swap2_copy(p, p, n);
-}
-
-static inline void
-_mesa_swap4(GLuint *p, GLuint n)
-{
-   _mesa_swap4_copy(p, p, n);
-}
  
  extern GLintptr
  _mesa_image_offset( GLuint dimensions,
@@ -146,5 +135,10 @@ _mesa_clip_blit(struct gl_context *ctx,
                  GLint *srcX0, GLint *srcY0, GLint *srcX1, GLint *srcY1,
                  GLint *dstX0, GLint *dstY0, GLint *dstX1, GLint *dstY1);
  
+void
+_mesa_swap_bytes_2d_image(GLenum format, GLenum type,
+                          const struct gl_pixelstore_attrib *packing,
+                          GLsizei width, GLsizei height,
+                          GLvoid *dst, const GLvoid *src);
  
  #endif
diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c

index 2bf5902..ab16c28 100644 (file)
--- a/src/mesa/main/mipmap.c
+++ b/src/mesa/main/mipmap.c
@@ -1922,11 +1922,8 @@ generate_mipmap_uncompressed(struct gl_context *ctx, GLenum target,
        }
  
        /* get dest gl_texture_image */
-      dstImage = _mesa_get_tex_image(ctx, texObj, target, level + 1);
-      if (!dstImage) {
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "generating mipmaps");
-         return;
-      }
+      dstImage = _mesa_select_tex_image(texObj, target, level + 1);
+      assert(dstImage);
  
        if (target == GL_TEXTURE_1D_ARRAY) {
          srcDepth = srcHeight;
@@ -2110,7 +2107,19 @@ generate_mipmap_compressed(struct gl_context *ctx, GLenum target,
                                           srcWidth, srcHeight, srcDepth,
                                           &dstWidth, &dstHeight, &dstDepth);
        if (!nextLevel)
-        break;
+        goto end;
+
+      if (!_mesa_prepare_mipmap_level(ctx, texObj, level + 1,
+                                      dstWidth, dstHeight, dstDepth,
+                                      border, srcImage->InternalFormat,
+                                      srcImage->TexFormat)) {
+         /* all done */
+         goto end;
+      }
+
+      /* get dest gl_texture_image */
+      dstImage = _mesa_select_tex_image(texObj, target, level + 1);
+      assert(dstImage);
  
        /* Compute dst image strides and alloc memory on first iteration */
        temp_dst_row_stride = _mesa_format_row_stride(temp_format, dstWidth);
@@ -2124,13 +2133,6 @@ generate_mipmap_compressed(struct gl_context *ctx, GLenum target,
          }
        }
  
-      /* get dest gl_texture_image */
-      dstImage = _mesa_get_tex_image(ctx, texObj, target, level + 1);
-      if (!dstImage) {
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "generating mipmaps");
-         goto end;
-      }
-
        /* for 2D arrays, setup array[depth] of slice pointers */
        for (i = 0; i < srcDepth; i++) {
           temp_src_slices[i] = temp_src + temp_src_img_stride * i;
@@ -2149,14 +2151,6 @@ generate_mipmap_compressed(struct gl_context *ctx, GLenum target,
                                    dstWidth, dstHeight, dstDepth,
                                    temp_dst_slices, temp_dst_row_stride);
  
-      if (!_mesa_prepare_mipmap_level(ctx, texObj, level + 1,
-                                      dstWidth, dstHeight, dstDepth,
-                                      border, srcImage->InternalFormat,
-                                      srcImage->TexFormat)) {
-         /* all done */
-         goto end;
-      }
-
        /* The image space was allocated above so use glTexSubImage now */
        ctx->Driver.TexSubImage(ctx, 2, dstImage,
                                0, 0, 0, dstWidth, dstHeight, dstDepth,
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h

index 4e88494..cbfb155 100644 (file)
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -944,6 +944,7 @@ typedef enum
   */
  struct gl_sampler_object
  {
+   mtx_t Mutex;
     GLuint Name;
     GLint RefCount;
     GLchar *Label;               /**< GL_KHR_debug */
@@ -1887,6 +1888,7 @@ enum gl_frag_depth_layout
   */
  struct gl_program
  {
+   mtx_t Mutex;
     GLuint Id;
     GLint RefCount;
     GLubyte *String;  /**< Null-terminated program text */
@@ -2292,6 +2294,7 @@ struct gl_shader
     struct gl_uniform_block *UniformBlocks;
  
     struct exec_list *ir;
+   struct exec_list *packed_varyings;
     struct glsl_symbol_table *symbols;
  
     bool uses_builtin_functions;
@@ -2453,7 +2456,8 @@ enum gl_uniform_block_packing
  {
     ubo_packing_std140,
     ubo_packing_shared,
-   ubo_packing_packed
+   ubo_packing_packed,
+   ubo_packing_std430
  };
  
  
@@ -2690,7 +2694,7 @@ struct gl_shader_program
      */
     unsigned LastClipDistanceArraySize;
  
-   unsigned NumUniformBlocks;
+   unsigned NumBufferInterfaceBlocks;
     struct gl_uniform_block *UniformBlocks;
  
     /**
@@ -3663,6 +3667,7 @@ struct gl_extensions
     GLboolean ARB_shader_stencil_export;
     GLboolean ARB_shader_storage_buffer_object;
     GLboolean ARB_shader_subroutine;
+   GLboolean ARB_shader_texture_image_samples;
     GLboolean ARB_shader_texture_lod;
     GLboolean ARB_shading_language_packing;
     GLboolean ARB_shading_language_420pack;
@@ -4292,6 +4297,7 @@ struct gl_context
     struct gl_perf_monitor_state PerfMonitor;
  
     struct gl_buffer_object *DrawIndirectBuffer; /** < GL_ARB_draw_indirect */
+   struct gl_buffer_object *DispatchIndirectBuffer; /** < GL_ARB_compute_shader */
  
     struct gl_buffer_object *CopyReadBuffer; /**< GL_ARB_copy_buffer */
     struct gl_buffer_object *CopyWriteBuffer; /**< GL_ARB_copy_buffer */
diff --git a/src/mesa/main/objectpurge.c b/src/mesa/main/objectpurge.c

new file mode 100644 (file)

index 0000000..d730f46
--- /dev/null
+++ b/src/mesa/main/objectpurge.c
@@ -0,0 +1,416 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
+ * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * Code related to the GL_APPLE_object_purgeable extension.
+ */
+
+
+#include "glheader.h"
+#include "enums.h"
+#include "hash.h"
+#include "imports.h"
+#include "context.h"
+#include "bufferobj.h"
+#include "fbobject.h"
+#include "mtypes.h"
+#include "objectpurge.h"
+#include "texobj.h"
+#include "teximage.h"
+
+
+static GLenum
+buffer_object_purgeable(struct gl_context *ctx, GLuint name, GLenum option)
+{
+   struct gl_buffer_object *bufObj;
+   GLenum retval;
+
+   bufObj = _mesa_lookup_bufferobj(ctx, name);
+   if (!bufObj) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glObjectPurgeable(name = 0x%x)", name);
+      return 0;
+   }
+   if (!_mesa_is_bufferobj(bufObj)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glObjectPurgeable(buffer 0)" );
+      return 0;
+   }
+
+   if (bufObj->Purgeable) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glObjectPurgeable(name = 0x%x) is already purgeable", name);
+      return GL_VOLATILE_APPLE;
+   }
+
+   bufObj->Purgeable = GL_TRUE;
+
+   retval = GL_VOLATILE_APPLE;
+   if (ctx->Driver.BufferObjectPurgeable)
+      retval = ctx->Driver.BufferObjectPurgeable(ctx, bufObj, option);
+
+   return retval;
+}
+
+
+static GLenum
+renderbuffer_purgeable(struct gl_context *ctx, GLuint name, GLenum option)
+{
+   struct gl_renderbuffer *bufObj;
+   GLenum retval;
+
+   bufObj = _mesa_lookup_renderbuffer(ctx, name);
+   if (!bufObj) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glObjectUnpurgeable(name = 0x%x)", name);
+      return 0;
+   }
+
+   if (bufObj->Purgeable) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glObjectPurgeable(name = 0x%x) is already purgeable", name);
+      return GL_VOLATILE_APPLE;
+   }
+
+   bufObj->Purgeable = GL_TRUE;
+
+   retval = GL_VOLATILE_APPLE;
+   if (ctx->Driver.RenderObjectPurgeable)
+      retval = ctx->Driver.RenderObjectPurgeable(ctx, bufObj, option);
+
+   return retval;
+}
+
+
+static GLenum
+texture_object_purgeable(struct gl_context *ctx, GLuint name, GLenum option)
+{
+   struct gl_texture_object *bufObj;
+   GLenum retval;
+
+   bufObj = _mesa_lookup_texture(ctx, name);
+   if (!bufObj) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glObjectPurgeable(name = 0x%x)", name);
+      return 0;
+   }
+
+   if (bufObj->Purgeable) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glObjectPurgeable(name = 0x%x) is already purgeable", name);
+      return GL_VOLATILE_APPLE;
+   }
+
+   bufObj->Purgeable = GL_TRUE;
+
+   retval = GL_VOLATILE_APPLE;
+   if (ctx->Driver.TextureObjectPurgeable)
+      retval = ctx->Driver.TextureObjectPurgeable(ctx, bufObj, option);
+
+   return retval;
+}
+
+
+GLenum GLAPIENTRY
+_mesa_ObjectPurgeableAPPLE(GLenum objectType, GLuint name, GLenum option)
+{
+   GLenum retval;
+
+   GET_CURRENT_CONTEXT(ctx);
+   ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, 0);
+
+   if (name == 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glObjectPurgeable(name = 0x%x)", name);
+      return 0;
+   }
+
+   switch (option) {
+   case GL_VOLATILE_APPLE:
+   case GL_RELEASED_APPLE:
+      /* legal */
+      break;
+   default:
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glObjectPurgeable(name = 0x%x) invalid option: %d",
+                  name, option);
+      return 0;
+   }
+
+   switch (objectType) {
+   case GL_TEXTURE:
+      retval = texture_object_purgeable(ctx, name, option);
+      break;
+   case GL_RENDERBUFFER_EXT:
+      retval = renderbuffer_purgeable(ctx, name, option);
+      break;
+   case GL_BUFFER_OBJECT_APPLE:
+      retval = buffer_object_purgeable(ctx, name, option);
+      break;
+   default:
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glObjectPurgeable(name = 0x%x) invalid type: %d",
+                  name, objectType);
+      return 0;
+   }
+
+   /* In strict conformance to the spec, we must only return VOLATILE when
+    * when passed the VOLATILE option. Madness.
+    *
+    * XXX First fix the spec, then fix me.
+    */
+   return option == GL_VOLATILE_APPLE ? GL_VOLATILE_APPLE : retval;
+}
+
+
+static GLenum
+buffer_object_unpurgeable(struct gl_context *ctx, GLuint name, GLenum option)
+{
+   struct gl_buffer_object *bufObj;
+   GLenum retval;
+
+   bufObj = _mesa_lookup_bufferobj(ctx, name);
+   if (!bufObj) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glObjectUnpurgeable(name = 0x%x)", name);
+      return 0;
+   }
+
+   if (! bufObj->Purgeable) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glObjectUnpurgeable(name = 0x%x) object is "
+                  " already \"unpurged\"", name);
+      return 0;
+   }
+
+   bufObj->Purgeable = GL_FALSE;
+
+   retval = option;
+   if (ctx->Driver.BufferObjectUnpurgeable)
+      retval = ctx->Driver.BufferObjectUnpurgeable(ctx, bufObj, option);
+
+   return retval;
+}
+
+
+static GLenum
+renderbuffer_unpurgeable(struct gl_context *ctx, GLuint name, GLenum option)
+{
+   struct gl_renderbuffer *bufObj;
+   GLenum retval;
+
+   bufObj = _mesa_lookup_renderbuffer(ctx, name);
+   if (!bufObj) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glObjectUnpurgeable(name = 0x%x)", name);
+      return 0;
+   }
+
+   if (! bufObj->Purgeable) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glObjectUnpurgeable(name = 0x%x) object is "
+                  " already \"unpurged\"", name);
+      return 0;
+   }
+
+   bufObj->Purgeable = GL_FALSE;
+
+   retval = option;
+   if (ctx->Driver.RenderObjectUnpurgeable)
+      retval = ctx->Driver.RenderObjectUnpurgeable(ctx, bufObj, option);
+
+   return retval;
+}
+
+
+static GLenum
+texture_object_unpurgeable(struct gl_context *ctx, GLuint name, GLenum option)
+{
+   struct gl_texture_object *bufObj;
+   GLenum retval;
+
+   bufObj = _mesa_lookup_texture(ctx, name);
+   if (!bufObj) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glObjectUnpurgeable(name = 0x%x)", name);
+      return 0;
+   }
+
+   if (! bufObj->Purgeable) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glObjectUnpurgeable(name = 0x%x) object is"
+                  " already \"unpurged\"", name);
+      return 0;
+   }
+
+   bufObj->Purgeable = GL_FALSE;
+
+   retval = option;
+   if (ctx->Driver.TextureObjectUnpurgeable)
+      retval = ctx->Driver.TextureObjectUnpurgeable(ctx, bufObj, option);
+
+   return retval;
+}
+
+
+GLenum GLAPIENTRY
+_mesa_ObjectUnpurgeableAPPLE(GLenum objectType, GLuint name, GLenum option)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, 0);
+
+   if (name == 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glObjectUnpurgeable(name = 0x%x)", name);
+      return 0;
+   }
+
+   switch (option) {
+   case GL_RETAINED_APPLE:
+   case GL_UNDEFINED_APPLE:
+      /* legal */
+      break;
+   default:
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glObjectUnpurgeable(name = 0x%x) invalid option: %d",
+                  name, option);
+      return 0;
+   }
+
+   switch (objectType) {
+   case GL_BUFFER_OBJECT_APPLE:
+      return buffer_object_unpurgeable(ctx, name, option);
+   case GL_TEXTURE:
+      return texture_object_unpurgeable(ctx, name, option);
+   case GL_RENDERBUFFER_EXT:
+      return renderbuffer_unpurgeable(ctx, name, option);
+   default:
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glObjectUnpurgeable(name = 0x%x) invalid type: %d",
+                  name, objectType);
+      return 0;
+   }
+}
+
+
+static void
+get_buffer_object_parameteriv(struct gl_context *ctx, GLuint name,
+                              GLenum pname, GLint *params)
+{
+   struct gl_buffer_object *bufObj = _mesa_lookup_bufferobj(ctx, name);
+   if (!bufObj) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glGetObjectParameteriv(name = 0x%x) invalid object", name);
+      return;
+   }
+
+   switch (pname) {
+   case GL_PURGEABLE_APPLE:
+      *params = bufObj->Purgeable;
+      break;
+   default:
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glGetObjectParameteriv(name = 0x%x) invalid enum: %d",
+                  name, pname);
+      break;
+   }
+}
+
+
+static void
+get_renderbuffer_parameteriv(struct gl_context *ctx, GLuint name,
+                             GLenum pname, GLint *params)
+{
+   struct gl_renderbuffer *rb = _mesa_lookup_renderbuffer(ctx, name);
+   if (!rb) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glObjectUnpurgeable(name = 0x%x)", name);
+      return;
+   }
+
+   switch (pname) {
+   case GL_PURGEABLE_APPLE:
+      *params = rb->Purgeable;
+      break;
+   default:
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glGetObjectParameteriv(name = 0x%x) invalid enum: %d",
+                  name, pname);
+      break;
+   }
+}
+
+
+static void
+get_texture_object_parameteriv(struct gl_context *ctx, GLuint name,
+                               GLenum pname, GLint *params)
+{
+   struct gl_texture_object *texObj = _mesa_lookup_texture(ctx, name);
+   if (!texObj) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glObjectUnpurgeable(name = 0x%x)", name);
+      return;
+   }
+
+   switch (pname) {
+   case GL_PURGEABLE_APPLE:
+      *params = texObj->Purgeable;
+      break;
+   default:
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glGetObjectParameteriv(name = 0x%x) invalid enum: %d",
+                  name, pname);
+      break;
+   }
+}
+
+
+void GLAPIENTRY
+_mesa_GetObjectParameterivAPPLE(GLenum objectType, GLuint name, GLenum pname,
+                                GLint *params)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   if (name == 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glGetObjectParameteriv(name = 0x%x)", name);
+      return;
+   }
+
+   switch (objectType) {
+   case GL_TEXTURE:
+      get_texture_object_parameteriv(ctx, name, pname, params);
+      break;
+   case GL_BUFFER_OBJECT_APPLE:
+      get_buffer_object_parameteriv(ctx, name, pname, params);
+      break;
+   case GL_RENDERBUFFER_EXT:
+      get_renderbuffer_parameteriv(ctx, name, pname, params);
+      break;
+   default:
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glGetObjectParameteriv(name = 0x%x) invalid type: %d",
+                  name, objectType);
+   }
+}
diff --git a/src/mesa/main/objectpurge.h b/src/mesa/main/objectpurge.h

new file mode 100644 (file)

index 0000000..f049012
--- /dev/null
+++ b/src/mesa/main/objectpurge.h
@@ -0,0 +1,42 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
+ * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef OBJECTPURGE_H
+#define OBJECTPURGE_H
+
+
+GLenum GLAPIENTRY
+_mesa_ObjectPurgeableAPPLE(GLenum objectType, GLuint name, GLenum option);
+
+GLenum GLAPIENTRY
+_mesa_ObjectUnpurgeableAPPLE(GLenum objectType, GLuint name, GLenum option);
+
+void GLAPIENTRY
+_mesa_GetObjectParameterivAPPLE(GLenum objectType, GLuint name,
+                                GLenum pname, GLint* params);
+
+
+#endif /* OBJECTPURGE_H */
diff --git a/src/mesa/main/pbo.c b/src/mesa/main/pbo.c

index 0c16025..7762324 100644 (file)
--- a/src/mesa/main/pbo.c
+++ b/src/mesa/main/pbo.c
@@ -103,6 +103,12 @@ _mesa_validate_pbo_access(GLuint dimensions,
        /* no buffer! */
        return GL_FALSE;
  
+   /* If the size of the image is zero then no pixels are accessed so we
+    * don't need to check anything else.
+    */
+   if (width == 0 || height == 0 || depth == 0)
+      return GL_TRUE;
+
     /* get the offset to the first pixel we'll read/write */
     start = _mesa_image_offset(dimensions, pack, width, height,
                                format, type, 0, 0, 0);
diff --git a/src/mesa/main/pipelineobj.c b/src/mesa/main/pipelineobj.c

index 07acbf1..c2e1d29 100644 (file)
--- a/src/mesa/main/pipelineobj.c
+++ b/src/mesa/main/pipelineobj.c
@@ -614,7 +614,8 @@ _mesa_GetProgramPipelineiv(GLuint pipeline, GLenum pname, GLint *params)
        *params = pipe->InfoLog ? strlen(pipe->InfoLog) + 1 : 0;
        return;
     case GL_VALIDATE_STATUS:
-      *params = pipe->Validated;
+      /* If pipeline is not bound, return initial value 0. */
+      *params = (ctx->_Shader->Name != pipe->Name) ? 0 : pipe->Validated;
        return;
     case GL_VERTEX_SHADER:
        *params = pipe->CurrentProgram[MESA_SHADER_VERTEX]
diff --git a/src/mesa/main/program_resource.c b/src/mesa/main/program_resource.c

index 23d2b4d..eb71fdd 100644 (file)
--- a/src/mesa/main/program_resource.c
+++ b/src/mesa/main/program_resource.c
@@ -41,6 +41,8 @@ supported_interface_enum(struct gl_context *ctx, GLenum iface)
     case GL_PROGRAM_OUTPUT:
     case GL_TRANSFORM_FEEDBACK_VARYING:
     case GL_ATOMIC_COUNTER_BUFFER:
+   case GL_BUFFER_VARIABLE:
+   case GL_SHADER_STORAGE_BLOCK:
        return true;
     case GL_VERTEX_SUBROUTINE:
     case GL_FRAGMENT_SUBROUTINE:
@@ -58,8 +60,6 @@ supported_interface_enum(struct gl_context *ctx, GLenum iface)
     case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
     case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
        return _mesa_has_tessellation(ctx) && _mesa_has_shader_subroutine(ctx);
-   case GL_BUFFER_VARIABLE:
-   case GL_SHADER_STORAGE_BLOCK:
     default:
        return false;
     }
@@ -111,16 +111,15 @@ _mesa_GetProgramInterfaceiv(GLuint program, GLenum programInterface,
        for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++) {
           if (shProg->ProgramResourceList[i].Type != programInterface)
              continue;
-         const char *name =
-            _mesa_program_resource_name(&shProg->ProgramResourceList[i]);
-         unsigned array_size =
-            _mesa_program_resource_array_size(&shProg->ProgramResourceList[i]);
-         *params = MAX2(*params, strlen(name) + (array_size ? 3 : 0) + 1);
+         unsigned len =
+            _mesa_program_resource_name_len(&shProg->ProgramResourceList[i]);
+         *params = MAX2(*params, len + 1);
        }
        break;
     case GL_MAX_NUM_ACTIVE_VARIABLES:
        switch (programInterface) {
        case GL_UNIFORM_BLOCK:
+      case GL_SHADER_STORAGE_BLOCK:
           for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++) {
              if (shProg->ProgramResourceList[i].Type == programInterface) {
                 struct gl_uniform_block *block =
@@ -247,8 +246,10 @@ _mesa_GetProgramResourceIndex(GLuint program, GLenum programInterface,
     case GL_PROGRAM_INPUT:
     case GL_PROGRAM_OUTPUT:
     case GL_UNIFORM:
+   case GL_BUFFER_VARIABLE:
     case GL_TRANSFORM_FEEDBACK_VARYING:
     case GL_UNIFORM_BLOCK:
+   case GL_SHADER_STORAGE_BLOCK:
        res = _mesa_program_resource_find_name(shProg, programInterface, name,
                                               &array_index);
        if (!res || array_index > 0)
diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c

index 1277944..81bb912 100644 (file)
--- a/src/mesa/main/readpix.c
+++ b/src/mesa/main/readpix.c
@@ -523,7 +523,8 @@ read_rgba_pixels( struct gl_context *ctx,
         * convert to, then we can convert directly into the dst buffer and avoid
         * the final conversion/copy from the rgba buffer to the dst buffer.
         */
-      if (dst_format == rgba_format) {
+      if (dst_format == rgba_format &&
+          dst_stride == rgba_stride) {
           need_convert = false;
           rgba = dst;
        } else {
@@ -613,15 +614,8 @@ read_rgba_pixels( struct gl_context *ctx,
  done_swap:
     /* Handle byte swapping if required */
     if (packing->SwapBytes) {
-      GLint swapSize = _mesa_sizeof_packed_type(type);
-      if (swapSize == 2 || swapSize == 4) {
-         int swapsPerPixel = _mesa_bytes_per_pixel(format, type) / swapSize;
-         assert(_mesa_bytes_per_pixel(format, type) % swapSize == 0);
-         if (swapSize == 2)
-            _mesa_swap2((GLushort *) dst, width * height * swapsPerPixel);
-         else if (swapSize == 4)
-            _mesa_swap4((GLuint *) dst, width * height * swapsPerPixel);
-      }
+      _mesa_swap_bytes_2d_image(format, type, packing,
+                                width, height, dst, dst);
     }
  
  done_unmap:
@@ -969,6 +963,7 @@ read_pixels_es3_error_check(GLenum format, GLenum type,
              return GL_NO_ERROR;
           break;
        case GL_UNSIGNED_SHORT:
+      case GL_UNSIGNED_INT:
        case GL_UNSIGNED_INT_24_8:
           if (!is_float_depth)
              return GL_NO_ERROR;
diff --git a/src/mesa/main/renderbuffer.c b/src/mesa/main/renderbuffer.c

index e9d129a..b0d4c8c 100644 (file)
--- a/src/mesa/main/renderbuffer.c
+++ b/src/mesa/main/renderbuffer.c
@@ -174,7 +174,6 @@ _mesa_reference_renderbuffer_(struct gl_renderbuffer **ptr,
        mtx_lock(&oldRb->Mutex);
        assert(oldRb->RefCount > 0);
        oldRb->RefCount--;
-      /*printf("RB DECR %p (%d) to %d\n", (void*) oldRb, oldRb->Name, oldRb->RefCount);*/
        deleteFlag = (oldRb->RefCount == 0);
        mtx_unlock(&oldRb->Mutex);
  
@@ -191,7 +190,6 @@ _mesa_reference_renderbuffer_(struct gl_renderbuffer **ptr,
        /* reference new renderbuffer */
        mtx_lock(&rb->Mutex);
        rb->RefCount++;
-      /*printf("RB INCR %p (%d) to %d\n", (void*) rb, rb->Name, rb->RefCount);*/
        mtx_unlock(&rb->Mutex);
        *ptr = rb;
     }
diff --git a/src/mesa/main/samplerobj.c b/src/mesa/main/samplerobj.c

index 32180fb..c7b9666 100644 (file)
--- a/src/mesa/main/samplerobj.c
+++ b/src/mesa/main/samplerobj.c
@@ -88,15 +88,11 @@ _mesa_reference_sampler_object_(struct gl_context *ctx,
        GLboolean deleteFlag = GL_FALSE;
        struct gl_sampler_object *oldSamp = *ptr;
  
-      /*mtx_lock(&oldSamp->Mutex);*/
+      mtx_lock(&oldSamp->Mutex);
        assert(oldSamp->RefCount > 0);
        oldSamp->RefCount--;
-#if 0
-      printf("SamplerObj %p %d DECR to %d\n",
-             (void *) oldSamp, oldSamp->Name, oldSamp->RefCount);
-#endif
        deleteFlag = (oldSamp->RefCount == 0);
-      /*mtx_unlock(&oldSamp->Mutex);*/
+      mtx_unlock(&oldSamp->Mutex);
  
        if (deleteFlag) {
          assert(ctx->Driver.DeleteSamplerObject);
@@ -109,7 +105,7 @@ _mesa_reference_sampler_object_(struct gl_context *ctx,
  
     if (samp) {
        /* reference new sampler */
-      /*mtx_lock(&samp->Mutex);*/
+      mtx_lock(&samp->Mutex);
        if (samp->RefCount == 0) {
           /* this sampler's being deleted (look just above) */
           /* Not sure this can every really happen.  Warn if it does. */
@@ -118,13 +114,9 @@ _mesa_reference_sampler_object_(struct gl_context *ctx,
        }
        else {
           samp->RefCount++;
-#if 0
-         printf("SamplerObj %p %d INCR to %d\n",
-                (void *) samp, samp->Name, samp->RefCount);
-#endif
           *ptr = samp;
        }
-      /*mtx_unlock(&samp->Mutex);*/
+      mtx_unlock(&samp->Mutex);
     }
  }
  
@@ -135,6 +127,7 @@ _mesa_reference_sampler_object_(struct gl_context *ctx,
  static void
  _mesa_init_sampler_object(struct gl_sampler_object *sampObj, GLuint name)
  {
+   mtx_init(&sampObj->Mutex, mtx_plain);
     sampObj->Name = name;
     sampObj->RefCount = 1;
     sampObj->WrapS = GL_REPEAT;
@@ -177,6 +170,7 @@ static void
  _mesa_delete_sampler_object(struct gl_context *ctx,
                              struct gl_sampler_object *sampObj)
  {
+   mtx_destroy(&sampObj->Mutex);
     free(sampObj->Label);
     free(sampObj);
  }
diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp

index ee73202..73dee85 100644 (file)
--- a/src/mesa/main/shader_query.cpp
+++ b/src/mesa/main/shader_query.cpp
@@ -30,17 +30,16 @@
  
  #include "main/context.h"
  #include "main/core.h"
-#include "glsl_symbol_table.h"
-#include "ir.h"
-#include "shaderobj.h"
-#include "program/hash_table.h"
-#include "../glsl/program.h"
-#include "uniforms.h"
  #include "main/enums.h"
+#include "main/shaderapi.h"
+#include "main/shaderobj.h"
+#include "main/uniforms.h"
+#include "glsl/glsl_symbol_table.h"
+#include "glsl/ir.h"
+#include "glsl/program.h"
+#include "program/hash_table.h"
+#include "util/strndup.h"
  
-extern "C" {
-#include "shaderapi.h"
-}
  
  static GLint
  program_resource_location(struct gl_shader_program *shProg,
@@ -431,6 +430,7 @@ _mesa_program_resource_name(struct gl_program_resource *res)
     const ir_variable *var;
     switch (res->Type) {
     case GL_UNIFORM_BLOCK:
+   case GL_SHADER_STORAGE_BLOCK:
        return RESOURCE_UBO(res)->Name;
     case GL_TRANSFORM_FEEDBACK_VARYING:
        return RESOURCE_XFB(res)->Name;
@@ -445,6 +445,7 @@ _mesa_program_resource_name(struct gl_program_resource *res)
     case GL_PROGRAM_OUTPUT:
        return RESOURCE_VAR(res)->name;
     case GL_UNIFORM:
+   case GL_BUFFER_VARIABLE:
        return RESOURCE_UNI(res)->name;
     case GL_VERTEX_SUBROUTINE_UNIFORM:
     case GL_GEOMETRY_SUBROUTINE_UNIFORM:
@@ -476,7 +477,7 @@ _mesa_program_resource_array_size(struct gl_program_resource *res)
               RESOURCE_XFB(res)->Size : 0;
     case GL_PROGRAM_INPUT:
     case GL_PROGRAM_OUTPUT:
-      return RESOURCE_VAR(res)->data.max_array_access;
+      return RESOURCE_VAR(res)->type->length;
     case GL_UNIFORM:
     case GL_VERTEX_SUBROUTINE_UNIFORM:
     case GL_GEOMETRY_SUBROUTINE_UNIFORM:
@@ -484,6 +485,7 @@ _mesa_program_resource_array_size(struct gl_program_resource *res)
     case GL_COMPUTE_SUBROUTINE_UNIFORM:
     case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
     case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
+   case GL_BUFFER_VARIABLE:
        return RESOURCE_UNI(res)->array_elements;
     case GL_VERTEX_SUBROUTINE:
     case GL_GEOMETRY_SUBROUTINE:
@@ -493,6 +495,7 @@ _mesa_program_resource_array_size(struct gl_program_resource *res)
     case GL_TESS_EVALUATION_SUBROUTINE:
     case GL_ATOMIC_COUNTER_BUFFER:
     case GL_UNIFORM_BLOCK:
+   case GL_SHADER_STORAGE_BLOCK:
        return 0;
     default:
        assert(!"support for resource type not implemented");
@@ -538,6 +541,7 @@ _mesa_program_resource_find_name(struct gl_shader_program *shProg,
        if (strncmp(rname, name, baselen) == 0) {
           switch (programInterface) {
           case GL_UNIFORM_BLOCK:
+         case GL_SHADER_STORAGE_BLOCK:
              /* Basename match, check if array or struct. */
              if (name[baselen] == '\0' ||
                  name[baselen] == '[' ||
@@ -546,6 +550,7 @@ _mesa_program_resource_find_name(struct gl_shader_program *shProg,
              }
              break;
           case GL_TRANSFORM_FEEDBACK_VARYING:
+         case GL_BUFFER_VARIABLE:
           case GL_UNIFORM:
           case GL_VERTEX_SUBROUTINE_UNIFORM:
           case GL_GEOMETRY_SUBROUTINE_UNIFORM:
@@ -607,6 +612,7 @@ _mesa_program_resource_index(struct gl_shader_program *shProg,
  
     switch (res->Type) {
     case GL_UNIFORM_BLOCK:
+   case GL_SHADER_STORAGE_BLOCK:
        return RESOURCE_UBO(res)- shProg->UniformBlocks;
     case GL_ATOMIC_COUNTER_BUFFER:
        return RESOURCE_ATC(res) - shProg->AtomicBuffers;
@@ -632,6 +638,7 @@ _mesa_program_resource_find_index(struct gl_shader_program *shProg,
        switch (res->Type) {
        case GL_UNIFORM_BLOCK:
        case GL_ATOMIC_COUNTER_BUFFER:
+      case GL_SHADER_STORAGE_BLOCK:
           if (_mesa_program_resource_index(shProg, res) == index)
              return res;
           break;
@@ -651,6 +658,7 @@ _mesa_program_resource_find_index(struct gl_shader_program *shProg,
        case GL_COMPUTE_SUBROUTINE:
        case GL_TESS_CONTROL_SUBROUTINE:
        case GL_TESS_EVALUATION_SUBROUTINE:
+      case GL_BUFFER_VARIABLE:
           if (++idx == (int) index)
              return res;
           break;
@@ -661,6 +669,57 @@ _mesa_program_resource_find_index(struct gl_shader_program *shProg,
     return NULL;
  }
  
+/* Function returns if resource name is expected to have index
+ * appended into it.
+ *
+ *
+ * Page 61 (page 73 of the PDF) in section 2.11 of the OpenGL ES 3.0
+ * spec says:
+ *
+ *     "If the active uniform is an array, the uniform name returned in
+ *     name will always be the name of the uniform array appended with
+ *     "[0]"."
+ *
+ * The same text also appears in the OpenGL 4.2 spec.  It does not,
+ * however, appear in any previous spec.  Previous specifications are
+ * ambiguous in this regard.  However, either name can later be passed
+ * to glGetUniformLocation (and related APIs), so there shouldn't be any
+ * harm in always appending "[0]" to uniform array names.
+ *
+ * Geometry shader stage has different naming convention where the 'normal'
+ * condition is an array, therefore for variables referenced in geometry
+ * stage we do not add '[0]'.
+ *
+ * Note, that TCS outputs and TES inputs should not have index appended
+ * either.
+ */
+static bool
+add_index_to_name(struct gl_program_resource *res)
+{
+   bool add_index = !(((res->Type == GL_PROGRAM_INPUT) &&
+                       res->StageReferences & (1 << MESA_SHADER_GEOMETRY)));
+
+   /* Transform feedback varyings have array index already appended
+    * in their names.
+    */
+   if (res->Type == GL_TRANSFORM_FEEDBACK_VARYING)
+      add_index = false;
+
+   return add_index;
+}
+
+/* Get name length of a program resource. This consists of
+ * base name + 3 for '[0]' if resource is an array.
+ */
+extern unsigned
+_mesa_program_resource_name_len(struct gl_program_resource *res)
+{
+   unsigned length = strlen(_mesa_program_resource_name(res));
+   if (_mesa_program_resource_array_size(res) && add_index_to_name(res))
+      length += 3;
+   return length;
+}
+
  /* Get full name of a program resource.
   */
  bool
@@ -696,36 +755,7 @@ _mesa_get_program_resource_name(struct gl_shader_program *shProg,
  
     _mesa_copy_string(name, bufSize, length, _mesa_program_resource_name(res));
  
-   /* Page 61 (page 73 of the PDF) in section 2.11 of the OpenGL ES 3.0
-    * spec says:
-    *
-    *     "If the active uniform is an array, the uniform name returned in
-    *     name will always be the name of the uniform array appended with
-    *     "[0]"."
-    *
-    * The same text also appears in the OpenGL 4.2 spec.  It does not,
-    * however, appear in any previous spec.  Previous specifications are
-    * ambiguous in this regard.  However, either name can later be passed
-    * to glGetUniformLocation (and related APIs), so there shouldn't be any
-    * harm in always appending "[0]" to uniform array names.
-    *
-    * Geometry shader stage has different naming convention where the 'normal'
-    * condition is an array, therefore for variables referenced in geometry
-    * stage we do not add '[0]'.
-    *
-    * Note, that TCS outputs and TES inputs should not have index appended
-    * either.
-    */
-   bool add_index = !(((programInterface == GL_PROGRAM_INPUT) &&
-                       res->StageReferences & (1 << MESA_SHADER_GEOMETRY)));
-
-   /* Transform feedback varyings have array index already appended
-    * in their names.
-    */
-   if (programInterface == GL_TRANSFORM_FEEDBACK_VARYING)
-      add_index = false;
-
-   if (add_index && _mesa_program_resource_array_size(res)) {
+   if (_mesa_program_resource_array_size(res) && add_index_to_name(res)) {
        int i;
  
        /* The comparison is strange because *length does *NOT* include the
@@ -804,6 +834,193 @@ program_resource_location(struct gl_shader_program *shProg,
     }
  }
  
+static char*
+get_top_level_name(const char *name)
+{
+   const char *first_dot = strchr(name, '.');
+   const char *first_square_bracket = strchr(name, '[');
+   int name_size = 0;
+   /* From ARB_program_interface_query spec:
+    *
+    * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer identifying the
+    *  number of active array elements of the top-level shader storage block
+    *  member containing to the active variable is written to <params>.  If the
+    *  top-level block member is not declared as an array, the value one is
+    *  written to <params>.  If the top-level block member is an array with no
+    *  declared size, the value zero is written to <params>.
+    */
+
+   /* The buffer variable is on top level.*/
+   if (!first_square_bracket && !first_dot)
+      name_size = strlen(name);
+   else if ((!first_square_bracket ||
+            (first_dot && first_dot < first_square_bracket)))
+      name_size = first_dot - name;
+   else
+      name_size = first_square_bracket - name;
+
+   return strndup(name, name_size);
+}
+
+static char*
+get_var_name(const char *name)
+{
+   const char *first_dot = strchr(name, '.');
+
+   if (!first_dot)
+      return strdup(name);
+
+   return strndup(first_dot+1, strlen(first_dot) - 1);
+}
+
+static GLint
+program_resource_top_level_array_size(struct gl_shader_program *shProg,
+                                      struct gl_program_resource *res,
+                                      const char *name)
+{
+   int block_index = RESOURCE_UNI(res)->block_index;
+   int array_size = -1;
+   char *var_name = get_top_level_name(name);
+   char *interface_name =
+      get_top_level_name(shProg->UniformBlocks[block_index].Name);
+
+   if (strcmp(var_name, interface_name) == 0) {
+      /* Deal with instanced array of SSBOs */
+      char *temp_name = get_var_name(name);
+      free(var_name);
+      var_name = get_top_level_name(temp_name);
+      free(temp_name);
+   }
+
+   for (unsigned i = 0; i < shProg->NumShaders; i++) {
+      if (shProg->Shaders[i] == NULL)
+         continue;
+
+      const gl_shader *stage = shProg->Shaders[i];
+      foreach_in_list(ir_instruction, node, stage->ir) {
+         ir_variable *var = node->as_variable();
+         if (!var || !var->get_interface_type() ||
+             var->data.mode != ir_var_shader_storage)
+            continue;
+
+         const glsl_type *interface = var->get_interface_type();
+
+         if (strcmp(interface_name, interface->name) != 0)
+            continue;
+
+         for (unsigned i = 0; i < interface->length; i++) {
+            const glsl_struct_field *field = &interface->fields.structure[i];
+            if (strcmp(field->name, var_name) != 0)
+               continue;
+            /* From GL_ARB_program_interface_query spec:
+             *
+             * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer
+             * identifying the number of active array elements of the top-level
+             * shader storage block member containing to the active variable is
+             * written to <params>.  If the top-level block member is not
+             * declared as an array, the value one is written to <params>.  If
+             * the top-level block member is an array with no declared size,
+             * the value zero is written to <params>.
+             */
+            if (field->type->is_unsized_array())
+               array_size = 0;
+            else if (field->type->is_array())
+               array_size = field->type->length;
+            else
+               array_size = 1;
+            goto found_top_level_array_size;
+         }
+      }
+   }
+found_top_level_array_size:
+   free(interface_name);
+   free(var_name);
+   return array_size;
+}
+
+static GLint
+program_resource_top_level_array_stride(struct gl_shader_program *shProg,
+                                        struct gl_program_resource *res,
+                                        const char *name)
+{
+   int block_index = RESOURCE_UNI(res)->block_index;
+   int array_stride = -1;
+   char *var_name = get_top_level_name(name);
+   char *interface_name =
+      get_top_level_name(shProg->UniformBlocks[block_index].Name);
+
+   if (strcmp(var_name, interface_name) == 0) {
+      /* Deal with instanced array of SSBOs */
+      char *temp_name = get_var_name(name);
+      free(var_name);
+      var_name = get_top_level_name(temp_name);
+      free(temp_name);
+   }
+
+   for (unsigned i = 0; i < shProg->NumShaders; i++) {
+      if (shProg->Shaders[i] == NULL)
+         continue;
+
+      const gl_shader *stage = shProg->Shaders[i];
+      foreach_in_list(ir_instruction, node, stage->ir) {
+         ir_variable *var = node->as_variable();
+         if (!var || !var->get_interface_type() ||
+             var->data.mode != ir_var_shader_storage)
+            continue;
+
+         const glsl_type *interface = var->get_interface_type();
+
+         if (strcmp(interface_name, interface->name) != 0) {
+            continue;
+         }
+
+         for (unsigned i = 0; i < interface->length; i++) {
+            const glsl_struct_field *field = &interface->fields.structure[i];
+            if (strcmp(field->name, var_name) != 0)
+               continue;
+            /* From GL_ARB_program_interface_query:
+             *
+             * "For the property TOP_LEVEL_ARRAY_STRIDE, a single integer
+             *  identifying the stride between array elements of the top-level
+             *  shader storage block member containing the active variable is
+             *  written to <params>.  For top-level block members declared as
+             *  arrays, the value written is the difference, in basic machine
+             *  units, between the offsets of the active variable for
+             *  consecutive elements in the top-level array.  For top-level
+             *  block members not declared as an array, zero is written to
+             *  <params>."
+             */
+            if (field->type->is_array()) {
+               const enum glsl_matrix_layout matrix_layout =
+                  glsl_matrix_layout(field->matrix_layout);
+               bool row_major = matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR;
+               const glsl_type *array_type = field->type->fields.array;
+
+               if (interface->interface_packing != GLSL_INTERFACE_PACKING_STD430) {
+                  if (array_type->is_record()) {
+                     array_stride = array_type->std140_size(row_major);
+                     array_stride = glsl_align(array_stride, 16);
+                  } else {
+                     unsigned element_base_align = 0;
+                     element_base_align = array_type->std140_base_alignment(row_major);
+                     array_stride = MAX2(element_base_align, 16);
+                  }
+               } else {
+                  array_stride = array_type->std430_array_stride(row_major);
+               }
+            } else {
+               array_stride = 0;
+            }
+            goto found_top_level_array_size;
+         }
+      }
+   }
+found_top_level_array_size:
+   free(interface_name);
+   free(var_name);
+   return array_stride;
+}
+
  /**
   * Function implements following location queries:
   *    glGetUniformLocation
@@ -880,7 +1097,7 @@ is_resource_referenced(struct gl_shader_program *shProg,
     if (res->Type == GL_ATOMIC_COUNTER_BUFFER)
        return RESOURCE_ATC(res)->StageReferences[stage];
  
-   if (res->Type == GL_UNIFORM_BLOCK)
+   if (res->Type == GL_UNIFORM_BLOCK || res->Type == GL_SHADER_STORAGE_BLOCK)
        return shProg->UniformBlockStageIndex[stage][index] != -1;
  
     return res->StageReferences & (1 << stage);
@@ -893,7 +1110,8 @@ get_buffer_property(struct gl_shader_program *shProg,
  {
     GET_CURRENT_CONTEXT(ctx);
     if (res->Type != GL_UNIFORM_BLOCK &&
-       res->Type != GL_ATOMIC_COUNTER_BUFFER)
+       res->Type != GL_ATOMIC_COUNTER_BUFFER &&
+       res->Type != GL_SHADER_STORAGE_BLOCK)
        goto invalid_operation;
  
     if (res->Type == GL_UNIFORM_BLOCK) {
@@ -929,6 +1147,39 @@ get_buffer_property(struct gl_shader_program *shProg,
           }
           return RESOURCE_UBO(res)->NumUniforms;
        }
+   } else if (res->Type == GL_SHADER_STORAGE_BLOCK) {
+      switch (prop) {
+      case GL_BUFFER_BINDING:
+         *val = RESOURCE_UBO(res)->Binding;
+         return 1;
+      case GL_BUFFER_DATA_SIZE:
+         *val = RESOURCE_UBO(res)->UniformBufferSize;
+         return 1;
+      case GL_NUM_ACTIVE_VARIABLES:
+         *val = 0;
+         for (unsigned i = 0; i < RESOURCE_UBO(res)->NumUniforms; i++) {
+            const char *iname = RESOURCE_UBO(res)->Uniforms[i].IndexName;
+            struct gl_program_resource *uni =
+               _mesa_program_resource_find_name(shProg, GL_BUFFER_VARIABLE,
+                                                iname, NULL);
+            if (!uni)
+               continue;
+            (*val)++;
+         }
+         return 1;
+      case GL_ACTIVE_VARIABLES:
+         for (unsigned i = 0; i < RESOURCE_UBO(res)->NumUniforms; i++) {
+            const char *iname = RESOURCE_UBO(res)->Uniforms[i].IndexName;
+            struct gl_program_resource *uni =
+               _mesa_program_resource_find_name(shProg, GL_BUFFER_VARIABLE,
+                                                iname, NULL);
+            if (!uni)
+               continue;
+            *val++ =
+               _mesa_program_resource_index(shProg, uni);
+         }
+         return RESOURCE_UBO(res)->NumUniforms;
+      }
     } else if (res->Type == GL_ATOMIC_COUNTER_BUFFER) {
        switch (prop) {
        case GL_BUFFER_BINDING:
@@ -967,23 +1218,24 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg,
     if (res->Type != type)\
        goto invalid_operation;
  
+#define VALIDATE_TYPE_2(type1, type2)\
+   if (res->Type != type1 && res->Type != type2)\
+      goto invalid_operation;
+
     switch(prop) {
     case GL_NAME_LENGTH:
        switch (res->Type) {
        case GL_ATOMIC_COUNTER_BUFFER:
           goto invalid_operation;
-      case GL_TRANSFORM_FEEDBACK_VARYING:
-         *val = strlen(_mesa_program_resource_name(res)) + 1;
-         break;
        default:
-         /* Base name +3 if array '[0]' + terminator. */
-         *val = strlen(_mesa_program_resource_name(res)) +
-            (_mesa_program_resource_array_size(res) > 0 ? 3 : 0) + 1;
+         /* Resource name length + terminator. */
+         *val = _mesa_program_resource_name_len(res) + 1;
        }
        return 1;
     case GL_TYPE:
        switch (res->Type) {
        case GL_UNIFORM:
+      case GL_BUFFER_VARIABLE:
           *val = RESOURCE_UNI(res)->type->gl_type;
           return 1;
        case GL_PROGRAM_INPUT:
@@ -999,11 +1251,12 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg,
     case GL_ARRAY_SIZE:
        switch (res->Type) {
        case GL_UNIFORM:
+      case GL_BUFFER_VARIABLE:
              *val = MAX2(RESOURCE_UNI(res)->array_elements, 1);
              return 1;
        case GL_PROGRAM_INPUT:
        case GL_PROGRAM_OUTPUT:
-         *val = MAX2(RESOURCE_VAR(res)->type->length, 1);
+         *val = MAX2(_mesa_program_resource_array_size(res), 1);
           return 1;
        case GL_TRANSFORM_FEEDBACK_VARYING:
           *val = MAX2(RESOURCE_XFB(res)->Size, 1);
@@ -1012,23 +1265,23 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg,
           goto invalid_operation;
        }
     case GL_OFFSET:
-      VALIDATE_TYPE(GL_UNIFORM);
+      VALIDATE_TYPE_2(GL_UNIFORM, GL_BUFFER_VARIABLE);
        *val = RESOURCE_UNI(res)->offset;
        return 1;
     case GL_BLOCK_INDEX:
-      VALIDATE_TYPE(GL_UNIFORM);
+      VALIDATE_TYPE_2(GL_UNIFORM, GL_BUFFER_VARIABLE);
        *val = RESOURCE_UNI(res)->block_index;
        return 1;
     case GL_ARRAY_STRIDE:
-      VALIDATE_TYPE(GL_UNIFORM);
+      VALIDATE_TYPE_2(GL_UNIFORM, GL_BUFFER_VARIABLE);
        *val = RESOURCE_UNI(res)->array_stride;
        return 1;
     case GL_MATRIX_STRIDE:
-      VALIDATE_TYPE(GL_UNIFORM);
+      VALIDATE_TYPE_2(GL_UNIFORM, GL_BUFFER_VARIABLE);
        *val = RESOURCE_UNI(res)->matrix_stride;
        return 1;
     case GL_IS_ROW_MAJOR:
-      VALIDATE_TYPE(GL_UNIFORM);
+      VALIDATE_TYPE_2(GL_UNIFORM, GL_BUFFER_VARIABLE);
        *val = RESOURCE_UNI(res)->row_major;
        return 1;
     case GL_ATOMIC_COUNTER_BUFFER_INDEX:
@@ -1054,6 +1307,8 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg,
        case GL_PROGRAM_INPUT:
        case GL_PROGRAM_OUTPUT:
        case GL_UNIFORM_BLOCK:
+      case GL_BUFFER_VARIABLE:
+      case GL_SHADER_STORAGE_BLOCK:
        case GL_ATOMIC_COUNTER_BUFFER:
           *val = is_resource_referenced(shProg, res, index,
                                         stage_from_enum(prop));
@@ -1117,6 +1372,19 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg,
        }
        return count;
     }
+
+   case GL_TOP_LEVEL_ARRAY_SIZE:
+      VALIDATE_TYPE(GL_BUFFER_VARIABLE);
+      *val = program_resource_top_level_array_size(shProg, res,
+                                                   _mesa_program_resource_name(res));
+      return 1;
+
+   case GL_TOP_LEVEL_ARRAY_STRIDE:
+      VALIDATE_TYPE(GL_BUFFER_VARIABLE);
+      *val = program_resource_top_level_array_stride(shProg, res,
+                                                     _mesa_program_resource_name(res));
+      return 1;
+
     /* GL_ARB_tessellation_shader */
     case GL_IS_PER_PATCH:
        switch (res->Type) {
@@ -1132,6 +1400,7 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg,
     }
  
  #undef VALIDATE_TYPE
+#undef VALIDATE_TYPE_2
  
  invalid_enum:
     _mesa_error(ctx, GL_INVALID_ENUM, "%s(%s prop %s)", caller,
diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c

index 0e0e0d6..82a1ec3 100644 (file)
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -37,6 +37,7 @@
   */
  
  
+#include <stdbool.h>
  #include "main/glheader.h"
  #include "main/context.h"
  #include "main/dispatch.h"
@@ -48,19 +49,16 @@
  #include "main/shaderobj.h"
  #include "main/transformfeedback.h"
  #include "main/uniforms.h"
+#include "glsl/glsl_parser_extras.h"
+#include "glsl/ir.h"
+#include "glsl/ir_uniform.h"
+#include "glsl/program.h"
  #include "program/program.h"
  #include "program/prog_print.h"
  #include "program/prog_parameter.h"
  #include "util/ralloc.h"
  #include "util/hash_table.h"
-#include <stdbool.h>
-#include "../glsl/glsl_parser_extras.h"
-#include "../glsl/ir.h"
-#include "../glsl/ir_uniform.h"
-#include "../glsl/program.h"
-
-/** Define this to enable shader substitution (see below) */
-#define SHADER_SUBST 0
+#include "util/mesa-sha1.h"
  
  
  /**
@@ -715,7 +713,7 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname,
        if (!has_ubo)
           break;
  
-      for (i = 0; i < shProg->NumUniformBlocks; i++) {
+      for (i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
          /* Add one for the terminating NUL character.
           */
          const GLint len = strlen(shProg->UniformBlocks[i].Name) + 1;
@@ -731,7 +729,11 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname,
        if (!has_ubo)
           break;
  
-      *params = shProg->NumUniformBlocks;
+      *params = 0;
+      for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
+         if (!shProg->UniformBlocks[i].IsShaderStorage)
+            (*params)++;
+      }
        return;
     case GL_PROGRAM_BINARY_RETRIEVABLE_HINT:
        /* This enum isn't part of the OES extension for OpenGL ES 2.0.  It is
@@ -773,7 +775,8 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname,
        return;
     }
     case GL_PROGRAM_SEPARABLE:
-      *params = shProg->SeparateShader;
+      /* If the program has not been linked, return initial value 0. */
+      *params = (shProg->LinkStatus == GL_FALSE) ? 0 : shProg->SeparateShader;
        return;
  
     /* ARB_tessellation_shader */
@@ -932,13 +935,9 @@ get_shader_source(struct gl_context *ctx, GLuint shader, GLsizei maxLength,
   * glShaderSource[ARB].
   */
  static void
-shader_source(struct gl_context *ctx, GLuint shader, const GLchar *source)
+shader_source(struct gl_shader *sh, const GLchar *source)
  {
-   struct gl_shader *sh;
-
-   sh = _mesa_lookup_shader_err(ctx, shader, "glShaderSource");
-   if (!sh)
-      return;
+   assert(sh);
  
     /* free old shader source string and install new one */
     free((void *)sh->Source);
@@ -1512,24 +1511,100 @@ _mesa_LinkProgram(GLhandleARB programObj)
     link_program(ctx, programObj);
  }
  
+#if defined(HAVE_SHA1)
+/**
+ * Generate a SHA-1 hash value string for given source string.
+ */
+static void
+generate_sha1(const char *source, char sha_str[64])
+{
+   unsigned char sha[20];
+   _mesa_sha1_compute(source, strlen(source), sha);
+   _mesa_sha1_format(sha_str, sha);
+}
+
+/**
+ * Construct a full path for shader replacement functionality using
+ * following format:
+ *
+ * <path>/<stage prefix>_<CHECKSUM>.glsl
+ */
+static void
+construct_name(const gl_shader_stage stage, const char *source,
+               const char *path, char *name, unsigned length)
+{
+   char sha[64];
+   static const char *types[] = {
+      "VS", "TC", "TE", "GS", "FS", "CS",
+   };
+
+   generate_sha1(source, sha);
+   _mesa_snprintf(name, length, "%s/%s_%s.glsl", path, types[stage],
+                  sha);
+}
+
+/**
+ * Write given shader source to a file in MESA_SHADER_DUMP_PATH.
+ */
+static void
+dump_shader(const gl_shader_stage stage, const char *source)
+{
+   char name[PATH_MAX];
+   static bool path_exists = true;
+   char *dump_path;
+   FILE *f;
+
+   if (!path_exists)
+      return;
+
+   dump_path = getenv("MESA_SHADER_DUMP_PATH");
+   if (!dump_path) {
+      path_exists = false;
+      return;
+   }
  
+   construct_name(stage, source, dump_path, name, PATH_MAX);
+
+   f = fopen(name, "w");
+   if (f) {
+      fputs(source, f);
+      fclose(f);
+   } else {
+      GET_CURRENT_CONTEXT(ctx);
+      _mesa_warning(ctx, "could not open %s for dumping shader (%s)", name,
+                    strerror(errno));
+   }
+}
  
  /**
   * Read shader source code from a file.
   * Useful for debugging to override an app's shader.
   */
  static GLcharARB *
-read_shader(const char *fname)
+read_shader(const gl_shader_stage stage, const char *source)
  {
-   int shader_size = 0;
-   FILE *f = fopen(fname, "r");
-   GLcharARB *buffer, *shader;
-   int len;
+   char name[PATH_MAX];
+   char *read_path;
+   static bool path_exists = true;
+   int len, shader_size = 0;
+   GLcharARB *buffer;
+   FILE *f;
+
+   if (!path_exists)
+      return NULL;
  
-   if (!f) {
+   read_path = getenv("MESA_SHADER_READ_PATH");
+   if (!read_path) {
+      path_exists = false;
        return NULL;
     }
  
+   construct_name(stage, source, read_path, name, PATH_MAX);
+
+   f = fopen(name, "r");
+   if (!f)
+      return NULL;
+
     /* allocate enough room for the entire shader */
     fseek(f, 0, SEEK_END);
     shader_size = ftell(f);
@@ -1547,12 +1622,9 @@ read_shader(const char *fname)
  
     fclose(f);
  
-   shader = strdup(buffer);
-   free(buffer);
-
-   return shader;
+   return buffer;
  }
-
+#endif /* HAVE_SHA1 */
  
  /**
   * Called via glShaderSource() and glShaderSourceARB() API functions.
@@ -1567,9 +1639,17 @@ _mesa_ShaderSource(GLhandleARB shaderObj, GLsizei count,
     GLint *offsets;
     GLsizei i, totalLength;
     GLcharARB *source;
-   GLuint checksum;
+   struct gl_shader *sh;
+
+#if defined(HAVE_SHA1)
+   GLcharARB *replacement;
+#endif /* HAVE_SHA1 */
+
+   sh = _mesa_lookup_shader_err(ctx, shaderObj, "glShaderSourceARB");
+   if (!sh)
+      return;
  
-   if (!shaderObj || string == NULL) {
+   if (string == NULL) {
        _mesa_error(ctx, GL_INVALID_VALUE, "glShaderSourceARB");
        return;
     }
@@ -1620,34 +1700,20 @@ _mesa_ShaderSource(GLhandleARB shaderObj, GLsizei count,
     source[totalLength - 1] = '\0';
     source[totalLength - 2] = '\0';
  
-   if (SHADER_SUBST) {
-      /* Compute the shader's source code checksum then try to open a file
-       * named newshader_<CHECKSUM>.  If it exists, use it in place of the
-       * original shader source code.  For debugging.
-       */
-      char filename[100];
-      GLcharARB *newSource;
-
-      checksum = _mesa_str_checksum(source);
-
-      _mesa_snprintf(filename, sizeof(filename), "newshader_%d", checksum);
+#if defined(HAVE_SHA1)
+   /* Dump original shader source to MESA_SHADER_DUMP_PATH and replace
+    * if corresponding entry found from MESA_SHADER_READ_PATH.
+    */
+   dump_shader(sh->Stage, source);
  
-      newSource = read_shader(filename);
-      if (newSource) {
-         fprintf(stderr, "Mesa: Replacing shader %u chksum=%d with %s\n",
-                       shaderObj, checksum, filename);
-         free(source);
-         source = newSource;
-      }
+   replacement = read_shader(sh->Stage, source);
+   if (replacement) {
+      free(source);
+      source = replacement;
     }
+#endif /* HAVE_SHA1 */
  
-   shader_source(ctx, shaderObj, source);
-
-   if (SHADER_SUBST) {
-      struct gl_shader *sh = _mesa_lookup_shader(ctx, shaderObj);
-      if (sh)
-         sh->SourceChecksum = checksum; /* save original checksum */
-   }
+   shader_source(sh, source);
  
     free(offsets);
  }
diff --git a/src/mesa/main/shaderapi.h b/src/mesa/main/shaderapi.h

index 0a10191..fba767b 100644 (file)
--- a/src/mesa/main/shaderapi.h
+++ b/src/mesa/main/shaderapi.h
@@ -245,6 +245,9 @@ _mesa_get_program_resource_name(struct gl_shader_program *shProg,
                                  GLsizei bufSize, GLsizei *length,
                                  GLchar *name, const char *caller);
  
+extern unsigned
+_mesa_program_resource_name_len(struct gl_program_resource *res);
+
  extern GLint
  _mesa_program_resource_location(struct gl_shader_program *shProg,
                                  GLenum programInterface, const char *name);
diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c

index 110a18e..5cd37d7 100644 (file)
--- a/src/mesa/main/shaderobj.c
+++ b/src/mesa/main/shaderobj.c
@@ -69,8 +69,6 @@ _mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,
  
        assert(old->RefCount > 0);
        old->RefCount--;
-      /*printf("SHADER DECR %p (%d) to %d\n",
-        (void*) old, old->Name, old->RefCount);*/
        deleteFlag = (old->RefCount == 0);
  
        if (deleteFlag) {
@@ -86,8 +84,6 @@ _mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,
     if (sh) {
        /* reference new */
        sh->RefCount++;
-      /*printf("SHADER INCR %p (%d) to %d\n",
-        (void*) sh, sh->Name, sh->RefCount);*/
        *ptr = sh;
     }
  }
@@ -209,10 +205,6 @@ _mesa_reference_shader_program_(struct gl_context *ctx,
  
        assert(old->RefCount > 0);
        old->RefCount--;
-#if 0
-      printf("ShaderProgram %p ID=%u  RefCount-- to %d\n",
-             (void *) old, old->Name, old->RefCount);
-#endif
        deleteFlag = (old->RefCount == 0);
  
        if (deleteFlag) {
@@ -227,10 +219,6 @@ _mesa_reference_shader_program_(struct gl_context *ctx,
  
     if (shProg) {
        shProg->RefCount++;
-#if 0
-      printf("ShaderProgram %p ID=%u  RefCount++ to %d\n",
-             (void *) shProg, shProg->Name, shProg->RefCount);
-#endif
        *ptr = shProg;
     }
  }
@@ -306,7 +294,7 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg)
  
     ralloc_free(shProg->UniformBlocks);
     shProg->UniformBlocks = NULL;
-   shProg->NumUniformBlocks = 0;
+   shProg->NumBufferInterfaceBlocks = 0;
     for (i = 0; i < MESA_SHADER_STAGES; i++) {
        ralloc_free(shProg->UniformBlockStageIndex[i]);
        shProg->UniformBlockStageIndex[i] = NULL;
diff --git a/src/mesa/main/tests/Makefile.am b/src/mesa/main/tests/Makefile.am

index 9467f3b..bd7ab73 100644 (file)
--- a/src/mesa/main/tests/Makefile.am
+++ b/src/mesa/main/tests/Makefile.am
@@ -1,5 +1,4 @@
  AM_CFLAGS = \
-       $(X11_CFLAGS) \
         $(PTHREAD_CFLAGS)
  AM_CPPFLAGS = \
         -I$(top_srcdir)/src/gtest/include \
diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp

index b941f3e..b19c6d7 100644 (file)
--- a/src/mesa/main/tests/dispatch_sanity.cpp
+++ b/src/mesa/main/tests/dispatch_sanity.cpp
@@ -844,7 +844,7 @@ const struct function common_desktop_functions_possible[] = {
     { "glGetProgramResourceiv", 43, -1 },
     { "glGetProgramResourceLocation", 43, -1 },
     { "glGetProgramResourceLocationIndex", 43, -1 },
-// { "glShaderStorageBlockBinding", 43, -1 },           // XXX: Add to xml
+   { "glShaderStorageBlockBinding", 43, -1 },
  // { "glTextureBufferRangeEXT", 43, -1 },               // XXX: Add to xml
     { "glTexStorage2DMultisample", 43, -1 },
     { "glTexStorage3DMultisample", 43, -1 },
@@ -2405,10 +2405,8 @@ const struct function gles31_functions_possible[] = {
     { "glDrawArraysIndirect", 31, -1 },
     { "glDrawElementsIndirect", 31, -1 },
  
-   // FINISHME: These two functions have not been implemented yet.  They come
-   // FINISHME: from the ARB_framebuffer_no_attachments extension.
-   // { "glFramebufferParameteri", 31, -1 },
-   // { "glGetFramebufferParameteriv", 31, -1 },
+   { "glFramebufferParameteri", 31, -1 },
+   { "glGetFramebufferParameteriv", 31, -1 },
  
     { "glGetProgramInterfaceiv", 31, -1 },
     { "glGetProgramResourceIndex", 31, -1 },
diff --git a/src/mesa/main/tests/enum_strings.cpp b/src/mesa/main/tests/enum_strings.cpp

index 8218cc9..96b2246 100644 (file)
--- a/src/mesa/main/tests/enum_strings.cpp
+++ b/src/mesa/main/tests/enum_strings.cpp
@@ -1780,6 +1780,7 @@ const struct enum_info everything[] = {
     { 0x8E5F, "GL_MAX_PROGRAM_TEXTURE_GATHER_OFFSET" },
     { 0x8F36, "GL_COPY_READ_BUFFER" },
     { 0x8F37, "GL_COPY_WRITE_BUFFER" },
+   { 0x8F39, "GL_MAX_COMBINED_SHADER_OUTPUT_RESOURCES" },
     { 0x8F90, "GL_RED_SNORM" },
     { 0x8F91, "GL_RG_SNORM" },
     { 0x8F92, "GL_RGB_SNORM" },
@@ -1797,6 +1798,20 @@ const struct enum_info everything[] = {
     { 0x8F9E, "GL_PRIMITIVE_RESTART_INDEX" },
     { 0x8F9F, "GL_MAX_PROGRAM_TEXTURE_GATHER_COMPONENTS_ARB" },
     { 0x906F, "GL_RGB10_A2UI" },
+   { 0x90D2, "GL_SHADER_STORAGE_BUFFER" },
+   { 0x90D3, "GL_SHADER_STORAGE_BUFFER_BINDING" },
+   { 0x90D4, "GL_SHADER_STORAGE_BUFFER_START" },
+   { 0x90D5, "GL_SHADER_STORAGE_BUFFER_SIZE" },
+   { 0x90D6, "GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS" },
+   { 0x90D7, "GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS" },
+   { 0x90D8, "GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS" },
+   { 0x90D9, "GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS" },
+   { 0x90DA, "GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS" },
+   { 0x90DB, "GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS" },
+   { 0x90DC, "GL_MAX_COMBINED_SHADER_STORAGE_BLOCKS" },
+   { 0x90DD, "GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS" },
+   { 0x90DE, "GL_MAX_SHADER_STORAGE_BLOCK_SIZE" },
+   { 0x90DF, "GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT" },
     { 0x9100, "GL_TEXTURE_2D_MULTISAMPLE" },
     { 0x9101, "GL_PROXY_TEXTURE_2D_MULTISAMPLE" },
     { 0x9102, "GL_TEXTURE_2D_MULTISAMPLE_ARRAY" },
diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c

index 394c8ba..84973d3 100644 (file)
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -100,6 +100,12 @@ _mesa_gl_compressed_format_base_format(GLenum format)
     case GL_ETC1_RGB8_OES:
     case GL_COMPRESSED_RGB8_ETC2:
     case GL_COMPRESSED_SRGB8_ETC2:
+   case GL_RGB_S3TC:
+   case GL_RGB4_S3TC:
+   case GL_PALETTE4_RGB8_OES:
+   case GL_PALETTE4_R5_G6_B5_OES:
+   case GL_PALETTE8_RGB8_OES:
+   case GL_PALETTE8_R5_G6_B5_OES:
        return GL_RGB;
  
     case GL_COMPRESSED_RGBA:
@@ -117,6 +123,14 @@ _mesa_gl_compressed_format_base_format(GLenum format)
     case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC:
     case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
     case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
+   case GL_RGBA_S3TC:
+   case GL_RGBA4_S3TC:
+   case GL_PALETTE4_RGBA8_OES:
+   case GL_PALETTE8_RGB5_A1_OES:
+   case GL_PALETTE4_RGBA4_OES:
+   case GL_PALETTE4_RGB5_A1_OES:
+   case GL_PALETTE8_RGBA8_OES:
+   case GL_PALETTE8_RGBA4_OES:
        return GL_RGBA;
  
     case GL_COMPRESSED_ALPHA:
@@ -728,6 +742,7 @@ _mesa_get_compressed_fetch_func(mesa_format format)
     case MESA_FORMAT_LAYOUT_FXT1:
        return _mesa_get_fxt_fetch_func(format);
     case MESA_FORMAT_LAYOUT_RGTC:
+   case MESA_FORMAT_LAYOUT_LATC:
        return _mesa_get_compressed_rgtc_func(format);
     case MESA_FORMAT_LAYOUT_ETC1:
        return _mesa_get_etc_fetch_func(format);
diff --git a/src/mesa/main/texcompress_bptc.c b/src/mesa/main/texcompress_bptc.c

index a600180..f0f6553 100644 (file)
--- a/src/mesa/main/texcompress_bptc.c
+++ b/src/mesa/main/texcompress_bptc.c
@@ -1291,7 +1291,8 @@ _mesa_texstore_bptc_rgba_unorm(TEXSTORE_PARAMS)
        tempImageSlices[0] = (GLubyte *) tempImage;
        _mesa_texstore(ctx, dims,
                       baseInternalFormat,
-                     MESA_FORMAT_R8G8B8A8_UNORM,
+                     _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
+                                           : MESA_FORMAT_A8B8G8R8_UNORM,
                       rgbaRowStride, tempImageSlices,
                       srcWidth, srcHeight, srcDepth,
                       srcFormat, srcType, srcAddr,
diff --git a/src/mesa/main/texcompress_fxt1.c b/src/mesa/main/texcompress_fxt1.c

index f06f048..ae339e1 100644 (file)
--- a/src/mesa/main/texcompress_fxt1.c
+++ b/src/mesa/main/texcompress_fxt1.c
@@ -65,7 +65,7 @@ _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
     if (srcFormat != GL_RGB ||
         srcType != GL_UNSIGNED_BYTE ||
         ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
         srcPacking->SwapBytes) {
        /* convert image to RGB/GLubyte */
        GLubyte *tempImageSlices[1];
@@ -130,7 +130,8 @@ _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
        tempImageSlices[0] = (GLubyte *) tempImage;
        _mesa_texstore(ctx, dims,
                       baseInternalFormat,
-                     MESA_FORMAT_R8G8B8A8_UNORM,
+                     _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
+                                           : MESA_FORMAT_A8B8G8R8_UNORM,
                       rgbaRowStride, tempImageSlices,
                       srcWidth, srcHeight, srcDepth,
                       srcFormat, srcType, srcAddr,
diff --git a/src/mesa/main/texcompress_rgtc.c b/src/mesa/main/texcompress_rgtc.c

index 66de1f1..8cab7a5 100644 (file)
--- a/src/mesa/main/texcompress_rgtc.c
+++ b/src/mesa/main/texcompress_rgtc.c
@@ -196,9 +196,11 @@ _mesa_texstore_rg_rgtc2(TEXSTORE_PARAMS)
            dstFormat == MESA_FORMAT_LA_LATC2_UNORM);
  
     if (baseInternalFormat == GL_RG)
-      tempFormat = MESA_FORMAT_R8G8_UNORM;
+      tempFormat = _mesa_little_endian() ? MESA_FORMAT_R8G8_UNORM
+                                         : MESA_FORMAT_G8R8_UNORM;
     else
-      tempFormat = MESA_FORMAT_L8A8_UNORM;
+      tempFormat = _mesa_little_endian() ? MESA_FORMAT_L8A8_UNORM
+                                         : MESA_FORMAT_A8L8_UNORM;
  
     rgRowStride = 2 * srcWidth * sizeof(GLubyte);
     tempImage = malloc(srcWidth * srcHeight * 2 * sizeof(GLubyte));
diff --git a/src/mesa/main/texcompress_s3tc.c b/src/mesa/main/texcompress_s3tc.c

index 7ce3cb8..7ddb0ed 100644 (file)
--- a/src/mesa/main/texcompress_s3tc.c
+++ b/src/mesa/main/texcompress_s3tc.c
@@ -130,7 +130,7 @@ _mesa_texstore_rgb_dxt1(TEXSTORE_PARAMS)
     if (srcFormat != GL_RGB ||
         srcType != GL_UNSIGNED_BYTE ||
         ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
         srcPacking->SwapBytes) {
        /* convert image to RGB/GLubyte */
        GLubyte *tempImageSlices[1];
@@ -187,7 +187,7 @@ _mesa_texstore_rgba_dxt1(TEXSTORE_PARAMS)
     if (srcFormat != GL_RGBA ||
         srcType != GL_UNSIGNED_BYTE ||
         ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
         srcPacking->SwapBytes) {
        /* convert image to RGBA/GLubyte */
        GLubyte *tempImageSlices[1];
@@ -198,7 +198,8 @@ _mesa_texstore_rgba_dxt1(TEXSTORE_PARAMS)
        tempImageSlices[0] = (GLubyte *) tempImage;
        _mesa_texstore(ctx, dims,
                       baseInternalFormat,
-                     MESA_FORMAT_R8G8B8A8_UNORM,
+                     _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
+                                           : MESA_FORMAT_A8B8G8R8_UNORM,
                       rgbaRowStride, tempImageSlices,
                       srcWidth, srcHeight, srcDepth,
                       srcFormat, srcType, srcAddr,
@@ -244,7 +245,7 @@ _mesa_texstore_rgba_dxt3(TEXSTORE_PARAMS)
     if (srcFormat != GL_RGBA ||
         srcType != GL_UNSIGNED_BYTE ||
         ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
         srcPacking->SwapBytes) {
        /* convert image to RGBA/GLubyte */
        GLubyte *tempImageSlices[1];
@@ -255,7 +256,8 @@ _mesa_texstore_rgba_dxt3(TEXSTORE_PARAMS)
        tempImageSlices[0] = (GLubyte *) tempImage;
        _mesa_texstore(ctx, dims,
                       baseInternalFormat,
-                     MESA_FORMAT_R8G8B8A8_UNORM,
+                     _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
+                                           : MESA_FORMAT_A8B8G8R8_UNORM,
                       rgbaRowStride, tempImageSlices,
                       srcWidth, srcHeight, srcDepth,
                       srcFormat, srcType, srcAddr,
@@ -300,7 +302,7 @@ _mesa_texstore_rgba_dxt5(TEXSTORE_PARAMS)
     if (srcFormat != GL_RGBA ||
         srcType != GL_UNSIGNED_BYTE ||
         ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
         srcPacking->SwapBytes) {
        /* convert image to RGBA/GLubyte */
        GLubyte *tempImageSlices[1];
@@ -311,7 +313,8 @@ _mesa_texstore_rgba_dxt5(TEXSTORE_PARAMS)
        tempImageSlices[0] = (GLubyte *) tempImage;
        _mesa_texstore(ctx, dims,
                       baseInternalFormat,
-                     MESA_FORMAT_R8G8B8A8_UNORM,
+                     _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
+                                           : MESA_FORMAT_A8B8G8R8_UNORM,
                       rgbaRowStride, tempImageSlices,
                       srcWidth, srcHeight, srcDepth,
                       srcFormat, srcType, srcAddr,
diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c

index f62553d..682b727 100644 (file)
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -361,6 +361,13 @@ get_tex_rgba_compressed(struct gl_context *ctx, GLuint dimensions,
                             tempSlice, RGBA32_FLOAT, srcStride,
                             width, height,
                             needsRebase ? rebaseSwizzle : NULL);
+
+      /* Handle byte swapping if required */
+      if (ctx->Pack.SwapBytes) {
+         _mesa_swap_bytes_2d_image(format, type, &ctx->Pack,
+                                   width, height, dest, dest);
+      }
+
        tempSlice += 4 * width * height;
     }
  
@@ -557,17 +564,9 @@ get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint dimensions,
  
     do_swap:
        /* Handle byte swapping if required */
-      if (ctx->Pack.SwapBytes) {
-         GLint swapSize = _mesa_sizeof_packed_type(type);
-         if (swapSize == 2 || swapSize == 4) {
-            int swapsPerPixel = _mesa_bytes_per_pixel(format, type) / swapSize;
-            assert(_mesa_bytes_per_pixel(format, type) % swapSize == 0);
-            if (swapSize == 2)
-               _mesa_swap2((GLushort *) dest, width * height * swapsPerPixel);
-            else if (swapSize == 4)
-               _mesa_swap4((GLuint *) dest, width * height * swapsPerPixel);
-         }
-      }
+      if (ctx->Pack.SwapBytes)
+         _mesa_swap_bytes_2d_image(format, type, &ctx->Pack,
+                                   width, height, dest, dest);
  
        /* Unmap the src texture buffer */
        ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset + img);
diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c

index bfb0858..d9453e3 100644 (file)
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -117,517 +117,6 @@ adjust_for_oes_float_texture(GLenum format, GLenum type)
  }
  
  /**
- * Return the simple base format for a given internal texture format.
- * For example, given GL_LUMINANCE12_ALPHA4, return GL_LUMINANCE_ALPHA.
- *
- * \param ctx GL context.
- * \param internalFormat the internal texture format token or 1, 2, 3, or 4.
- *
- * \return the corresponding \u base internal format (GL_ALPHA, GL_LUMINANCE,
- * GL_LUMANCE_ALPHA, GL_INTENSITY, GL_RGB, or GL_RGBA), or -1 if invalid enum.
- *
- * This is the format which is used during texture application (i.e. the
- * texture format and env mode determine the arithmetic used.
- */
-GLint
-_mesa_base_tex_format( struct gl_context *ctx, GLint internalFormat )
-{
-   switch (internalFormat) {
-   case GL_ALPHA:
-   case GL_ALPHA4:
-   case GL_ALPHA8:
-   case GL_ALPHA12:
-   case GL_ALPHA16:
-      return (ctx->API != API_OPENGL_CORE) ? GL_ALPHA : -1;
-   case 1:
-   case GL_LUMINANCE:
-   case GL_LUMINANCE4:
-   case GL_LUMINANCE8:
-   case GL_LUMINANCE12:
-   case GL_LUMINANCE16:
-      return (ctx->API != API_OPENGL_CORE) ? GL_LUMINANCE : -1;
-   case 2:
-   case GL_LUMINANCE_ALPHA:
-   case GL_LUMINANCE4_ALPHA4:
-   case GL_LUMINANCE6_ALPHA2:
-   case GL_LUMINANCE8_ALPHA8:
-   case GL_LUMINANCE12_ALPHA4:
-   case GL_LUMINANCE12_ALPHA12:
-   case GL_LUMINANCE16_ALPHA16:
-      return (ctx->API != API_OPENGL_CORE) ? GL_LUMINANCE_ALPHA : -1;
-   case GL_INTENSITY:
-   case GL_INTENSITY4:
-   case GL_INTENSITY8:
-   case GL_INTENSITY12:
-   case GL_INTENSITY16:
-      return (ctx->API != API_OPENGL_CORE) ? GL_INTENSITY : -1;
-   case 3:
-      return (ctx->API != API_OPENGL_CORE) ? GL_RGB : -1;
-   case GL_RGB:
-   case GL_R3_G3_B2:
-   case GL_RGB4:
-   case GL_RGB5:
-   case GL_RGB8:
-   case GL_RGB10:
-   case GL_RGB12:
-   case GL_RGB16:
-      return GL_RGB;
-   case 4:
-      return (ctx->API != API_OPENGL_CORE) ? GL_RGBA : -1;
-   case GL_RGBA:
-   case GL_RGBA2:
-   case GL_RGBA4:
-   case GL_RGB5_A1:
-   case GL_RGBA8:
-   case GL_RGB10_A2:
-   case GL_RGBA12:
-   case GL_RGBA16:
-      return GL_RGBA;
-   default:
-      ; /* fallthrough */
-   }
-
-   /* GL_BGRA can be an internal format *only* in OpenGL ES (1.x or 2.0).
-    */
-   if (_mesa_is_gles(ctx)) {
-      switch (internalFormat) {
-      case GL_BGRA:
-         return GL_RGBA;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->Extensions.ARB_ES2_compatibility) {
-      switch (internalFormat) {
-      case GL_RGB565:
-         return GL_RGB;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->Extensions.ARB_depth_texture) {
-      switch (internalFormat) {
-      case GL_DEPTH_COMPONENT:
-      case GL_DEPTH_COMPONENT16:
-      case GL_DEPTH_COMPONENT24:
-      case GL_DEPTH_COMPONENT32:
-         return GL_DEPTH_COMPONENT;
-      case GL_DEPTH_STENCIL:
-      case GL_DEPTH24_STENCIL8:
-         return GL_DEPTH_STENCIL;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->Extensions.ARB_texture_stencil8) {
-      switch (internalFormat) {
-      case GL_STENCIL_INDEX:
-      case GL_STENCIL_INDEX1:
-      case GL_STENCIL_INDEX4:
-      case GL_STENCIL_INDEX8:
-      case GL_STENCIL_INDEX16:
-         return GL_STENCIL_INDEX;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   switch (internalFormat) {
-   case GL_COMPRESSED_ALPHA:
-      return GL_ALPHA;
-   case GL_COMPRESSED_LUMINANCE:
-      return GL_LUMINANCE;
-   case GL_COMPRESSED_LUMINANCE_ALPHA:
-      return GL_LUMINANCE_ALPHA;
-   case GL_COMPRESSED_INTENSITY:
-      return GL_INTENSITY;
-   case GL_COMPRESSED_RGB:
-      return GL_RGB;
-   case GL_COMPRESSED_RGBA:
-      return GL_RGBA;
-   default:
-      ; /* fallthrough */
-   }
-
-   if (ctx->Extensions.TDFX_texture_compression_FXT1) {
-      switch (internalFormat) {
-      case GL_COMPRESSED_RGB_FXT1_3DFX:
-         return GL_RGB;
-      case GL_COMPRESSED_RGBA_FXT1_3DFX:
-         return GL_RGBA;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   /* Assume that the ANGLE flag will always be set if the EXT flag is set.
-    */
-   if (ctx->Extensions.ANGLE_texture_compression_dxt) {
-      switch (internalFormat) {
-      case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
-         return GL_RGB;
-      case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
-      case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
-      case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
-         return GL_RGBA;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (_mesa_is_desktop_gl(ctx)
-       && ctx->Extensions.ANGLE_texture_compression_dxt) {
-      switch (internalFormat) {
-      case GL_RGB_S3TC:
-      case GL_RGB4_S3TC:
-         return GL_RGB;
-      case GL_RGBA_S3TC:
-      case GL_RGBA4_S3TC:
-         return GL_RGBA;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->Extensions.MESA_ycbcr_texture) {
-      if (internalFormat == GL_YCBCR_MESA)
-         return GL_YCBCR_MESA;
-   }
-
-   if (ctx->Extensions.ARB_texture_float) {
-      switch (internalFormat) {
-      case GL_ALPHA16F_ARB:
-      case GL_ALPHA32F_ARB:
-         return GL_ALPHA;
-      case GL_RGBA16F_ARB:
-      case GL_RGBA32F_ARB:
-         return GL_RGBA;
-      case GL_RGB16F_ARB:
-      case GL_RGB32F_ARB:
-         return GL_RGB;
-      case GL_INTENSITY16F_ARB:
-      case GL_INTENSITY32F_ARB:
-         return GL_INTENSITY;
-      case GL_LUMINANCE16F_ARB:
-      case GL_LUMINANCE32F_ARB:
-         return GL_LUMINANCE;
-      case GL_LUMINANCE_ALPHA16F_ARB:
-      case GL_LUMINANCE_ALPHA32F_ARB:
-         return GL_LUMINANCE_ALPHA;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->Extensions.EXT_texture_snorm) {
-      switch (internalFormat) {
-      case GL_RED_SNORM:
-      case GL_R8_SNORM:
-      case GL_R16_SNORM:
-         return GL_RED;
-      case GL_RG_SNORM:
-      case GL_RG8_SNORM:
-      case GL_RG16_SNORM:
-         return GL_RG;
-      case GL_RGB_SNORM:
-      case GL_RGB8_SNORM:
-      case GL_RGB16_SNORM:
-         return GL_RGB;
-      case GL_RGBA_SNORM:
-      case GL_RGBA8_SNORM:
-      case GL_RGBA16_SNORM:
-         return GL_RGBA;
-      case GL_ALPHA_SNORM:
-      case GL_ALPHA8_SNORM:
-      case GL_ALPHA16_SNORM:
-         return GL_ALPHA;
-      case GL_LUMINANCE_SNORM:
-      case GL_LUMINANCE8_SNORM:
-      case GL_LUMINANCE16_SNORM:
-         return GL_LUMINANCE;
-      case GL_LUMINANCE_ALPHA_SNORM:
-      case GL_LUMINANCE8_ALPHA8_SNORM:
-      case GL_LUMINANCE16_ALPHA16_SNORM:
-         return GL_LUMINANCE_ALPHA;
-      case GL_INTENSITY_SNORM:
-      case GL_INTENSITY8_SNORM:
-      case GL_INTENSITY16_SNORM:
-         return GL_INTENSITY;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->Extensions.EXT_texture_sRGB) {
-      switch (internalFormat) {
-      case GL_SRGB_EXT:
-      case GL_SRGB8_EXT:
-      case GL_COMPRESSED_SRGB_EXT:
-         return GL_RGB;
-      case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
-         return ctx->Extensions.EXT_texture_compression_s3tc ? GL_RGB : -1;
-      case GL_SRGB_ALPHA_EXT:
-      case GL_SRGB8_ALPHA8_EXT:
-      case GL_COMPRESSED_SRGB_ALPHA_EXT:
-         return GL_RGBA;
-      case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
-      case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
-      case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
-         return ctx->Extensions.EXT_texture_compression_s3tc ? GL_RGBA : -1;
-      case GL_SLUMINANCE_ALPHA_EXT:
-      case GL_SLUMINANCE8_ALPHA8_EXT:
-      case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT:
-         return GL_LUMINANCE_ALPHA;
-      case GL_SLUMINANCE_EXT:
-      case GL_SLUMINANCE8_EXT:
-      case GL_COMPRESSED_SLUMINANCE_EXT:
-         return GL_LUMINANCE;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->Version >= 30 ||
-       ctx->Extensions.EXT_texture_integer) {
-      switch (internalFormat) {
-      case GL_RGBA8UI_EXT:
-      case GL_RGBA16UI_EXT:
-      case GL_RGBA32UI_EXT:
-      case GL_RGBA8I_EXT:
-      case GL_RGBA16I_EXT:
-      case GL_RGBA32I_EXT:
-      case GL_RGB10_A2UI:
-         return GL_RGBA;
-      case GL_RGB8UI_EXT:
-      case GL_RGB16UI_EXT:
-      case GL_RGB32UI_EXT:
-      case GL_RGB8I_EXT:
-      case GL_RGB16I_EXT:
-      case GL_RGB32I_EXT:
-         return GL_RGB;
-      }
-   }
-
-   if (ctx->Extensions.EXT_texture_integer) {
-      switch (internalFormat) {
-      case GL_ALPHA8UI_EXT:
-      case GL_ALPHA16UI_EXT:
-      case GL_ALPHA32UI_EXT:
-      case GL_ALPHA8I_EXT:
-      case GL_ALPHA16I_EXT:
-      case GL_ALPHA32I_EXT:
-         return GL_ALPHA;
-      case GL_INTENSITY8UI_EXT:
-      case GL_INTENSITY16UI_EXT:
-      case GL_INTENSITY32UI_EXT:
-      case GL_INTENSITY8I_EXT:
-      case GL_INTENSITY16I_EXT:
-      case GL_INTENSITY32I_EXT:
-         return GL_INTENSITY;
-      case GL_LUMINANCE8UI_EXT:
-      case GL_LUMINANCE16UI_EXT:
-      case GL_LUMINANCE32UI_EXT:
-      case GL_LUMINANCE8I_EXT:
-      case GL_LUMINANCE16I_EXT:
-      case GL_LUMINANCE32I_EXT:
-         return GL_LUMINANCE;
-      case GL_LUMINANCE_ALPHA8UI_EXT:
-      case GL_LUMINANCE_ALPHA16UI_EXT:
-      case GL_LUMINANCE_ALPHA32UI_EXT:
-      case GL_LUMINANCE_ALPHA8I_EXT:
-      case GL_LUMINANCE_ALPHA16I_EXT:
-      case GL_LUMINANCE_ALPHA32I_EXT:
-         return GL_LUMINANCE_ALPHA;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->Extensions.ARB_texture_rg) {
-      switch (internalFormat) {
-      case GL_R16F:
-      case GL_R32F:
-        if (!ctx->Extensions.ARB_texture_float)
-           break;
-         return GL_RED;
-      case GL_R8I:
-      case GL_R8UI:
-      case GL_R16I:
-      case GL_R16UI:
-      case GL_R32I:
-      case GL_R32UI:
-        if (ctx->Version < 30 && !ctx->Extensions.EXT_texture_integer)
-           break;
-        /* FALLTHROUGH */
-      case GL_R8:
-      case GL_R16:
-      case GL_RED:
-      case GL_COMPRESSED_RED:
-         return GL_RED;
-
-      case GL_RG16F:
-      case GL_RG32F:
-        if (!ctx->Extensions.ARB_texture_float)
-           break;
-         return GL_RG;
-      case GL_RG8I:
-      case GL_RG8UI:
-      case GL_RG16I:
-      case GL_RG16UI:
-      case GL_RG32I:
-      case GL_RG32UI:
-        if (ctx->Version < 30 && !ctx->Extensions.EXT_texture_integer)
-           break;
-        /* FALLTHROUGH */
-      case GL_RG:
-      case GL_RG8:
-      case GL_RG16:
-      case GL_COMPRESSED_RG:
-         return GL_RG;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->Extensions.EXT_texture_shared_exponent) {
-      switch (internalFormat) {
-      case GL_RGB9_E5_EXT:
-         return GL_RGB;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->Extensions.EXT_packed_float) {
-      switch (internalFormat) {
-      case GL_R11F_G11F_B10F_EXT:
-         return GL_RGB;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->Extensions.ARB_depth_buffer_float) {
-      switch (internalFormat) {
-      case GL_DEPTH_COMPONENT32F:
-         return GL_DEPTH_COMPONENT;
-      case GL_DEPTH32F_STENCIL8:
-         return GL_DEPTH_STENCIL;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->Extensions.ARB_texture_compression_rgtc) {
-      switch (internalFormat) {
-      case GL_COMPRESSED_RED_RGTC1:
-      case GL_COMPRESSED_SIGNED_RED_RGTC1:
-         return GL_RED;
-      case GL_COMPRESSED_RG_RGTC2:
-      case GL_COMPRESSED_SIGNED_RG_RGTC2:
-         return GL_RG;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->Extensions.EXT_texture_compression_latc) {
-      switch (internalFormat) {
-      case GL_COMPRESSED_LUMINANCE_LATC1_EXT:
-      case GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT:
-         return GL_LUMINANCE;
-      case GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT:
-      case GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT:
-         return GL_LUMINANCE_ALPHA;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->Extensions.ATI_texture_compression_3dc) {
-      switch (internalFormat) {
-      case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI:
-         return GL_LUMINANCE_ALPHA;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->Extensions.OES_compressed_ETC1_RGB8_texture) {
-      switch (internalFormat) {
-      case GL_ETC1_RGB8_OES:
-         return GL_RGB;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->Extensions.KHR_texture_compression_astc_ldr &&
-       _mesa_is_astc_format(internalFormat))
-         return GL_RGBA;
-
-   if (_mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility) {
-      switch (internalFormat) {
-      case GL_COMPRESSED_RGB8_ETC2:
-      case GL_COMPRESSED_SRGB8_ETC2:
-         return GL_RGB;
-      case GL_COMPRESSED_RGBA8_ETC2_EAC:
-      case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC:
-      case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
-      case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
-         return GL_RGBA;
-      case GL_COMPRESSED_R11_EAC:
-      case GL_COMPRESSED_SIGNED_R11_EAC:
-         return GL_RED;
-      case GL_COMPRESSED_RG11_EAC:
-      case GL_COMPRESSED_SIGNED_RG11_EAC:
-         return GL_RG;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (_mesa_is_desktop_gl(ctx) &&
-       ctx->Extensions.ARB_texture_compression_bptc) {
-      switch (internalFormat) {
-      case GL_COMPRESSED_RGBA_BPTC_UNORM:
-      case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
-         return GL_RGBA;
-      case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
-      case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
-         return GL_RGB;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   if (ctx->API == API_OPENGLES) {
-      switch (internalFormat) {
-      case GL_PALETTE4_RGB8_OES:
-      case GL_PALETTE4_R5_G6_B5_OES:
-      case GL_PALETTE8_RGB8_OES:
-      case GL_PALETTE8_R5_G6_B5_OES:
-        return GL_RGB;
-      case GL_PALETTE4_RGBA8_OES:
-      case GL_PALETTE8_RGB5_A1_OES:
-      case GL_PALETTE4_RGBA4_OES:
-      case GL_PALETTE4_RGB5_A1_OES:
-      case GL_PALETTE8_RGBA8_OES:
-      case GL_PALETTE8_RGBA4_OES:
-        return GL_RGBA;
-      default:
-         ; /* fallthrough */
-      }
-   }
-
-   return -1; /* error */
-}
-
-
-/**
   * For cube map faces, return a face index in [0,5].
   * For other targets return 0;
   */
@@ -2301,15 +1790,23 @@ texture_error_check( struct gl_context *ctx,
        return GL_TRUE;
     }
  
-   /* OpenGL ES 1.x and OpenGL ES 2.0 impose additional restrictions on the
-    * combinations of format, internalFormat, and type that can be used.
-    * Formats and types that require additional extensions (e.g., GL_FLOAT
-    * requires GL_OES_texture_float) are filtered elsewhere.
-    */
-   if (_mesa_is_gles(ctx) &&
-       texture_format_error_check_gles(ctx, format, type, internalFormat,
-                                       dimensions, "glTexImage%dD")) {
-     return GL_TRUE;
+   /* Check incoming image format and type */
+   err = _mesa_error_check_format_and_type(ctx, format, type);
+   if (err != GL_NO_ERROR) {
+      /* Prior to OpenGL-ES 2.0, an INVALID_VALUE is expected instead of
+       * INVALID_ENUM. From page 73 OpenGL ES 1.1 spec:
+       *
+       *     "Specifying a value for internalformat that is not one of the
+       *      above (acceptable) values generates the error INVALID VALUE."
+       */
+      if (err == GL_INVALID_ENUM && _mesa_is_gles(ctx) && ctx->Version < 20)
+         err = GL_INVALID_VALUE;
+
+      _mesa_error(ctx, err,
+                  "glTexImage%dD(incompatible format = %s, type = %s)",
+                  dimensions, _mesa_enum_to_string(format),
+                  _mesa_enum_to_string(type));
+      return GL_TRUE;
     }
  
     /* Check internalFormat */
@@ -2320,13 +1817,14 @@ texture_error_check( struct gl_context *ctx,
        return GL_TRUE;
     }
  
-   /* Check incoming image format and type */
-   err = _mesa_error_check_format_and_type(ctx, format, type);
-   if (err != GL_NO_ERROR) {
-      _mesa_error(ctx, err,
-                  "glTexImage%dD(incompatible format = %s, type = %s)",
-                  dimensions, _mesa_enum_to_string(format),
-                  _mesa_enum_to_string(type));
+   /* OpenGL ES 1.x and OpenGL ES 2.0 impose additional restrictions on the
+    * combinations of format, internalFormat, and type that can be used.
+    * Formats and types that require additional extensions (e.g., GL_FLOAT
+    * requires GL_OES_texture_float) are filtered elsewhere.
+    */
+   if (_mesa_is_gles(ctx) &&
+       texture_format_error_check_gles(ctx, format, type, internalFormat,
+                                       dimensions, "glTexImage%dD")) {
        return GL_TRUE;
     }
  
diff --git a/src/mesa/main/teximage.h b/src/mesa/main/teximage.h

index a4736b5..5df36c5 100644 (file)
--- a/src/mesa/main/teximage.h
+++ b/src/mesa/main/teximage.h
@@ -59,10 +59,6 @@ _mesa_is_zero_size_texture(const struct gl_texture_image *texImage)
  /** \name Internal functions */
  /*@{*/
  
-extern GLint
-_mesa_base_tex_format( struct gl_context *ctx, GLint internalFormat );
-
-
  extern GLboolean
  _mesa_is_proxy_texture(GLenum target);
  
diff --git a/src/mesa/main/texstorage.c b/src/mesa/main/texstorage.c

index c53bb29..9fd969f 100644 (file)
--- a/src/mesa/main/texstorage.c
+++ b/src/mesa/main/texstorage.c
@@ -22,14 +22,11 @@
   * OTHER DEALINGS IN THE SOFTWARE.
   */
  
-
  /**
   * \file texstorage.c
   * GL_ARB_texture_storage functions
   */
  
-
-
  #include "glheader.h"
  #include "context.h"
  #include "enums.h"
@@ -110,7 +107,7 @@ legal_texobj_target(struct gl_context *ctx, GLuint dims, GLenum target)
  
  /** Helper to get a particular texture image in a texture object */
  static struct gl_texture_image *
-get_tex_image(struct gl_context *ctx, 
+get_tex_image(struct gl_context *ctx,
                struct gl_texture_object *texObj,
                GLuint face, GLuint level)
  {
@@ -151,7 +148,8 @@ initialize_texture_fields(struct gl_context *ctx,
                                      0, internalFormat, texFormat);
        }
  
-      _mesa_next_mipmap_level_size(target, 0, levelWidth, levelHeight, levelDepth,
+      _mesa_next_mipmap_level_size(target, 0,
+                                   levelWidth, levelHeight, levelDepth,
                                     &levelWidth, &levelHeight, &levelDepth);
     }
     return GL_TRUE;
@@ -204,7 +202,8 @@ update_fbo_texture(struct gl_context *ctx, struct gl_texture_object *texObj)
  
  
  GLboolean
-_mesa_is_legal_tex_storage_format(struct gl_context *ctx, GLenum internalformat)
+_mesa_is_legal_tex_storage_format(const struct gl_context *ctx,
+                                  GLenum internalformat)
  {
     /* check internal format - note that only sized formats are allowed */
     switch (internalformat) {
@@ -246,6 +245,7 @@ _mesa_is_legal_tex_storage_format(struct gl_context *ctx, GLenum internalformat)
     }
  }
  
+
  /**
   * Default ctx->Driver.AllocTextureStorage() handler.
   *
@@ -306,7 +306,7 @@ tex_storage_error_check(struct gl_context *ctx,
                    "glTex%sStorage%uD(width, height or depth < 1)",
                    suffix, dims);
        return GL_TRUE;
-   }  
+   }
  
     if (_mesa_is_compressed_format(ctx, internalformat)) {
        GLenum err;
@@ -323,7 +323,7 @@ tex_storage_error_check(struct gl_context *ctx,
        _mesa_error(ctx, GL_INVALID_VALUE, "glTex%sStorage%uD(levels < 1)",
                    suffix, dims);
        return GL_TRUE;
-   }  
+   }
  
     /* check levels against maximum (note different error than above) */
     if (levels > (GLint) _mesa_max_texture_levels(ctx, target)) {
@@ -390,7 +390,6 @@ _mesa_texture_storage(struct gl_context *ctx, GLuint dims,
        return; /* error was recorded */
     }
  
-
     texFormat = _mesa_choose_texture_format(ctx, texObj, target, 0,
                                             internalformat, GL_NONE, GL_NONE);
     assert(texFormat != MESA_FORMAT_NONE);
@@ -456,6 +455,7 @@ _mesa_texture_storage(struct gl_context *ctx, GLuint dims,
     }
  }
  
+
  /**
   * Helper used by _mesa_TexStorage1/2/3D().
   */
@@ -466,9 +466,9 @@ texstorage(GLuint dims, GLenum target, GLsizei levels, GLenum internalformat,
     struct gl_texture_object *texObj;
     GET_CURRENT_CONTEXT(ctx);
  
-   /* target check */
-   /* This is done here so that _mesa_texture_storage can receive unsized
-    * formats. */
+   /* Check target.  This is done here so that _mesa_texture_storage
+    * can receive unsized formats.
+    */
     if (!legal_texobj_target(ctx, dims, target)) {
        _mesa_error(ctx, GL_INVALID_ENUM,
                    "glTexStorage%uD(illegal target=%s)",
@@ -482,6 +482,7 @@ texstorage(GLuint dims, GLenum target, GLsizei levels, GLenum internalformat,
                    _mesa_enum_to_string(target), levels,
                    _mesa_enum_to_string(internalformat),
                    width, height, depth);
+
     /* Check the format to make sure it is sized. */
     if (!_mesa_is_legal_tex_storage_format(ctx, internalformat)) {
        _mesa_error(ctx, GL_INVALID_ENUM,
@@ -498,6 +499,7 @@ texstorage(GLuint dims, GLenum target, GLsizei levels, GLenum internalformat,
                           internalformat, width, height, depth, false);
  }
  
+
  /**
   * Helper used by _mesa_TextureStorage1/2/3D().
   */
@@ -531,9 +533,9 @@ texturestorage(GLuint dims, GLuint texture, GLsizei levels,
        return;
     }
  
-   /* target check */
-   /* This is done here so that _mesa_texture_storage can receive unsized
-    * formats. */
+   /* Check target.  This is done here so that _mesa_texture_storage
+    * can receive unsized formats.
+    */
     if (!legal_texobj_target(ctx, dims, texObj->Target)) {
        _mesa_error(ctx, GL_INVALID_ENUM,
                    "glTextureStorage%uD(illegal target=%s)",
@@ -545,6 +547,7 @@ texturestorage(GLuint dims, GLuint texture, GLsizei levels,
                           levels, internalformat, width, height, depth, true);
  }
  
+
  void GLAPIENTRY
  _mesa_TexStorage1D(GLenum target, GLsizei levels, GLenum internalformat,
                     GLsizei width)
@@ -568,6 +571,7 @@ _mesa_TexStorage3D(GLenum target, GLsizei levels, GLenum internalformat,
     texstorage(3, target, levels, internalformat, width, height, depth);
  }
  
+
  void GLAPIENTRY
  _mesa_TextureStorage1D(GLuint texture, GLsizei levels, GLenum internalformat,
                         GLsizei width)
@@ -584,6 +588,7 @@ _mesa_TextureStorage2D(GLuint texture, GLsizei levels,
     texturestorage(2, texture, levels, internalformat, width, height, 1);
  }
  
+
  void GLAPIENTRY
  _mesa_TextureStorage3D(GLuint texture, GLsizei levels, GLenum internalformat,
                         GLsizei width, GLsizei height, GLsizei depth)
@@ -637,7 +642,6 @@ _mesa_TextureStorage2DEXT(GLuint texture, GLenum target, GLsizei levels,
  }
  
  
-
  void GLAPIENTRY
  _mesa_TextureStorage3DEXT(GLuint texture, GLenum target, GLsizei levels,
                            GLenum internalformat,
diff --git a/src/mesa/main/texstorage.h b/src/mesa/main/texstorage.h

index 033ecb7..e80a9ff 100644 (file)
--- a/src/mesa/main/texstorage.h
+++ b/src/mesa/main/texstorage.h
@@ -111,7 +111,8 @@ _mesa_TextureStorage3DEXT(GLuint texture, GLenum target, GLsizei levels,
                            GLsizei width, GLsizei height, GLsizei depth);
  
  extern GLboolean
-_mesa_is_legal_tex_storage_format(struct gl_context *ctx, GLenum internalformat);
+_mesa_is_legal_tex_storage_format(const struct gl_context *ctx,
+                                  GLenum internalformat);
  
  extern GLboolean
  _mesa_AllocTextureStorage_sw(struct gl_context *ctx,
diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c

index 5394026..e50964e 100644 (file)
--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -727,19 +727,25 @@ texstore_rgba(TEXSTORE_PARAMS)
         */
        GLint swapSize = _mesa_sizeof_packed_type(srcType);
        if (swapSize == 2 || swapSize == 4) {
-         int bytesPerPixel = _mesa_bytes_per_pixel(srcFormat, srcType);
-         int swapsPerPixel = bytesPerPixel / swapSize;
-         int elementCount = srcWidth * srcHeight * srcDepth;
-         assert(bytesPerPixel % swapSize == 0);
-         tempImage = malloc(elementCount * bytesPerPixel);
+         int imageStride = _mesa_image_image_stride(srcPacking, srcWidth, srcHeight, srcFormat, srcType);
+         int bufferSize = imageStride * srcDepth;
+         int layer;
+         const uint8_t *src;
+         uint8_t *dst;
+
+         tempImage = malloc(bufferSize);
           if (!tempImage)
              return GL_FALSE;
-         if (swapSize == 2)
-            _mesa_swap2_copy(tempImage, (GLushort *) srcAddr,
-                             elementCount * swapsPerPixel);
-         else
-            _mesa_swap4_copy(tempImage, (GLuint *) srcAddr,
-                             elementCount * swapsPerPixel);
+         src = srcAddr;
+         dst = tempImage;
+         for (layer = 0; layer < srcDepth; layer++) {
+            _mesa_swap_bytes_2d_image(srcFormat, srcType,
+                                      srcPacking,
+                                      srcWidth, srcHeight,
+                                      dst, src);
+            src += imageStride;
+            dst += imageStride;
+         }
           srcAddr = tempImage;
        }
     }
diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp

index 1026618..0bee594 100644 (file)
--- a/src/mesa/main/uniform_query.cpp
+++ b/src/mesa/main/uniform_query.cpp
@@ -873,7 +873,7 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
                      GLuint cols, GLuint rows,
                       GLint location, GLsizei count,
                       GLboolean transpose,
-                     const GLvoid *values, GLenum type)
+                     const GLvoid *values, enum glsl_base_type basicType)
  {
     unsigned offset;
     unsigned vectors;
@@ -892,8 +892,8 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
        return;
     }
  
-   assert(type == GL_FLOAT || type == GL_DOUBLE);
-   size_mul = type == GL_DOUBLE ? 2 : 1;
+   assert(basicType == GLSL_TYPE_FLOAT || basicType == GLSL_TYPE_DOUBLE);
+   size_mul = basicType == GLSL_TYPE_DOUBLE ? 2 : 1;
  
     assert(!uni->type->is_sampler());
     vectors = uni->type->matrix_columns;
@@ -919,6 +919,31 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
        }
     }
  
+   /* Section 2.11.7 (Uniform Variables) of the OpenGL 4.2 Core Profile spec
+    * says:
+    *
+    *     "If any of the following conditions occur, an INVALID_OPERATION
+    *     error is generated by the Uniform* commands, and no uniform values
+    *     are changed:
+    *
+    *     ...
+    *
+    *     - if the uniform declared in the shader is not of type boolean and
+    *       the type indicated in the name of the Uniform* command used does
+    *       not match the type of the uniform"
+    *
+    * There are no Boolean matrix types, so we do not need to allow
+    * GLSL_TYPE_BOOL here (as _mesa_uniform does).
+    */
+   if (uni->type->base_type != basicType) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glUniformMatrix%ux%u(\"%s\"@%d is %s, not %s)",
+                  cols, rows, uni->name, location,
+                  glsl_type_name(uni->type->base_type),
+                  glsl_type_name(basicType));
+      return;
+   }
+
     if (unlikely(ctx->_Shader->Flags & GLSL_UNIFORMS)) {
        log_uniform(values, uni->type->base_type, components, vectors, count,
                   bool(transpose), shProg, location, uni);
@@ -948,7 +973,7 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
     if (!transpose) {
        memcpy(&uni->storage[elements * offset], values,
              sizeof(uni->storage[0]) * elements * count * size_mul);
-   } else if (type == GL_FLOAT) {
+   } else if (basicType == GLSL_TYPE_FLOAT) {
        /* Copy and transpose the matrix.
         */
        const float *src = (const float *)values;
@@ -965,7 +990,7 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
          src += elements;
        }
     } else {
-      assert(type == GL_DOUBLE);
+      assert(basicType == GLSL_TYPE_DOUBLE);
        const double *src = (const double *)values;
        double *dst = (double *)&uni->storage[elements * offset].f;
  
diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c

index 10819e2..04cc81f 100644 (file)
--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -553,7 +553,7 @@ _mesa_UniformMatrix2fv(GLint location, GLsizei count, GLboolean transpose,
  {
     GET_CURRENT_CONTEXT(ctx);
     _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-                       2, 2, location, count, transpose, value, GL_FLOAT);
+                       2, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
  }
  
  void GLAPIENTRY
@@ -562,7 +562,7 @@ _mesa_UniformMatrix3fv(GLint location, GLsizei count, GLboolean transpose,
  {
     GET_CURRENT_CONTEXT(ctx);
     _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-                       3, 3, location, count, transpose, value, GL_FLOAT);
+                       3, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
  }
  
  void GLAPIENTRY
@@ -571,7 +571,7 @@ _mesa_UniformMatrix4fv(GLint location, GLsizei count, GLboolean transpose,
  {
     GET_CURRENT_CONTEXT(ctx);
     _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-                       4, 4, location, count, transpose, value, GL_FLOAT);
+                       4, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
  }
  
  /** Same as above with direct state access **/
@@ -683,7 +683,7 @@ _mesa_ProgramUniformMatrix2fv(GLuint program, GLint location, GLsizei count,
     struct gl_shader_program *shProg =
        _mesa_lookup_shader_program_err(ctx, program,
              "glProgramUniformMatrix2fv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 2, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 2, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
  }
  
  void GLAPIENTRY
@@ -694,7 +694,7 @@ _mesa_ProgramUniformMatrix3fv(GLuint program, GLint location, GLsizei count,
     struct gl_shader_program *shProg =
        _mesa_lookup_shader_program_err(ctx, program,
              "glProgramUniformMatrix3fv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 3, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 3, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
  }
  
  void GLAPIENTRY
@@ -705,7 +705,7 @@ _mesa_ProgramUniformMatrix4fv(GLuint program, GLint location, GLsizei count,
     struct gl_shader_program *shProg =
        _mesa_lookup_shader_program_err(ctx, program,
              "glProgramUniformMatrix4fv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 4, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 4, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
  }
  
  
@@ -718,7 +718,7 @@ _mesa_UniformMatrix2x3fv(GLint location, GLsizei count, GLboolean transpose,
  {
     GET_CURRENT_CONTEXT(ctx);
     _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-                       2, 3, location, count, transpose, value, GL_FLOAT);
+                       2, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
  }
  
  void GLAPIENTRY
@@ -727,7 +727,7 @@ _mesa_UniformMatrix3x2fv(GLint location, GLsizei count, GLboolean transpose,
  {
     GET_CURRENT_CONTEXT(ctx);
     _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-                       3, 2, location, count, transpose, value, GL_FLOAT);
+                       3, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
  }
  
  void GLAPIENTRY
@@ -736,7 +736,7 @@ _mesa_UniformMatrix2x4fv(GLint location, GLsizei count, GLboolean transpose,
  {
     GET_CURRENT_CONTEXT(ctx);
     _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-                       2, 4, location, count, transpose, value, GL_FLOAT);
+                       2, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
  }
  
  void GLAPIENTRY
@@ -745,7 +745,7 @@ _mesa_UniformMatrix4x2fv(GLint location, GLsizei count, GLboolean transpose,
  {
     GET_CURRENT_CONTEXT(ctx);
     _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-                       4, 2, location, count, transpose, value, GL_FLOAT);
+                       4, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
  }
  
  void GLAPIENTRY
@@ -754,7 +754,7 @@ _mesa_UniformMatrix3x4fv(GLint location, GLsizei count, GLboolean transpose,
  {
     GET_CURRENT_CONTEXT(ctx);
     _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-                       3, 4, location, count, transpose, value, GL_FLOAT);
+                       3, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
  }
  
  void GLAPIENTRY
@@ -763,7 +763,7 @@ _mesa_UniformMatrix4x3fv(GLint location, GLsizei count, GLboolean transpose,
  {
     GET_CURRENT_CONTEXT(ctx);
     _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-                       4, 3, location, count, transpose, value, GL_FLOAT);
+                       4, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
  }
  
  /** Same as above with direct state access **/
@@ -776,7 +776,7 @@ _mesa_ProgramUniformMatrix2x3fv(GLuint program, GLint location, GLsizei count,
     struct gl_shader_program *shProg =
        _mesa_lookup_shader_program_err(ctx, program,
              "glProgramUniformMatrix2x3fv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 3, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 2, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
  }
  
  void GLAPIENTRY
@@ -787,7 +787,7 @@ _mesa_ProgramUniformMatrix3x2fv(GLuint program, GLint location, GLsizei count,
     struct gl_shader_program *shProg =
        _mesa_lookup_shader_program_err(ctx, program,
              "glProgramUniformMatrix3x2fv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 2, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 3, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
  }
  
  void GLAPIENTRY
@@ -798,7 +798,7 @@ _mesa_ProgramUniformMatrix2x4fv(GLuint program, GLint location, GLsizei count,
     struct gl_shader_program *shProg =
        _mesa_lookup_shader_program_err(ctx, program,
              "glProgramUniformMatrix2x4fv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 4, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 2, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
  }
  
  void GLAPIENTRY
@@ -809,7 +809,7 @@ _mesa_ProgramUniformMatrix4x2fv(GLuint program, GLint location, GLsizei count,
     struct gl_shader_program *shProg =
        _mesa_lookup_shader_program_err(ctx, program,
              "glProgramUniformMatrix4x2fv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 2, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 4, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
  }
  
  void GLAPIENTRY
@@ -820,7 +820,7 @@ _mesa_ProgramUniformMatrix3x4fv(GLuint program, GLint location, GLsizei count,
     struct gl_shader_program *shProg =
        _mesa_lookup_shader_program_err(ctx, program,
              "glProgramUniformMatrix3x4fv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 4, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 3, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
  }
  
  void GLAPIENTRY
@@ -831,7 +831,7 @@ _mesa_ProgramUniformMatrix4x3fv(GLuint program, GLint location, GLsizei count,
     struct gl_shader_program *shProg =
        _mesa_lookup_shader_program_err(ctx, program,
              "glProgramUniformMatrix4x3fv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
  }
  
  
@@ -1002,10 +1002,10 @@ _mesa_UniformBlockBinding(GLuint program,
     if (!shProg)
        return;
  
-   if (uniformBlockIndex >= shProg->NumUniformBlocks) {
+   if (uniformBlockIndex >= shProg->NumBufferInterfaceBlocks) {
        _mesa_error(ctx, GL_INVALID_VALUE,
                   "glUniformBlockBinding(block index %u >= %u)",
-                 uniformBlockIndex, shProg->NumUniformBlocks);
+                 uniformBlockIndex, shProg->NumBufferInterfaceBlocks);
        return;
     }
  
@@ -1036,6 +1036,58 @@ _mesa_UniformBlockBinding(GLuint program,
     }
  }
  
+void GLAPIENTRY
+_mesa_ShaderStorageBlockBinding(GLuint program,
+                               GLuint shaderStorageBlockIndex,
+                               GLuint shaderStorageBlockBinding)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_shader_program *shProg;
+
+   if (!ctx->Extensions.ARB_shader_storage_buffer_object) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glShaderStorageBlockBinding");
+      return;
+   }
+
+   shProg = _mesa_lookup_shader_program_err(ctx, program,
+                                           "glShaderStorageBlockBinding");
+   if (!shProg)
+      return;
+
+   if (shaderStorageBlockIndex >= shProg->NumBufferInterfaceBlocks) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                 "glShaderStorageBlockBinding(block index %u >= %u)",
+                 shaderStorageBlockIndex, shProg->NumBufferInterfaceBlocks);
+      return;
+   }
+
+   if (shaderStorageBlockBinding >= ctx->Const.MaxShaderStorageBufferBindings) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                 "glShaderStorageBlockBinding(block binding %u >= %u)",
+                 shaderStorageBlockBinding,
+                  ctx->Const.MaxShaderStorageBufferBindings);
+      return;
+   }
+
+   if (shProg->UniformBlocks[shaderStorageBlockIndex].Binding !=
+       shaderStorageBlockBinding) {
+      int i;
+
+      FLUSH_VERTICES(ctx, 0);
+      ctx->NewDriverState |= ctx->DriverFlags.NewShaderStorageBuffer;
+
+      shProg->UniformBlocks[shaderStorageBlockIndex].Binding = shaderStorageBlockBinding;
+
+      for (i = 0; i < MESA_SHADER_STAGES; i++) {
+        int stage_index = shProg->UniformBlockStageIndex[i][shaderStorageBlockIndex];
+
+        if (stage_index != -1) {
+           struct gl_shader *sh = shProg->_LinkedShaders[i];
+           sh->UniformBlocks[stage_index].Binding = shaderStorageBlockBinding;
+        }
+      }
+   }
+}
  
  /**
   * Generic program resource property query.
@@ -1303,7 +1355,7 @@ _mesa_UniformMatrix2dv(GLint location, GLsizei count, GLboolean transpose,
  {
     GET_CURRENT_CONTEXT(ctx);
     _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-                       2, 2, location, count, transpose, value, GL_DOUBLE);
+                       2, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
  }
  
  void GLAPIENTRY
@@ -1312,7 +1364,7 @@ _mesa_UniformMatrix3dv(GLint location, GLsizei count, GLboolean transpose,
  {
     GET_CURRENT_CONTEXT(ctx);
     _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-                       3, 3, location, count, transpose, value, GL_DOUBLE);
+                       3, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
  }
  
  void GLAPIENTRY
@@ -1321,7 +1373,7 @@ _mesa_UniformMatrix4dv(GLint location, GLsizei count, GLboolean transpose,
  {
     GET_CURRENT_CONTEXT(ctx);
     _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-                       4, 4, location, count, transpose, value, GL_DOUBLE);
+                       4, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
  }
  
  void GLAPIENTRY
@@ -1330,7 +1382,7 @@ _mesa_UniformMatrix2x3dv(GLint location, GLsizei count, GLboolean transpose,
  {
     GET_CURRENT_CONTEXT(ctx);
     _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-                       2, 3, location, count, transpose, value, GL_DOUBLE);
+                       2, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
  }
  
  void GLAPIENTRY
@@ -1339,7 +1391,7 @@ _mesa_UniformMatrix3x2dv(GLint location, GLsizei count, GLboolean transpose,
  {
     GET_CURRENT_CONTEXT(ctx);
     _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-                       3, 2, location, count, transpose, value, GL_DOUBLE);
+                       3, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
  }
  
  void GLAPIENTRY
@@ -1348,7 +1400,7 @@ _mesa_UniformMatrix2x4dv(GLint location, GLsizei count, GLboolean transpose,
  {
     GET_CURRENT_CONTEXT(ctx);
     _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-                       2, 4, location, count, transpose, value, GL_DOUBLE);
+                       2, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
  }
  
  void GLAPIENTRY
@@ -1357,7 +1409,7 @@ _mesa_UniformMatrix4x2dv(GLint location, GLsizei count, GLboolean transpose,
  {
     GET_CURRENT_CONTEXT(ctx);
     _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-                       4, 2, location, count, transpose, value, GL_DOUBLE);
+                       4, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
  }
  
  void GLAPIENTRY
@@ -1366,7 +1418,7 @@ _mesa_UniformMatrix3x4dv(GLint location, GLsizei count, GLboolean transpose,
  {
     GET_CURRENT_CONTEXT(ctx);
     _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-                       3, 4, location, count, transpose, value, GL_DOUBLE);
+                       3, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
  }
  
  void GLAPIENTRY
@@ -1375,7 +1427,7 @@ _mesa_UniformMatrix4x3dv(GLint location, GLsizei count, GLboolean transpose,
  {
     GET_CURRENT_CONTEXT(ctx);
     _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-                       4, 3, location, count, transpose, value, GL_DOUBLE);
+                       4, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
  }
  
  void GLAPIENTRY
@@ -1481,7 +1533,7 @@ _mesa_ProgramUniformMatrix2dv(GLuint program, GLint location, GLsizei count,
     struct gl_shader_program *shProg =
        _mesa_lookup_shader_program_err(ctx, program,
              "glProgramUniformMatrix2dv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 2, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 2, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
  }
  
  void GLAPIENTRY
@@ -1492,7 +1544,7 @@ _mesa_ProgramUniformMatrix3dv(GLuint program, GLint location, GLsizei count,
     struct gl_shader_program *shProg =
        _mesa_lookup_shader_program_err(ctx, program,
              "glProgramUniformMatrix3dv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 3, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 3, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
  }
  
  void GLAPIENTRY
@@ -1503,7 +1555,7 @@ _mesa_ProgramUniformMatrix4dv(GLuint program, GLint location, GLsizei count,
     struct gl_shader_program *shProg =
        _mesa_lookup_shader_program_err(ctx, program,
              "glProgramUniformMatrix4dv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 4, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 4, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
  }
  
  void GLAPIENTRY
@@ -1514,7 +1566,7 @@ _mesa_ProgramUniformMatrix2x3dv(GLuint program, GLint location, GLsizei count,
     struct gl_shader_program *shProg =
        _mesa_lookup_shader_program_err(ctx, program,
              "glProgramUniformMatrix2x3dv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 3, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 2, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
  }
  
  void GLAPIENTRY
@@ -1525,7 +1577,7 @@ _mesa_ProgramUniformMatrix3x2dv(GLuint program, GLint location, GLsizei count,
     struct gl_shader_program *shProg =
        _mesa_lookup_shader_program_err(ctx, program,
              "glProgramUniformMatrix3x2dv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 2, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 3, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
  }
  
  void GLAPIENTRY
@@ -1536,7 +1588,7 @@ _mesa_ProgramUniformMatrix2x4dv(GLuint program, GLint location, GLsizei count,
     struct gl_shader_program *shProg =
        _mesa_lookup_shader_program_err(ctx, program,
              "glProgramUniformMatrix2x4dv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 4, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 2, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
  }
  
  void GLAPIENTRY
@@ -1547,7 +1599,7 @@ _mesa_ProgramUniformMatrix4x2dv(GLuint program, GLint location, GLsizei count,
     struct gl_shader_program *shProg =
        _mesa_lookup_shader_program_err(ctx, program,
              "glProgramUniformMatrix4x2dv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 2, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 4, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
  }
  
  void GLAPIENTRY
@@ -1558,7 +1610,7 @@ _mesa_ProgramUniformMatrix3x4dv(GLuint program, GLint location, GLsizei count,
     struct gl_shader_program *shProg =
        _mesa_lookup_shader_program_err(ctx, program,
              "glProgramUniformMatrix3x4dv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 4, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 3, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
  }
  
  void GLAPIENTRY
@@ -1569,5 +1621,5 @@ _mesa_ProgramUniformMatrix4x3dv(GLuint program, GLint location, GLsizei count,
     struct gl_shader_program *shProg =
        _mesa_lookup_shader_program_err(ctx, program,
              "glProgramUniformMatrix4x3dv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
  }
diff --git a/src/mesa/main/uniforms.h b/src/mesa/main/uniforms.h

index e62eaa5..96172b7 100644 (file)
--- a/src/mesa/main/uniforms.h
+++ b/src/mesa/main/uniforms.h
@@ -225,6 +225,10 @@ _mesa_UniformBlockBinding(GLuint program,
                           GLuint uniformBlockIndex,
                           GLuint uniformBlockBinding);
  void GLAPIENTRY
+_mesa_ShaderStorageBlockBinding(GLuint program,
+                                GLuint shaderStorageBlockIndex,
+                                GLuint shaderStorageBlockBinding);
+void GLAPIENTRY
  _mesa_GetActiveAtomicCounterBufferiv(GLuint program, GLuint bufferIndex,
                                       GLenum pname, GLint *params);
  void GLAPIENTRY
@@ -355,7 +359,7 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
                      GLuint cols, GLuint rows,
                       GLint location, GLsizei count,
                       GLboolean transpose,
-                     const GLvoid *values, GLenum type);
+                     const GLvoid *values, enum glsl_base_type basicType);
  
  void
  _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location,
diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c

index 3bab985..4df57c1 100644 (file)
--- a/src/mesa/main/varray.c
+++ b/src/mesa/main/varray.c
@@ -657,7 +657,7 @@ _mesa_PointSizePointerOES(GLenum type, GLsizei stride, const GLvoid *ptr)
                    "glPointSizePointer(ES 1.x only)");
        return;
     }
-      
+
     update_array(ctx, "glPointSizePointer", VERT_ATTRIB_POINT_SIZE,
                  legalTypes, 1, 1,
                  1, type, stride, GL_FALSE, GL_FALSE, GL_FALSE, ptr);
@@ -899,12 +899,12 @@ get_vertex_array_attrib(struct gl_context *ctx,
        }
        goto error;
     case GL_VERTEX_ATTRIB_BINDING:
-      if (_mesa_is_desktop_gl(ctx)) {
+      if (_mesa_is_desktop_gl(ctx) || _mesa_is_gles31(ctx)) {
           return array->VertexBinding - VERT_ATTRIB_GENERIC0;
        }
        goto error;
     case GL_VERTEX_ATTRIB_RELATIVE_OFFSET:
-      if (_mesa_is_desktop_gl(ctx)) {
+      if (_mesa_is_desktop_gl(ctx) || _mesa_is_gles31(ctx)) {
           return array->RelativeOffset;
        }
        goto error;
@@ -933,7 +933,8 @@ get_current_attrib(struct gl_context *ctx, GLuint index, const char *function)
        return NULL;
     }
  
-   assert(VERT_ATTRIB_GENERIC(index) < ARRAY_SIZE(ctx->Array.VAO->VertexAttrib));
+   assert(VERT_ATTRIB_GENERIC(index) <
+          ARRAY_SIZE(ctx->Array.VAO->VertexAttrib));
  
     FLUSH_CURRENT(ctx, 0);
     return ctx->Current.Attrib[VERT_ATTRIB_GENERIC(index)];
@@ -985,7 +986,9 @@ _mesa_GetVertexAttribLdv(GLuint index, GLenum pname, GLdouble *params)
     GET_CURRENT_CONTEXT(ctx);
  
     if (pname == GL_CURRENT_VERTEX_ATTRIB_ARB) {
-      const GLdouble *v = (const GLdouble *)get_current_attrib(ctx, index, "glGetVertexAttribLdv");
+      const GLdouble *v =
+         (const GLdouble *)get_current_attrib(ctx, index,
+                                              "glGetVertexAttribLdv");
        if (v != NULL) {
           params[0] = v[0];
           params[1] = v[1];
@@ -1080,9 +1083,11 @@ _mesa_GetVertexAttribPointerv(GLuint index, GLenum pname, GLvoid **pointer)
        return;
     }
  
-   assert(VERT_ATTRIB_GENERIC(index) < ARRAY_SIZE(ctx->Array.VAO->VertexAttrib));
+   assert(VERT_ATTRIB_GENERIC(index) <
+          ARRAY_SIZE(ctx->Array.VAO->VertexAttrib));
  
-   *pointer = (GLvoid *) ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_GENERIC(index)].Ptr;
+   *pointer = (GLvoid *)
+      ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_GENERIC(index)].Ptr;
  }
  
  
@@ -1193,8 +1198,8 @@ _mesa_GetVertexArrayIndexed64iv(GLuint vaobj, GLuint index,
      * required to be the same, so in practice this doesn't matter.
      */
     if (index >= ctx->Const.MaxVertexAttribBindings) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glGetVertexArrayIndexed64iv("
-                  "index %d >= the value of GL_MAX_VERTEX_ATTRIB_BINDINGS (%d))",
+      _mesa_error(ctx, GL_INVALID_VALUE, "glGetVertexArrayIndexed64iv(index"
+                  "%d >= the value of GL_MAX_VERTEX_ATTRIB_BINDINGS (%d))",
                    index, ctx->Const.MaxVertexAttribBindings);
        return;
     }
@@ -1637,7 +1642,8 @@ _mesa_primitive_restart_index(const struct gl_context *ctx, GLenum ib_type)
   * GL_ARB_vertex_attrib_binding
   */
  static void
-vertex_array_vertex_buffer(struct gl_context *ctx, struct gl_vertex_array_object *vao,
+vertex_array_vertex_buffer(struct gl_context *ctx,
+                           struct gl_vertex_array_object *vao,
                             GLuint bindingIndex, GLuint buffer, GLintptr offset,
                             GLsizei stride, const char *func)
  {
@@ -1676,14 +1682,15 @@ vertex_array_vertex_buffer(struct gl_context *ctx, struct gl_vertex_array_object
        return;
     }
  
-   if (ctx->API == API_OPENGL_CORE && ctx->Version >= 44 &&
+   if (((ctx->API == API_OPENGL_CORE && ctx->Version >= 44) || _mesa_is_gles31(ctx)) &&
         stride > ctx->Const.MaxVertexAttribStride) {
        _mesa_error(ctx, GL_INVALID_VALUE, "%s(stride=%d > "
                    "GL_MAX_VERTEX_ATTRIB_STRIDE)", func, stride);
        return;
     }
  
-   if (buffer == vao->VertexBinding[VERT_ATTRIB_GENERIC(bindingIndex)].BufferObj->Name) {
+   if (buffer ==
+       vao->VertexBinding[VERT_ATTRIB_GENERIC(bindingIndex)].BufferObj->Name) {
        vbo = vao->VertexBinding[VERT_ATTRIB_GENERIC(bindingIndex)].BufferObj;
     } else if (buffer != 0) {
        vbo = _mesa_lookup_bufferobj(ctx, buffer);
@@ -1698,8 +1705,7 @@ vertex_array_vertex_buffer(struct gl_context *ctx, struct gl_vertex_array_object
         * Otherwise, we fall back to the same compat profile behavior as other
         * object references (automatically gen it).
         */
-      if (!_mesa_handle_bind_buffer_gen(ctx, GL_ARRAY_BUFFER, buffer,
-                                        &vbo, func))
+      if (!_mesa_handle_bind_buffer_gen(ctx, buffer, &vbo, func))
           return;
     } else {
        /* The ARB_vertex_attrib_binding spec says:
@@ -1726,7 +1732,7 @@ _mesa_BindVertexBuffer(GLuint bindingIndex, GLuint buffer, GLintptr offset,
      *    "An INVALID_OPERATION error is generated if no vertex array object
      *     is bound."
      */
-   if (ctx->API == API_OPENGL_CORE &&
+   if ((ctx->API == API_OPENGL_CORE || _mesa_is_gles31(ctx)) &&
         ctx->Array.VAO == ctx->Array.DefaultVAO) {
        _mesa_error(ctx, GL_INVALID_OPERATION,
                    "glBindVertexBuffer(No array object bound)");
@@ -1948,7 +1954,7 @@ vertex_attrib_format(GLuint attribIndex, GLint size, GLenum type,
      * is an oversight.  In the OpenGL 4.3 (Core Profile) spec, it applies
      * to all three functions.
      */
-   if (ctx->API == API_OPENGL_CORE &&
+   if ((ctx->API == API_OPENGL_CORE || _mesa_is_gles31(ctx)) &&
         ctx->Array.VAO == ctx->Array.DefaultVAO) {
        _mesa_error(ctx, GL_INVALID_OPERATION,
                    "%s(No array object bound)", func);
@@ -2136,7 +2142,7 @@ _mesa_VertexAttribBinding(GLuint attribIndex, GLuint bindingIndex)
      *    "An INVALID_OPERATION error is generated if no vertex array object
      *     is bound."
      */
-   if (ctx->API == API_OPENGL_CORE &&
+   if ((ctx->API == API_OPENGL_CORE || _mesa_is_gles31(ctx)) &&
         ctx->Array.VAO == ctx->Array.DefaultVAO) {
        _mesa_error(ctx, GL_INVALID_OPERATION,
                    "glVertexAttribBinding(No array object bound)");
@@ -2210,7 +2216,7 @@ _mesa_VertexBindingDivisor(GLuint bindingIndex, GLuint divisor)
      *    "An INVALID_OPERATION error is generated if no vertex array object
      *     is bound."
      */
-   if (ctx->API == API_OPENGL_CORE &&
+   if ((ctx->API == API_OPENGL_CORE || _mesa_is_gles31(ctx)) &&
         ctx->Array.VAO == ctx->Array.DefaultVAO) {
        _mesa_error(ctx, GL_INVALID_OPERATION,
                    "glVertexBindingDivisor(No array object bound)");
@@ -2224,7 +2230,8 @@ _mesa_VertexBindingDivisor(GLuint bindingIndex, GLuint divisor)
  
  
  void GLAPIENTRY
-_mesa_VertexArrayBindingDivisor(GLuint vaobj, GLuint bindingIndex, GLuint divisor)
+_mesa_VertexArrayBindingDivisor(GLuint vaobj, GLuint bindingIndex,
+                                GLuint divisor)
  {
     struct gl_vertex_array_object *vao;
     GET_CURRENT_CONTEXT(ctx);
@@ -2344,7 +2351,7 @@ _mesa_print_arrays(struct gl_context *ctx)
  /**
   * Initialize vertex array state for given context.
   */
-void 
+void
  _mesa_init_varray(struct gl_context *ctx)
  {
     ctx->Array.DefaultVAO = ctx->Driver.NewArrayObject(ctx, 0);
@@ -2370,7 +2377,7 @@ delete_arrayobj_cb(GLuint id, void *data, void *userData)
  /**
   * Free vertex array state for given context.
   */
-void 
+void
  _mesa_free_varray_data(struct gl_context *ctx)
  {
     _mesa_HashDeleteAll(ctx->Array.Objects, delete_arrayobj_cb, ctx);
diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c

index f811c1a..498b2f8 100644 (file)
--- a/src/mesa/main/version.c
+++ b/src/mesa/main/version.c
@@ -451,7 +451,7 @@ compute_version_es2(const struct gl_extensions *extensions)
                           extensions->ARB_compute_shader &&
                           extensions->ARB_draw_indirect &&
                           extensions->ARB_explicit_uniform_location &&
-                         false /*extensions->ARB_framebuffer_no_attachments*/ &&
+                         extensions->ARB_framebuffer_no_attachments &&
                           extensions->ARB_shader_atomic_counters &&
                           extensions->ARB_shader_image_load_store &&
                           extensions->ARB_shader_image_size &&
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp

index b8b082e..1cfcf91 100644 (file)
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1345,9 +1345,11 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
     case ir_unop_dFdy_coarse:
     case ir_unop_dFdy_fine:
     case ir_unop_subroutine_to_int:
+   case ir_unop_get_buffer_size:
        assert(!"not supported");
        break;
  
+   case ir_unop_ssbo_unsized_array_length:
     case ir_quadop_vector:
        /* This operation should have already been handled.
         */
@@ -1920,6 +1922,8 @@ ir_to_mesa_visitor::visit(ir_texture *ir)
     case ir_query_levels:
        assert(!"Unexpected ir_query_levels opcode");
        break;
+   case ir_texture_samples:
+      unreachable("Unexpected ir_texture_samples opcode");
     }
  
     const glsl_type *sampler_type = ir->sampler->type;
@@ -2981,7 +2985,7 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
        if (!ctx->Driver.LinkShader(ctx, prog)) {
          prog->LinkStatus = GL_FALSE;
        } else {
-         build_program_resource_list(ctx, prog);
+         build_program_resource_list(prog);
        }
     }
  
diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c

index fccd16f..1bd735a 100644 (file)
--- a/src/mesa/program/prog_to_nir.c
+++ b/src/mesa/program/prog_to_nir.c
@@ -527,8 +527,7 @@ ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
  static void
  ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
  {
-   nir_ssa_def *dp3 = nir_fdot3(b, src[0], src[1]);
-   ptn_move_dest(b, dest, nir_fadd(b, dp3, ptn_channel(b, src[1], W)));
+   ptn_move_dest(b, dest, nir_fdph(b, src[0], src[1]));
  }
  
  static void
@@ -1056,7 +1055,7 @@ setup_registers_and_variables(struct ptn_compile *c)
     c->temp_regs = rzalloc_array(c, nir_register *, c->prog->NumTemporaries);
  
     nir_register *reg;
-   for (int i = 0; i < c->prog->NumTemporaries; i++) {
+   for (unsigned i = 0; i < c->prog->NumTemporaries; i++) {
        reg = nir_local_reg_create(b->impl);
        if (!reg) {
           c->error = true;
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c

index 2d03bba..e94c102 100644 (file)
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -226,6 +226,7 @@ init_program_struct(struct gl_program *prog, GLenum target, GLuint id)
     assert(prog);
  
     memset(prog, 0, sizeof(*prog));
+   mtx_init(&prog->Mutex, mtx_plain);
     prog->Id = id;
     prog->Target = target;
     prog->RefCount = 1;
@@ -418,6 +419,7 @@ _mesa_delete_program(struct gl_context *ctx, struct gl_program *prog)
        ralloc_free(prog->nir);
     }
  
+   mtx_destroy(&prog->Mutex);
     free(prog);
  }
  
@@ -463,24 +465,18 @@ _mesa_reference_program_(struct gl_context *ctx,
  
     if (*ptr) {
        GLboolean deleteFlag;
+      struct gl_program *oldProg = *ptr;
  
-      /*mtx_lock(&(*ptr)->Mutex);*/
-#if 0
-      printf("Program %p ID=%u Target=%s  Refcount-- to %d\n",
-             *ptr, (*ptr)->Id,
-             ((*ptr)->Target == GL_VERTEX_PROGRAM_ARB ? "VP" :
-              ((*ptr)->Target == GL_GEOMETRY_PROGRAM_NV ? "GP" : "FP")),
-             (*ptr)->RefCount - 1);
-#endif
-      assert((*ptr)->RefCount > 0);
-      (*ptr)->RefCount--;
+      mtx_lock(&oldProg->Mutex);
+      assert(oldProg->RefCount > 0);
+      oldProg->RefCount--;
  
-      deleteFlag = ((*ptr)->RefCount == 0);
-      /*mtx_lock(&(*ptr)->Mutex);*/
+      deleteFlag = (oldProg->RefCount == 0);
+      mtx_unlock(&oldProg->Mutex);
  
        if (deleteFlag) {
           assert(ctx);
-         ctx->Driver.DeleteProgram(ctx, *ptr);
+         ctx->Driver.DeleteProgram(ctx, oldProg);
        }
  
        *ptr = NULL;
@@ -488,16 +484,9 @@ _mesa_reference_program_(struct gl_context *ctx,
  
     assert(!*ptr);
     if (prog) {
-      /*mtx_lock(&prog->Mutex);*/
+      mtx_lock(&prog->Mutex);
        prog->RefCount++;
-#if 0
-      printf("Program %p ID=%u Target=%s  Refcount++ to %d\n",
-             prog, prog->Id,
-             (prog->Target == GL_VERTEX_PROGRAM_ARB ? "VP" :
-              (prog->Target == GL_GEOMETRY_PROGRAM_NV ? "GP" : "FP")),
-             prog->RefCount);
-#endif
-      /*mtx_unlock(&prog->Mutex);*/
+      mtx_unlock(&prog->Mutex);
     }
  
     *ptr = prog;
diff --git a/src/mesa/state_tracker/st_atom_array.c b/src/mesa/state_tracker/st_atom_array.c

index 56b8019..0847184 100644 (file)
--- a/src/mesa/state_tracker/st_atom_array.c
+++ b/src/mesa/state_tracker/st_atom_array.c
@@ -567,8 +567,12 @@ setup_non_interleaved_attribs(struct st_context *st,
        unsigned src_format;
  
        array = get_client_array(vp, arrays, attr);
-      if (!array)
+      if (!array) {
+         vbuffer[attr].buffer = NULL;
+         vbuffer[attr].user_buffer = NULL;
+         vbuffer[attr].buffer_offset = 0;
           continue;
+      }
  
        stride = array->StrideB;
        bufobj = array->BufferObj;
diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c

index 31e0f6b..3e37752 100644 (file)
--- a/src/mesa/state_tracker/st_atom_texture.c
+++ b/src/mesa/state_tracker/st_atom_texture.c
@@ -274,8 +274,8 @@ st_create_texture_sampler_view_from_stobj(struct pipe_context *pipe,
           return NULL;
        size = MIN2(stObj->pt->width0 - base, (unsigned)stObj->base.BufferSize);
  
-      f = ((base * 8) / desc->block.bits) * desc->block.width;
-      n = ((size * 8) / desc->block.bits) * desc->block.width;
+      f = (base / (desc->block.bits / 8)) * desc->block.width;
+      n = (size / (desc->block.bits / 8)) * desc->block.width;
        if (!n)
           return NULL;
        templ.u.buf.first_element = f;
diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c

index 01a96c1..230eba8 100644 (file)
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -349,8 +349,9 @@ setup_bitmap_vertex_data(struct st_context *st, bool normalized,
        tBot = (GLfloat) height;
     }
  
-   if (u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]),
-                      vbuf_offset, vbuf, (void **) &vertices) != PIPE_OK) {
+   u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]),
+                  vbuf_offset, vbuf, (void **) &vertices);
+   if (!*vbuf) {
        return;
     }
  
diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c

index 4fdef7f..a05a5af 100644 (file)
--- a/src/mesa/state_tracker/st_cb_blit.c
+++ b/src/mesa/state_tracker/st_cb_blit.c
@@ -229,6 +229,7 @@ st_BlitFramebuffer(struct gl_context *ctx,
                    st_adjust_blit_for_msaa_resolve(&blit);
  
                    st->pipe->blit(st->pipe, &blit);
+                  dstRb->defined = true; /* front buffer tracking */
                 }
              }
           }
@@ -266,6 +267,7 @@ st_BlitFramebuffer(struct gl_context *ctx,
                    st_adjust_blit_for_msaa_resolve(&blit);
  
                    st->pipe->blit(st->pipe, &blit);
+                  dstRb->defined = true; /* front buffer tracking */
                 }
              }
           }
diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c

index 137fac8..18efd14 100644 (file)
--- a/src/mesa/state_tracker/st_cb_clear.c
+++ b/src/mesa/state_tracker/st_cb_clear.c
@@ -184,9 +184,10 @@ draw_quad(struct st_context *st,
  
     vb.stride = 8 * sizeof(float);
  
-   if (u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]),
-                      &vb.buffer_offset, &vb.buffer,
-                      (void **) &vertices) != PIPE_OK) {
+   u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]),
+                  &vb.buffer_offset, &vb.buffer,
+                  (void **) &vertices);
+   if (!vb.buffer) {
        return;
     }
  
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c

index b372697..152160e 100644 (file)
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -580,8 +580,9 @@ draw_quad(struct gl_context *ctx, GLfloat x0, GLfloat y0, GLfloat z,
     struct pipe_resource *buf = NULL;
     unsigned offset;
  
-   if (u_upload_alloc(st->uploader, 0, 4 * sizeof(verts[0]), &offset,
-                      &buf, (void **) &verts) != PIPE_OK) {
+   u_upload_alloc(st->uploader, 0, 4 * sizeof(verts[0]), &offset,
+                  &buf, (void **) &verts);
+   if (!buf) {
        return;
     }
  
diff --git a/src/mesa/state_tracker/st_cb_drawtex.c b/src/mesa/state_tracker/st_cb_drawtex.c

index 2af4f6d..2634b09 100644 (file)
--- a/src/mesa/state_tracker/st_cb_drawtex.c
+++ b/src/mesa/state_tracker/st_cb_drawtex.c
@@ -149,9 +149,10 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
        GLfloat *vbuf = NULL;
        GLuint attr;
  
-      if (u_upload_alloc(st->uploader, 0,
-                         numAttribs * 4 * 4 * sizeof(GLfloat),
-                         &offset, &vbuffer, (void **) &vbuf) != PIPE_OK) {
+      u_upload_alloc(st->uploader, 0,
+                     numAttribs * 4 * 4 * sizeof(GLfloat),
+                     &offset, &vbuffer, (void **) &vbuf);
+      if (!vbuffer) {
           return;
        }
        
diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c

index 5707590..9d06a23 100644 (file)
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -388,17 +388,6 @@ st_new_renderbuffer_fb(enum pipe_format format, int samples, boolean sw)
  
  
  /**
- * Called via ctx->Driver.BindFramebufferEXT().
- */
-static void
-st_bind_framebuffer(struct gl_context *ctx, GLenum target,
-                    struct gl_framebuffer *fb, struct gl_framebuffer *fbread)
-{
-   /* no-op */
-}
-
-
-/**
   * Create or update the pipe_surface of a FBO renderbuffer.
   * This is usually called after st_finalize_texture.
   */
@@ -839,7 +828,6 @@ void st_init_fbo_functions(struct dd_function_table *functions)
  {
     functions->NewFramebuffer = st_new_framebuffer;
     functions->NewRenderbuffer = st_new_renderbuffer;
-   functions->BindFramebuffer = st_bind_framebuffer;
     functions->FramebufferRenderbuffer = _mesa_FramebufferRenderbuffer_sw;
     functions->RenderTexture = st_render_texture;
     functions->FinishRenderTexture = st_finish_render_texture;
diff --git a/src/mesa/state_tracker/st_cb_queryobj.c b/src/mesa/state_tracker/st_cb_queryobj.c

index 71222e8..aafae16 100644 (file)
--- a/src/mesa/state_tracker/st_cb_queryobj.c
+++ b/src/mesa/state_tracker/st_cb_queryobj.c
@@ -289,9 +289,18 @@ st_CheckQuery(struct gl_context *ctx, struct gl_query_object *q)
  static uint64_t
  st_GetTimestamp(struct gl_context *ctx)
  {
-   struct pipe_screen *screen = st_context(ctx)->pipe->screen;
+   struct pipe_context *pipe = st_context(ctx)->pipe;
+   struct pipe_screen *screen = pipe->screen;
  
-   return screen->get_timestamp(screen);
+   /* Prefer the per-screen function */
+   if (screen->get_timestamp) {
+      return screen->get_timestamp(screen);
+   }
+   else {
+      /* Fall back to the per-context function */
+      assert(pipe->get_timestamp);
+      return pipe->get_timestamp(pipe);
+   }
  }
  
  
diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c

index 6ff6cf6..bb36e69 100644 (file)
--- a/src/mesa/state_tracker/st_cb_readpixels.c
+++ b/src/mesa/state_tracker/st_cb_readpixels.c
@@ -238,9 +238,9 @@ st_readpixels(struct gl_context *ctx, GLint x, GLint y,
        GLuint row;
  
        for (row = 0; row < (unsigned) height; row++) {
-         GLvoid *dest = _mesa_image_address3d(pack, pixels,
+         GLvoid *dest = _mesa_image_address2d(pack, pixels,
                                                width, height, format,
-                                              type, 0, row, 0);
+                                              type, row, 0);
           memcpy(dest, map, bytesPerRow);
           map += tex_xfer->stride;
        }
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c

index 93335ae..5d25fed 100644 (file)
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -1879,22 +1879,45 @@ st_TextureView(struct gl_context *ctx,
  static void
  st_CopyImageSubData(struct gl_context *ctx,
                      struct gl_texture_image *src_image,
+                    struct gl_renderbuffer *src_renderbuffer,
                      int src_x, int src_y, int src_z,
                      struct gl_texture_image *dst_image,
+                    struct gl_renderbuffer *dst_renderbuffer,
                      int dst_x, int dst_y, int dst_z,
                      int src_width, int src_height)
  {
     struct st_context *st = st_context(ctx);
     struct pipe_context *pipe = st->pipe;
-   struct st_texture_image *src = st_texture_image(src_image);
-   struct st_texture_image *dst = st_texture_image(dst_image);
-
+   struct pipe_resource *src_res, *dst_res;
     struct pipe_box box;
+   int src_level, dst_level;
+
+   if (src_image) {
+      struct st_texture_image *src = st_texture_image(src_image);
+      src_res = src->pt;
+      src_level = src_image->Level;
+   }
+   else {
+      struct st_renderbuffer *src = st_renderbuffer(src_renderbuffer);
+      src_res = src->texture;
+      src_level = 0;
+   }
+
+   if (dst_image) {
+      struct st_texture_image *dst = st_texture_image(dst_image);
+      dst_res = dst->pt;
+      dst_level = dst_image->Level;
+   }
+   else {
+      struct st_renderbuffer *dst = st_renderbuffer(dst_renderbuffer);
+      dst_res = dst->texture;
+      dst_level = 0;
+   }
  
     u_box_2d_zslice(src_x, src_y, src_z, src_width, src_height, &box);
-   pipe->resource_copy_region(pipe, dst->pt, dst_image->Level,
+   pipe->resource_copy_region(pipe, dst_res, dst_level,
                                dst_x, dst_y, dst_z,
-                              src->pt, src_image->Level,
+                              src_res, src_level,
                                &box);
  }
  
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c

index 957fcfd..2ad679b 100644 (file)
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -106,9 +106,10 @@ setup_index_buffer(struct st_context *st,
     }
     else if (st->indexbuf_uploader) {
        /* upload indexes from user memory into a real buffer */
-      if (u_upload_data(st->indexbuf_uploader, 0,
-                        ib->count * ibuffer->index_size, ib->ptr,
-                        &ibuffer->offset, &ibuffer->buffer) != PIPE_OK) {
+      u_upload_data(st->indexbuf_uploader, 0,
+                    ib->count * ibuffer->index_size, ib->ptr,
+                    &ibuffer->offset, &ibuffer->buffer);
+      if (!ibuffer->buffer) {
           /* out of memory */
           return FALSE;
        }
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c

index 17f572f..e290292 100644 (file)
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -449,6 +449,7 @@ void st_init_extensions(struct pipe_screen *screen,
        { o(ARB_point_sprite),                 PIPE_CAP_POINT_SPRITE                     },
        { o(ARB_seamless_cube_map),            PIPE_CAP_SEAMLESS_CUBE_MAP                },
        { o(ARB_shader_stencil_export),        PIPE_CAP_SHADER_STENCIL_EXPORT            },
+      { o(ARB_shader_texture_image_samples), PIPE_CAP_TGSI_TXQS                        },
        { o(ARB_shader_texture_lod),           PIPE_CAP_SM3                              },
        { o(ARB_shadow),                       PIPE_CAP_TEXTURE_SHADOW_MAP               },
        { o(ARB_texture_buffer_object),        PIPE_CAP_TEXTURE_BUFFER_OBJECTS           },
@@ -873,8 +874,13 @@ void st_init_extensions(struct pipe_screen *screen,
  
     consts->MaxViewports = screen->get_param(screen, PIPE_CAP_MAX_VIEWPORTS);
     if (consts->MaxViewports >= 16) {
-      consts->ViewportBounds.Min = -16384.0;
-      consts->ViewportBounds.Max = 16384.0;
+      if (glsl_feature_level >= 400) {
+         consts->ViewportBounds.Min = -32768.0;
+         consts->ViewportBounds.Max = 32767.0;
+      } else {
+         consts->ViewportBounds.Min = -16384.0;
+         consts->ViewportBounds.Max = 16383.0;
+      }
        extensions->ARB_viewport_array = GL_TRUE;
        extensions->ARB_fragment_layer_viewport = GL_TRUE;
        if (extensions->AMD_vertex_shader_layer)
diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c

index db74184..144b7d6 100644 (file)
--- a/src/mesa/state_tracker/st_format.c
+++ b/src/mesa/state_tracker/st_format.c
@@ -34,6 +34,8 @@
  
  #include "main/imports.h"
  #include "main/context.h"
+#include "main/enums.h"
+#include "main/formats.h"
  #include "main/glformats.h"
  #include "main/texgetimage.h"
  #include "main/teximage.h"
@@ -1270,46 +1272,40 @@ static const struct format_mapping format_map[] = {
     /* 32-bit float formats */
     {
        { GL_RGBA32F_ARB, 0 },
-      { PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+      { PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
     },
     {
        { GL_RGB32F_ARB, 0 },
        { PIPE_FORMAT_R32G32B32_FLOAT, PIPE_FORMAT_R32G32B32X32_FLOAT,
-        PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+        PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
     },
     {
        { GL_LUMINANCE_ALPHA32F_ARB, 0 },
-      { PIPE_FORMAT_L32A32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT,
-        PIPE_FORMAT_L16A16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+      { PIPE_FORMAT_L32A32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
     },
     {
        { GL_ALPHA32F_ARB, 0 },
        { PIPE_FORMAT_A32_FLOAT, PIPE_FORMAT_L32A32_FLOAT,
-        PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_A16_FLOAT,
-        PIPE_FORMAT_L16A16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+        PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
     },
     {
        { GL_INTENSITY32F_ARB, 0 },
        { PIPE_FORMAT_I32_FLOAT, PIPE_FORMAT_L32A32_FLOAT,
-        PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_I16_FLOAT,
-        PIPE_FORMAT_L16A16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+        PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
     },
     {
        { GL_LUMINANCE32F_ARB, 0 },
        { PIPE_FORMAT_L32_FLOAT, PIPE_FORMAT_L32A32_FLOAT,
-        PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_L16_FLOAT,
-        PIPE_FORMAT_L16A16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+        PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
     },
     {
        { GL_R32F, 0 },
        { PIPE_FORMAT_R32_FLOAT, PIPE_FORMAT_R32G32_FLOAT,
-        PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_R16_FLOAT,
-        PIPE_FORMAT_R16G16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+        PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
     },
     {
        { GL_RG32F, 0 },
-      { PIPE_FORMAT_R32G32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT,
-        PIPE_FORMAT_R16G16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+      { PIPE_FORMAT_R32G32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
     },
  
     /* R, RG formats */
@@ -1944,6 +1940,7 @@ st_ChooseTextureFormat(struct gl_context *ctx, GLenum target,
  {
     struct st_context *st = st_context(ctx);
     enum pipe_format pFormat;
+   mesa_format mFormat;
     unsigned bindings;
     enum pipe_texture_target pTarget = gl_target_to_pipe(target);
  
@@ -1966,7 +1963,11 @@ st_ChooseTextureFormat(struct gl_context *ctx, GLenum target,
     else if (internalFormat == 3 || internalFormat == 4 ||
              internalFormat == GL_RGB || internalFormat == GL_RGBA ||
              internalFormat == GL_RGB8 || internalFormat == GL_RGBA8 ||
-            internalFormat == GL_BGRA)
+            internalFormat == GL_BGRA ||
+            internalFormat == GL_RGB16F ||
+            internalFormat == GL_RGBA16F ||
+            internalFormat == GL_RGB32F ||
+            internalFormat == GL_RGBA32F)
          bindings |= PIPE_BIND_RENDER_TARGET;
  
     /* GLES allows the driver to choose any format which matches
@@ -2016,7 +2017,20 @@ st_ChooseTextureFormat(struct gl_context *ctx, GLenum target,
        return MESA_FORMAT_NONE;
     }
  
-   return st_pipe_format_to_mesa_format(pFormat);
+   mFormat = st_pipe_format_to_mesa_format(pFormat);
+
+   /* Debugging aid */
+   if (0) {
+      debug_printf("%s(intFormat=%s, format=%s, type=%s) -> %s, %s\n",
+                   __func__,
+                   _mesa_enum_to_string(internalFormat),
+                   _mesa_enum_to_string(format),
+                   _mesa_enum_to_string(type),
+                   util_format_name(pFormat),
+                   _mesa_get_format_name(mFormat));
+   }
+
+   return mFormat;
  }
  
  
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp

index 6c9f947..633e90f 100644 (file)
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -2217,10 +2217,15 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
     case ir_triop_vector_insert:
     case ir_binop_carry:
     case ir_binop_borrow:
+   case ir_unop_ssbo_unsized_array_length:
        /* This operation is not supported, or should have already been handled.
         */
        assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()");
        break;
+
+   case ir_unop_get_buffer_size:
+      assert(!"Not implemented yet");
+      break;
     }
  
     this->result = result_src;
@@ -3228,6 +3233,9 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
     case ir_lod:
        opcode = TGSI_OPCODE_LODQ;
        break;
+   case ir_texture_samples:
+      opcode = TGSI_OPCODE_TXQS;
+      break;
     }
  
     if (ir->projector) {
@@ -3337,6 +3345,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
           emit_asm(ir, TGSI_OPCODE_MOV, result_dst, levels_src);
        } else
           inst = emit_asm(ir, opcode, result_dst, lod_info);
+   } else if (opcode == TGSI_OPCODE_TXQS) {
+      inst = emit_asm(ir, opcode, result_dst);
     } else if (opcode == TGSI_OPCODE_TXF) {
        inst = emit_asm(ir, opcode, result_dst, coord);
     } else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) {
@@ -4139,8 +4149,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
         */
        for (unsigned i = 0; i < ARRAY_SIZE(inst->dst); i++) {
           if (inst->dst[i].file == PROGRAM_TEMPORARY &&
-             !inst->dst[i].reladdr &&
-             !inst->saturate) {
+             !inst->dst[i].reladdr) {
              for (int c = 0; c < 4; c++) {
                 if (inst->dst[i].writemask & (1 << c)) {
                    if (writes[4 * inst->dst[i].index + c]) {
@@ -5028,6 +5037,7 @@ compile_tgsi_instruction(struct st_translate *t,
     case TGSI_OPCODE_TXL:
     case TGSI_OPCODE_TXP:
     case TGSI_OPCODE_TXQ:
+   case TGSI_OPCODE_TXQS:
     case TGSI_OPCODE_TXF:
     case TGSI_OPCODE_TEX2:
     case TGSI_OPCODE_TXB2:
diff --git a/src/mesa/swrast/s_drawpix.c b/src/mesa/swrast/s_drawpix.c

index 5393d50..f05528d 100644 (file)
--- a/src/mesa/swrast/s_drawpix.c
+++ b/src/mesa/swrast/s_drawpix.c
@@ -481,17 +481,17 @@ draw_rgba_pixels( struct gl_context *ctx, GLint x, GLint y,
            */
           GLint swapSize = _mesa_sizeof_packed_type(type);
           if (swapSize == 2 || swapSize == 4) {
-            int components = _mesa_components_in_format(format);
-            int elementCount = width * height * components;
-            tempImage = malloc(elementCount * swapSize);
+            int imageStride = _mesa_image_image_stride(unpack, width, height, format, type);
+
+            tempImage = malloc(imageStride);
              if (!tempImage) {
                 _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels");
                 return;
              }
-            if (swapSize == 2)
-               _mesa_swap2_copy(tempImage, (GLushort *) pixels, elementCount);
-            else
-               _mesa_swap4_copy(tempImage, (GLuint *) pixels, elementCount);
+
+            _mesa_swap_bytes_2d_image(format, type, unpack,
+                                      width, height, tempImage, pixels);
+
              pixels = tempImage;
           }
        }
diff --git a/src/mesa/swrast/s_texfetch.c b/src/mesa/swrast/s_texfetch.c

index acb06e6..27de9b3 100644 (file)
--- a/src/mesa/swrast/s_texfetch.c
+++ b/src/mesa/swrast/s_texfetch.c
@@ -290,10 +290,26 @@ texfetch_funcs[] =
     },
  
     /* Packed signed/unsigned non-normalized integer formats */
+   FETCH_NULL(A8B8G8R8_UINT),
+   FETCH_NULL(A8R8G8B8_UINT),
+   FETCH_NULL(R8G8B8A8_UINT),
+   FETCH_NULL(B8G8R8A8_UINT),
     FETCH_NULL(B10G10R10A2_UINT),
     FETCH_NULL(R10G10B10A2_UINT),
     FETCH_NULL(A2B10G10R10_UINT),
     FETCH_NULL(A2R10G10B10_UINT),
+   FETCH_NULL(B5G6R5_UINT),
+   FETCH_NULL(R5G6B5_UINT),
+   FETCH_NULL(B2G3R3_UINT),
+   FETCH_NULL(R3G3B2_UINT),
+   FETCH_NULL(A4B4G4R4_UINT),
+   FETCH_NULL(R4G4B4A4_UINT),
+   FETCH_NULL(B4G4R4A4_UINT),
+   FETCH_NULL(A4R4G4B4_UINT),
+   FETCH_NULL(A1B5G5R5_UINT),
+   FETCH_NULL(B5G5R5A1_UINT),
+   FETCH_NULL(A1R5G5B5_UINT),
+   FETCH_NULL(R5G5B5A1_UINT),
  
     /* Array signed/unsigned non-normalized integer formats */
     FETCH_NULL(A_UINT8),
diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h

index 7be3954..e7e19a0 100644 (file)
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -24,7 +24,7 @@
   * Authors:
   *    Keith Whitwell <keithw@vmware.com>
   */
-
+#include <stdbool.h>
  
  /**
   * \file t_dd_dmatmp.h
@@ -39,29 +39,12 @@
   * tristrips, lineloops to linestrips), or to indexed vertices.
   */
  
-#if !defined(HAVE_TRIANGLES)
-#error "must have at least triangles to use render template"
-#endif
-
-#if !HAVE_ELTS
-#define ELTS_VARS(buf)
-#define ALLOC_ELTS(nr) 0
-#define EMIT_ELT( offset, elt )
-#define EMIT_TWO_ELTS( offset, elt0, elt1 )
-#define INCR_ELTS( nr )
-#define ELT_INIT(prim)
-#define GET_CURRENT_VB_MAX_ELTS() 0
-#define GET_SUBSEQUENT_VB_MAX_ELTS() 0
-#define RELEASE_ELT_VERTS()
-#define EMIT_INDEXED_VERTS( ctx, start, count )
+#if !HAVE_TRIANGLES || !HAVE_LINES || !HAVE_LINE_STRIPS || !HAVE_TRI_STRIPS || !HAVE_TRI_FANS
+#error "must have lines, line strips, triangles, triangle fans, and triangle strips to use render template"
  #endif
  
-#ifndef EMIT_TWO_ELTS
-#define EMIT_TWO_ELTS( offset, elt0, elt1 )    \
-do {                                           \
-   EMIT_ELT( offset, elt0 );                   \
-   EMIT_ELT( offset+1, elt1 );                         \
-} while (0)
+#if HAVE_QUAD_STRIPS || HAVE_QUADS || HAVE_ELTS
+#error "ELTs, quads, and quad strips not supported by render template"
  #endif
  
  
@@ -69,33 +52,8 @@ do {                                                 \
  /*                  Render whole begin/end objects                    */
  /**********************************************************************/
  
-
-
-
-#if (HAVE_ELTS)
-static void *TAG(emit_elts)( struct gl_context *ctx, GLuint *elts, GLuint nr,
-                            void *buf)
-{
-   GLint i;
-   LOCAL_VARS;
-   ELTS_VARS(buf);
-
-   for ( i = 0 ; i+1 < nr ; i+=2, elts += 2 ) {
-      EMIT_TWO_ELTS( 0, elts[0], elts[1] );
-      INCR_ELTS( 2 );
-   }
-   
-   if (i < nr) {
-      EMIT_ELT( 0, elts[0] );
-      INCR_ELTS( 1 );
-   }
-
-   return (void *)ELTPTR;
-}
-#endif
-
-static __inline void *TAG(emit_verts)( struct gl_context *ctx, GLuint start, 
-                                    GLuint count, void *buf )
+static inline void *TAG(emit_verts)(struct gl_context *ctx, GLuint start,
+                                    GLuint count, void *buf)
  {
     return EMIT_VERTS(ctx, start, count, buf);
  }
@@ -104,309 +62,261 @@ static __inline void *TAG(emit_verts)( struct gl_context *ctx, GLuint start,
   *                    Render non-indexed primitives.
   ***********************************************************************/
  
-static void TAG(render_points_verts)( struct gl_context *ctx,
-                                     GLuint start,
-                                     GLuint count,
-                                     GLuint flags )
+static void TAG(render_points_verts)(struct gl_context *ctx,
+                                     GLuint start,
+                                     GLuint count,
+                                     GLuint flags)
  {
     if (HAVE_POINTS) {
        LOCAL_VARS;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
-      int currentsz;
+      const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+      unsigned currentsz;
        GLuint j, nr;
  
-      INIT( GL_POINTS );
+      INIT(GL_POINTS);
  
        currentsz = GET_CURRENT_VB_MAX_VERTS();
        if (currentsz < 8)
-        currentsz = dmasz;
+         currentsz = dmasz;
  
-      for (j = start; j < count; j += nr ) {
-        nr = MIN2( currentsz, count - j );
-        TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
-        currentsz = dmasz;
+      for (j = 0; j < count; j += nr) {
+         nr = MIN2(currentsz, count - j);
+         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
+         currentsz = dmasz;
        }
-
     } else {
        fprintf(stderr, "%s - cannot draw primitive\n", __func__);
        return;
     }
  }
  
-static void TAG(render_lines_verts)( struct gl_context *ctx,
-                                    GLuint start,
-                                    GLuint count,
-                                    GLuint flags )
+static void TAG(render_lines_verts)(struct gl_context *ctx,
+                                    GLuint start,
+                                    GLuint count,
+                                    GLuint flags)
  {
-   if (HAVE_LINES) {
-      LOCAL_VARS;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
-      int currentsz;
-      GLuint j, nr;
+   LOCAL_VARS;
+   const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS() & ~1;
+   unsigned currentsz;
+   GLuint j, nr;
  
-      INIT( GL_LINES );
+   INIT(GL_LINES);
  
-      /* Emit whole number of lines in total and in each buffer:
-       */
-      count -= (count-start) & 1;
-      currentsz = GET_CURRENT_VB_MAX_VERTS();
-      currentsz -= currentsz & 1;
-      dmasz -= dmasz & 1;
-
-      if (currentsz < 8)
-        currentsz = dmasz;
+   /* Emit whole number of lines in total and in each buffer:
+    */
+   count -= count & 1;
+   currentsz = GET_CURRENT_VB_MAX_VERTS();
+   currentsz -= currentsz & 1;
  
-      for (j = start; j < count; j += nr ) {
-        nr = MIN2( currentsz, count - j );
-        TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
-        currentsz = dmasz;
-      }
+   if (currentsz < 8)
+      currentsz = dmasz;
  
-   } else {
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
+   for (j = 0; j < count; j += nr) {
+      nr = MIN2(currentsz, count - j);
+      TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
+      currentsz = dmasz;
     }
  }
  
  
-static void TAG(render_line_strip_verts)( struct gl_context *ctx,
-                                         GLuint start,
-                                         GLuint count,
-                                         GLuint flags )
+static void TAG(render_line_strip_verts)(struct gl_context *ctx,
+                                         GLuint start,
+                                         GLuint count,
+                                         GLuint flags)
  {
-   if (HAVE_LINE_STRIPS) {
-      LOCAL_VARS;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
-      int currentsz;
-      GLuint j, nr;
-
-      INIT( GL_LINE_STRIP );
+   LOCAL_VARS;
+   const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+   unsigned currentsz;
+   GLuint j, nr;
  
-      currentsz = GET_CURRENT_VB_MAX_VERTS();
-      if (currentsz < 8)
-        currentsz = dmasz;
+   INIT(GL_LINE_STRIP);
  
-      for (j = start; j + 1 < count; j += nr - 1 ) {
-        nr = MIN2( currentsz, count - j );
-        TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
-        currentsz = dmasz;
-      }
- 
-      FLUSH();
+   currentsz = GET_CURRENT_VB_MAX_VERTS();
+   if (currentsz < 8)
+      currentsz = dmasz;
  
-   } else {
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
+   for (j = 0; j + 1 < count; j += nr - 1) {
+      nr = MIN2(currentsz, count - j);
+      TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
+      currentsz = dmasz;
     }
+ 
+   FLUSH();
  }
  
  
-static void TAG(render_line_loop_verts)( struct gl_context *ctx,
-                                        GLuint start,
-                                        GLuint count,
-                                        GLuint flags )
+static void TAG(render_line_loop_verts)(struct gl_context *ctx,
+                                        GLuint start,
+                                        GLuint count,
+                                        GLuint flags)
  {
-   if (HAVE_LINE_STRIPS) {
-      LOCAL_VARS;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
-      int currentsz;
-      GLuint j, nr;
-
-      INIT( GL_LINE_STRIP );
+   LOCAL_VARS;
+   const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS() - 1;
+   unsigned currentsz;
+   GLuint j, nr;
  
-      if (flags & PRIM_BEGIN)
-        j = start;
-      else
-        j = start + 1;
+   INIT(GL_LINE_STRIP);
  
-      /* Ensure last vertex won't wrap buffers:
-       */
-      currentsz = GET_CURRENT_VB_MAX_VERTS();
-      currentsz--;
-      dmasz--;
+   j = (flags & PRIM_BEGIN) ? 0 : 1;
  
-      if (currentsz < 8) {
-        currentsz = dmasz;
-      }
+   /* Ensure last vertex won't wrap buffers:
+    */
+   currentsz = GET_CURRENT_VB_MAX_VERTS();
+   currentsz--;
  
-      if (j + 1 < count) {
-        for ( ; j + 1 < count; j += nr - 1 ) {
-           nr = MIN2( currentsz, count - j );
-
-           if (j + nr >= count &&
-               start < count - 1 && 
-               (flags & PRIM_END)) 
-           {
-              void *tmp;
-              tmp = ALLOC_VERTS(nr+1);
-              tmp = TAG(emit_verts)( ctx, j, nr, tmp );
-              tmp = TAG(emit_verts)( ctx, start, 1, tmp );
-              (void) tmp;
-           }
-           else {
-              TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
-              currentsz = dmasz;
-           }
-        }
+   if (currentsz < 8)
+      currentsz = dmasz;
  
+   if (j + 1 < count) {
+      for (/* empty */; j + 1 < count; j += nr - 1) {
+         nr = MIN2(currentsz, count - j);
+
+         if (j + nr >= count &&
+             count > 1 &&
+             (flags & PRIM_END)) {
+            void *tmp;
+            tmp = ALLOC_VERTS(nr+1);
+            tmp = TAG(emit_verts)(ctx, start + j, nr, tmp);
+            tmp = TAG(emit_verts)( ctx, start, 1, tmp );
+            (void) tmp;
+         } else {
+            TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
+            currentsz = dmasz;
+         }
        }
-      else if (start + 1 < count && (flags & PRIM_END)) {
-        void *tmp;
-        tmp = ALLOC_VERTS(2);
-        tmp = TAG(emit_verts)( ctx, start+1, 1, tmp );
-        tmp = TAG(emit_verts)( ctx, start, 1, tmp );
-        (void) tmp;
-      }
-
-      FLUSH();
-
-   } else {
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
+   } else if (count > 1 && (flags & PRIM_END)) {
+      void *tmp;
+      tmp = ALLOC_VERTS(2);
+      tmp = TAG(emit_verts)( ctx, start+1, 1, tmp );
+      tmp = TAG(emit_verts)( ctx, start, 1, tmp );
+      (void) tmp;
     }
+
+   FLUSH();
  }
  
  
-static void TAG(render_triangles_verts)( struct gl_context *ctx,
-                                        GLuint start,
-                                        GLuint count,
-                                        GLuint flags )
+static void TAG(render_triangles_verts)(struct gl_context *ctx,
+                                        GLuint start,
+                                        GLuint count,
+                                        GLuint flags)
  {
     LOCAL_VARS;
-   int dmasz = (GET_SUBSEQUENT_VB_MAX_VERTS()/3) * 3;
-   int currentsz;
+   const unsigned dmasz = (GET_SUBSEQUENT_VB_MAX_VERTS() / 3) * 3;
+   unsigned currentsz;
     GLuint j, nr;
  
     INIT(GL_TRIANGLES);
  
-   currentsz = (GET_CURRENT_VB_MAX_VERTS()/3) * 3;
+   currentsz = (GET_CURRENT_VB_MAX_VERTS() / 3) * 3;
  
     /* Emit whole number of tris in total.  dmasz is already a multiple
      * of 3.
      */
-   count -= (count-start)%3;
+   count -= count % 3;
  
     if (currentsz < 8)
        currentsz = dmasz;
  
-   for (j = start; j < count; j += nr) {
-      nr = MIN2( currentsz, count - j );
-      TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+   for (j = 0; j < count; j += nr) {
+      nr = MIN2(currentsz, count - j);
+      TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
        currentsz = dmasz;
     }
  }
  
  
  
-static void TAG(render_tri_strip_verts)( struct gl_context *ctx,
-                                        GLuint start,
-                                        GLuint count,
-                                        GLuint flags )
+static void TAG(render_tri_strip_verts)(struct gl_context *ctx,
+                                        GLuint start,
+                                        GLuint count,
+                                        GLuint flags)
  {
-   if (HAVE_TRI_STRIPS) {
-      LOCAL_VARS;
-      GLuint j, nr;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
-      int currentsz;
-
-      INIT(GL_TRIANGLE_STRIP);
-
-      currentsz = GET_CURRENT_VB_MAX_VERTS();
+   LOCAL_VARS;
+   GLuint j, nr;
+   const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS() & ~1;
+   unsigned currentsz;
  
-      if (currentsz < 8) {
-        currentsz = dmasz;
-      }
+   INIT(GL_TRIANGLE_STRIP);
  
-      /* From here on emit even numbers of tris when wrapping over buffers:
-       */
-      dmasz -= (dmasz & 1);
-      currentsz -= (currentsz & 1);
+   currentsz = GET_CURRENT_VB_MAX_VERTS();
  
-      for (j = start ; j + 2 < count; j += nr - 2 ) {
-        nr = MIN2( currentsz, count - j );
-        TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
-        currentsz = dmasz;
-      }
+   if (currentsz < 8)
+      currentsz = dmasz;
  
-      FLUSH();
+   /* From here on emit even numbers of tris when wrapping over buffers:
+    */
+   currentsz -= (currentsz & 1);
  
-   } else {
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
+   for (j = 0; j + 2 < count; j += nr - 2) {
+      nr = MIN2(currentsz, count - j);
+      TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
+      currentsz = dmasz;
     }
+
+   FLUSH();
  }
  
-static void TAG(render_tri_fan_verts)( struct gl_context *ctx,
-                                      GLuint start,
-                                      GLuint count,
-                                      GLuint flags )
+static void TAG(render_tri_fan_verts)(struct gl_context *ctx,
+                                      GLuint start,
+                                      GLuint count,
+                                      GLuint flags)
  {
-   if (HAVE_TRI_FANS) {
-      LOCAL_VARS;
-      GLuint j, nr;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
-      int currentsz;
-
-      INIT(GL_TRIANGLE_FAN);
+   LOCAL_VARS;
+   GLuint j, nr;
+   const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+   unsigned currentsz;
  
-      currentsz = GET_CURRENT_VB_MAX_VERTS();
-      if (currentsz < 8) {
-        currentsz = dmasz;
-      }
+   INIT(GL_TRIANGLE_FAN);
  
-      for (j = start + 1 ; j + 1 < count; j += nr - 2 ) {
-        void *tmp;
-        nr = MIN2( currentsz, count - j + 1 );
-        tmp = ALLOC_VERTS( nr );
-        tmp = TAG(emit_verts)( ctx, start, 1, tmp );
-        tmp = TAG(emit_verts)( ctx, j, nr - 1, tmp );
-        (void) tmp;
-        currentsz = dmasz;
-      }
+   currentsz = GET_CURRENT_VB_MAX_VERTS();
+   if (currentsz < 8)
+      currentsz = dmasz;
  
-      FLUSH();
-   }
-   else {
-      /* Could write code to emit these as indexed vertices (for the
-       * g400, for instance).
-       */
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
+   for (j = 1; j + 1 < count; j += nr - 2) {
+      void *tmp;
+      nr = MIN2(currentsz, count - j + 1);
+      tmp = ALLOC_VERTS(nr);
+      tmp = TAG(emit_verts)(ctx, start, 1, tmp);
+      tmp = TAG(emit_verts)(ctx, start + j, nr - 1, tmp);
+      (void) tmp;
+      currentsz = dmasz;
     }
+
+   FLUSH();
  }
  
  
-static void TAG(render_poly_verts)( struct gl_context *ctx,
-                                   GLuint start,
-                                   GLuint count,
-                                   GLuint flags )
+static void TAG(render_poly_verts)(struct gl_context *ctx,
+                                   GLuint start,
+                                   GLuint count,
+                                   GLuint flags)
  {
     if (HAVE_POLYGONS) {
        LOCAL_VARS;
        GLuint j, nr;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
-      int currentsz;
+      const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+      unsigned currentsz;
  
        INIT(GL_POLYGON);
  
        currentsz = GET_CURRENT_VB_MAX_VERTS();
        if (currentsz < 8) {
-        currentsz = dmasz;
+         currentsz = dmasz;
        }
  
-      for (j = start + 1 ; j + 1 < count ; j += nr - 2 ) {
-        void *tmp;
-        nr = MIN2( currentsz, count - j + 1 );
-        tmp = ALLOC_VERTS( nr );
-        tmp = TAG(emit_verts)( ctx, start, 1, tmp );
-        tmp = TAG(emit_verts)( ctx, j, nr - 1, tmp );
-        (void) tmp;
-        currentsz = dmasz;
+      for (j = 1; j + 1 < count; j += nr - 2) {
+         void *tmp;
+         nr = MIN2(currentsz, count - j + 1);
+         tmp = ALLOC_VERTS(nr);
+         tmp = TAG(emit_verts)(ctx, start, 1, tmp);
+         tmp = TAG(emit_verts)(ctx, start + j, nr - 1, tmp);
+         (void) tmp;
+         currentsz = dmasz;
        }
  
        FLUSH();
-   }
-   else if (HAVE_TRI_FANS && ctx->Light.ShadeModel == GL_SMOOTH) {
+   } else if (ctx->Light.ShadeModel == GL_SMOOTH) {
        TAG(render_tri_fan_verts)( ctx, start, count, flags );
     } else {
        fprintf(stderr, "%s - cannot draw primitive\n", __func__);
@@ -414,252 +324,92 @@ static void TAG(render_poly_verts)( struct gl_context *ctx,
     }
  }
  
-static void TAG(render_quad_strip_verts)( struct gl_context *ctx,
-                                         GLuint start,
-                                         GLuint count,
-                                         GLuint flags )
+static void TAG(render_quad_strip_verts)(struct gl_context *ctx,
+                                         GLuint start,
+                                         GLuint count,
+                                         GLuint flags)
  {
     GLuint j, nr;
  
-   if (HAVE_QUAD_STRIPS) {
-      LOCAL_VARS;
-      GLuint j, nr;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
-      int currentsz;
-
-      INIT(GL_QUAD_STRIP);
-
-      currentsz = GET_CURRENT_VB_MAX_VERTS();
-      if (currentsz < 8) {
-        currentsz = dmasz;
-      }
-
-      dmasz -= (dmasz & 2);
-      currentsz -= (currentsz & 2);
-
-      for (j = start ; j + 3 < count; j += nr - 2 ) {
-        nr = MIN2( currentsz, count - j );
-        TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
-        currentsz = dmasz;
-      }
-
-      FLUSH();
-
-   } else if (HAVE_TRI_STRIPS && 
-             ctx->Light.ShadeModel == GL_FLAT &&
-             TNL_CONTEXT(ctx)->vb.AttribPtr[_TNL_ATTRIB_COLOR0]->stride) {
-      if (HAVE_ELTS) {
-        LOCAL_VARS;
-        int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-        int currentsz;
-        GLuint j, nr;
-
-         EMIT_INDEXED_VERTS( ctx, start, count );
-
-        /* Simulate flat-shaded quadstrips using indexed vertices:
-         */
-        ELT_INIT( GL_TRIANGLES );
-
-        currentsz = GET_CURRENT_VB_MAX_ELTS();
-
-        /* Emit whole number of quads in total, and in each buffer.
-         */
-        dmasz -= dmasz & 1;
-        count -= (count-start) & 1;
-        currentsz -= currentsz & 1;
-
-        if (currentsz < 12)
-           currentsz = dmasz;
-
-        currentsz = currentsz/6*2;
-        dmasz = dmasz/6*2;
-
-        for (j = start; j + 3 < count; j += nr - 2 ) {
-           nr = MIN2( currentsz, count - j );
-           if (nr >= 4) {
-              GLint quads = (nr/2)-1;
-              GLint i;
-              ELTS_VARS( ALLOC_ELTS( quads*6 ) );
-
-              for ( i = j-start ; i < j-start+quads*2 ; i+=2 ) {
-                 EMIT_TWO_ELTS( 0, (i+0), (i+1) );
-                 EMIT_TWO_ELTS( 2, (i+2), (i+1) );
-                 EMIT_TWO_ELTS( 4, (i+3), (i+2) );
-                 INCR_ELTS( 6 );
-              }
-
-              FLUSH();
-           }
-           currentsz = dmasz;
-        }
-
-        RELEASE_ELT_VERTS();
-        FLUSH();
-      }
-      else {
-        /* Vertices won't fit in a single buffer or elts not
-         * available - should never happen.
-         */
-        fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-        return;
-      }
-   }
-   else if (HAVE_TRI_STRIPS) {
+   if (ctx->Light.ShadeModel == GL_FLAT &&
+       TNL_CONTEXT(ctx)->vb.AttribPtr[_TNL_ATTRIB_COLOR0]->stride) {
+      /* Vertices won't fit in a single buffer or elts not available - should
+       * never happen.
+       */
+      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
+      return;
+   } else {
        LOCAL_VARS;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
-      int currentsz;
+      const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS() & ~1;
+      unsigned currentsz;
  
        /* Emit smooth-shaded quadstrips as tristrips:
         */
        FLUSH();
-      INIT( GL_TRIANGLE_STRIP );
+      INIT(GL_TRIANGLE_STRIP);
  
        /* Emit whole number of quads in total, and in each buffer.
         */
-      dmasz -= dmasz & 1;
        currentsz = GET_CURRENT_VB_MAX_VERTS();
        currentsz -= currentsz & 1;
-      count -= (count-start) & 1;
+      count -= count & 1;
  
-      if (currentsz < 8) {
-        currentsz = dmasz;
-      }
+      if (currentsz < 8)
+         currentsz = dmasz;
  
-      for (j = start; j + 3 < count; j += nr - 2 ) {
-        nr = MIN2( currentsz, count - j );
-        TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
-        currentsz = dmasz;
+      for (j = 0; j + 3 < count; j += nr - 2) {
+         nr = MIN2(currentsz, count - j);
+         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
+         currentsz = dmasz;
        }
  
        FLUSH();
-
-   } else {
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
     }
  }
  
  
-static void TAG(render_quads_verts)( struct gl_context *ctx,
-                                    GLuint start,
-                                    GLuint count,
-                                    GLuint flags )
+static void TAG(render_quads_verts)(struct gl_context *ctx,
+                                    GLuint start,
+                                    GLuint count,
+                                    GLuint flags)
  {
-   if (HAVE_QUADS) {
-      LOCAL_VARS;
-      int dmasz = (GET_SUBSEQUENT_VB_MAX_VERTS()/4) * 4;
-      int currentsz;
-      GLuint j, nr;
-
-      INIT(GL_QUADS);
-
-      /* Emit whole number of quads in total.  dmasz is already a multiple
-       * of 4.
-       */
-      count -= (count-start)%4;
-
-      currentsz = (GET_CURRENT_VB_MAX_VERTS()/4) * 4;
-      if (currentsz < 8)
-         currentsz = dmasz;
-
-      for (j = start; j < count; j += nr) {
-         nr = MIN2( currentsz, count - j );
-         TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
-         currentsz = dmasz;
-      }
-   }
-   else if (HAVE_ELTS) {
-      /* Hardware doesn't have a quad primitive type -- try to
-       * simulate it using indexed vertices and the triangle
-       * primitive:
-       */
-      LOCAL_VARS;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-      int currentsz;
-      GLuint j, nr;
-
-      EMIT_INDEXED_VERTS( ctx, start, count );
-
-      FLUSH();
-      ELT_INIT( GL_TRIANGLES );
-      currentsz = GET_CURRENT_VB_MAX_ELTS();
-
-      /* Emit whole number of quads in total, and in each buffer.
-       */
-      dmasz -= dmasz & 3;
-      count -= (count-start) & 3;
-      currentsz -= currentsz & 3;
+   LOCAL_VARS;
+   GLuint j;
  
-      /* Adjust for rendering as triangles:
-       */
-      currentsz = currentsz/6*4;
-      dmasz = dmasz/6*4;
+   /* Emit whole number of quads in total. */
+   count -= count & 3;
  
-      if (currentsz < 8)
-        currentsz = dmasz;
-
-      for (j = start; j < count; j += nr ) {
-        nr = MIN2( currentsz, count - j );
-        if (nr >= 4) {
-           GLint quads = nr/4;
-           GLint i;
-           ELTS_VARS( ALLOC_ELTS( quads*6 ) );
-
-           for ( i = j-start ; i < j-start+quads*4 ; i+=4 ) {
-              EMIT_TWO_ELTS( 0, (i+0), (i+1) );
-              EMIT_TWO_ELTS( 2, (i+3), (i+1) );
-              EMIT_TWO_ELTS( 4, (i+2), (i+3) );
-              INCR_ELTS( 6 );
-           }
-
-           FLUSH();
-        }
-        currentsz = dmasz;
-      }
+   /* Hardware doesn't have a quad primitive type -- try to simulate it using
+    * triangle primitive.  This is a win for gears, but is it useful in the
+    * broader world?
+    */
+   INIT(GL_TRIANGLES);
  
-      RELEASE_ELT_VERTS();
-   }
-   else if (HAVE_TRIANGLES) {
-      /* Hardware doesn't have a quad primitive type -- try to
-       * simulate it using triangle primitive.  This is a win for
-       * gears, but is it useful in the broader world?
+   for (j = 0; j + 3 < count; j += 4) {
+      void *tmp = ALLOC_VERTS(6);
+      /* Send v0, v1, v3
         */
-      LOCAL_VARS;
-      GLuint j;
-
-      INIT(GL_TRIANGLES);
-
-      for (j = start; j < count-3; j += 4) {
-        void *tmp = ALLOC_VERTS( 6 );
-        /* Send v0, v1, v3
-         */
-        tmp = EMIT_VERTS(ctx, j,     2, tmp);
-        tmp = EMIT_VERTS(ctx, j + 3, 1, tmp);
-        /* Send v1, v2, v3
-         */
-        tmp = EMIT_VERTS(ctx, j + 1, 3, tmp);
-        (void) tmp;
-      }
-   }
-   else {
-      /* Vertices won't fit in a single buffer, should never happen.
+      tmp = EMIT_VERTS(ctx, start + j,     2, tmp);
+      tmp = EMIT_VERTS(ctx, start + j + 3, 1, tmp);
+      /* Send v1, v2, v3
         */
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
+      tmp = EMIT_VERTS(ctx, start + j + 1, 3, tmp);
+      (void) tmp;
     }
  }
  
-static void TAG(render_noop)( struct gl_context *ctx,
-                             GLuint start,
-                             GLuint count,
-                             GLuint flags )
+static void TAG(render_noop)(struct gl_context *ctx,
+                             GLuint start,
+                             GLuint count,
+                             GLuint flags)
  {
+   (void) ctx;
+   (void) start;
+   (void) count;
+   (void) flags;
  }
  
-
-
-
-static tnl_render_func TAG(render_tab_verts)[GL_POLYGON+2] =
+static const tnl_render_func TAG(render_tab_verts)[GL_POLYGON+2] =
  {
     TAG(render_points_verts),
     TAG(render_lines_verts),
@@ -674,593 +424,63 @@ static tnl_render_func TAG(render_tab_verts)[GL_POLYGON+2] =
     TAG(render_noop),
  };
  
-
-/****************************************************************************
- *                 Render elts using hardware indexed verts                 *
- ****************************************************************************/
-
-#if (HAVE_ELTS)
-static void TAG(render_points_elts)( struct gl_context *ctx,
-                                    GLuint start,
-                                    GLuint count,
-                                    GLuint flags )
-{
-   if (HAVE_POINTS) {
-      LOCAL_VARS;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-      int currentsz;
-      GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-      GLuint j, nr;
-
-      ELT_INIT( GL_POINTS );
-
-      currentsz = GET_CURRENT_VB_MAX_ELTS();
-      if (currentsz < 8)
-        currentsz = dmasz;
-
-      for (j = start; j < count; j += nr ) {
-        nr = MIN2( currentsz, count - j );
-        TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
-        FLUSH();
-        currentsz = dmasz;
-      }
-   } else {
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
-   }
-}
-
-
-
-static void TAG(render_lines_elts)( struct gl_context *ctx,
-                                   GLuint start,
-                                   GLuint count,
-                                   GLuint flags )
-{
-   if (HAVE_LINES) {
-      LOCAL_VARS;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-      int currentsz;
-      GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-      GLuint j, nr;
-
-      ELT_INIT( GL_LINES );
-
-      /* Emit whole number of lines in total and in each buffer:
-       */
-      count -= (count-start) & 1;
-      currentsz -= currentsz & 1;
-      dmasz -= dmasz & 1;
-
-      currentsz = GET_CURRENT_VB_MAX_ELTS();
-      if (currentsz < 8)
-        currentsz = dmasz;
-
-      for (j = start; j < count; j += nr ) {
-        nr = MIN2( currentsz, count - j );
-        TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
-        FLUSH();
-        currentsz = dmasz;
-      }
-   } else {
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
-   }
-}
-
-
-static void TAG(render_line_strip_elts)( struct gl_context *ctx,
-                                        GLuint start,
-                                        GLuint count,
-                                        GLuint flags )
-{
-   if (HAVE_LINE_STRIPS) {
-      LOCAL_VARS;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-      int currentsz;
-      GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-      GLuint j, nr;
-
-      FLUSH(); /* always a new primitive */
-      ELT_INIT( GL_LINE_STRIP );
-
-      currentsz = GET_CURRENT_VB_MAX_ELTS();
-      if (currentsz < 8)
-        currentsz = dmasz;
-
-      for (j = start; j + 1 < count; j += nr - 1 ) {
-        nr = MIN2( currentsz, count - j );
-        TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
-        FLUSH();
-        currentsz = dmasz;
-      }
-   } else {
-      /* TODO: Try to emit as indexed lines.
-       */
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
-   }
-}
-
-
-static void TAG(render_line_loop_elts)( struct gl_context *ctx,
-                                       GLuint start,
-                                       GLuint count,
-                                       GLuint flags )
-{
-   if (HAVE_LINE_STRIPS) {
-      LOCAL_VARS;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-      int currentsz;
-      GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-      GLuint j, nr;
-
-      FLUSH();
-      ELT_INIT( GL_LINE_STRIP );
-
-      if (flags & PRIM_BEGIN)
-        j = start;
-      else
-        j = start + 1;
-
-      currentsz = GET_CURRENT_VB_MAX_ELTS();
-      if (currentsz < 8) {
-        currentsz = dmasz;
-      }
-
-      /* Ensure last vertex doesn't wrap:
-       */
-      currentsz--;
-      dmasz--;
-
-      if (j + 1 < count) {
-        for ( ; j + 1 < count; j += nr - 1 ) {
-           nr = MIN2( currentsz, count - j );
-
-           if (j + nr >= count &&
-               start < count - 1 && 
-               (flags & PRIM_END)) 
-           {
-              void *tmp;
-              tmp = ALLOC_ELTS(nr+1);
-              tmp = TAG(emit_elts)( ctx, elts+j, nr, tmp );
-              tmp = TAG(emit_elts)( ctx, elts+start, 1, tmp );
-              (void) tmp;
-           }
-           else {
-              TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
-              currentsz = dmasz;
-           }
-        }
-
-      }
-      else if (start + 1 < count && (flags & PRIM_END)) {
-        void *tmp;
-        tmp = ALLOC_ELTS(2);
-        tmp = TAG(emit_elts)( ctx, elts+start+1, 1, tmp );
-        tmp = TAG(emit_elts)( ctx, elts+start, 1, tmp );
-        (void) tmp;
-      }
-
-      FLUSH();
-   } else {
-      /* TODO: Try to emit as indexed lines */
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
-   }
-}
-
-
-/* For verts, we still eliminate the copy from main memory to dma
- * buffers.  For elts, this is probably no better (worse?) than the
- * standard path.
- */
-static void TAG(render_triangles_elts)( struct gl_context *ctx,
-                                       GLuint start,
-                                       GLuint count,
-                                       GLuint flags )
-{
-   LOCAL_VARS;
-   GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-   int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS()/3*3;
-   int currentsz;
-   GLuint j, nr;
-
-   FLUSH();
-   ELT_INIT( GL_TRIANGLES );
-
-   currentsz = GET_CURRENT_VB_MAX_ELTS();
-
-   /* Emit whole number of tris in total.  dmasz is already a multiple
-    * of 3.
-    */
-   count -= (count-start)%3;
-   currentsz -= currentsz%3;
-   if (currentsz < 8)
-      currentsz = dmasz;
-
-   for (j = start; j < count; j += nr) {
-      nr = MIN2( currentsz, count - j );
-      TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
-      FLUSH();
-      currentsz = dmasz;
-   }
-}
-
-
-
-static void TAG(render_tri_strip_elts)( struct gl_context *ctx,
-                                       GLuint start,
-                                       GLuint count,
-                                       GLuint flags )
-{
-   if (HAVE_TRI_STRIPS) {
-      LOCAL_VARS;
-      GLuint j, nr;
-      GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-      int currentsz;
-
-      FLUSH();
-      ELT_INIT( GL_TRIANGLE_STRIP );
-
-      currentsz = GET_CURRENT_VB_MAX_ELTS();
-      if (currentsz < 8) {
-        currentsz = dmasz;
-      }
-
-      /* Keep the same winding over multiple buffers:
-       */
-      dmasz -= (dmasz & 1);
-      currentsz -= (currentsz & 1);
-
-      for (j = start ; j + 2 < count; j += nr - 2 ) {
-        nr = MIN2( currentsz, count - j );
-        TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
-        FLUSH();
-        currentsz = dmasz;
-      }
-   } else {
-      /* TODO: try to emit as indexed triangles */
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
-   }
-}
-
-static void TAG(render_tri_fan_elts)( struct gl_context *ctx,
-                                     GLuint start,
-                                     GLuint count,
-                                     GLuint flags )
-{
-   if (HAVE_TRI_FANS) {
-      LOCAL_VARS;
-      GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-      GLuint j, nr;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-      int currentsz;
-
-      FLUSH();
-      ELT_INIT( GL_TRIANGLE_FAN );
-
-      currentsz = GET_CURRENT_VB_MAX_ELTS();
-      if (currentsz < 8) {
-        currentsz = dmasz;
-      }
-
-      for (j = start + 1 ; j + 1 < count; j += nr - 2 ) {
-        void *tmp;
-        nr = MIN2( currentsz, count - j + 1 );
-        tmp = ALLOC_ELTS( nr );
-        tmp = TAG(emit_elts)( ctx, elts+start, 1, tmp );
-        tmp = TAG(emit_elts)( ctx, elts+j, nr - 1, tmp );
-        (void) tmp;
-        FLUSH();
-        currentsz = dmasz;
-      }
-   } else {
-      /* TODO: try to emit as indexed triangles */
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
-   }
-}
-
-
-static void TAG(render_poly_elts)( struct gl_context *ctx,
-                                  GLuint start,
-                                  GLuint count,
-                                  GLuint flags )
-{
-   if (HAVE_POLYGONS) {
-      LOCAL_VARS;
-      GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-      GLuint j, nr;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-      int currentsz;
-
-      FLUSH();
-      ELT_INIT( GL_POLYGON );
-
-      currentsz = GET_CURRENT_VB_MAX_ELTS();
-      if (currentsz < 8) {
-        currentsz = dmasz;
-      }
-
-      for (j = start + 1 ; j + 1 < count; j += nr - 2 ) {
-        void *tmp;
-        nr = MIN2( currentsz, count - j + 1 );
-        tmp = ALLOC_ELTS( nr );
-        tmp = TAG(emit_elts)( ctx, elts+start, 1, tmp );
-        tmp = TAG(emit_elts)( ctx, elts+j, nr - 1, tmp );
-        (void) tmp;
-        FLUSH();
-        currentsz = dmasz;
-      }
-   } else if (HAVE_TRI_FANS && ctx->Light.ShadeModel == GL_SMOOTH) {
-      TAG(render_tri_fan_verts)( ctx, start, count, flags );
-   } else {
-      fprintf(stderr, "%s - cannot draw primitive\n", __func__);
-      return;
-   }
-}
-
-static void TAG(render_quad_strip_elts)( struct gl_context *ctx,
-                                        GLuint start,
-                                        GLuint count,
-                                        GLuint flags )
-{
-   if (HAVE_QUAD_STRIPS && 0) {
-   }
-   else if (HAVE_TRI_STRIPS) {
-      LOCAL_VARS;
-      GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-      int currentsz;
-      GLuint j, nr;
-
-      FLUSH();
-      currentsz = GET_CURRENT_VB_MAX_ELTS();
-
-      /* Emit whole number of quads in total, and in each buffer.
-       */
-      dmasz -= dmasz & 1;
-      count -= (count-start) & 1;
-      currentsz -= currentsz & 1;
-
-      if (currentsz < 12)
-        currentsz = dmasz;
-
-      if (ctx->Light.ShadeModel == GL_FLAT) {
-        ELT_INIT( GL_TRIANGLES );
-
-        currentsz = currentsz/6*2;
-        dmasz = dmasz/6*2;
-
-        for (j = start; j + 3 < count; j += nr - 2 ) {
-           nr = MIN2( currentsz, count - j );
-
-           if (nr >= 4)
-           {
-              GLint i;
-              GLint quads = (nr/2)-1;
-              ELTS_VARS( ALLOC_ELTS( quads*6 ) );
-
-              for ( i = j-start ; i < j-start+quads ; i++, elts += 2 ) {
-                 EMIT_TWO_ELTS( 0, elts[0], elts[1] );
-                 EMIT_TWO_ELTS( 2, elts[2], elts[1] );
-                 EMIT_TWO_ELTS( 4, elts[3], elts[2] );
-                 INCR_ELTS( 6 );
-              }
-
-              FLUSH();
-           }
-
-           currentsz = dmasz;
-        }
-      }
-      else {
-        ELT_INIT( GL_TRIANGLE_STRIP );
-
-        for (j = start; j + 3 < count; j += nr - 2 ) {
-           nr = MIN2( currentsz, count - j );
-           TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
-           FLUSH();
-           currentsz = dmasz;
-        }
-      }
-   }
-}
-
-
-static void TAG(render_quads_elts)( struct gl_context *ctx,
-                                   GLuint start,
-                                   GLuint count,
-                                   GLuint flags )
-{
-   if (HAVE_QUADS) {
-      LOCAL_VARS;
-      GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS()/4*4;
-      int currentsz;
-      GLuint j, nr;
-
-      FLUSH();
-      ELT_INIT( GL_TRIANGLES );
-
-      currentsz = GET_CURRENT_VB_MAX_ELTS()/4*4;
-
-      count -= (count-start)%4;
-
-      if (currentsz < 8)
-        currentsz = dmasz;
-
-      for (j = start; j < count; j += nr) {
-        nr = MIN2( currentsz, count - j );
-        TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
-        FLUSH();
-        currentsz = dmasz;
-      }
-   } else {
-      LOCAL_VARS;
-      GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
-      int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
-      int currentsz;
-      GLuint j, nr;
-
-      ELT_INIT( GL_TRIANGLES );
-      currentsz = GET_CURRENT_VB_MAX_ELTS();
-
-      /* Emit whole number of quads in total, and in each buffer.
-       */
-      dmasz -= dmasz & 3;
-      count -= (count-start) & 3;
-      currentsz -= currentsz & 3;
-
-      /* Adjust for rendering as triangles:
-       */
-      currentsz = currentsz/6*4;
-      dmasz = dmasz/6*4;
-
-      if (currentsz < 8)
-        currentsz = dmasz;
-
-      for (j = start; j + 3 < count; j += nr - 2 ) {
-        nr = MIN2( currentsz, count - j );
-
-        if (nr >= 4)
-        {
-           GLint quads = nr/4;
-           GLint i;
-           ELTS_VARS( ALLOC_ELTS( quads * 6 ) );
-
-           for ( i = j-start ; i < j-start+quads ; i++, elts += 4 ) {
-              EMIT_TWO_ELTS( 0, elts[0], elts[1] );
-              EMIT_TWO_ELTS( 2, elts[3], elts[1] );
-              EMIT_TWO_ELTS( 4, elts[2], elts[3] );
-              INCR_ELTS( 6 );
-           }
-
-           FLUSH();
-        }
-
-        currentsz = dmasz;
-      }
-   }
-}
-
-
-
-static tnl_render_func TAG(render_tab_elts)[GL_POLYGON+2] =
-{
-   TAG(render_points_elts),
-   TAG(render_lines_elts),
-   TAG(render_line_loop_elts),
-   TAG(render_line_strip_elts),
-   TAG(render_triangles_elts),
-   TAG(render_tri_strip_elts),
-   TAG(render_tri_fan_elts),
-   TAG(render_quads_elts),
-   TAG(render_quad_strip_elts),
-   TAG(render_poly_elts),
-   TAG(render_noop),
-};
-
-
-
-#endif
-
-
-
  /* Pre-check the primitives in the VB to prevent the need for
   * fallbacks later on.
   */
-static GLboolean TAG(validate_render)( struct gl_context *ctx,
-                                      struct vertex_buffer *VB )
+static bool TAG(validate_render)(struct gl_context *ctx,
+                                 struct vertex_buffer *VB)
  {
     GLint i;
  
     if (VB->ClipOrMask & ~CLIP_CULL_BIT)
-      return GL_FALSE;
+      return false;
  
-   if (VB->Elts && !HAVE_ELTS)
-      return GL_FALSE;
+   if (VB->Elts)
+      return false;
  
     for (i = 0 ; i < VB->PrimitiveCount ; i++) {
        GLuint prim = VB->Primitive[i].mode;
        GLuint count = VB->Primitive[i].count;
-      GLboolean ok = GL_FALSE;
+      bool ok = false;
  
        if (!count)
-        continue;
+         continue;
  
        switch (prim & PRIM_MODE_MASK) {
        case GL_POINTS:
-        ok = HAVE_POINTS;
-        break;
+         ok = HAVE_POINTS;
+         break;
        case GL_LINES:
-        ok = HAVE_LINES && !ctx->Line.StippleFlag;
-        break;
        case GL_LINE_STRIP:
-        ok = HAVE_LINE_STRIPS && !ctx->Line.StippleFlag;
-        break;
        case GL_LINE_LOOP:
-        ok = HAVE_LINE_STRIPS && !ctx->Line.StippleFlag;
-        break;
+         ok = !ctx->Line.StippleFlag;
+         break;
        case GL_TRIANGLES:
-        ok = HAVE_TRIANGLES;
-        break;
        case GL_TRIANGLE_STRIP:
-        ok = HAVE_TRI_STRIPS;
-        break;
        case GL_TRIANGLE_FAN:
-        ok = HAVE_TRI_FANS;
-        break;
+         ok = true;
+         break;
        case GL_POLYGON:
-        if (HAVE_POLYGONS) {
-           ok = GL_TRUE;
-        }
-        else {
-           ok = (HAVE_TRI_FANS && ctx->Light.ShadeModel == GL_SMOOTH);
-         }
-        break;
+         ok = (HAVE_POLYGONS) || ctx->Light.ShadeModel == GL_SMOOTH;
+         break;
        case GL_QUAD_STRIP:
-        if (VB->Elts) {
-           ok = HAVE_TRI_STRIPS;
-        }
-        else if (HAVE_QUAD_STRIPS) {
-           ok = GL_TRUE;
-        } else if (HAVE_TRI_STRIPS && 
-                   ctx->Light.ShadeModel == GL_FLAT &&
-                   VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride != 0) {
-           if (HAVE_ELTS) {
-              ok = (GLint) count < GET_SUBSEQUENT_VB_MAX_ELTS();
-           }
-           else {
-              ok = GL_FALSE;
-           }
-        }
-        else 
-           ok = HAVE_TRI_STRIPS;
-        break;
+         ok = VB->Elts ||
+              (ctx->Light.ShadeModel != GL_FLAT ||
+               VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride == 0);
+         break;
        case GL_QUADS:
-        if (HAVE_QUADS) {
-           ok = GL_TRUE;
-        } else if (HAVE_ELTS) {
-           ok = (GLint) count < GET_SUBSEQUENT_VB_MAX_ELTS();
-        }
-        else {
-           ok = HAVE_TRIANGLES; /* flatshading is ok. */
-        }
-        break;
+         ok = true; /* flatshading is ok. */
+         break;
        default:
-        break;
+         break;
        }
        
        if (!ok) {
-/*      fprintf(stderr, "not ok %s\n", _mesa_enum_to_string(prim & PRIM_MODE_MASK)); */
-        return GL_FALSE;
+/*          fprintf(stderr, "not ok %s\n", _mesa_enum_to_string(prim & PRIM_MODE_MASK)); */
+         return false;
        }
     }
  
-   return GL_TRUE;
+   return true;
  }
  
diff --git a/src/util/Makefile.am b/src/util/Makefile.am

index 1e087b4..e05a2c5 100644 (file)
--- a/src/util/Makefile.am
+++ b/src/util/Makefile.am
@@ -44,11 +44,7 @@ libmesautil_la_SOURCES = \
         $(MESA_UTIL_FILES) \
         $(MESA_UTIL_GENERATED_FILES)
  
-if ENABLE_SHADER_CACHE
-libmesautil_la_SOURCES += $(MESA_UTIL_SHADER_CACHE_FILES)
-
  libmesautil_la_LIBADD = $(SHA1_LIBS)
-endif
  
  roundeven_test_LDADD = -lm
  
@@ -59,5 +55,7 @@ BUILT_SOURCES = $(MESA_UTIL_GENERATED_FILES)
  CLEANFILES = $(BUILT_SOURCES)
  EXTRA_DIST = format_srgb.py SConscript
  
-format_srgb.c: $(srcdir)/format_srgb.py
-       $(AM_V_GEN) $(PYTHON2) $< > $@
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
+
+format_srgb.c: format_srgb.py
+       $(PYTHON_GEN) $(srcdir)/format_srgb.py > $@
diff --git a/src/util/Makefile.sources b/src/util/Makefile.sources

index 82df3bc..ef38b5a 100644 (file)
--- a/src/util/Makefile.sources
+++ b/src/util/Makefile.sources
@@ -1,7 +1,3 @@
-MESA_UTIL_SHADER_CACHE_FILES := \
-       mesa-sha1.c \
-       mesa-sha1.h
-
  MESA_UTIL_FILES :=     \
         bitset.h \
         format_srgb.h \
@@ -9,6 +5,8 @@ MESA_UTIL_FILES :=      \
         hash_table.h \
         list.h \
         macros.h \
+       mesa-sha1.c \
+       mesa-sha1.h \
         ralloc.c \
         ralloc.h \
         register_allocate.c \
@@ -19,6 +17,8 @@ MESA_UTIL_FILES :=    \
         set.c \
         set.h \
         simple_list.h \
+       strndup.c \
+       strndup.h \
         strtod.c \
         strtod.h \
         texcompress_rgtc_tmp.h \
diff --git a/src/util/mesa-sha1.c b/src/util/mesa-sha1.c

index fa28193..faa1c87 100644 (file)
--- a/src/util/mesa-sha1.c
+++ b/src/util/mesa-sha1.c
@@ -26,6 +26,8 @@
  
  #include "mesa-sha1.h"
  
+#ifdef HAVE_SHA1
+
  #if defined(HAVE_SHA1_IN_LIBMD)  /* Use libmd for SHA1 */ \
         || defined(HAVE_SHA1_IN_LIBC)   /* Use libc for SHA1 */
  
@@ -314,3 +316,5 @@ _mesa_sha1_format(char *buf, const unsigned char *sha1)
  
     return buf;
  }
+
+#endif
diff --git a/src/util/strndup.c b/src/util/strndup.c

new file mode 100644 (file)

index 0000000..ca1c6f5
--- /dev/null
+++ b/src/util/strndup.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#if defined(_WIN32)
+#include <stdlib.h>
+#include <string.h>
+#include "strndup.h"
+
+char *
+strndup(const char *str, size_t max)
+{
+   size_t n;
+   char *ptr;
+
+   if (!str)
+      return NULL;
+
+   n = strlen(str);
+   if (n > max)
+      n = max;
+
+   ptr = (char *) calloc(n + 1, sizeof(char));
+   if (!ptr)
+      return NULL;
+
+   memcpy(ptr, str, n);
+   return ptr;
+}
+
+#endif
diff --git a/src/util/strndup.h b/src/util/strndup.h

new file mode 100644 (file)

index 0000000..c5ed7a8
--- /dev/null
+++ b/src/util/strndup.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <stdlib.h> // size_t
+
+#if defined(_WIN32)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+char *strndup(const char *str, size_t max);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp

index dcd5581..8acfc06 100644 (file)
--- a/src/vulkan/anv_compiler.cpp
+++ b/src/vulkan/anv_compiler.cpp
@@ -167,9 +167,7 @@ brw_vs_populate_key(struct brw_context *brw,
     /* Just upload the program verbatim for now.  Always send it all
      * the inputs it asks for, whether they are varying or not.
      */
-   key->base.program_string_id = vp->id;
-   brw_setup_vue_key_clip_info(brw, &key->base,
-                               vp->program.Base.UsesClipDistanceOut);
+   key->program_string_id = vp->id;
  
     /* _NEW_POLYGON */
     if (brw->gen < 6) {
@@ -193,7 +191,7 @@ brw_vs_populate_key(struct brw_context *brw,
  
     /* _NEW_TEXTURE */
     brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count,
-                                      &key->base.tex);
+                                      &key->tex);
  }
  
  static bool
@@ -250,14 +248,15 @@ really_do_vs_prog(struct brw_context *brw,
      * distance varying slots whenever clipping is enabled, even if the vertex
      * shader doesn't write to gl_ClipDistance.
      */
-   if (key->base.userclip_active) {
+   if (key->nr_userclip_plane_consts) {
        outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
        outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
     }
  
     brw_compute_vue_map(brw->intelScreen->devinfo,
-                       &prog_data->base.vue_map, outputs_written);
-\
+                       &prog_data->base.vue_map, outputs_written,
+                       prog ? prog->SeparateShader : false);
+
     set_binding_table_layout(&prog_data->base.base, pipeline,
                              VK_SHADER_STAGE_VERTEX);
  
@@ -558,18 +557,11 @@ brw_gs_populate_key(struct brw_context *brw,
  
     memset(key, 0, sizeof(*key));
  
-   key->base.program_string_id = gp->id;
-   brw_setup_vue_key_clip_info(brw, &key->base,
-                               gp->program.Base.UsesClipDistanceOut);
+   key->program_string_id = gp->id;
  
     /* _NEW_TEXTURE */
     brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count,
-                                      &key->base.tex);
-
-   struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data;
-
-   /* BRW_NEW_VUE_MAP_VS */
-   key->input_varyings = prog_data->base.vue_map.slots_valid;
+                                      &key->tex);
  }
  
  static bool
diff --git a/src/vulkan/anv_nir_apply_dynamic_offsets.c b/src/vulkan/anv_nir_apply_dynamic_offsets.c

index 367c4f8..1f6c64a 100644 (file)
--- a/src/vulkan/anv_nir_apply_dynamic_offsets.c
+++ b/src/vulkan/anv_nir_apply_dynamic_offsets.c
@@ -108,8 +108,7 @@ apply_dynamic_offsets_block(nir_block *block, void *void_state)
        nir_builder_instr_insert(&state->builder, &new_load->instr);
  
        nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
-                               nir_src_for_ssa(&new_load->dest.ssa),
-                               state->shader);
+                               nir_src_for_ssa(&new_load->dest.ssa));
  
        nir_instr_remove(&intrin->instr);
     }
author	Kristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
	Wed, 30 Sep 2015 00:10:50 +0000 (17:10 -0700)
committer	Kristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
	Thu, 1 Oct 2015 21:24:29 +0000 (14:24 -0700)