-DANDROID_VERSION=0x0$(MESA_ANDROID_MAJOR_VERSION)0$(MESA_ANDROID_MINOR_VERSION)
LOCAL_CFLAGS += \
+ -D__STDC_LIMIT_MACROS \
-DHAVE___BUILTIN_EXPECT \
-DHAVE___BUILTIN_FFS \
-DHAVE___BUILTIN_FFSLL \
ifeq ($(MESA_ENABLE_LLVM),true)
LOCAL_CFLAGS += \
- -DHAVE_LLVM=0x0305 -DLLVM_VERSION_PATCH=2 \
+ -DHAVE_LLVM=0x0305 -DMESA_LLVM_VERSION_PATCH=2 \
-D__STDC_CONSTANT_MACROS \
-D__STDC_FORMAT_MACROS \
-D__STDC_LIMIT_MACROS
LIBDRM_INTEL_REQUIRED=2.4.61
LIBDRM_NVVIEUX_REQUIRED=2.4.33
LIBDRM_NOUVEAU_REQUIRED=2.4.62
-LIBDRM_FREEDRENO_REQUIRED=2.4.64
+LIBDRM_FREEDRENO_REQUIRED=2.4.65
DRI2PROTO_REQUIRED=2.6
DRI3PROTO_REQUIRED=1.0
PRESENTPROTO_REQUIRED=1.0
dnl
dnl library names
dnl
+dnl Unfortunately we need to do a few things that libtool can't help us with,
+dnl so we need some knowledge of shared library filenames:
+dnl
+dnl LIB_EXT is the extension used when creating symlinks for alternate
+dnl filenames for a shared library which will be dynamically loaded
+dnl
+dnl IMP_LIB_EXT is the extension used when checking for the presence of a
+dnl the file for a shared library we wish to link with
+dnl
case "$host_os" in
darwin* )
- LIB_EXT='dylib' ;;
+ LIB_EXT='dylib'
+ IMP_LIB_EXT=$LIB_EXT
+ ;;
cygwin* )
- LIB_EXT='dll' ;;
+ LIB_EXT='dll'
+ IMP_LIB_EXT='dll.a'
+ ;;
aix* )
- LIB_EXT='a' ;;
+ LIB_EXT='a'
+ IMP_LIB_EXT=$LIB_EXT
+ ;;
* )
- LIB_EXT='so' ;;
+ LIB_EXT='so'
+ IMP_LIB_EXT=$LIB_EXT
+ ;;
esac
AC_SUBST([LIB_EXT])
AC_SUBST(SHA1_LIBS)
AC_SUBST(SHA1_CFLAGS)
+# Enable a define for SHA1
+if test "x$with_sha1" != "x"; then
+ DEFINES="$DEFINES -DHAVE_SHA1"
+fi
+
# Allow user to configure out the shader-cache feature
AC_ARG_ENABLE([shader-cache],
AS_HELP_STRING([--disable-shader-cache], [Disable binary shader cache]),
AS_IF([test "x$GLX_USE_TLS" = xyes -a "x$ax_pthread_ok" = xyes],
[DEFINES="${DEFINES} -DGLX_USE_TLS"])
+dnl Read-only text section on x86 hardened platforms
+AC_ARG_ENABLE([glx-read-only-text],
+ [AS_HELP_STRING([--enable-glx-read-only-text],
+ [Disable writable .text section on x86 (decreases performance) @<:@default=disabled@:>@])],
+ [enable_glx_read_only_text="$enableval"],
+ [enable_glx_read_only_text=no])
+if test "x$enable_glx_read_only_text" = xyes; then
+ DEFINES="$DEFINES -DGLX_X86_READONLY_TEXT"
+fi
+
dnl
dnl More DRI setup
dnl
if test "x$enable_gallium_llvm" != "xyes"; then
AC_MSG_ERROR([--enable-gallium-llvm is required when building $1])
fi
- llvm_check_version_for "3" "4" "2" $1
+ llvm_check_version_for "3" "5" "0" $1
if test true && $LLVM_CONFIG --targets-built | grep -iqvw $amdgpu_llvm_target_name ; then
AC_MSG_ERROR([LLVM $amdgpu_llvm_target_name not enabled in your LLVM build.])
fi
gallium_require_drm "vc4"
gallium_require_drm_loader
- case "$host_cpu" in
- i?86 | x86_64 | amd64)
- USE_VC4_SIMULATOR=yes
- ;;
- esac
+ PKG_CHECK_MODULES([SIMPENROSE], [simpenrose],
+ [USE_VC4_SIMULATOR=yes], [USE_VC4_SIMULATOR=no])
;;
*)
AC_MSG_ERROR([Unknown Gallium driver: $driver])
LLVM_LIBS="`$LLVM_CONFIG --libs ${LLVM_COMPONENTS}`"
+ dnl llvm-config may not give the right answer when llvm is a built as a
+ dnl single shared library, so we must work the library name out for
+ dnl ourselves.
+ dnl (See https://llvm.org/bugs/show_bug.cgi?id=6823)
if test "x$enable_llvm_shared_libs" = xyes; then
dnl We can't use $LLVM_VERSION because it has 'svn' stripped out,
LLVM_SO_NAME=LLVM-`$LLVM_CONFIG --version`
- AS_IF([test -f "$LLVM_LIBDIR/lib$LLVM_SO_NAME.so"], [llvm_have_one_so=yes])
+ AS_IF([test -f "$LLVM_LIBDIR/lib$LLVM_SO_NAME.$IMP_LIB_EXT"], [llvm_have_one_so=yes])
if test "x$llvm_have_one_so" = xyes; then
dnl LLVM was built using auto*, so there is only one shared object.
else
dnl If LLVM was built with CMake, there will be one shared object per
dnl component.
- AS_IF([test ! -f "$LLVM_LIBDIR/libLLVMTarget.so"],
+ AS_IF([test ! -f "$LLVM_LIBDIR/libLLVMTarget.$IMP_LIB_EXT"],
[AC_MSG_ERROR([Could not find llvm shared libraries:
Please make sure you have built llvm with the --enable-shared option
and that your llvm libraries are installed in $LLVM_LIBDIR
- Enhanced per-sample shading DONE (r600)
- Interpolation functions DONE (r600)
- New overload resolution rules DONE
- GL_ARB_gpu_shader_fp64 DONE (llvmpipe, softpipe)
+ GL_ARB_gpu_shader_fp64 DONE (r600, llvmpipe, softpipe)
GL_ARB_sample_shading DONE (i965, nv50, r600)
GL_ARB_shader_subroutine DONE (i965, nv50, r600, llvmpipe, softpipe)
GL_ARB_tessellation_shader DONE ()
GL_ARB_texture_buffer_object_rgb32 DONE (i965, r600, llvmpipe, softpipe)
GL_ARB_texture_cube_map_array DONE (i965, nv50, r600, llvmpipe, softpipe)
GL_ARB_texture_gather DONE (i965, nv50, r600, llvmpipe, softpipe)
- GL_ARB_texture_query_lod DONE (i965, nv50, r600)
+ GL_ARB_texture_query_lod DONE (i965, nv50, r600, softpipe)
GL_ARB_transform_feedback2 DONE (i965, nv50, r600, llvmpipe, softpipe)
GL_ARB_transform_feedback3 DONE (i965, nv50, r600, llvmpipe, softpipe)
GL_ARB_get_program_binary DONE (0 binary formats)
GL_ARB_separate_shader_objects DONE (all drivers)
GL_ARB_shader_precision DONE (all drivers that support GLSL 4.10)
- GL_ARB_vertex_attrib_64bit DONE (llvmpipe, softpipe)
+ GL_ARB_vertex_attrib_64bit DONE (r600, llvmpipe, softpipe)
GL_ARB_viewport_array DONE (i965, nv50, r600, llvmpipe)
GL_ARB_program_interface_query DONE (all drivers)
GL_ARB_robust_buffer_access_behavior not started
GL_ARB_shader_image_size DONE (i965)
- GL_ARB_shader_storage_buffer_object in progress (Iago Toral, Samuel Iglesias)
+ GL_ARB_shader_storage_buffer_object DONE (i965)
GL_ARB_stencil_texturing DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
GL_ARB_texture_buffer_range DONE (nv50, nvc0, i965, r600, radeonsi, llvmpipe)
GL_ARB_texture_query_levels DONE (all drivers that support GLSL 1.30)
GL_ARB_derivative_control DONE (i965, nv50, nvc0, r600, radeonsi)
GL_ARB_direct_state_access DONE (all drivers)
GL_ARB_get_texture_sub_image DONE (all drivers)
- GL_ARB_shader_texture_image_samples not started
- GL_ARB_texture_barrier DONE (nv50, nvc0, r600, radeonsi)
+ GL_ARB_shader_texture_image_samples DONE (i965, nv50, nvc0, r600, radeonsi)
+ GL_ARB_texture_barrier DONE (i965, nv50, nvc0, r600, radeonsi)
GL_KHR_context_flush_control DONE (all - but needs GLX/EGL extension to be useful)
GL_KHR_robust_buffer_access_behavior not started
GL_KHR_robustness 90% done (the ARB variant)
GL_ARB_shader_atomic_counters DONE (i965)
GL_ARB_shader_image_load_store DONE (i965)
GL_ARB_shader_image_size DONE (i965)
- GL_ARB_shader_storage_buffer_object in progress (Iago Toral, Samuel Iglesias)
+ GL_ARB_shader_storage_buffer_object DONE (i965)
GL_ARB_shading_language_packing DONE (all drivers)
GL_ARB_separate_shader_objects DONE (all drivers)
GL_ARB_stencil_texturing DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
GS5 Packing/bitfield/conversion functions DONE (i965, nvc0, r600, radeonsi)
GL_EXT_shader_integer_mix DONE (all drivers that support GLSL)
- Additional functions not covered above:
- glMemoryBarrierByRegion
- glGetTexLevelParameter[fi]v - needs updates to restrict to GLES enums
- glGetBooleani_v - needs updates to restrict to GLES enums
+ Additional functionality not covered above:
+ glMemoryBarrierByRegion DONE
+ glGetTexLevelParameter[fi]v - needs updates DONE
+ glGetBooleani_v - restrict to GLES enums
+ gl_HelperInvocation support
+
+GLES3.2, GLSL ES 3.2
+ GL_EXT_color_buffer_float DONE (all drivers)
+ GL_KHR_blend_equation_advanced not started
+ GL_KHR_debug DONE (all drivers)
+ GL_KHR_robustness 90% done (the ARB variant)
+ GL_KHR_texture_compression_astc_ldr DONE (i965/gen9+)
+ GL_OES_copy_image not started (based on GL_ARB_copy_image, which is done for some drivers)
+ GL_OES_draw_buffers_indexed not started
+ GL_OES_draw_elements_base_vertex not started (based on GL_ARB_draw_elements_base_vertex, which is done for all drivers)
+ GL_OES_geometry_shader not started (based on GL_ARB_geometry_shader4, which is done for all drivers)
+ GL_OES_gpu_shader5 not started (based on parts of GL_ARB_gpu_shader5, which is done for some drivers)
+ GL_OES_primitive_bounding box not started
+ GL_OES_sample_shading not started (based on parts of GL_ARB_sample_shading, which is done for some drivers)
+ GL_OES_sample_variables not started (based on parts of GL_ARB_sample_shading, which is done for some drivers)
+ GL_OES_shader_image_atomic not started (based on parts of GL_ARB_shader_image_load_store, which is done for some drivers)
+ GL_OES_shader_io_blocks not started (based on parts of GLSL 1.50, which is done)
+ GL_OES_shader_multisample_interpolation not started (based on parts of GL_ARB_gpu_shader5, which is done)
+ GL_OES_tessellation_shader not started (based on GL_ARB_tessellation_shader, which is done for some drivers)
+ GL_OES_texture_border_clamp not started (based on GL_ARB_texture_border_clamp, which is done)
+ GL_OES_texture_buffer not started (based on GL_ARB_texture_buffer_object, GL_ARB_texture_buffer_range, and GL_ARB_texture_buffer_object_rgb32 that are all done)
+ GL_OES_texture_cube_map_array not started (based on GL_ARB_texture_cube_map_array, which is done for all drivers)
+ GL_OES_texture_stencil8 not started (based on GL_ARB_texture_stencil8, which is done for some drivers)
+ GL_OES_texture_storage_multisample_2d_array DONE (all drivers that support GL_ARB_texture_multisample)
More info about these features and the work involved can be found at
http://dri.freedesktop.org/wiki/MissingFunctionality
tree.</p>
</dd>
+<dt><code>--sysconfdir=DIR</code></dt>
+<dd><p>This option specifies the directory where the configuration
+files will be installed. The default is <code>${prefix}/etc</code>.
+Currently there's only one config file provided when dri drivers are
+enabled - it's <code>drirc</code>.</p>
+</dd>
+
<dt><code>--enable-static, --disable-shared</code></dt>
<dd><p>By default, Mesa
will build shared libraries. Either of these options will force static
<dt><code>--with-expat=DIR</code>
<dd><p><strong>DEPRECATED</strong>, use <code>PKG_CONFIG_PATH</code> instead.</p>
<p>The DRI-enabled libGL uses expat to
-parse the DRI configuration files in <code>/etc/drirc</code> and
+parse the DRI configuration files in <code>${sysconfdir}/drirc</code> and
<code>~/.drirc</code>. This option allows a specific expat installation
to be used. For example, <code>--with-expat=/usr/local</code> will
search for expat headers and libraries in <code>/usr/local/include</code>
<li>no16 - suppress generation of 16-wide fragment shaders. useful for debugging broken shaders</li>
<li>blorp - emit messages about the blorp operations (blits & clears)</li>
<li>nodualobj - suppress generation of dual-object geometry shader code</li>
+ <li>optimizer - dump shader assembly to files at each optimization pass and iteration that make progress</li>
</ul>
</ul>
<h1>News</h1>
-<h2>August 22 2015</h2>
+<h2>September 28, 2015</h2>
+<p>
+<a href="relnotes/11.0.2.html">Mesa 11.0.2</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>September 26, 2015</h2>
+<p>
+<a href="relnotes/11.0.1.html">Mesa 11.0.1</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>September 20, 2015</h2>
+<p>
+<a href="relnotes/10.6.8.html">Mesa 10.6.8</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>September 12, 2015</h2>
+<p>
+<a href="relnotes/11.0.0.html">Mesa 11.0.0</a> is released. This is a new
+development release. See the release notes for more information about
+the release.
+</p>
+
+<h2>September 10, 2015</h2>
+<p>
+<a href="relnotes/10.6.7.html">Mesa 10.6.7</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>September 4, 2015</h2>
+<p>
+<a href="relnotes/10.6.6.html">Mesa 10.6.6</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>August 22, 2015</h2>
<p>
<a href="relnotes/10.6.5.html">Mesa 10.6.5</a> is released.
This is a bug-fix release.
</p>
-<h2>August 11 2015</h2>
+<h2>August 11, 2015</h2>
<p>
<a href="relnotes/10.6.4.html">Mesa 10.6.4</a> is released.
This is a bug-fix release.
</p>
-<h2>July 26 2015</h2>
+<h2>July 26, 2015</h2>
<p>
<a href="relnotes/10.6.3.html">Mesa 10.6.3</a> is released.
This is a bug-fix release.
</p>
-<h2>July 11 2015</h2>
+<h2>July 11, 2015</h2>
<p>
<a href="relnotes/10.6.2.html">Mesa 10.6.2</a> is released.
This is a bug-fix release.
</p>
<ul>
+<li><a href="relnotes/11.0.2.html">11.0.2 release notes</a>
+<li><a href="relnotes/11.0.1.html">11.0.1 release notes</a>
+<li><a href="relnotes/10.6.8.html">10.6.8 release notes</a>
+<li><a href="relnotes/11.0.0.html">11.0.0 release notes</a>
+<li><a href="relnotes/10.6.7.html">10.6.7 release notes</a>
+<li><a href="relnotes/10.6.6.html">10.6.6 release notes</a>
<li><a href="relnotes/10.6.5.html">10.6.5 release notes</a>
<li><a href="relnotes/10.6.4.html">10.6.4 release notes</a>
<li><a href="relnotes/10.6.3.html">10.6.3 release notes</a>
--- /dev/null
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+ <meta http-equiv="content-type" content="text/html; charset=utf-8">
+ <title>Mesa Release Notes</title>
+ <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+ <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.6.6 Release Notes / September 04, 2015</h1>
+
+<p>
+Mesa 10.6.6 is a bug fix release which fixes bugs found since the 10.6.5 release.
+</p>
+<p>
+Mesa 10.6.6 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3. OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+416517aa9df4791f97d34451a9e4da33c966afcd18c115c5769b92b15b018ef5 mesa-10.6.6.tar.gz
+570f2154b7340ff5db61ff103bc6e85165b8958798b78a50fa2df488e98e5778 mesa-10.6.6.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84677">Bug 84677</a> - Triangle disappears with glPolygonMode GL_LINE</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90734">Bug 90734</a> - glBufferSubData is corrupting data when buffer is > 32k</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90748">Bug 90748</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.depth.rg_half_float_oes fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90902">Bug 90902</a> - [bsw][regression] dEQP: "Found invalid pixel values"</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90925">Bug 90925</a> - "high fidelity": Segfault in _mesa_program_resource_find_name</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91254">Bug 91254</a> - (regresion) video using VA-API on Intel slow and freeze system with mesa 10.6 or 10.6.1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91292">Bug 91292</a> - [BDW+] glVertexAttribDivisor not working in combination with glPolygonMode</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91673">Bug 91673</a> - Segfault when calling glTexSubImage2D on storage texture to bound FBO</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91726">Bug 91726</a> - R600 asserts in tgsi_cmp/make_src_for_op3</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Chris Wilson (2):</p>
+<ul>
+ <li>i965: Prevent coordinate overflow in intel_emit_linear_blit</li>
+ <li>i965: Always re-emit the pipeline select during invariant state emission</li>
+</ul>
+
+<p>Daniel Scharrer (1):</p>
+<ul>
+ <li>mesa: add missing queries for ARB_direct_state_access</li>
+</ul>
+
+<p>Dave Airlie (8):</p>
+<ul>
+ <li>mesa/arb_gpu_shader_fp64: add support for glGetUniformdv</li>
+ <li>mesa/texgetimage: fix missing stencil check</li>
+ <li>st/readpixels: fix accel path for skipimages.</li>
+ <li>texcompress_s3tc/fxt1: fix stride checks (v1.1)</li>
+ <li>mesa/readpixels: check strides are equal before skipping conversion</li>
+ <li>mesa: enable texture stencil8 for multisample</li>
+ <li>r600/sb: update last_cf for finalize if.</li>
+ <li>r600g: fix calculation for gpr allocation</li>
+</ul>
+
+<p>David Heidelberg (1):</p>
+<ul>
+ <li>st/nine: Require gcc >= 4.6</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+ <li>docs: add sha256 checksums for 10.6.5</li>
+ <li>get-pick-list.sh: Require explicit "10.6" for nominating stable patches</li>
+</ul>
+
+<p>Glenn Kennard (4):</p>
+<ul>
+ <li>r600g: Fix assert in tgsi_cmp</li>
+ <li>r600g/sb: Handle undef in read port tracker</li>
+ <li>r600g/sb: Don't read junk after EOP</li>
+ <li>r600g/sb: Don't crash on empty if jump target</li>
+</ul>
+
+<p>Ilia Mirkin (5):</p>
+<ul>
+ <li>st/mesa: fix assignments with 4-operand arguments (i.e. BFI)</li>
+ <li>st/mesa: pass through 4th opcode argument in bitmap/pixel visitors</li>
+ <li>nv50,nvc0: disable depth bounds test on blit</li>
+ <li>nv50: fix 2d engine blits for 64- and 128-bit formats</li>
+ <li>mesa: only copy the requested teximage faces</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+ <li>i965/fs: Split VGRFs after lowering pull constants</li>
+</ul>
+
+<p>Kenneth Graunke (3):</p>
+<ul>
+ <li>i965: Fix copy propagation type changes.</li>
+ <li>Revert "i965: Advertise a line width of 40.0 on Cherryview and Skylake."</li>
+ <li>i965: Momentarily pretend to support ARB_texture_stencil8 for blits.</li>
+</ul>
+
+<p>Marek Olšák (3):</p>
+<ul>
+ <li>gallium/radeon: fix the ADDRESS_HI mask for EVENT_WRITE CIK packets</li>
+ <li>mesa: create multisample fallback textures like normal textures</li>
+ <li>radeonsi: fix a Unigine Heaven hang when drirc is missing</li>
+</ul>
+
+<p>Matt Turner (1):</p>
+<ul>
+ <li>i965/fs: Handle MRF destinations in lower_integer_multiplication().</li>
+</ul>
+
+<p>Neil Roberts (2):</p>
+<ul>
+ <li>i965: Swap the order of the vertex ID and edge flag attributes</li>
+ <li>i965/bdw: Fix 3DSTATE_VF_INSTANCING when the edge flag is used</li>
+</ul>
+
+<p>Tapani Pälli (5):</p>
+<ul>
+ <li>mesa: update fbo state in glTexStorage</li>
+ <li>glsl: build stageref mask using IR, not symbol table</li>
+ <li>glsl: expose build_program_resource_list function</li>
+ <li>glsl: create program resource list after LinkShader</li>
+ <li>mesa: add GL_RED, GL_RG support for floating point textures</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- /dev/null
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+ <meta http-equiv="content-type" content="text/html; charset=utf-8">
+ <title>Mesa Release Notes</title>
+ <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+ <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.6.7 Release Notes / September 10, 2015</h1>
+
+<p>
+Mesa 10.6.7 is a bug fix release which fixes bugs found since the 10.6.6 release.
+</p>
+<p>
+Mesa 10.6.7 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3. OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+4ba10c59abee30d72476543a57afd2f33803dabf4620dc333b335d47966ff842 mesa-10.6.7.tar.gz
+feb1f640b915dada88a7c793dfaff0ae23580f8903f87a6b76469253de0d28d8 mesa-10.6.7.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90751">Bug 90751</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.stencil.stencil_index8 fails</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Dave Airlie (1):</p>
+<ul>
+ <li>mesa/teximage: use correct extension for accept stencil texture.</li>
+</ul>
+
+<p>Emil Velikov (3):</p>
+<ul>
+ <li>docs: add sha256 checksums for 10.6.6</li>
+ <li>Revert "i965: Momentarily pretend to support ARB_texture_stencil8 for blits."</li>
+ <li>Update version to 10.6.7</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+ <li>glsl: Handle attribute aliasing in attribute storage limit check.</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- /dev/null
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+ <meta http-equiv="content-type" content="text/html; charset=utf-8">
+ <title>Mesa Release Notes</title>
+ <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+ <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.6.8 Release Notes / September 20, 2015</h1>
+
+<p>
+Mesa 10.6.8 is a bug fix release which fixes bugs found since the 10.6.7 release.
+</p>
+<p>
+Mesa 10.6.8 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3. OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+1f34dba2a8059782e3e4e0f18b9628004e253b2c69085f735b846d2e63c9e250 mesa-10.6.8.tar.gz
+e36ee5ceeadb3966fb5ce5b4cf18322dbb76a4f075558ae49c3bba94f57d58fd mesa-10.6.8.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90621">Bug 90621</a> - Mesa fail to build from git</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91526">Bug 91526</a> - World of Warcraft (on Wine) has UI corruption with nouveau</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91719">Bug 91719</a> - [SNB,HSW,BYT] dEQP regressions associated with using NIR for vertex shaders</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alejandro Piñeiro (1):</p>
+<ul>
+ <li>i965/vec4: fill src_reg type using the constructor type parameter</li>
+</ul>
+
+<p>Antia Puentes (1):</p>
+<ul>
+ <li>i965/vec4: Fix saturation errors when coalescing registers</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+ <li>docs: add sha256 checksums for 10.6.7</li>
+ <li>cherry-ignore: add commit non applicable for 10.6</li>
+</ul>
+
+<p>Hans de Goede (4):</p>
+<ul>
+ <li>nv30: Fix creation of scanout buffers</li>
+ <li>nv30: Implement color resolve for msaa</li>
+ <li>nv30: Fix max width / height checks in nv30 sifm code</li>
+ <li>nv30: Disable msaa unless requested from the env by NV30_MAX_MSAA</li>
+</ul>
+
+<p>Ian Romanick (2):</p>
+<ul>
+ <li>mesa: Pass the type to _mesa_uniform_matrix as a glsl_base_type</li>
+ <li>mesa: Don't allow wrong type setters for matrix uniforms</li>
+</ul>
+
+<p>Ilia Mirkin (5):</p>
+<ul>
+ <li>st/mesa: don't fall back to 16F when 32F is requested</li>
+ <li>nvc0: always emit a full shader colormask</li>
+ <li>nvc0: remove BGRA4 format support</li>
+ <li>st/mesa: avoid integer overflows with buffers >= 512MB</li>
+ <li>nv50, nvc0: fix max texture buffer size to 128M elements</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+ <li>i965/vec4: Don't reswizzle hardware registers</li>
+</ul>
+
+<p>Jose Fonseca (1):</p>
+<ul>
+ <li>gallivm: Workaround LLVM PR23628.</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+ <li>i965: Momentarily pretend to support ARB_texture_stencil8 for blits.</li>
+</ul>
+
+<p>Oded Gabbay (1):</p>
+<ul>
+ <li>llvmpipe: convert double to long long instead of unsigned long long</li>
+</ul>
+
+<p>Ray Strode (1):</p>
+<ul>
+ <li>gbm: convert gbm bo format to fourcc format on dma-buf import</li>
+</ul>
+
+<p>Ulrich Weigand (1):</p>
+<ul>
+ <li>mesa: Fix texture compression on big-endian systems</li>
+</ul>
+
+<p>Vinson Lee (1):</p>
+<ul>
+ <li>gallivm: Do not use NoFramePointerElim with LLVM 3.7.</li>
+</ul>
+
+
+</div>
+</body>
+</html>
<iframe src="../contents.html"></iframe>
<div class="content">
-<h1>Mesa 11.0.0 Release Notes / TBD</h1>
+<h1>Mesa 11.0.0 Release Notes / September 12, 2015</h1>
<p>
Mesa 11.0.0 is a new development release.
<h2>SHA256 checksums</h2>
<pre>
-TBD.
+7d7e4ddffa3b162506efa01e2cc41e329caa4995336b92e5cc21f2e1fb36c1b3 mesa-11.0.0.tar.gz
+e095a3eb2eca9dfde7efca8946527c8ae20a0cc938a8c78debc7f158ad44af32 mesa-11.0.0.tar.xz
</pre>
<li>EGL 1.5 on r600, radeonsi, nv50, nvc0</li>
</ul>
+
<h2>Bug fixes</h2>
-TBD.
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=51658">Bug 51658</a> - r200 (& possibly radeon) DRI fixes for gnome shell on Mesa 8.0.3</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=65525">Bug 65525</a> - [llvmpipe] lp_scene.h:210:lp_scene_alloc: Assertion `size <= (64 * 1024)' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=66346">Bug 66346</a> - shader_query.cpp:49: error: invalid conversion from 'void*' to 'GLuint'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=73512">Bug 73512</a> - [clover] mesa.icd. should contain full path</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=73528">Bug 73528</a> - Deferred lighting in Second Life causes system hiccups and screen flickering</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=74329">Bug 74329</a> - Please expose OES_texture_float and OES_texture_half_float on the ES3 context</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=80500">Bug 80500</a> - Flickering shadows in unreleased title trace</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82186">Bug 82186</a> - [r600g] BARTS GPU lockup with minecraft shaders</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84225">Bug 84225</a> - Allow constant-index-expression sampler array indexing with GLSL-ES < 300</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84677">Bug 84677</a> - Triangle disappears with glPolygonMode GL_LINE</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=85252">Bug 85252</a> - Segfault in compiler while processing ternary operator with void arguments</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89131">Bug 89131</a> - [Bisected] Graphical corruption in Weston, shows old framebuffer pieces</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90000">Bug 90000</a> - [i965 Bisected NIR] Piglit/gglean_fragprog1-z-write_test fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90073">Bug 90073</a> - Leaks in xcb_dri3_open_reply_fds() and get_render_node_from_id_path_tag</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90249">Bug 90249</a> - Fails to build egl_dri2 on osx</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90310">Bug 90310</a> - Fails to build gallium_dri.so at linking stage with clang because of multiple redefinitions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90347">Bug 90347</a> - [NVE0+] Failure to insert texbar under some circumstances (causing bad colors in Terasology)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90466">Bug 90466</a> - arm: linker error ndefined reference to `nir_metadata_preserve'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90520">Bug 90520</a> - Register spilling clobbers registers used elsewhere in the shader</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90537">Bug 90537</a> - radeonsi bo/va conflict on RADEON_GEM_VA (rscreen->ws->buffer_from_handle returns NULL)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90547">Bug 90547</a> - [BDW/BSW/SKL Bisected]Piglit/glean@vertprog1-rsq_test_2_(reciprocal_square_root_of_negative_value) fais</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90580">Bug 90580</a> - [HSW bisected] integer multiplication bug</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90600">Bug 90600</a> - IOError: [Errno 2] No such file or directory: 'gl_API.xml'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90621">Bug 90621</a> - Mesa fail to build from git</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90629">Bug 90629</a> - [i965] SIMD16 dual_source_blend assertion `src[i].file != GRF || src[i].width == dst.width' failed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90691">Bug 90691</a> - [BSW]Piglit/spec/nv_conditional_render/dlist fails intermittently</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90728">Bug 90728</a> - dvd playback with vlc and vdpau causes segmentation fault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90734">Bug 90734</a> - glBufferSubData is corrupting data when buffer is > 32k</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90748">Bug 90748</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.depth.rg_half_float_oes fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90749">Bug 90749</a> - [BDW Bisected]dEQP-GLES3.functional.rasterization.fbo.rbo_multisample_max.primitives.lines_wide fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90751">Bug 90751</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.stencil.stencil_index8 fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90797">Bug 90797</a> - [ALL bisected] Mesa change cause performance case manhattan fail.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90817">Bug 90817</a> - swrast fails to load with certain remote X servers</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90830">Bug 90830</a> - [bsw bisected regression] GPU hang for spec.arb_gpu_shader5.execution.sampler_array_indexing.vs-nonzero-base</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90839">Bug 90839</a> - [10.5.5/10.6 regression, bisected] PBO glDrawPixels no longer using blit fastpath</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90873">Bug 90873</a> - Kernel hang, TearFree On, Mate desktop environment</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90887">Bug 90887</a> - PhiMovesPass in register allocator broken</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90895">Bug 90895</a> - [IVB/HSW/BDW/BSW Bisected] GLB2.7 Egypt, GfxBench3.0 T-Rex & ALU and many SynMark cases performance reduced by 10-23%</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90902">Bug 90902</a> - [bsw][regression] dEQP: "Found invalid pixel values"</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90903">Bug 90903</a> - egl_dri2.c:dri2_load fails to load libglapi on osx</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90904">Bug 90904</a> - OSX: EXC_BAD_ACCESS when using translate_sse + gallium + softpipe/llvmpipe</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90905">Bug 90905</a> - mesa: Finish subdir-objects transition</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90925">Bug 90925</a> - "high fidelity": Segfault in _mesa_program_resource_find_name</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91022">Bug 91022</a> - [g45 g965 bisected] assertions generated from textureGrad cube samplers fix</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91047">Bug 91047</a> - [SNB Bisected] Messed up Fog in Super Smash Bros. Melee in Dolphin</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91056">Bug 91056</a> - The Bard's Tale (2005, native) has rendering issues</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91077">Bug 91077</a> - dri2_glx.c:1186: undefined reference to `loader_open_device'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91099">Bug 91099</a> - [llvmpipe] piglit glsl-max-varyings >max_varying_components regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91101">Bug 91101</a> - [softpipe] piglit glsl-1.50@execution@geometry@max-input-components regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91117">Bug 91117</a> - Nimbus (running in wine) has rendering issues, objects are semi-transparent</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91124">Bug 91124</a> - Civilization V (in Wine) has rendering issues: text missing, menu bar corrupted</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91173">Bug 91173</a> - Oddworld: Stranger's Wrath HD: disfigured models in wrong colors</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91193">Bug 91193</a> - [290x] Dota2 reborn ingame rendering breaks with git-af4b9c7</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91222">Bug 91222</a> - lp_test_format regression on CentOS 7</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91226">Bug 91226</a> - Crash in glLinkProgram (NEW)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91231">Bug 91231</a> - [NV92] Psychonauts (native) segfaults on start when DRI3 enabled</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91254">Bug 91254</a> - (regresion) video using VA-API on Intel slow and freeze system with mesa 10.6 or 10.6.1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91290">Bug 91290</a> - SIGSEGV glcpp/glcpp-parse.y:1077</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91292">Bug 91292</a> - [BDW+] glVertexAttribDivisor not working in combination with glPolygonMode</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91337">Bug 91337</a> - OSMesaGetProcAdress("OSMesaPixelStore") returns nil</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91418">Bug 91418</a> - Visual Studio 2015 vsnprintf build error</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91425">Bug 91425</a> - [regression, bisected] Piglit spec/ext_packed_float/ getteximage-invalid-format-for-packed-type fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91441">Bug 91441</a> - make check DispatchSanity_test.GL30 regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91444">Bug 91444</a> - regression bisected radeonsi: don't change pipe_resource in resource_copy_region</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91461">Bug 91461</a> - gl_TessLevel* writes have no effect for all but the last TCS invocation</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91513">Bug 91513</a> - [IVB/HSW/BDW/SKL Bisected] Lightsmark performance reduced by 7%-10%</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91526">Bug 91526</a> - World of Warcraft (on Wine) has UI corruption with nouveau</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91544">Bug 91544</a> - [i965, regression, bisected] regression of several tests in 93977d3a151675946c03e</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91551">Bug 91551</a> - DXTn compressed normal maps produce severe artifacts on all NV5x and NVDx chipsets</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91570">Bug 91570</a> - Upgrading mesa to 10.6 causes segfault in OpenGL applications with GeForce4 MX 440 / AGP 8X</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91591">Bug 91591</a> - rounding.h:102:2: error: #error "Unsupported or undefined LONG_BIT"</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91610">Bug 91610</a> - [BSW] GPU hang for spec.shaders.point-vertex-id gl_instanceid divisor</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91673">Bug 91673</a> - Segfault when calling glTexSubImage2D on storage texture to bound FBO</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91726">Bug 91726</a> - R600 asserts in tgsi_cmp/make_src_for_op3</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91847">Bug 91847</a> - glGenerateTextureMipmap not working (no errors) unless glActiveTexture(GL_TEXTURE1) is called before</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91857">Bug 91857</a> - Mesa 10.6.3 linker is slow</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91881">Bug 91881</a> - regression: GPU lockups since mesa-11.0.0_rc1 on RV620 (r600) driver</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91890">Bug 91890</a> - [nve7] witcher2: blurry image & DATA_ERRORs (class 0xa097 mthd 0x2380/0x238c)</li>
+
+</ul>
+
<h2>Changes</h2>
-TBD.
+<li>Removed the EGL loader from the Linux SCons build.</li>
</div>
</body>
--- /dev/null
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+ <meta http-equiv="content-type" content="text/html; charset=utf-8">
+ <title>Mesa Release Notes</title>
+ <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+ <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.0.1 Release Notes / September 26, 2015</h1>
+
+<p>
+Mesa 11.0.1 is a bug fix release which fixes bugs found since the 11.0.0 release.
+</p>
+<p>
+Mesa 11.0.1 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1. OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+6dab262877e12c0546a0e2970c6835a0f217e6d4026ccecb3cd5dd733d1ce867 mesa-11.0.1.tar.gz
+43d0dfcd1f1e36f07f8228cd76d90175d3fc74c1ed25d7071794a100a98ef2a6 mesa-11.0.1.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38109">Bug 38109</a> - i915 driver crashes if too few vertices are submitted (Mesa 7.10.2)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91114">Bug 91114</a> - ES3-CTS.gtf.GL3Tests.shadow.shadow_execution_vert fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91716">Bug 91716</a> - [bisected] piglit.shaders.glsl-vs-int-attrib regresses on 32 bit BYT, HSW, IVB, SNB</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91719">Bug 91719</a> - [SNB,HSW,BYT] dEQP regressions associated with using NIR for vertex shaders</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92009">Bug 92009</a> - ES3-CTS.gtf.GL3Tests.packed_pixels.packed_pixels fails</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Antia Puentes (2):</p>
+<ul>
+ <li>i965/vec4: Fix saturation errors when coalescing registers</li>
+ <li>i965/vec4_nir: Load constants as integers</li>
+</ul>
+
+<p>Anuj Phogat (1):</p>
+<ul>
+ <li>meta: Abort meta pbo path if TexSubImage need signed unsigned conversion</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+ <li>docs: add sha256 checksums for 11.0.0</li>
+ <li>Update version to 11.0.1</li>
+</ul>
+
+<p>Iago Toral Quiroga (1):</p>
+<ul>
+ <li>mesa: Fix GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE for default framebuffer.</li>
+</ul>
+
+<p>Ian Romanick (5):</p>
+<ul>
+ <li>t_dd_dmatmp: Make "count" actually be the count</li>
+ <li>t_dd_dmatmp: Clean up improper code formatting from previous patch</li>
+ <li>t_dd_dmatmp: Use '& 3' instead of '% 4' everywhere</li>
+ <li>t_dd_dmatmp: Pull out common 'count -= count & 3' code</li>
+ <li>t_dd_dmatmp: Use addition instead of subtraction in loop bounds</li>
+</ul>
+
+<p>Ilia Mirkin (6):</p>
+<ul>
+ <li>st/mesa: avoid integer overflows with buffers >= 512MB</li>
+ <li>nv50, nvc0: fix max texture buffer size to 128M elements</li>
+ <li>freedreno/a3xx: fix blending of L8 format</li>
+ <li>nv50,nvc0: detect underlying resource changes and update tic</li>
+ <li>nv50,nvc0: flush texture cache in presence of coherent bufs</li>
+ <li>radeonsi: load fmask ptr relative to the resources array</li>
+</ul>
+
+<p>Jason Ekstrand (2):</p>
+<ul>
+ <li>nir: Fix a bunch of ralloc parenting errors</li>
+ <li>i965/vec4: Don't reswizzle hardware registers</li>
+</ul>
+
+<p>Jeremy Huddleston (1):</p>
+<ul>
+ <li>configure.ac: Add support to enable read-only text segment on x86.</li>
+</ul>
+
+<p>Ray Strode (1):</p>
+<ul>
+ <li>gbm: convert gbm bo format to fourcc format on dma-buf import</li>
+</ul>
+
+<p>Tapani Pälli (2):</p>
+<ul>
+ <li>mesa: fix errors when reading depth with glReadPixels</li>
+ <li>i965: fix textureGrad for cubemaps</li>
+</ul>
+
+<p>Ulrich Weigand (1):</p>
+<ul>
+ <li>mesa: Fix texture compression on big-endian systems</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- /dev/null
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+ <meta http-equiv="content-type" content="text/html; charset=utf-8">
+ <title>Mesa Release Notes</title>
+ <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+ <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.0.2 Release Notes / September 28, 2015</h1>
+
+<p>
+Mesa 11.0.2 is a bug fix release which fixes bugs found since the 11.0.1 release.
+</p>
+<p>
+Mesa 11.0.2 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1. OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+45170773500d6ae2f9eb93fc85efee69f7c97084411ada4eddf92f78bca56d20 mesa-11.0.2.tar.gz
+fce11fb27eb87adf1e620a76455d635c6136dfa49ae58c53b34ef8d0c7b7eae4 mesa-11.0.2.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91582">Bug 91582</a> - [bisected] Regression in DEQP gles2.functional.negative_api.texture.texsubimage2d_neg_offset</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91970">Bug 91970</a> - [BSW regression] dEQP-GLES3.functional.shaders.precision.int.highp_mul_vertex</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92095">Bug 92095</a> - [Regression, bisected] arb_shader_atomic_counters.compiler.builtins.frag</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Eduardo Lima Mitev (3):</p>
+<ul>
+ <li>mesa: Fix order of format+type and internal format checks for glTexImageXD ops</li>
+ <li>mesa: Move _mesa_base_tex_format() from teximage to glformats files</li>
+ <li>mesa: Use the effective internal format instead for validation</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+ <li>docs: add sha256 checksums for 11.0.1</li>
+ <li>Update version to 11.0.2</li>
+</ul>
+
+<p>Kristian Høgsberg Kristensen (1):</p>
+<ul>
+ <li>i965: Respect stride and subreg_offset for ATTR registers</li>
+</ul>
+
+<p>Matt Turner (1):</p>
+<ul>
+ <li>glsl: Expose gl_MaxTess{Control,Evaluation}AtomicCounters.</li>
+</ul>
+
+
+</div>
+</body>
+</html>
</p>
<ul>
-TBD.
+<li>GL_ARB_blend_func_extended on freedreno (a3xx)</li>
+<li>GL_ARB_shader_storage_buffer_object on i965</li>
+<li>GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600, radeonsi</li>
+<li>GL_ARB_texture_barrier / GL_NV_texture_barrier on i965</li>
+<li>GL_ARB_texture_query_lod on softpipe</li>
+<li>GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips</li>
</ul>
<h2>Bug fixes</h2>
Example: export MESA_GLSL=dump,nopt
</p>
+<p>
+Shaders can be dumped and replaced on runtime for debugging purposes. Mesa
+needs to be configured with '--with-sha1' to enable this functionality. This
+feature is not currently supported by SCons build.
+
+This is controlled via following environment variables:
+<ul>
+<li><b>MESA_SHADER_DUMP_PATH</b> - path where shader sources are dumped
+<li><b>MESA_SHADER_READ_PATH</b> - path where replacement shaders are read
+</ul>
+Note, path set must exist before running for dumping or replacing to work.
+When both are set, these paths should be different so the dumped shaders do
+not clobber the replacement shaders.
+</p>
<h2 id="support">GLSL Version</h2>
</p>
<p>
+With the August 2015 Workstation 12 / Fusion 8 releases, OpenGL 3.3
+is supported in the guest.
+This requires:
+<ul>
+<li>The VM is configured for virtual hardware version 12.
+<li>The host OS, GPU and graphics driver supports DX11 (Windows) or
+ OpenGL 4.0 (Linux, Mac)
+<li>On Linux, the vmwgfx kernel module must be version 2.9.0 or later.
+<li>A recent version of Mesa with the updated svga gallium driver.
+</ul>
+</p>
+
+<p>
+Otherwise, OpenGL 2.1 is supported.
+</p>
+
+<p>
+OpenGL 3.3 support can be disabled by setting the environment variable
+SVGA_VGPU10=0.
+You will then have OpenGL 2.1 support.
+This may be useful to work around application bugs (such as incorrect use
+of the OpenGL 3.x core profile).
+</p>
+
+<p>
Most modern Linux distros include the SVGA3D driver so end users shouldn't
be concerned with this information.
But if your distro lacks the driver or you want to update to the latest code
then rerun glxinfo and examine the output for error messages.
</p>
+<p>
+If OpenGL 3.3 is not working (you only get OpenGL 2.1):
+</p>
+<ul>
+<li>Make sure the VM uses hardware version 12.
+<li>Make sure the vmwgfx kernel module is version 2.9.0 or later.
+<li>Check the vmware.log file for errors.
+<li>Run 'dmesg | grep vmwgfx' and look for "DX: yes".
+
+
</div>
</body>
</html>
static inline int
cnd_broadcast(cnd_t *cond)
{
- if (!cond) return thrd_error;
- pthread_cond_broadcast(cond);
- return thrd_success;
+ assert(cond != NULL);
+ return (pthread_cond_broadcast(cond) == 0) ? thrd_success : thrd_error;
}
// 7.25.3.2
static inline int
cnd_init(cnd_t *cond)
{
- if (!cond) return thrd_error;
- pthread_cond_init(cond, NULL);
- return thrd_success;
+ assert(cond != NULL);
+ return (pthread_cond_init(cond, NULL) == 0) ? thrd_success : thrd_error;
}
// 7.25.3.4
static inline int
cnd_signal(cnd_t *cond)
{
- if (!cond) return thrd_error;
- pthread_cond_signal(cond);
- return thrd_success;
+ assert(cond != NULL);
+ return (pthread_cond_signal(cond) == 0) ? thrd_success : thrd_error;
}
// 7.25.3.5
{
struct timespec abs_time;
int rt;
- if (!cond || !mtx || !xt) return thrd_error;
+ assert(mtx != NULL);
+ assert(cond != NULL);
rt = pthread_cond_timedwait(cond, mtx, &abs_time);
if (rt == ETIMEDOUT)
return thrd_busy;
static inline int
cnd_wait(cnd_t *cond, mtx_t *mtx)
{
- if (!cond || !mtx) return thrd_error;
- pthread_cond_wait(cond, mtx);
- return thrd_success;
+ assert(mtx != NULL);
+ assert(cond != NULL);
+ return (pthread_cond_wait(cond, mtx) == 0) ? thrd_success : thrd_error;
}
static inline void
mtx_destroy(mtx_t *mtx)
{
- assert(mtx);
+ assert(mtx != NULL);
pthread_mutex_destroy(mtx);
}
mtx_init(mtx_t *mtx, int type)
{
pthread_mutexattr_t attr;
- if (!mtx) return thrd_error;
+ assert(mtx != NULL);
if (type != mtx_plain && type != mtx_timed && type != mtx_try
&& type != (mtx_plain|mtx_recursive)
&& type != (mtx_timed|mtx_recursive)
static inline int
mtx_lock(mtx_t *mtx)
{
- if (!mtx) return thrd_error;
- pthread_mutex_lock(mtx);
- return thrd_success;
+ assert(mtx != NULL);
+ return (pthread_mutex_lock(mtx) == 0) ? thrd_success : thrd_error;
}
static inline int
static inline int
mtx_timedlock(mtx_t *mtx, const xtime *xt)
{
- if (!mtx || !xt) return thrd_error;
+ assert(mtx != NULL);
+ assert(xt != NULL);
+
{
#ifdef EMULATED_THREADS_USE_NATIVE_TIMEDLOCK
struct timespec ts;
static inline int
mtx_trylock(mtx_t *mtx)
{
- if (!mtx) return thrd_error;
+ assert(mtx != NULL);
return (pthread_mutex_trylock(mtx) == 0) ? thrd_success : thrd_busy;
}
static inline int
mtx_unlock(mtx_t *mtx)
{
- if (!mtx) return thrd_error;
- pthread_mutex_unlock(mtx);
- return thrd_success;
+ assert(mtx != NULL);
+ return (pthread_mutex_unlock(mtx) == 0) ? thrd_success : thrd_error;
}
thrd_create(thrd_t *thr, thrd_start_t func, void *arg)
{
struct impl_thrd_param *pack;
- if (!thr) return thrd_error;
+ assert(thr != NULL);
pack = (struct impl_thrd_param *)malloc(sizeof(struct impl_thrd_param));
if (!pack) return thrd_nomem;
pack->func = func;
static inline int
tss_create(tss_t *key, tss_dtor_t dtor)
{
- if (!key) return thrd_error;
+ assert(key != NULL);
return (pthread_key_create(key, dtor) == 0) ? thrd_success : thrd_error;
}
noinst_LTLIBRARIES = libglsl_util.la
libglsl_util_la_SOURCES = \
+ glsl/shader_enums.c \
mesa/main/imports.c \
mesa/program/prog_hash_table.c \
mesa/program/symbol_table.c \
env.Append(CPPPATH = [
'#/include',
+ '#/include/HaikuGL',
'#/src/egl/main',
'#/src',
])
#define WL_HIDE_DEPRECATED
+#include <stdbool.h>
#include <stdint.h>
#include <stdbool.h>
#include <stdlib.h>
__DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB))
disp->Extensions.KHR_gl_colorspace = EGL_TRUE;
- if (dri2_dpy->dri2 && dri2_dpy->dri2->base.version >= 3) {
+ if ((dri2_dpy->dri2 && dri2_dpy->dri2->base.version >= 3) ||
+ (dri2_dpy->swrast && dri2_dpy->swrast->base.version >= 3)) {
disp->Extensions.KHR_create_context = EGL_TRUE;
if (dri2_dpy->robustness)
if (dri2_dpy->own_dri_screen)
dri2_dpy->core->destroyScreen(dri2_dpy->dri_screen);
- if (dri2_dpy->fd)
+ if (dri2_dpy->fd >= 0)
close(dri2_dpy->fd);
if (dri2_dpy->driver)
dlclose(dri2_dpy->driver);
_eglError(egl_error, "dri2_create_context");
}
+static bool
+dri2_fill_context_attribs(struct dri2_egl_context *dri2_ctx,
+ struct dri2_egl_display *dri2_dpy,
+ uint32_t *ctx_attribs,
+ unsigned *num_attribs)
+{
+ int pos = 0;
+
+ assert(*num_attribs >= 8);
+
+ ctx_attribs[pos++] = __DRI_CTX_ATTRIB_MAJOR_VERSION;
+ ctx_attribs[pos++] = dri2_ctx->base.ClientMajorVersion;
+ ctx_attribs[pos++] = __DRI_CTX_ATTRIB_MINOR_VERSION;
+ ctx_attribs[pos++] = dri2_ctx->base.ClientMinorVersion;
+
+ if (dri2_ctx->base.Flags != 0) {
+ /* If the implementation doesn't support the __DRI2_ROBUSTNESS
+ * extension, don't even try to send it the robust-access flag.
+ * It may explode. Instead, generate the required EGL error here.
+ */
+ if ((dri2_ctx->base.Flags & EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR) != 0
+ && !dri2_dpy->robustness) {
+ _eglError(EGL_BAD_MATCH, "eglCreateContext");
+ return false;
+ }
+
+ ctx_attribs[pos++] = __DRI_CTX_ATTRIB_FLAGS;
+ ctx_attribs[pos++] = dri2_ctx->base.Flags;
+ }
+
+ if (dri2_ctx->base.ResetNotificationStrategy != EGL_NO_RESET_NOTIFICATION_KHR) {
+ /* If the implementation doesn't support the __DRI2_ROBUSTNESS
+ * extension, don't even try to send it a reset strategy. It may
+ * explode. Instead, generate the required EGL error here.
+ */
+ if (!dri2_dpy->robustness) {
+ _eglError(EGL_BAD_CONFIG, "eglCreateContext");
+ return false;
+ }
+
+ ctx_attribs[pos++] = __DRI_CTX_ATTRIB_RESET_STRATEGY;
+ ctx_attribs[pos++] = __DRI_CTX_RESET_LOSE_CONTEXT;
+ }
+
+ *num_attribs = pos;
+
+ return true;
+}
+
/**
* Called via eglCreateContext(), drv->API.CreateContext().
*/
if (dri2_dpy->dri2) {
if (dri2_dpy->dri2->base.version >= 3) {
unsigned error;
- unsigned num_attribs = 0;
+ unsigned num_attribs = 8;
uint32_t ctx_attribs[8];
- ctx_attribs[num_attribs++] = __DRI_CTX_ATTRIB_MAJOR_VERSION;
- ctx_attribs[num_attribs++] = dri2_ctx->base.ClientMajorVersion;
- ctx_attribs[num_attribs++] = __DRI_CTX_ATTRIB_MINOR_VERSION;
- ctx_attribs[num_attribs++] = dri2_ctx->base.ClientMinorVersion;
-
- if (dri2_ctx->base.Flags != 0) {
- /* If the implementation doesn't support the __DRI2_ROBUSTNESS
- * extension, don't even try to send it the robust-access flag.
- * It may explode. Instead, generate the required EGL error here.
- */
- if ((dri2_ctx->base.Flags & EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR) != 0
- && !dri2_dpy->robustness) {
- _eglError(EGL_BAD_MATCH, "eglCreateContext");
- goto cleanup;
- }
-
- ctx_attribs[num_attribs++] = __DRI_CTX_ATTRIB_FLAGS;
- ctx_attribs[num_attribs++] = dri2_ctx->base.Flags;
- }
-
- if (dri2_ctx->base.ResetNotificationStrategy != EGL_NO_RESET_NOTIFICATION_KHR) {
- /* If the implementation doesn't support the __DRI2_ROBUSTNESS
- * extension, don't even try to send it a reset strategy. It may
- * explode. Instead, generate the required EGL error here.
- */
- if (!dri2_dpy->robustness) {
- _eglError(EGL_BAD_CONFIG, "eglCreateContext");
- goto cleanup;
- }
-
- ctx_attribs[num_attribs++] = __DRI_CTX_ATTRIB_RESET_STRATEGY;
- ctx_attribs[num_attribs++] = __DRI_CTX_RESET_LOSE_CONTEXT;
- }
-
- assert(num_attribs <= ARRAY_SIZE(ctx_attribs));
+ if (!dri2_fill_context_attribs(dri2_ctx, dri2_dpy, ctx_attribs,
+ &num_attribs))
+ goto cleanup;
dri2_ctx->dri_context =
dri2_dpy->dri2->createContextAttribs(dri2_dpy->dri_screen,
}
} else {
assert(dri2_dpy->swrast);
- dri2_ctx->dri_context =
- dri2_dpy->swrast->createNewContextForAPI(dri2_dpy->dri_screen,
- api,
- dri_config,
- shared,
- dri2_ctx);
+ if (dri2_dpy->swrast->base.version >= 3) {
+ unsigned error;
+ unsigned num_attribs = 8;
+ uint32_t ctx_attribs[8];
+
+ if (!dri2_fill_context_attribs(dri2_ctx, dri2_dpy, ctx_attribs,
+ &num_attribs))
+ goto cleanup;
+
+ dri2_ctx->dri_context =
+ dri2_dpy->swrast->createContextAttribs(dri2_dpy->dri_screen,
+ api,
+ dri_config,
+ shared,
+ num_attribs / 2,
+ ctx_attribs,
+ & error,
+ dri2_ctx);
+ dri2_create_context_attribs_error(error);
+ } else {
+ dri2_ctx->dri_context =
+ dri2_dpy->swrast->createNewContextForAPI(dri2_dpy->dri_screen,
+ api,
+ dri_config,
+ shared,
+ dri2_ctx);
+ }
}
if (!dri2_ctx->dri_context)
dri2_dpy->own_device = 1;
gbm = gbm_create_device(fd);
if (gbm == NULL)
- return EGL_FALSE;
+ goto cleanup;
+ } else {
+ fd = fcntl(gbm_device_get_fd(gbm), F_DUPFD_CLOEXEC, 3);
+ if (fd < 0)
+ goto cleanup;
}
- if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0) {
- free(dri2_dpy);
- return EGL_FALSE;
- }
+ if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0)
+ goto cleanup;
dri2_dpy->gbm_dri = gbm_dri_device(gbm);
- if (dri2_dpy->gbm_dri->base.type != GBM_DRM_DRIVER_TYPE_DRI) {
- free(dri2_dpy);
- return EGL_FALSE;
- }
-
- if (fd < 0) {
- fd = fcntl(gbm_device_get_fd(gbm), F_DUPFD_CLOEXEC, 3);
- if (fd < 0) {
- free(dri2_dpy);
- return EGL_FALSE;
- }
- }
+ if (dri2_dpy->gbm_dri->base.type != GBM_DRM_DRIVER_TYPE_DRI)
+ goto cleanup;
dri2_dpy->fd = fd;
dri2_dpy->device_name = loader_get_device_name_for_fd(dri2_dpy->fd);
dri2_dpy->vtbl = &dri2_drm_display_vtbl;
return EGL_TRUE;
+
+cleanup:
+ if (fd >= 0)
+ close(fd);
+
+ free(dri2_dpy);
+ return EGL_FALSE;
}
if (roundtrip(dri2_dpy) < 0 || dri2_dpy->formats == 0)
goto cleanup_shm;
+ dri2_dpy->fd = -1;
dri2_dpy->driver_name = strdup("swrast");
if (!dri2_load_driver_swrast(disp))
goto cleanup_shm;
* Every hardware driver_name is set using strdup. Doing the same in
* here will allow is to simply free the memory at dri2_terminate().
*/
+ dri2_dpy->fd = -1;
dri2_dpy->driver_name = strdup("swrast");
if (!dri2_load_driver_swrast(disp))
goto cleanup_conn;
wl_resource_post_event(resource, WL_DRM_AUTHENTICATED);
}
-const static struct wl_drm_interface drm_interface = {
+static const struct wl_drm_interface drm_interface = {
drm_authenticate,
drm_create_buffer,
drm_create_planar_buffer,
+/*
+ * Copyright © 2011 Kristian Høgsberg
+ * Copyright © 2011 Benjamin Franzke
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Kristian Høgsberg <krh@bitplanet.net>
+ * Benjamin Franzke <benjaminfranzke@googlemail.com>
+ */
+
#include <stdlib.h>
#include <wayland-client.h>
endif
-indices/u_indices_gen.c: $(srcdir)/indices/u_indices_gen.py
- $(AM_V_at)$(MKDIR_P) indices
- $(AM_V_GEN) $(PYTHON2) $< > $@
-
-indices/u_unfilled_gen.c: $(srcdir)/indices/u_unfilled_gen.py
- $(AM_V_at)$(MKDIR_P) indices
- $(AM_V_GEN) $(PYTHON2) $< > $@
-
-util/u_format_table.c: $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format_pack.py $(srcdir)/util/u_format_parse.py $(srcdir)/util/u_format.csv
- $(AM_V_at)$(MKDIR_P) util
- $(AM_V_GEN) $(PYTHON2) $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format.csv > $@
-
+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
+
+indices/u_indices_gen.c: indices/u_indices_gen.py
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/indices/u_indices_gen.py > $@
+
+indices/u_unfilled_gen.c: indices/u_unfilled_gen.py
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/indices/u_unfilled_gen.py > $@
+
+util/u_format_table.c: util/u_format_table.py \
+ util/u_format_pack.py \
+ util/u_format_parse.py \
+ util/u_format.csv
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format.csv > $@
noinst_LTLIBRARIES += libgalliumvl_stub.la
libgalliumvl_stub_la_SOURCES = \
rtasm/rtasm_execmem.h \
rtasm/rtasm_x86sse.c \
rtasm/rtasm_x86sse.h \
+ tgsi/tgsi_aa_point.c \
+ tgsi/tgsi_aa_point.h \
tgsi/tgsi_build.c \
tgsi/tgsi_build.h \
tgsi/tgsi_dump.c \
tgsi/tgsi_opcode_tmp.h \
tgsi/tgsi_parse.c \
tgsi/tgsi_parse.h \
+ tgsi/tgsi_point_sprite.c \
+ tgsi/tgsi_point_sprite.h \
tgsi/tgsi_sanity.c \
tgsi/tgsi_sanity.h \
tgsi/tgsi_scan.c \
tgsi/tgsi_text.h \
tgsi/tgsi_transform.c \
tgsi/tgsi_transform.h \
+ tgsi/tgsi_two_side.c \
+ tgsi/tgsi_two_side.h \
tgsi/tgsi_ureg.c \
tgsi/tgsi_ureg.h \
tgsi/tgsi_util.c \
util/u_pack_color.h \
util/u_pointer.h \
util/u_prim.h \
+ util/u_prim_restart.c \
+ util/u_prim_restart.h \
util/u_pstipple.c \
util/u_pstipple.h \
util/u_range.h \
TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W);
/* KILL_IF -tmp0.yyyy; # if -tmp0.y < 0, KILL */
- tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y);
+ tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, tmp0,
+ TGSI_SWIZZLE_Y, TRUE);
/* compute coverage factor = (1-d)/(1-k) */
/* KILL_IF -texTemp.wwww; # if -texTemp < 0, KILL fragment */
tgsi_transform_kill_inst(ctx,
- TGSI_FILE_TEMPORARY, pctx->texTemp, TGSI_SWIZZLE_W);
+ TGSI_FILE_TEMPORARY, pctx->texTemp,
+ TGSI_SWIZZLE_W, TRUE);
}
else {
double dscale = lp_const_scale(type);
- elem = LLVMConstInt(elem_type, round(val*dscale), 0);
+ elem = LLVMConstInt(elem_type, (long long) round(val*dscale), 0);
}
return elem;
#define ttn_channel(b, src, swiz) \
nir_swizzle(b, src, SWIZ(swiz, swiz, swiz, swiz), 1, false)
+static gl_varying_slot
+tgsi_varying_semantic_to_slot(unsigned semantic, unsigned index)
+{
+ switch (semantic) {
+ case TGSI_SEMANTIC_POSITION:
+ return VARYING_SLOT_POS;
+ case TGSI_SEMANTIC_COLOR:
+ if (index == 0)
+ return VARYING_SLOT_COL0;
+ else
+ return VARYING_SLOT_COL1;
+ case TGSI_SEMANTIC_BCOLOR:
+ if (index == 0)
+ return VARYING_SLOT_BFC0;
+ else
+ return VARYING_SLOT_BFC1;
+ case TGSI_SEMANTIC_FOG:
+ return VARYING_SLOT_FOGC;
+ case TGSI_SEMANTIC_PSIZE:
+ return VARYING_SLOT_PSIZ;
+ case TGSI_SEMANTIC_GENERIC:
+ return VARYING_SLOT_VAR0 + index;
+ case TGSI_SEMANTIC_FACE:
+ return VARYING_SLOT_FACE;
+ case TGSI_SEMANTIC_EDGEFLAG:
+ return VARYING_SLOT_EDGE;
+ case TGSI_SEMANTIC_PRIMID:
+ return VARYING_SLOT_PRIMITIVE_ID;
+ case TGSI_SEMANTIC_CLIPDIST:
+ if (index == 0)
+ return VARYING_SLOT_CLIP_DIST0;
+ else
+ return VARYING_SLOT_CLIP_DIST1;
+ case TGSI_SEMANTIC_CLIPVERTEX:
+ return VARYING_SLOT_CLIP_VERTEX;
+ case TGSI_SEMANTIC_TEXCOORD:
+ return VARYING_SLOT_TEX0 + index;
+ case TGSI_SEMANTIC_PCOORD:
+ return VARYING_SLOT_PNTC;
+ case TGSI_SEMANTIC_VIEWPORT_INDEX:
+ return VARYING_SLOT_VIEWPORT;
+ case TGSI_SEMANTIC_LAYER:
+ return VARYING_SLOT_LAYER;
+ default:
+ fprintf(stderr, "Bad TGSI semantic: %d/%d\n", semantic, index);
+ abort();
+ }
+}
+
+/* Temporary helper to remap back to TGSI style semantic name/index
+ * values, for use in drivers that haven't been converted to using
+ * VARYING_SLOT_
+ */
+void
+varying_slot_to_tgsi_semantic(gl_varying_slot slot,
+ unsigned *semantic_name, unsigned *semantic_index)
+{
+ static const unsigned map[][2] = {
+ [VARYING_SLOT_POS] = { TGSI_SEMANTIC_POSITION, 0 },
+ [VARYING_SLOT_COL0] = { TGSI_SEMANTIC_COLOR, 0 },
+ [VARYING_SLOT_COL1] = { TGSI_SEMANTIC_COLOR, 1 },
+ [VARYING_SLOT_BFC0] = { TGSI_SEMANTIC_BCOLOR, 0 },
+ [VARYING_SLOT_BFC1] = { TGSI_SEMANTIC_BCOLOR, 1 },
+ [VARYING_SLOT_FOGC] = { TGSI_SEMANTIC_FOG, 0 },
+ [VARYING_SLOT_PSIZ] = { TGSI_SEMANTIC_PSIZE, 0 },
+ [VARYING_SLOT_FACE] = { TGSI_SEMANTIC_FACE, 0 },
+ [VARYING_SLOT_EDGE] = { TGSI_SEMANTIC_EDGEFLAG, 0 },
+ [VARYING_SLOT_PRIMITIVE_ID] = { TGSI_SEMANTIC_PRIMID, 0 },
+ [VARYING_SLOT_CLIP_DIST0] = { TGSI_SEMANTIC_CLIPDIST, 0 },
+ [VARYING_SLOT_CLIP_DIST1] = { TGSI_SEMANTIC_CLIPDIST, 1 },
+ [VARYING_SLOT_CLIP_VERTEX] = { TGSI_SEMANTIC_CLIPVERTEX, 0 },
+ [VARYING_SLOT_PNTC] = { TGSI_SEMANTIC_PCOORD, 0 },
+ [VARYING_SLOT_VIEWPORT] = { TGSI_SEMANTIC_VIEWPORT_INDEX, 0 },
+ [VARYING_SLOT_LAYER] = { TGSI_SEMANTIC_LAYER, 0 },
+ };
+
+ if (slot >= VARYING_SLOT_VAR0) {
+ *semantic_name = TGSI_SEMANTIC_GENERIC;
+ *semantic_index = slot - VARYING_SLOT_VAR0;
+ return;
+ }
+
+ if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
+ *semantic_name = TGSI_SEMANTIC_TEXCOORD;
+ *semantic_index = slot - VARYING_SLOT_TEX0;
+ return;
+ }
+
+ if (slot >= ARRAY_SIZE(map)) {
+ fprintf(stderr, "Unknown varying slot %d\n", slot);
+ abort();
+ }
+
+ *semantic_name = map[slot][0];
+ *semantic_index = map[slot][1];
+}
+
+/* Temporary helper to remap back to TGSI style semantic name/index
+ * values, for use in drivers that haven't been converted to using
+ * FRAG_RESULT_
+ */
+void
+frag_result_to_tgsi_semantic(gl_frag_result slot,
+ unsigned *semantic_name, unsigned *semantic_index)
+{
+ static const unsigned map[][2] = {
+ [FRAG_RESULT_DEPTH] = { TGSI_SEMANTIC_POSITION, 0 },
+ [FRAG_RESULT_COLOR] = { TGSI_SEMANTIC_COLOR, -1 },
+ [FRAG_RESULT_DATA0 + 0] = { TGSI_SEMANTIC_COLOR, 0 },
+ [FRAG_RESULT_DATA0 + 1] = { TGSI_SEMANTIC_COLOR, 1 },
+ [FRAG_RESULT_DATA0 + 2] = { TGSI_SEMANTIC_COLOR, 2 },
+ [FRAG_RESULT_DATA0 + 3] = { TGSI_SEMANTIC_COLOR, 3 },
+ [FRAG_RESULT_DATA0 + 4] = { TGSI_SEMANTIC_COLOR, 4 },
+ [FRAG_RESULT_DATA0 + 5] = { TGSI_SEMANTIC_COLOR, 5 },
+ [FRAG_RESULT_DATA0 + 6] = { TGSI_SEMANTIC_COLOR, 6 },
+ [FRAG_RESULT_DATA0 + 7] = { TGSI_SEMANTIC_COLOR, 7 },
+ };
+
+ *semantic_name = map[slot][0];
+ *semantic_index = map[slot][1];
+}
+
static nir_ssa_def *
ttn_src_for_dest(nir_builder *b, nir_alu_dest *dest)
{
var->data.mode = nir_var_shader_in;
var->name = ralloc_asprintf(var, "in_%d", idx);
- /* We should probably translate to a VERT_ATTRIB_* or VARYING_SLOT_*
- * instead, but nothing in NIR core is looking at the value
- * currently, and this is less change to drivers.
- */
- var->data.location = decl->Semantic.Name;
- var->data.index = decl->Semantic.Index;
+ if (c->scan->processor == TGSI_PROCESSOR_FRAGMENT) {
+ var->data.location =
+ tgsi_varying_semantic_to_slot(decl->Semantic.Name,
+ decl->Semantic.Index);
+ } else {
+ assert(!decl->Declaration.Semantic);
+ var->data.location = VERT_ATTRIB_GENERIC0 + idx;
+ }
+ var->data.index = 0;
/* We definitely need to translate the interpolation field, because
* nir_print will decode it.
exec_list_push_tail(&b->shader->inputs, &var->node);
break;
case TGSI_FILE_OUTPUT: {
+ int semantic_name = decl->Semantic.Name;
+ int semantic_index = decl->Semantic.Index;
/* Since we can't load from outputs in the IR, we make temporaries
* for the outputs and emit stores to the real outputs at the end of
* the shader.
var->data.mode = nir_var_shader_out;
var->name = ralloc_asprintf(var, "out_%d", idx);
-
- var->data.location = decl->Semantic.Name;
- if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
- decl->Semantic.Index == 0 &&
- c->scan->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
- var->data.index = -1;
- else
- var->data.index = decl->Semantic.Index;
+ var->data.index = 0;
+
+ if (c->scan->processor == TGSI_PROCESSOR_FRAGMENT) {
+ switch (semantic_name) {
+ case TGSI_SEMANTIC_COLOR: {
+ /* TODO tgsi loses some information, so we cannot
+ * actually differentiate here between DSB and MRT
+ * at this point. But so far no drivers using tgsi-
+ * to-nir support dual source blend:
+ */
+ bool dual_src_blend = false;
+ if (dual_src_blend && (semantic_index == 1)) {
+ var->data.location = FRAG_RESULT_DATA0;
+ var->data.index = 1;
+ } else {
+ if (c->scan->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
+ var->data.location = FRAG_RESULT_COLOR;
+ else
+ var->data.location = FRAG_RESULT_DATA0 + semantic_index;
+ }
+ break;
+ }
+ case TGSI_SEMANTIC_POSITION:
+ var->data.location = FRAG_RESULT_DEPTH;
+ break;
+ default:
+ fprintf(stderr, "Bad TGSI semantic: %d/%d\n",
+ decl->Semantic.Name, decl->Semantic.Index);
+ abort();
+ }
+ } else {
+ var->data.location =
+ tgsi_varying_semantic_to_slot(semantic_name, semantic_index);
+ }
if (is_array) {
unsigned j;
{
nir_builder *b = &c->build;
- /* Save the outside-of-the-if-statement node list. */
- c->if_stack[c->if_stack_pos] = b->cursor;
- c->if_stack_pos++;
-
src = ttn_channel(b, src, X);
nir_if *if_stmt = nir_if_create(b->shader);
}
nir_builder_cf_insert(b, &if_stmt->cf_node);
+ c->if_stack[c->if_stack_pos] = nir_after_cf_node(&if_stmt->cf_node);
+ c->if_stack_pos++;
+
b->cursor = nir_after_cf_list(&if_stmt->then_list);
c->if_stack[c->if_stack_pos] = nir_after_cf_list(&if_stmt->else_list);
{
nir_builder *b = &c->build;
- /* Save the outside-of-the-loop node list. */
- c->loop_stack[c->loop_stack_pos] = b->cursor;
- c->loop_stack_pos++;
-
nir_loop *loop = nir_loop_create(b->shader);
nir_builder_cf_insert(b, &loop->cf_node);
+ c->loop_stack[c->loop_stack_pos] = nir_after_cf_node(&loop->cf_node);
+ c->loop_stack_pos++;
+
b->cursor = nir_after_cf_list(&loop->body);
}
struct nir_shader *
tgsi_to_nir(const void *tgsi_tokens,
const struct nir_shader_compiler_options *options);
+void
+varying_slot_to_tgsi_semantic(gl_varying_slot slot,
+ unsigned *semantic_name, unsigned *semantic_index);
+void
+frag_result_to_tgsi_semantic(gl_frag_result slot,
+ unsigned *semantic_name, unsigned *semantic_index);
}
+#if !defined(PIPE_SUBSYSTEM_EMBEDDED)
const char *
os_get_option(const char *name)
{
return getenv(name);
}
+#endif /* !PIPE_SUBSYSTEM_EMBEDDED */
/**
unsigned bypass_usage,
uint64_t maximum_cache_size);
+/**
+ * Remove a buffer from the cache, but keep it alive.
+ */
+void
+pb_cache_manager_remove_buffer(struct pb_buffer *buf);
struct pb_fence_ops;
}
+static void
+_pb_cache_manager_remove_buffer_locked(struct pb_cache_buffer *buf)
+{
+ struct pb_cache_manager *mgr = buf->mgr;
+
+ if (buf->head.next) {
+ LIST_DEL(&buf->head);
+ assert(mgr->numDelayed);
+ --mgr->numDelayed;
+ mgr->cache_size -= buf->base.size;
+ }
+ buf->mgr = NULL;
+}
+
+void
+pb_cache_manager_remove_buffer(struct pb_buffer *pb_buf)
+{
+ struct pb_cache_buffer *buf = (struct pb_cache_buffer*)pb_buf;
+ struct pb_cache_manager *mgr = buf->mgr;
+
+ if (!mgr)
+ return;
+
+ pipe_mutex_lock(mgr->mutex);
+ _pb_cache_manager_remove_buffer_locked(buf);
+ pipe_mutex_unlock(mgr->mutex);
+}
+
/**
* Actually destroy the buffer.
*/
static inline void
_pb_cache_buffer_destroy(struct pb_cache_buffer *buf)
{
- struct pb_cache_manager *mgr = buf->mgr;
-
- LIST_DEL(&buf->head);
- assert(mgr->numDelayed);
- --mgr->numDelayed;
- mgr->cache_size -= buf->base.size;
+ if (buf->mgr)
+ _pb_cache_manager_remove_buffer_locked(buf);
assert(!pipe_is_referenced(&buf->base.reference));
pb_reference(&buf->buffer, NULL);
FREE(buf);
struct pb_cache_buffer *buf = pb_cache_buffer(_buf);
struct pb_cache_manager *mgr = buf->mgr;
+ if (!mgr) {
+ pb_reference(&buf->buffer, NULL);
+ FREE(buf);
+ return;
+ }
+
pipe_mutex_lock(mgr->mutex);
assert(!pipe_is_referenced(&buf->base.reference));
--- /dev/null
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * This utility transforms the fragment shader to support anti-aliasing points.
+ */
+
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "tgsi_info.h"
+#include "tgsi_aa_point.h"
+#include "tgsi_transform.h"
+
+#define INVALID_INDEX 9999
+
+struct aa_transform_context
+{
+ struct tgsi_transform_context base;
+
+ unsigned tmp; // temp register
+ unsigned color_out; // frag color out register
+ unsigned color_tmp; // frag color temp register
+ unsigned num_tmp; // number of temp registers
+ unsigned num_imm; // number of immediates
+ unsigned num_input; // number of inputs
+ unsigned aa_point_coord_index;
+};
+
+static inline struct aa_transform_context *
+aa_transform_context(struct tgsi_transform_context *ctx)
+{
+ return (struct aa_transform_context *) ctx;
+}
+
+/**
+ * TGSI declaration transform callback.
+ */
+static void
+aa_decl(struct tgsi_transform_context *ctx,
+ struct tgsi_full_declaration *decl)
+{
+ struct aa_transform_context *ts = aa_transform_context(ctx);
+
+ if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
+ decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
+ decl->Semantic.Index == 0) {
+ ts->color_out = decl->Range.First;
+ }
+ else if (decl->Declaration.File == TGSI_FILE_INPUT) {
+ ts->num_input++;
+ }
+ else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
+ ts->num_tmp = MAX2(ts->num_tmp, decl->Range.Last + 1);
+ }
+
+ ctx->emit_declaration(ctx, decl);
+}
+
+/**
+ * TGSI immediate declaration transform callback.
+ */
+static void
+aa_immediate(struct tgsi_transform_context *ctx,
+ struct tgsi_full_immediate *imm)
+{
+ struct aa_transform_context *ts = aa_transform_context(ctx);
+
+ ctx->emit_immediate(ctx, imm);
+ ts->num_imm++;
+}
+
+/**
+ * TGSI transform prolog callback.
+ */
+static void
+aa_prolog(struct tgsi_transform_context *ctx)
+{
+ struct aa_transform_context *ts = aa_transform_context(ctx);
+ unsigned tmp0;
+ unsigned texIn;
+ unsigned imm;
+
+ /* Declare two temporary registers, one for temporary and
+ * one for color.
+ */
+ ts->tmp = ts->num_tmp++;
+ ts->color_tmp = ts->num_tmp++;
+
+ tgsi_transform_temps_decl(ctx, ts->tmp, ts->color_tmp);
+
+ /* Declare new generic input/texcoord */
+ texIn = ts->num_input++;
+ tgsi_transform_input_decl(ctx, texIn, TGSI_SEMANTIC_GENERIC,
+ ts->aa_point_coord_index, TGSI_INTERPOLATE_LINEAR);
+
+ /* Declare extra immediates */
+ imm = ts->num_imm++;
+ tgsi_transform_immediate_decl(ctx, 0.5, 0.5, 0.45, 1.0);
+
+ /*
+ * Emit code to compute fragment coverage.
+ * The point always has radius 0.5. The threshold value will be a
+ * value less than, but close to 0.5, such as 0.45.
+ * We compute a coverage factor from the distance and threshold.
+ * If the coverage is negative, the fragment is outside the circle and
+ * it's discarded.
+ * If the coverage is >= 1, the fragment is fully inside the threshold
+ * distance. We limit/clamp the coverage to 1.
+ * Otherwise, the fragment is between the threshold value and 0.5 and we
+ * compute a coverage value in [0,1].
+ *
+ * Input reg (texIn) usage:
+ * texIn.x = x point coord in [0,1]
+ * texIn.y = y point coord in [0,1]
+ * texIn.z = "k" the smoothing threshold distance
+ * texIn.w = unused
+ *
+ * Temp reg (t0) usage:
+ * t0.x = distance of fragment from center point
+ * t0.y = boolean, is t0.x > 0.5, also misc temp usage
+ * t0.z = temporary for computing 1/(0.5-k) value
+ * t0.w = final coverage value
+ */
+
+ tmp0 = ts->tmp;
+
+ /* SUB t0.xy, texIn, (0.5, 0,5) */
+ tgsi_transform_op2_inst(ctx, TGSI_OPCODE_SUB,
+ TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_XY,
+ TGSI_FILE_INPUT, texIn,
+ TGSI_FILE_IMMEDIATE, imm);
+
+ /* DP2 t0.x, t0.xy, t0.xy; # t0.x = x^2 + y^2 */
+ tgsi_transform_op2_inst(ctx, TGSI_OPCODE_DP2,
+ TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
+ TGSI_FILE_TEMPORARY, tmp0,
+ TGSI_FILE_TEMPORARY, tmp0);
+
+ /* SQRT t0.x, t0.x */
+ tgsi_transform_op1_inst(ctx, TGSI_OPCODE_SQRT,
+ TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
+ TGSI_FILE_TEMPORARY, tmp0);
+
+ /* compute coverage factor = (0.5-d)/(0.5-k) */
+
+ /* SUB t0.w, 0.5, texIn.z; # t0.w = 0.5-k */
+ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB,
+ TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
+ TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_X,
+ TGSI_FILE_INPUT, texIn, TGSI_SWIZZLE_Z);
+
+ /* SUB t0.y, 0.5, t0.x; # t0.y = 0.5-d */
+ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB,
+ TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y,
+ TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_X,
+ TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X);
+
+ /* DIV t0.w, t0.y, t0.w; # coverage = (0.5-d)/(0.5-k) */
+ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_DIV,
+ TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
+ TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y,
+ TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_W);
+
+ /* If the coverage value is negative, it means the fragment is outside
+ * the point's circular boundary. Kill it.
+ */
+ /* KILL_IF tmp0.w; # if tmp0.w < 0 KILL */
+ tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, tmp0,
+ TGSI_SWIZZLE_W, FALSE);
+
+ /* If the distance is less than the threshold, the coverage/alpha value
+ * will be greater than one. Clamp to one here.
+ */
+ /* MIN tmp0.w, tmp0.w, 1.0 */
+ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MIN,
+ TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
+ TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_W,
+ TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_W);
+}
+
+/**
+ * TGSI instruction transform callback.
+ */
+static void
+aa_inst(struct tgsi_transform_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct aa_transform_context *ts = aa_transform_context(ctx);
+ unsigned i;
+
+ /* Look for writes to color output reg and replace it with
+ * color temp reg.
+ */
+ for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
+ struct tgsi_full_dst_register *dst = &inst->Dst[i];
+ if (dst->Register.File == TGSI_FILE_OUTPUT &&
+ dst->Register.Index == ts->color_out) {
+ dst->Register.File = TGSI_FILE_TEMPORARY;
+ dst->Register.Index = ts->color_tmp;
+ }
+ }
+
+ ctx->emit_instruction(ctx, inst);
+}
+
+/**
+ * TGSI transform epilog callback.
+ */
+static void
+aa_epilog(struct tgsi_transform_context *ctx)
+{
+ struct aa_transform_context *ts = aa_transform_context(ctx);
+
+ /* add alpha modulation code at tail of program */
+ assert(ts->color_out != INVALID_INDEX);
+ assert(ts->color_tmp != INVALID_INDEX);
+
+ /* MOV output.color.xyz colorTmp */
+ tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
+ TGSI_FILE_OUTPUT, ts->color_out,
+ TGSI_WRITEMASK_XYZ,
+ TGSI_FILE_TEMPORARY, ts->color_tmp);
+
+ /* MUL output.color.w colorTmp.w tmp0.w */
+ tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL,
+ TGSI_FILE_OUTPUT, ts->color_out,
+ TGSI_WRITEMASK_W,
+ TGSI_FILE_TEMPORARY, ts->color_tmp,
+ TGSI_FILE_TEMPORARY, ts->tmp);
+}
+
+/**
+ * TGSI utility to transform a fragment shader to support antialiasing point.
+ *
+ * This utility accepts two inputs:
+ *\param tokens_in -- the original token string of the shader
+ *\param aa_point_coord_index -- the semantic index of the generic register
+ * that contains the point sprite texture coord
+ *
+ * For each fragment in the point, we compute the distance of the fragment
+ * from the point center using the point sprite texture coordinates.
+ * If the distance is greater than 0.5, we'll discard the fragment.
+ * Otherwise, we'll compute a coverage value which approximates how much
+ * of the fragment is inside the bounding circle of the point. If the distance
+ * is less than 'k', the coverage is 1. Else, the coverage is between 0 and 1.
+ * The final fragment color's alpha channel is then modulated by the coverage
+ * value.
+ */
+struct tgsi_token *
+tgsi_add_aa_point(const struct tgsi_token *tokens_in,
+ const int aa_point_coord_index)
+{
+ struct aa_transform_context transform;
+ const uint num_new_tokens = 200; /* should be enough */
+ const uint new_len = tgsi_num_tokens(tokens_in) + num_new_tokens;
+ struct tgsi_token *new_tokens;
+
+ /* allocate new tokens buffer */
+ new_tokens = tgsi_alloc_tokens(new_len);
+ if (!new_tokens)
+ return NULL;
+
+ /* setup transformation context */
+ memset(&transform, 0, sizeof(transform));
+ transform.base.transform_declaration = aa_decl;
+ transform.base.transform_instruction = aa_inst;
+ transform.base.transform_immediate = aa_immediate;
+ transform.base.prolog = aa_prolog;
+ transform.base.epilog = aa_epilog;
+
+ transform.tmp = INVALID_INDEX;
+ transform.color_out = INVALID_INDEX;
+ transform.color_tmp = INVALID_INDEX;
+
+ assert(aa_point_coord_index != -1);
+ transform.aa_point_coord_index = (unsigned)aa_point_coord_index;
+
+ transform.num_tmp = 0;
+ transform.num_imm = 0;
+ transform.num_input = 0;
+
+ /* transform the shader */
+ tgsi_transform_shader(tokens_in, new_tokens, new_len, &transform.base);
+
+ return new_tokens;
+}
--- /dev/null
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef TGSI_AA_POINT_H
+#define TGSI_AA_POINT_H
+
+struct tgsi_token;
+
+struct tgsi_token *
+tgsi_add_aa_point(const struct tgsi_token *tokens_in,
+ const int aa_point_coord_index);
+
+#endif /* TGSI_AA_POINT_H */
/*
* execute a texture instruction.
*
- * modifier is used to control the channel routing for the\
+ * modifier is used to control the channel routing for the
* instruction variants like proj, lod, and texture with lod bias.
* sampler indicates which src register the sampler is contained in.
*/
{
const union tgsi_exec_channel *args[5], *proj = NULL;
union tgsi_exec_channel r[5];
- enum tgsi_sampler_control control = tgsi_sampler_lod_none;
+ enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
uint chan;
uint unit;
int8_t offsets[3];
args[i] = &ZeroVec;
if (modifier == TEX_MODIFIER_EXPLICIT_LOD)
- control = tgsi_sampler_lod_explicit;
+ control = TGSI_SAMPLER_LOD_EXPLICIT;
else if (modifier == TEX_MODIFIER_LOD_BIAS)
- control = tgsi_sampler_lod_bias;
+ control = TGSI_SAMPLER_LOD_BIAS;
else if (modifier == TEX_MODIFIER_GATHER)
- control = tgsi_sampler_gather;
+ control = TGSI_SAMPLER_GATHER;
}
else {
for (i = dim; i < Elements(args); i++)
}
}
+static void
+exec_lodq(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ uint unit;
+ int dim;
+ int i;
+ union tgsi_exec_channel coords[4];
+ const union tgsi_exec_channel *args[Elements(coords)];
+ union tgsi_exec_channel r[2];
+
+ unit = fetch_sampler_unit(mach, inst, 1);
+ dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture, NULL);
+ assert(dim <= Elements(coords));
+ /* fetch coordinates */
+ for (i = 0; i < dim; i++) {
+ FETCH(&coords[i], 0, TGSI_CHAN_X + i);
+ args[i] = &coords[i];
+ }
+ for (i = dim; i < Elements(coords); i++) {
+ args[i] = &ZeroVec;
+ }
+ mach->Sampler->query_lod(mach->Sampler, unit, unit,
+ args[0]->f,
+ args[1]->f,
+ args[2]->f,
+ args[3]->f,
+ TGSI_SAMPLER_LOD_NONE,
+ r[0].f,
+ r[1].f);
+
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+ store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X,
+ TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+ store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y,
+ TGSI_EXEC_DATA_FLOAT);
+ }
+}
static void
exec_txd(struct tgsi_exec_machine *mach,
fetch_texel(mach->Sampler, unit, unit,
&r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
- derivs, offsets, tgsi_sampler_derivs_explicit,
+ derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
&r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
break;
fetch_texel(mach->Sampler, unit, unit,
&r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
- derivs, offsets, tgsi_sampler_derivs_explicit,
+ derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
&r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
break;
fetch_texel(mach->Sampler, unit, unit,
&r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
- derivs, offsets, tgsi_sampler_derivs_explicit,
+ derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
&r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
break;
fetch_texel(mach->Sampler, unit, unit,
&r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */
- derivs, offsets, tgsi_sampler_derivs_explicit,
+ derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
&r[0], &r[1], &r[2], &r[3]); /* outputs */
break;
fetch_texel(mach->Sampler, unit, unit,
&r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */
- derivs, offsets, tgsi_sampler_derivs_explicit,
+ derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
&r[0], &r[1], &r[2], &r[3]); /* outputs */
break;
const uint sampler_unit = inst->Src[2].Register.Index;
union tgsi_exec_channel r[5], c1;
const union tgsi_exec_channel *lod = &ZeroVec;
- enum tgsi_sampler_control control = tgsi_sampler_lod_none;
+ enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
uint chan;
unsigned char swizzles[4];
int8_t offsets[3];
if (modifier == TEX_MODIFIER_LOD_BIAS) {
FETCH(&c1, 3, TGSI_CHAN_X);
lod = &c1;
- control = tgsi_sampler_lod_bias;
+ control = TGSI_SAMPLER_LOD_BIAS;
}
else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
FETCH(&c1, 3, TGSI_CHAN_X);
lod = &c1;
- control = tgsi_sampler_lod_explicit;
+ control = TGSI_SAMPLER_LOD_EXPLICIT;
}
else {
assert(modifier == TEX_MODIFIER_LEVEL_ZERO);
- control = tgsi_sampler_lod_zero;
+ control = TGSI_SAMPLER_LOD_ZERO;
}
}
fetch_texel(mach->Sampler, resource_unit, sampler_unit,
&r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
- derivs, offsets, tgsi_sampler_derivs_explicit,
+ derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
&r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
break;
fetch_texel(mach->Sampler, resource_unit, sampler_unit,
&r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* inputs */
- derivs, offsets, tgsi_sampler_derivs_explicit,
+ derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
&r[0], &r[1], &r[2], &r[3]); /* outputs */
break;
fetch_texel(mach->Sampler, resource_unit, sampler_unit,
&r[0], &r[1], &r[2], &r[3], &ZeroVec,
- derivs, offsets, tgsi_sampler_derivs_explicit,
+ derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
&r[0], &r[1], &r[2], &r[3]);
break;
exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2);
break;
+ case TGSI_OPCODE_LODQ:
+ /* src[0] = texcoord */
+ /* src[1] = sampler unit */
+ exec_lodq(mach, inst);
+ break;
+
case TGSI_OPCODE_UP2H:
assert (0);
break;
float dady[TGSI_NUM_CHANNELS];
};
-enum tgsi_sampler_control {
- tgsi_sampler_lod_none,
- tgsi_sampler_lod_bias,
- tgsi_sampler_lod_explicit,
- tgsi_sampler_lod_zero,
- tgsi_sampler_derivs_explicit,
- tgsi_sampler_gather,
+enum tgsi_sampler_control
+{
+ TGSI_SAMPLER_LOD_NONE,
+ TGSI_SAMPLER_LOD_BIAS,
+ TGSI_SAMPLER_LOD_EXPLICIT,
+ TGSI_SAMPLER_LOD_ZERO,
+ TGSI_SAMPLER_DERIVS_EXPLICIT,
+ TGSI_SAMPLER_GATHER,
};
/**
const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE],
const int lod[TGSI_QUAD_SIZE], const int8_t offset[3],
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
+ void (*query_lod)(const struct tgsi_sampler *tgsi_sampler,
+ const unsigned sview_index,
+ const unsigned sampler_index,
+ const float s[TGSI_QUAD_SIZE],
+ const float t[TGSI_QUAD_SIZE],
+ const float p[TGSI_QUAD_SIZE],
+ const float c0[TGSI_QUAD_SIZE],
+ const enum tgsi_sampler_control control,
+ float mipmap[TGSI_QUAD_SIZE],
+ float lod[TGSI_QUAD_SIZE]);
};
#define TGSI_EXEC_NUM_TEMPS 4096
{ 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP },
{ 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB },
{ 1, 1, 1, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ },
- { 0, 0, 0, 0, 0, 0, NONE, "", 104 }, /* removed */
+ { 1, 1, 1, 0, 0, 0, OTHR, "TXQS", TGSI_OPCODE_TXQS },
{ 0, 0, 0, 0, 0, 0, NONE, "", 105 }, /* removed */
{ 0, 0, 0, 0, 0, 0, NONE, "", 106 }, /* removed */
{ 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP },
case TGSI_OPCODE_SAD: /* XXX some src args may be signed for SAD ? */
case TGSI_OPCODE_TXQ:
case TGSI_OPCODE_TXQ_LZ:
+ case TGSI_OPCODE_TXQS:
case TGSI_OPCODE_F2U:
case TGSI_OPCODE_UDIV:
case TGSI_OPCODE_UMAD:
--- /dev/null
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * This utility transforms the geometry shader to emulate point sprite by
+ * drawing a quad. It also adds an extra output for the original point position
+ * if the point position is to be written to a stream output buffer.
+ * Note: It assumes the driver will add a constant for the inverse viewport
+ * after the user defined constants.
+ */
+
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "tgsi_info.h"
+#include "tgsi_point_sprite.h"
+#include "tgsi_transform.h"
+#include "pipe/p_state.h"
+
+#define INVALID_INDEX 9999
+
+/* Set swizzle based on the immediates (0, 1, 0, -1) */
+static inline unsigned
+set_swizzle(int x, int y, int z, int w)
+{
+ static const unsigned map[3] = {TGSI_SWIZZLE_W, TGSI_SWIZZLE_X,
+ TGSI_SWIZZLE_Y};
+ assert(x >= -1);
+ assert(x <= 1);
+ assert(y >= -1);
+ assert(y <= 1);
+ assert(z >= -1);
+ assert(z <= 1);
+ assert(w >= -1);
+ assert(w <= 1);
+
+ return map[x+1] | (map[y+1] << 2) | (map[z+1] << 4) | (map[w+1] << 6);
+}
+
+static inline unsigned
+get_swizzle(unsigned swizzle, unsigned component)
+{
+ assert(component < 4);
+ return (swizzle >> (component * 2)) & 0x3;
+}
+
+struct psprite_transform_context
+{
+ struct tgsi_transform_context base;
+ unsigned num_tmp;
+ unsigned num_out;
+ unsigned num_orig_out;
+ unsigned num_const;
+ unsigned num_imm;
+ unsigned point_size_in; // point size input
+ unsigned point_size_out; // point size output
+ unsigned point_size_tmp; // point size temp
+ unsigned point_pos_in; // point pos input
+ unsigned point_pos_out; // point pos output
+ unsigned point_pos_sout; // original point pos for streamout
+ unsigned point_pos_tmp; // point pos temp
+ unsigned point_scale_tmp; // point scale temp
+ unsigned point_color_out; // point color output
+ unsigned point_color_tmp; // point color temp
+ unsigned point_imm; // point immediates
+ unsigned point_ivp; // point inverseViewport constant
+ unsigned point_dir_swz[4]; // point direction swizzle
+ unsigned point_coord_swz[4]; // point coord swizzle
+ unsigned point_coord_enable; // point coord enable mask
+ unsigned point_coord_decl; // point coord output declared mask
+ unsigned point_coord_out; // point coord output starting index
+ unsigned point_coord_aa; // aa point coord semantic index
+ unsigned point_coord_k; // aa point coord threshold distance
+ unsigned stream_out_point_pos:1; // set if to stream out original point pos
+ unsigned aa_point:1; // set if doing aa point
+ unsigned out_tmp_index[PIPE_MAX_SHADER_OUTPUTS];
+ int max_generic;
+};
+
+static inline struct psprite_transform_context *
+psprite_transform_context(struct tgsi_transform_context *ctx)
+{
+ return (struct psprite_transform_context *) ctx;
+}
+
+
+/**
+ * TGSI declaration transform callback.
+ */
+static void
+psprite_decl(struct tgsi_transform_context *ctx,
+ struct tgsi_full_declaration *decl)
+{
+ struct psprite_transform_context *ts = psprite_transform_context(ctx);
+
+ if (decl->Declaration.File == TGSI_FILE_INPUT) {
+ if (decl->Semantic.Name == TGSI_SEMANTIC_PSIZE) {
+ ts->point_size_in = decl->Range.First;
+ }
+ else if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
+ ts->point_pos_in = decl->Range.First;
+ }
+ }
+ else if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
+ if (decl->Semantic.Name == TGSI_SEMANTIC_PSIZE) {
+ ts->point_size_out = decl->Range.First;
+ }
+ else if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
+ ts->point_pos_out = decl->Range.First;
+ }
+ else if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
+ decl->Semantic.Index < 32) {
+ ts->point_coord_decl |= 1 << decl->Semantic.Index;
+ ts->max_generic = MAX2(ts->max_generic, decl->Semantic.Index);
+ }
+ ts->num_out = MAX2(ts->num_out, decl->Range.Last + 1);
+ }
+ else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
+ ts->num_tmp = MAX2(ts->num_tmp, decl->Range.Last + 1);
+ }
+ else if (decl->Declaration.File == TGSI_FILE_CONSTANT) {
+ ts->num_const = MAX2(ts->num_const, decl->Range.Last + 1);
+ }
+
+ ctx->emit_declaration(ctx, decl);
+}
+
+/**
+ * TGSI immediate declaration transform callback.
+ */
+static void
+psprite_immediate(struct tgsi_transform_context *ctx,
+ struct tgsi_full_immediate *imm)
+{
+ struct psprite_transform_context *ts = psprite_transform_context(ctx);
+
+ ctx->emit_immediate(ctx, imm);
+ ts->num_imm++;
+}
+
+
+/**
+ * TGSI transform prolog callback.
+ */
+static void
+psprite_prolog(struct tgsi_transform_context *ctx)
+{
+ struct psprite_transform_context *ts = psprite_transform_context(ctx);
+ unsigned point_coord_enable, en;
+ int i;
+
+ /* Replace output registers with temporary registers */
+ for (i = 0; i < ts->num_out; i++) {
+ ts->out_tmp_index[i] = ts->num_tmp++;
+ }
+ ts->num_orig_out = ts->num_out;
+
+ /* Declare a tmp register for point scale */
+ ts->point_scale_tmp = ts->num_tmp++;
+
+ if (ts->point_size_out != INVALID_INDEX)
+ ts->point_size_tmp = ts->out_tmp_index[ts->point_size_out];
+ else
+ ts->point_size_tmp = ts->num_tmp++;
+
+ assert(ts->point_pos_out != INVALID_INDEX);
+ ts->point_pos_tmp = ts->out_tmp_index[ts->point_pos_out];
+ ts->out_tmp_index[ts->point_pos_out] = INVALID_INDEX;
+
+ /* Declare one more tmp register for point coord threshold distance
+ * if we are generating anti-aliased point.
+ */
+ if (ts->aa_point)
+ ts->point_coord_k = ts->num_tmp++;
+
+ tgsi_transform_temps_decl(ctx, ts->point_size_tmp, ts->num_tmp-1);
+
+ /* Declare an extra output for the original point position for stream out */
+ if (ts->stream_out_point_pos) {
+ ts->point_pos_sout = ts->num_out++;
+ tgsi_transform_output_decl(ctx, ts->point_pos_sout,
+ TGSI_SEMANTIC_GENERIC, 0, 0);
+ }
+
+ /* point coord outputs to be declared */
+ point_coord_enable = ts->point_coord_enable & ~ts->point_coord_decl;
+
+ /* Declare outputs for those point coord that are enabled but are not
+ * already declared in this shader.
+ */
+ ts->point_coord_out = ts->num_out;
+ if (point_coord_enable) {
+ for (i = 0, en = point_coord_enable; en; en>>=1, i++) {
+ if (en & 0x1) {
+ tgsi_transform_output_decl(ctx, ts->num_out++,
+ TGSI_SEMANTIC_GENERIC, i, 0);
+ ts->max_generic = MAX2(ts->max_generic, i);
+ }
+ }
+ }
+
+ /* add an extra generic output for aa point texcoord */
+ if (ts->aa_point) {
+ ts->point_coord_aa = ts->max_generic + 1;
+ assert((ts->point_coord_enable & (1 << ts->point_coord_aa)) == 0);
+ ts->point_coord_enable |= 1 << (ts->point_coord_aa);
+ tgsi_transform_output_decl(ctx, ts->num_out++, TGSI_SEMANTIC_GENERIC,
+ ts->point_coord_aa, 0);
+ }
+
+ /* Declare extra immediates */
+ ts->point_imm = ts->num_imm;
+ tgsi_transform_immediate_decl(ctx, 0, 1, 0.5, -1);
+
+ /* Declare point constant -
+ * constant.xy -- inverseViewport
+ * constant.z -- current point size
+ * constant.w -- max point size
+ * The driver needs to add this constant to the constant buffer
+ */
+ ts->point_ivp = ts->num_const++;
+ tgsi_transform_const_decl(ctx, ts->point_ivp, ts->point_ivp);
+
+ /* If this geometry shader does not specify point size,
+ * get the current point size from the point constant.
+ */
+ if (ts->point_size_out == INVALID_INDEX) {
+ struct tgsi_full_instruction inst;
+
+ inst = tgsi_default_full_instruction();
+ inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+ inst.Instruction.NumDstRegs = 1;
+ tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,
+ ts->point_size_tmp, TGSI_WRITEMASK_XYZW);
+ inst.Instruction.NumSrcRegs = 1;
+ tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_CONSTANT,
+ ts->point_ivp, TGSI_SWIZZLE_Z,
+ TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z);
+ ctx->emit_instruction(ctx, &inst);
+ }
+}
+
+
+/**
+ * Add the point sprite emulation instructions at the emit vertex instruction
+ */
+static void
+psprite_emit_vertex_inst(struct tgsi_transform_context *ctx,
+ struct tgsi_full_instruction *vert_inst)
+{
+ struct psprite_transform_context *ts = psprite_transform_context(ctx);
+ struct tgsi_full_instruction inst;
+ unsigned point_coord_enable, en;
+ unsigned i, j, s;
+
+ /* new point coord outputs */
+ point_coord_enable = ts->point_coord_enable & ~ts->point_coord_decl;
+
+ /* OUTPUT[pos_sout] = TEMP[pos] */
+ if (ts->point_pos_sout != INVALID_INDEX) {
+ tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
+ TGSI_FILE_OUTPUT, ts->point_pos_sout,
+ TGSI_WRITEMASK_XYZW,
+ TGSI_FILE_TEMPORARY, ts->point_pos_tmp);
+ }
+
+ /**
+ * Set up the point scale vector
+ * scale = pointSize * pos.w * inverseViewport
+ */
+
+ /* MUL point_scale.x, point_size.x, point_pos.w */
+ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MUL,
+ TGSI_FILE_TEMPORARY, ts->point_scale_tmp, TGSI_WRITEMASK_X,
+ TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X,
+ TGSI_FILE_TEMPORARY, ts->point_pos_tmp, TGSI_SWIZZLE_W);
+
+ /* MUL point_scale.xy, point_scale.xx, inverseViewport.xy */
+ inst = tgsi_default_full_instruction();
+ inst.Instruction.Opcode = TGSI_OPCODE_MUL;
+ inst.Instruction.NumDstRegs = 1;
+ tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,
+ ts->point_scale_tmp, TGSI_WRITEMASK_XY);
+ inst.Instruction.NumSrcRegs = 2;
+ tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_TEMPORARY,
+ ts->point_scale_tmp, TGSI_SWIZZLE_X,
+ TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+ tgsi_transform_src_reg(&inst.Src[1], TGSI_FILE_CONSTANT,
+ ts->point_ivp, TGSI_SWIZZLE_X,
+ TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z);
+ ctx->emit_instruction(ctx, &inst);
+
+ /**
+ * Set up the point coord threshold distance
+ * k = 0.5 - 1 / pointsize
+ */
+ if (ts->aa_point) {
+ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_DIV,
+ TGSI_FILE_TEMPORARY, ts->point_coord_k,
+ TGSI_WRITEMASK_X,
+ TGSI_FILE_IMMEDIATE, ts->point_imm,
+ TGSI_SWIZZLE_Y,
+ TGSI_FILE_TEMPORARY, ts->point_size_tmp,
+ TGSI_SWIZZLE_X);
+
+ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB,
+ TGSI_FILE_TEMPORARY, ts->point_coord_k,
+ TGSI_WRITEMASK_X,
+ TGSI_FILE_IMMEDIATE, ts->point_imm,
+ TGSI_SWIZZLE_Z,
+ TGSI_FILE_TEMPORARY, ts->point_coord_k,
+ TGSI_SWIZZLE_X);
+ }
+
+
+ for (i = 0; i < 4; i++) {
+ unsigned point_dir_swz = ts->point_dir_swz[i];
+ unsigned point_coord_swz = ts->point_coord_swz[i];
+
+ /* All outputs need to be emitted for each vertex */
+ for (j = 0; j < ts->num_orig_out; j++) {
+ if (ts->out_tmp_index[j] != INVALID_INDEX) {
+ tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
+ TGSI_FILE_OUTPUT, j,
+ TGSI_WRITEMASK_XYZW,
+ TGSI_FILE_TEMPORARY, ts->out_tmp_index[j]);
+ }
+ }
+
+ /* pos = point_scale * point_dir + point_pos */
+ inst = tgsi_default_full_instruction();
+ inst.Instruction.Opcode = TGSI_OPCODE_MAD;
+ inst.Instruction.NumDstRegs = 1;
+ tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_OUTPUT, ts->point_pos_out,
+ TGSI_WRITEMASK_XYZW);
+ inst.Instruction.NumSrcRegs = 3;
+ tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_TEMPORARY, ts->point_scale_tmp,
+ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X,
+ TGSI_SWIZZLE_X);
+ tgsi_transform_src_reg(&inst.Src[1], TGSI_FILE_IMMEDIATE, ts->point_imm,
+ get_swizzle(point_dir_swz, 0),
+ get_swizzle(point_dir_swz, 1),
+ get_swizzle(point_dir_swz, 2),
+ get_swizzle(point_dir_swz, 3));
+ tgsi_transform_src_reg(&inst.Src[2], TGSI_FILE_TEMPORARY, ts->point_pos_tmp,
+ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
+ TGSI_SWIZZLE_W);
+ ctx->emit_instruction(ctx, &inst);
+
+ /* point coord */
+ for (j = 0, s = 0, en = point_coord_enable; en; en>>=1, s++) {
+ unsigned dstReg;
+
+ if (en & 0x1) {
+ dstReg = ts->point_coord_out + j;
+
+ inst = tgsi_default_full_instruction();
+ inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+ inst.Instruction.NumDstRegs = 1;
+ tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_OUTPUT,
+ dstReg, TGSI_WRITEMASK_XYZW);
+ inst.Instruction.NumSrcRegs = 1;
+ tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_IMMEDIATE, ts->point_imm,
+ get_swizzle(point_coord_swz, 0),
+ get_swizzle(point_coord_swz, 1),
+ get_swizzle(point_coord_swz, 2),
+ get_swizzle(point_coord_swz, 3));
+ ctx->emit_instruction(ctx, &inst);
+
+ /* MOV point_coord.z point_coord_k.x */
+ if (s == ts->point_coord_aa) {
+ tgsi_transform_op1_swz_inst(ctx, TGSI_OPCODE_MOV,
+ TGSI_FILE_OUTPUT, dstReg, TGSI_WRITEMASK_Z,
+ TGSI_FILE_TEMPORARY, ts->point_coord_k,
+ TGSI_SWIZZLE_X);
+ }
+ j++; /* the next point coord output offset */
+ }
+ }
+
+ /* Emit the EMIT instruction for each vertex of the quad */
+ ctx->emit_instruction(ctx, vert_inst);
+ }
+
+ /* Emit the ENDPRIM instruction for the quad */
+ inst = tgsi_default_full_instruction();
+ inst.Instruction.Opcode = TGSI_OPCODE_ENDPRIM;
+ inst.Instruction.NumDstRegs = 0;
+ inst.Instruction.NumSrcRegs = 1;
+ inst.Src[0] = vert_inst->Src[0];
+ ctx->emit_instruction(ctx, &inst);
+}
+
+
+/**
+ * TGSI instruction transform callback.
+ */
+static void
+psprite_inst(struct tgsi_transform_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct psprite_transform_context *ts = psprite_transform_context(ctx);
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_EMIT) {
+ psprite_emit_vertex_inst(ctx, inst);
+ }
+ else if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
+ inst->Dst[0].Register.Index == ts->point_size_out) {
+ /**
+ * Replace point size output reg with tmp reg.
+ * The tmp reg will be later used as a src reg for computing
+ * the point scale factor.
+ */
+ inst->Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+ inst->Dst[0].Register.Index = ts->point_size_tmp;
+ ctx->emit_instruction(ctx, inst);
+
+ /* Clamp the point size */
+ /* MAX point_size_tmp.x, point_size_tmp.x, point_imm.y */
+ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MAX,
+ TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_WRITEMASK_X,
+ TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X,
+ TGSI_FILE_IMMEDIATE, ts->point_imm, TGSI_SWIZZLE_Y);
+
+ /* MIN point_size_tmp.x, point_size_tmp.x, point_ivp.w */
+ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MIN,
+ TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_WRITEMASK_X,
+ TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X,
+ TGSI_FILE_CONSTANT, ts->point_ivp, TGSI_SWIZZLE_W);
+ }
+ else if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
+ inst->Dst[0].Register.Index == ts->point_pos_out) {
+ /**
+ * Replace point pos output reg with tmp reg.
+ */
+ inst->Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+ inst->Dst[0].Register.Index = ts->point_pos_tmp;
+ ctx->emit_instruction(ctx, inst);
+ }
+ else if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
+ /**
+ * Replace output reg with tmp reg.
+ */
+ inst->Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+ inst->Dst[0].Register.Index = ts->out_tmp_index[inst->Dst[0].Register.Index];
+ ctx->emit_instruction(ctx, inst);
+ }
+ else {
+ ctx->emit_instruction(ctx, inst);
+ }
+}
+
+
+/**
+ * TGSI property instruction transform callback.
+ * Transforms a point into a 4-vertex triangle strip.
+ */
+static void
+psprite_property(struct tgsi_transform_context *ctx,
+ struct tgsi_full_property *prop)
+{
+ switch (prop->Property.PropertyName) {
+ case TGSI_PROPERTY_GS_OUTPUT_PRIM:
+ prop->u[0].Data = PIPE_PRIM_TRIANGLE_STRIP;
+ break;
+ case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
+ prop->u[0].Data *= 4;
+ break;
+ default:
+ break;
+ }
+ ctx->emit_property(ctx, prop);
+}
+
+/**
+ * TGSI utility to transform a geometry shader to support point sprite.
+ */
+struct tgsi_token *
+tgsi_add_point_sprite(const struct tgsi_token *tokens_in,
+ const unsigned point_coord_enable,
+ const bool sprite_origin_lower_left,
+ const bool stream_out_point_pos,
+ int *aa_point_coord_index)
+{
+ struct psprite_transform_context transform;
+ const uint num_new_tokens = 200; /* should be enough */
+ const uint new_len = tgsi_num_tokens(tokens_in) + num_new_tokens;
+ struct tgsi_token *new_tokens;
+
+ /* setup transformation context */
+ memset(&transform, 0, sizeof(transform));
+ transform.base.transform_declaration = psprite_decl;
+ transform.base.transform_instruction = psprite_inst;
+ transform.base.transform_property = psprite_property;
+ transform.base.transform_immediate = psprite_immediate;
+ transform.base.prolog = psprite_prolog;
+
+ transform.point_size_in = INVALID_INDEX;
+ transform.point_size_out = INVALID_INDEX;
+ transform.point_size_tmp = INVALID_INDEX;
+ transform.point_pos_in = INVALID_INDEX;
+ transform.point_pos_out = INVALID_INDEX;
+ transform.point_pos_sout = INVALID_INDEX;
+ transform.point_pos_tmp = INVALID_INDEX;
+ transform.point_scale_tmp = INVALID_INDEX;
+ transform.point_imm = INVALID_INDEX;
+ transform.point_coord_aa = INVALID_INDEX;
+ transform.point_coord_k = INVALID_INDEX;
+
+ transform.stream_out_point_pos = stream_out_point_pos;
+ transform.point_coord_enable = point_coord_enable;
+ transform.aa_point = aa_point_coord_index != NULL;
+ transform.max_generic = -1;
+
+ /* point sprite directions based on the immediates (0, 1, 0.5, -1) */
+ /* (-1, -1, 0, 0) */
+ transform.point_dir_swz[0] = set_swizzle(-1, -1, 0, 0);
+ /* (-1, 1, 0, 0) */
+ transform.point_dir_swz[1] = set_swizzle(-1, 1, 0, 0);
+ /* (1, -1, 0, 0) */
+ transform.point_dir_swz[2] = set_swizzle(1, -1, 0, 0);
+ /* (1, 1, 0, 0) */
+ transform.point_dir_swz[3] = set_swizzle(1, 1, 0, 0);
+
+ /* point coord based on the immediates (0, 1, 0, -1) */
+ if (sprite_origin_lower_left) {
+ /* (0, 0, 0, 1) */
+ transform.point_coord_swz[0] = set_swizzle(0, 0, 0, 1);
+ /* (0, 1, 0, 1) */
+ transform.point_coord_swz[1] = set_swizzle(0, 1, 0, 1);
+ /* (1, 0, 0, 1) */
+ transform.point_coord_swz[2] = set_swizzle(1, 0, 0, 1);
+ /* (1, 1, 0, 1) */
+ transform.point_coord_swz[3] = set_swizzle(1, 1, 0, 1);
+ }
+ else {
+ /* (0, 1, 0, 1) */
+ transform.point_coord_swz[0] = set_swizzle(0, 1, 0, 1);
+ /* (0, 0, 0, 1) */
+ transform.point_coord_swz[1] = set_swizzle(0, 0, 0, 1);
+ /* (1, 1, 0, 1) */
+ transform.point_coord_swz[2] = set_swizzle(1, 1, 0, 1);
+ /* (1, 0, 0, 1) */
+ transform.point_coord_swz[3] = set_swizzle(1, 0, 0, 1);
+ }
+
+
+ /* allocate new tokens buffer */
+ new_tokens = tgsi_alloc_tokens(new_len);
+ if (!new_tokens)
+ return NULL;
+
+ /* transform the shader */
+ tgsi_transform_shader(tokens_in, new_tokens, new_len, &transform.base);
+
+ if (aa_point_coord_index)
+ *aa_point_coord_index = transform.point_coord_aa;
+
+ return new_tokens;
+}
--- /dev/null
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef TGSI_POINT_SPRITE_H
+#define TGSI_POINT_SPRITE_H
+
+struct tgsi_token;
+
+struct tgsi_token *
+tgsi_add_point_sprite(const struct tgsi_token *tokens_in,
+ const unsigned point_coord_enable,
+ const bool sprite_origin_lower_left,
+ const bool stream_out_point_pos,
+ int *aa_point_coord_index);
+
+#endif /* TGSI_POINT_SPRITE_H */
{
uint procType, i;
struct tgsi_parse_context parse;
+ unsigned current_depth = 0;
memset(info, 0, sizeof(*info));
for (i = 0; i < TGSI_FILE_COUNT; i++)
assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST);
info->opcode_count[fullinst->Instruction.Opcode]++;
+ switch (fullinst->Instruction.Opcode) {
+ case TGSI_OPCODE_IF:
+ case TGSI_OPCODE_UIF:
+ case TGSI_OPCODE_BGNLOOP:
+ current_depth++;
+ info->max_depth = MAX2(info->max_depth, current_depth);
+ break;
+ case TGSI_OPCODE_ENDIF:
+ case TGSI_OPCODE_ENDLOOP:
+ current_depth--;
+ break;
+ default:
+ break;
+ }
+
+ if (fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D &&
+ fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG)
+ info->uses_doubles = true;
+
for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
const struct tgsi_full_src_register *src =
&fullinst->Src[i];
boolean writes_viewport_index;
boolean writes_layer;
boolean is_msaa_sampler[PIPE_MAX_SAMPLERS];
-
+ boolean uses_doubles; /**< uses any of the double instructions */
unsigned clipdist_writemask;
unsigned culldist_writemask;
unsigned num_written_culldistance;
unsigned indirect_files_written;
unsigned properties[TGSI_PROPERTY_COUNT]; /* index with TGSI_PROPERTY_ */
+
+ /**
+ * Max nesting limit of loops/if's
+ */
+ unsigned max_depth;
};
extern void
* Helper for emitting temporary register declarations.
*/
static inline void
-tgsi_transform_temp_decl(struct tgsi_transform_context *ctx,
- unsigned index)
+tgsi_transform_temps_decl(struct tgsi_transform_context *ctx,
+ unsigned firstIdx, unsigned lastIdx)
{
struct tgsi_full_declaration decl;
decl = tgsi_default_full_declaration();
decl.Declaration.File = TGSI_FILE_TEMPORARY;
- decl.Range.First =
- decl.Range.Last = index;
+ decl.Range.First = firstIdx;
+ decl.Range.Last = lastIdx;
ctx->emit_declaration(ctx, &decl);
}
+static inline void
+tgsi_transform_temp_decl(struct tgsi_transform_context *ctx,
+ unsigned index)
+{
+ tgsi_transform_temps_decl(ctx, index, index);
+}
static inline void
+tgsi_transform_const_decl(struct tgsi_transform_context *ctx,
+ unsigned firstIdx, unsigned lastIdx)
+{
+ struct tgsi_full_declaration decl;
+
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_CONSTANT;
+ decl.Range.First = firstIdx;
+ decl.Range.Last = lastIdx;
+ ctx->emit_declaration(ctx, &decl);
+}
+
+static inline void
tgsi_transform_input_decl(struct tgsi_transform_context *ctx,
unsigned index,
unsigned sem_name, unsigned sem_index,
ctx->emit_declaration(ctx, &decl);
}
+static inline void
+tgsi_transform_output_decl(struct tgsi_transform_context *ctx,
+ unsigned index,
+ unsigned sem_name, unsigned sem_index,
+ unsigned interp)
+{
+ struct tgsi_full_declaration decl;
+
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_OUTPUT;
+ decl.Declaration.Interpolate = 1;
+ decl.Declaration.Semantic = 1;
+ decl.Semantic.Name = sem_name;
+ decl.Semantic.Index = sem_index;
+ decl.Range.First =
+ decl.Range.Last = index;
+ decl.Interp.Interpolate = interp;
+
+ ctx->emit_declaration(ctx, &decl);
+}
static inline void
tgsi_transform_sampler_decl(struct tgsi_transform_context *ctx,
ctx->emit_immediate(ctx, &immed);
}
+static inline void
+tgsi_transform_dst_reg(struct tgsi_full_dst_register *reg,
+ unsigned file, unsigned index, unsigned writemask)
+{
+ reg->Register.File = file;
+ reg->Register.Index = index;
+ reg->Register.WriteMask = writemask;
+}
+
+static inline void
+tgsi_transform_src_reg(struct tgsi_full_src_register *reg,
+ unsigned file, unsigned index,
+ unsigned swizzleX, unsigned swizzleY,
+ unsigned swizzleZ, unsigned swizzleW)
+{
+ reg->Register.File = file;
+ reg->Register.Index = index;
+ reg->Register.SwizzleX = swizzleX;
+ reg->Register.SwizzleY = swizzleY;
+ reg->Register.SwizzleZ = swizzleZ;
+ reg->Register.SwizzleW = swizzleW;
+}
/**
* Helper for emitting 1-operand instructions.
tgsi_transform_kill_inst(struct tgsi_transform_context *ctx,
unsigned src_file,
unsigned src_index,
- unsigned src_swizzle)
+ unsigned src_swizzle,
+ boolean negate)
{
struct tgsi_full_instruction inst;
inst.Src[0].Register.SwizzleY =
inst.Src[0].Register.SwizzleZ =
inst.Src[0].Register.SwizzleW = src_swizzle;
- inst.Src[0].Register.Negate = 1;
+ inst.Src[0].Register.Negate = negate;
ctx->emit_instruction(ctx, &inst);
}
--- /dev/null
+/*
+ * Copyright 2013 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * This utility transforms fragment shaders to facilitate two-sided lighting.
+ *
+ * Basically, if the FS has any color inputs (TGSI_SEMANTIC_COLOR) we'll:
+ * 1. create corresponding back-color inputs (TGSI_SEMANTIC_BCOLOR)
+ * 2. use the FACE register to choose between front/back colors and put the
+ * selected color in new temp regs.
+ * 3. replace reads of the original color inputs with the new temp regs.
+ *
+ * Then, the driver just needs to link the VS front/back output colors to
+ * the FS front/back input colors.
+ */
+
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "tgsi_info.h"
+#include "tgsi_two_side.h"
+#include "tgsi_transform.h"
+
+
+#define INVALID_INDEX 9999
+
+
+struct two_side_transform_context
+{
+ struct tgsi_transform_context base;
+ uint num_temps;
+ uint num_inputs;
+ uint face_input; /**< index of the FACE input */
+ uint front_color_input[2]; /**< INPUT regs */
+ uint front_color_interp[2];/**< TGSI_INTERPOLATE_x */
+ uint back_color_input[2]; /**< INPUT regs */
+ uint new_colors[2]; /**< TEMP regs */
+};
+
+
+static inline struct two_side_transform_context *
+two_side_transform_context(struct tgsi_transform_context *ctx)
+{
+ return (struct two_side_transform_context *) ctx;
+}
+
+
+static void
+xform_decl(struct tgsi_transform_context *ctx,
+ struct tgsi_full_declaration *decl)
+{
+ struct two_side_transform_context *ts = two_side_transform_context(ctx);
+
+ if (decl->Declaration.File == TGSI_FILE_INPUT) {
+ if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR) {
+ /* found a front color */
+ assert(decl->Semantic.Index < 2);
+ ts->front_color_input[decl->Semantic.Index] = decl->Range.First;
+ ts->front_color_interp[decl->Semantic.Index] = decl->Interp.Interpolate;
+ }
+ else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
+ ts->face_input = decl->Range.First;
+ }
+ ts->num_inputs = MAX2(ts->num_inputs, decl->Range.Last + 1);
+ }
+ else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
+ ts->num_temps = MAX2(ts->num_temps, decl->Range.Last + 1);
+ }
+
+ ctx->emit_declaration(ctx, decl);
+}
+
+
+static void
+emit_prolog(struct tgsi_transform_context *ctx)
+{
+ struct two_side_transform_context *ts = two_side_transform_context(ctx);
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction inst;
+ uint num_colors = 0;
+ uint i;
+
+ /* Declare 0, 1 or 2 new BCOLOR inputs */
+ for (i = 0; i < 2; i++) {
+ if (ts->front_color_input[i] != INVALID_INDEX) {
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_INPUT;
+ decl.Declaration.Interpolate = 1;
+ decl.Declaration.Semantic = 1;
+ decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
+ decl.Semantic.Index = i;
+ decl.Range.First = decl.Range.Last = ts->num_inputs++;
+ decl.Interp.Interpolate = ts->front_color_interp[i];
+ ctx->emit_declaration(ctx, &decl);
+ ts->back_color_input[i] = decl.Range.First;
+ num_colors++;
+ }
+ }
+
+ if (num_colors > 0) {
+ /* Declare 1 or 2 temp registers */
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_TEMPORARY;
+ decl.Range.First = ts->num_temps;
+ decl.Range.Last = ts->num_temps + num_colors - 1;
+ ctx->emit_declaration(ctx, &decl);
+ ts->new_colors[0] = ts->num_temps;
+ ts->new_colors[1] = ts->num_temps + 1;
+
+ if (ts->face_input == INVALID_INDEX) {
+ /* declare FACE INPUT register */
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_INPUT;
+ decl.Declaration.Semantic = 1;
+ decl.Semantic.Name = TGSI_SEMANTIC_FACE;
+ decl.Semantic.Index = 0;
+ decl.Range.First = decl.Range.Last = ts->num_inputs++;
+ ctx->emit_declaration(ctx, &decl);
+ ts->face_input = decl.Range.First;
+ }
+
+ /* CMP temp[c0], face, bcolor[c0], fcolor[c0]
+ * temp[c0] = face < 0.0 ? bcolor[c0] : fcolor[c0]
+ */
+ for (i = 0; i < 2; i++) {
+ if (ts->front_color_input[i] != INVALID_INDEX) {
+ inst = tgsi_default_full_instruction();
+ inst.Instruction.Opcode = TGSI_OPCODE_CMP;
+ inst.Instruction.NumDstRegs = 1;
+ inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+ inst.Dst[0].Register.Index = ts->new_colors[i];
+ inst.Instruction.NumSrcRegs = 3;
+ inst.Src[0].Register.File = TGSI_FILE_INPUT;
+ inst.Src[0].Register.Index = ts->face_input;
+ inst.Src[1].Register.File = TGSI_FILE_INPUT;
+ inst.Src[1].Register.Index = ts->back_color_input[i];
+ inst.Src[2].Register.File = TGSI_FILE_INPUT;
+ inst.Src[2].Register.Index = ts->front_color_input[i];
+
+ ctx->emit_instruction(ctx, &inst);
+ }
+ }
+ }
+}
+
+
+static void
+xform_inst(struct tgsi_transform_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct two_side_transform_context *ts = two_side_transform_context(ctx);
+ const struct tgsi_opcode_info *info =
+ tgsi_get_opcode_info(inst->Instruction.Opcode);
+ uint i, j;
+
+ /* Look for src regs which reference the input color and replace
+ * them with the temp color.
+ */
+ for (i = 0; i < info->num_src; i++) {
+ if (inst->Src[i].Register.File == TGSI_FILE_INPUT) {
+ for (j = 0; j < 2; j++) {
+ if (inst->Src[i].Register.Index == ts->front_color_input[j]) {
+ /* replace color input with temp reg */
+ inst->Src[i].Register.File = TGSI_FILE_TEMPORARY;
+ inst->Src[i].Register.Index = ts->new_colors[j];
+ break;
+ }
+ }
+ }
+ }
+
+ ctx->emit_instruction(ctx, inst);
+}
+
+
+struct tgsi_token *
+tgsi_add_two_side(const struct tgsi_token *tokens_in)
+{
+ struct two_side_transform_context transform;
+ const uint num_new_tokens = 100; /* should be enough */
+ const uint new_len = tgsi_num_tokens(tokens_in) + num_new_tokens;
+ struct tgsi_token *new_tokens;
+
+ /* setup transformation context */
+ memset(&transform, 0, sizeof(transform));
+ transform.base.transform_declaration = xform_decl;
+ transform.base.transform_instruction = xform_inst;
+ transform.base.prolog = emit_prolog;
+ transform.face_input = INVALID_INDEX;
+ transform.front_color_input[0] = INVALID_INDEX;
+ transform.front_color_input[1] = INVALID_INDEX;
+ transform.front_color_interp[0] = TGSI_INTERPOLATE_COLOR;
+ transform.front_color_interp[1] = TGSI_INTERPOLATE_COLOR;
+ transform.back_color_input[0] = INVALID_INDEX;
+ transform.back_color_input[1] = INVALID_INDEX;
+
+ /* allocate new tokens buffer */
+ new_tokens = tgsi_alloc_tokens(new_len);
+ if (!new_tokens)
+ return NULL;
+
+ /* transform the shader */
+ tgsi_transform_shader(tokens_in, new_tokens, new_len, &transform.base);
+
+ return new_tokens;
+}
--- /dev/null
+/*
+ * Copyright 2013 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef TGSI_TWO_SIDE_H
+#define TGSI_TWO_SIDE_H
+
+struct tgsi_token;
+
+struct tgsi_token *
+tgsi_add_two_side(const struct tgsi_token *tokens_in);
+
+#endif /* TGSI_TWO_SIDE_H */
return dim;
}
+
+
+boolean
+tgsi_is_shadow_target(unsigned target)
+{
+ switch (target) {
+ case TGSI_TEXTURE_SHADOW1D:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ case TGSI_TEXTURE_SHADOW1D_ARRAY:
+ case TGSI_TEXTURE_SHADOW2D_ARRAY:
+ case TGSI_TEXTURE_SHADOWCUBE:
+ case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
int
tgsi_util_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample);
+boolean
+tgsi_is_shadow_target(unsigned target);
+
#if defined __cplusplus
}
#endif
u_upload_data(ctx->upload, 0, sizeof(ctx->vertices), ctx->vertices,
&vb.buffer_offset, &vb.buffer);
+ if (!vb.buffer)
+ return;
u_upload_unmap(ctx->upload);
pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, &vb);
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
struct pipe_context *pipe = ctx->base.pipe;
struct pipe_vertex_buffer vb = {0};
- struct pipe_stream_output_target *so_target;
+ struct pipe_stream_output_target *so_target = NULL;
unsigned offsets[PIPE_MAX_SO_BUFFERS] = {0};
assert(num_channels >= 1);
u_upload_data(ctx->upload, 0, num_channels*4, clear_value,
&vb.buffer_offset, &vb.buffer);
+ if (!vb.buffer)
+ goto out;
+
vb.stride = 0;
blitter_set_running_flag(ctx);
util_draw_arrays(pipe, PIPE_PRIM_POINTS, 0, size / 4);
+out:
blitter_restore_vertex_states(ctx);
blitter_restore_render_cond(ctx);
blitter_unset_running_flag(ctx);
*dst_count = util_last_bit(enabled_buffers);
}
+
+
+void
+util_set_index_buffer(struct pipe_index_buffer *dst,
+ const struct pipe_index_buffer *src)
+{
+ if (src) {
+ pipe_resource_reference(&dst->buffer, src->buffer);
+ memcpy(dst, src, sizeof(*dst));
+ }
+ else {
+ pipe_resource_reference(&dst->buffer, NULL);
+ memset(dst, 0, sizeof(*dst));
+ }
+}
const struct pipe_vertex_buffer *src,
unsigned start_slot, unsigned count);
+void util_set_index_buffer(struct pipe_index_buffer *dst,
+ const struct pipe_index_buffer *src);
+
#ifdef __cplusplus
}
#endif
#define ffs __builtin_ffs
#endif
+#ifdef HAVE___BUILTIN_FFSLL
+#define ffsll __builtin_ffsll
+#else
+static inline int
+ffsll(long long int val)
+{
+ int bit;
+
+ bit = ffs((unsigned) (val & 0xffffffff));
+ if (bit != 0)
+ return bit;
+
+ bit = ffs((unsigned) (val >> 32));
+ if (bit != 0)
+ return 32 + bit;
+
+ return 0;
+}
+#endif
+
#endif /* FFS_DEFINED */
/**
}
#endif
+/* For looping over a bitmask when you want to loop over consecutive bits
+ * manually, for example:
+ *
+ * while (mask) {
+ * int start, count, i;
+ *
+ * u_bit_scan_consecutive_range(&mask, &start, &count);
+ *
+ * for (i = 0; i < count; i++)
+ * ... process element (start+i)
+ * }
+ */
+static inline void
+u_bit_scan_consecutive_range(unsigned *mask, int *start, int *count)
+{
+ *start = ffs(*mask) - 1;
+ *count = ffs(~(*mask >> *start)) - 1;
+ *mask &= ~(((1 << *count) - 1) << *start);
+}
+
/**
* Return float bits.
*/
--- /dev/null
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+
+#include "u_inlines.h"
+#include "u_memory.h"
+#include "u_prim_restart.h"
+
+
+/**
+ * Translate an index buffer for primitive restart.
+ * Create a new index buffer which is a copy of the original index buffer
+ * except that instances of 'restart_index' are converted to 0xffff or
+ * 0xffffffff.
+ * Also, index buffers using 1-byte indexes are converted to 2-byte indexes.
+ */
+enum pipe_error
+util_translate_prim_restart_ib(struct pipe_context *context,
+ struct pipe_index_buffer *src_buffer,
+ struct pipe_resource **dst_buffer,
+ unsigned num_indexes,
+ unsigned restart_index)
+{
+ struct pipe_screen *screen = context->screen;
+ struct pipe_transfer *src_transfer = NULL, *dst_transfer = NULL;
+ void *src_map = NULL, *dst_map = NULL;
+ const unsigned src_index_size = src_buffer->index_size;
+ unsigned dst_index_size;
+
+ /* 1-byte indexes are converted to 2-byte indexes, 4-byte stays 4-byte */
+ dst_index_size = MAX2(2, src_buffer->index_size);
+ assert(dst_index_size == 2 || dst_index_size == 4);
+
+ /* no user buffers for now */
+ assert(src_buffer->user_buffer == NULL);
+
+ /* Create new index buffer */
+ *dst_buffer = pipe_buffer_create(screen, PIPE_BIND_INDEX_BUFFER,
+ PIPE_USAGE_STREAM,
+ num_indexes * dst_index_size);
+ if (!*dst_buffer)
+ goto error;
+
+ /* Map new / dest index buffer */
+ dst_map = pipe_buffer_map(context, *dst_buffer,
+ PIPE_TRANSFER_WRITE, &dst_transfer);
+ if (!dst_map)
+ goto error;
+
+ /* Map original / src index buffer */
+ src_map = pipe_buffer_map_range(context, src_buffer->buffer,
+ src_buffer->offset,
+ num_indexes * src_index_size,
+ PIPE_TRANSFER_READ,
+ &src_transfer);
+ if (!src_map)
+ goto error;
+
+ if (src_index_size == 1 && dst_index_size == 2) {
+ uint8_t *src = (uint8_t *) src_map;
+ uint16_t *dst = (uint16_t *) dst_map;
+ unsigned i;
+ for (i = 0; i < num_indexes; i++) {
+ dst[i] = (src[i] == restart_index) ? 0xffff : src[i];
+ }
+ }
+ else if (src_index_size == 2 && dst_index_size == 2) {
+ uint16_t *src = (uint16_t *) src_map;
+ uint16_t *dst = (uint16_t *) dst_map;
+ unsigned i;
+ for (i = 0; i < num_indexes; i++) {
+ dst[i] = (src[i] == restart_index) ? 0xffff : src[i];
+ }
+ }
+ else {
+ uint32_t *src = (uint32_t *) src_map;
+ uint32_t *dst = (uint32_t *) dst_map;
+ unsigned i;
+ assert(src_index_size == 4);
+ assert(dst_index_size == 4);
+ for (i = 0; i < num_indexes; i++) {
+ dst[i] = (src[i] == restart_index) ? 0xffffffff : src[i];
+ }
+ }
+
+ pipe_buffer_unmap(context, src_transfer);
+ pipe_buffer_unmap(context, dst_transfer);
+
+ return PIPE_OK;
+
+error:
+ if (src_transfer)
+ pipe_buffer_unmap(context, src_transfer);
+ if (dst_transfer)
+ pipe_buffer_unmap(context, dst_transfer);
+ if (*dst_buffer)
+ screen->resource_destroy(screen, *dst_buffer);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+}
+
+
+/** Helper structs for util_draw_vbo_without_prim_restart() */
+
+struct range {
+ unsigned start, count;
+};
+
+struct range_info {
+ struct range *ranges;
+ unsigned count, max;
+};
+
+
+/**
+ * Helper function for util_draw_vbo_without_prim_restart()
+ * \return true for success, false if out of memory
+ */
+static boolean
+add_range(struct range_info *info, unsigned start, unsigned count)
+{
+ if (info->max == 0) {
+ info->max = 10;
+ info->ranges = MALLOC(info->max * sizeof(struct range));
+ if (!info->ranges) {
+ return FALSE;
+ }
+ }
+ else if (info->count == info->max) {
+ /* grow the ranges[] array */
+ info->ranges = REALLOC(info->ranges,
+ info->max * sizeof(struct range),
+ 2 * info->max * sizeof(struct range));
+ if (!info->ranges) {
+ return FALSE;
+ }
+
+ info->max *= 2;
+ }
+
+ /* save the range */
+ info->ranges[info->count].start = start;
+ info->ranges[info->count].count = count;
+ info->count++;
+
+ return TRUE;
+}
+
+
+/**
+ * Implement primitive restart by breaking an indexed primitive into
+ * pieces which do not contain restart indexes. Each piece is then
+ * drawn by calling pipe_context::draw_vbo().
+ * \return PIPE_OK if no error, an error code otherwise.
+ */
+enum pipe_error
+util_draw_vbo_without_prim_restart(struct pipe_context *context,
+ const struct pipe_index_buffer *ib,
+ const struct pipe_draw_info *info)
+{
+ const void *src_map;
+ struct range_info ranges = {0};
+ struct pipe_draw_info new_info;
+ struct pipe_transfer *src_transfer = NULL;
+ unsigned i, start, count;
+
+ assert(info->indexed);
+ assert(info->primitive_restart);
+
+ /* Get pointer to the index data */
+ if (ib->buffer) {
+ /* map the index buffer (only the range we need to scan) */
+ src_map = pipe_buffer_map_range(context, ib->buffer,
+ ib->offset + info->start * ib->index_size,
+ info->count * ib->index_size,
+ PIPE_TRANSFER_READ,
+ &src_transfer);
+ if (!src_map) {
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+ }
+ else {
+ if (!ib->user_buffer) {
+ debug_printf("User-space index buffer is null!");
+ return PIPE_ERROR_BAD_INPUT;
+ }
+ src_map = (const uint8_t *) ib->user_buffer
+ + ib->offset
+ + info->start * ib->index_size;
+ }
+
+#define SCAN_INDEXES(TYPE) \
+ for (i = 0; i <= info->count; i++) { \
+ if (i == info->count || \
+ ((const TYPE *) src_map)[i] == info->restart_index) { \
+ /* cut / restart */ \
+ if (count > 0) { \
+ if (!add_range(&ranges, info->start + start, count)) { \
+ if (src_transfer) \
+ pipe_buffer_unmap(context, src_transfer); \
+ return PIPE_ERROR_OUT_OF_MEMORY; \
+ } \
+ } \
+ start = i + 1; \
+ count = 0; \
+ } \
+ else { \
+ count++; \
+ } \
+ }
+
+ start = info->start;
+ count = 0;
+ switch (ib->index_size) {
+ case 1:
+ SCAN_INDEXES(uint8_t);
+ break;
+ case 2:
+ SCAN_INDEXES(uint16_t);
+ break;
+ case 4:
+ SCAN_INDEXES(uint32_t);
+ break;
+ default:
+ assert(!"Bad index size");
+ return PIPE_ERROR_BAD_INPUT;
+ }
+
+ /* unmap index buffer */
+ if (src_transfer)
+ pipe_buffer_unmap(context, src_transfer);
+
+ /* draw ranges between the restart indexes */
+ new_info = *info;
+ new_info.primitive_restart = FALSE;
+ for (i = 0; i < ranges.count; i++) {
+ new_info.start = ranges.ranges[i].start;
+ new_info.count = ranges.ranges[i].count;
+ context->draw_vbo(context, &new_info);
+ }
+
+ FREE(ranges.ranges);
+
+ return PIPE_OK;
+}
--- /dev/null
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef U_PRIM_RESTART_H
+#define U_PRIM_RESTART_H
+
+
+#include "pipe/p_defines.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+struct pipe_context;
+struct pipe_draw_info;
+struct pipe_index_buffer;
+struct pipe_resource;
+
+
+enum pipe_error
+util_translate_prim_restart_ib(struct pipe_context *context,
+ struct pipe_index_buffer *src_buffer,
+ struct pipe_resource **dst_buffer,
+ unsigned num_indexes,
+ unsigned restart_index);
+
+enum pipe_error
+util_draw_vbo_without_prim_restart(struct pipe_context *context,
+ const struct pipe_index_buffer *ib,
+ const struct pipe_draw_info *info);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
/* KILL_IF -texTemp; # if -texTemp < 0, kill fragment */
tgsi_transform_kill_inst(ctx,
TGSI_FILE_TEMPORARY, texTemp,
- TGSI_SWIZZLE_W);
+ TGSI_SWIZZLE_W, TRUE);
}
};
/* Do two rectangles intersect?
+ * Note: empty rectangles are valid as inputs (and never intersect).
*/
static inline boolean
u_rect_test_intersection(const struct u_rect *a,
return (!(a->x1 < b->x0 ||
b->x1 < a->x0 ||
a->y1 < b->y0 ||
- b->y1 < a->y0));
+ b->y1 < a->y0 ||
+ a->x1 < a->x0 ||
+ a->y1 < a->y0 ||
+ b->x1 < b->x0 ||
+ b->y1 < b->y0));
}
/* Find the intersection of two rectangles known to intersect.
u_rect_find_intersection(a,b);
}
else {
- b->x0 = b->x1 = b->y0 = b->y1 = 0;
+ /*
+ * Note the u_rect_xx tests deal with inclusive coordinates
+ * hence all-zero would not be an empty box.
+ */
+ b->x0 = b->y0 = 0;
+ b->x1 = b->y1 = -1;
}
}
return ureg_create_shader_and_destroy(ureg, pipe);
}
+
+void *
+util_make_geometry_passthrough_shader(struct pipe_context *pipe,
+ uint num_attribs,
+ const ubyte *semantic_names,
+ const ubyte *semantic_indexes)
+{
+ static const unsigned zero[4] = {0, 0, 0, 0};
+
+ struct ureg_program *ureg;
+ struct ureg_dst dst[PIPE_MAX_SHADER_OUTPUTS];
+ struct ureg_src src[PIPE_MAX_SHADER_INPUTS];
+ struct ureg_src imm;
+
+ unsigned i;
+
+ ureg = ureg_create(TGSI_PROCESSOR_GEOMETRY);
+ if (ureg == NULL)
+ return NULL;
+
+ ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, PIPE_PRIM_POINTS);
+ ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, PIPE_PRIM_POINTS);
+ ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES, 1);
+ ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS, 1);
+ imm = ureg_DECL_immediate_uint(ureg, zero, 4);
+
+ /**
+ * Loop over all the attribs and declare the corresponding
+ * declarations in the geometry shader
+ */
+ for (i = 0; i < num_attribs; i++) {
+ src[i] = ureg_DECL_input(ureg, semantic_names[i],
+ semantic_indexes[i], 0, 1);
+ src[i] = ureg_src_dimension(src[i], 0);
+ dst[i] = ureg_DECL_output(ureg, semantic_names[i], semantic_indexes[i]);
+ }
+
+ /* MOV dst[i] src[i] */
+ for (i = 0; i < num_attribs; i++) {
+ ureg_MOV(ureg, dst[i], src[i]);
+ }
+
+ /* EMIT IMM[0] */
+ ureg_insn(ureg, TGSI_OPCODE_EMIT, NULL, 0, &imm, 1);
+
+ /* END */
+ ureg_END(ureg);
+
+ return ureg_create_shader_and_destroy(ureg, pipe);
+}
+
unsigned tgsi_tex, unsigned nr_samples,
enum tgsi_return_type stype);
+extern void *
+util_make_geometry_passthrough_shader(struct pipe_context *pipe,
+ uint num_attribs,
+ const ubyte *semantic_names,
+ const ubyte *semantic_indexes);
+
#ifdef __cplusplus
}
#endif
}
+#define util_strcasecmp stricmp
+
#else
#define util_vsnprintf vsnprintf
#define util_strncat strncat
#define util_strstr strstr
#define util_memmove memmove
+#define util_strcasecmp strcasecmp
#endif
}
-static enum pipe_error
-u_upload_alloc_buffer( struct u_upload_mgr *upload,
- unsigned min_size )
+static void
+u_upload_alloc_buffer(struct u_upload_mgr *upload,
+ unsigned min_size)
{
struct pipe_screen *screen = upload->pipe->screen;
struct pipe_resource buffer;
}
upload->buffer = screen->resource_create(screen, &buffer);
- if (upload->buffer == NULL) {
- return PIPE_ERROR_OUT_OF_MEMORY;
- }
+ if (upload->buffer == NULL)
+ return;
/* Map the new buffer. */
upload->map = pipe_buffer_map_range(upload->pipe, upload->buffer,
if (upload->map == NULL) {
upload->transfer = NULL;
pipe_resource_reference(&upload->buffer, NULL);
- return PIPE_ERROR_OUT_OF_MEMORY;
+ return;
}
upload->offset = 0;
- return PIPE_OK;
}
-enum pipe_error u_upload_alloc( struct u_upload_mgr *upload,
- unsigned min_out_offset,
- unsigned size,
- unsigned *out_offset,
- struct pipe_resource **outbuf,
- void **ptr )
+void
+u_upload_alloc(struct u_upload_mgr *upload,
+ unsigned min_out_offset,
+ unsigned size,
+ unsigned *out_offset,
+ struct pipe_resource **outbuf,
+ void **ptr)
{
- unsigned alloc_size = align( size, upload->alignment );
+ unsigned alloc_size = align(size, upload->alignment);
unsigned alloc_offset = align(min_out_offset, upload->alignment);
+ unsigned buffer_size = upload->buffer ? upload->buffer->width0 : 0;
unsigned offset;
- /* Init these return values here in case we fail below to make
- * sure the caller doesn't get garbage values.
- */
- *out_offset = ~0;
- pipe_resource_reference(outbuf, NULL);
- *ptr = NULL;
-
/* Make sure we have enough space in the upload buffer
* for the sub-allocation. */
- if (!upload->buffer ||
- MAX2(upload->offset, alloc_offset) + alloc_size > upload->buffer->width0) {
- enum pipe_error ret = u_upload_alloc_buffer(upload,
- alloc_offset + alloc_size);
- if (ret != PIPE_OK)
- return ret;
+ if (unlikely(MAX2(upload->offset, alloc_offset) + alloc_size > buffer_size)) {
+ u_upload_alloc_buffer(upload, alloc_offset + alloc_size);
+
+ if (unlikely(!upload->buffer)) {
+ *out_offset = ~0;
+ pipe_resource_reference(outbuf, NULL);
+ *ptr = NULL;
+ return;
+ }
+
+ buffer_size = upload->buffer->width0;
}
offset = MAX2(upload->offset, alloc_offset);
- if (!upload->map) {
+ if (unlikely(!upload->map)) {
upload->map = pipe_buffer_map_range(upload->pipe, upload->buffer,
offset,
- upload->buffer->width0 - offset,
+ buffer_size - offset,
upload->map_flags,
&upload->transfer);
- if (!upload->map) {
+ if (unlikely(!upload->map)) {
upload->transfer = NULL;
- return PIPE_ERROR_OUT_OF_MEMORY;
+ *out_offset = ~0;
+ pipe_resource_reference(outbuf, NULL);
+ *ptr = NULL;
+ return;
}
upload->map -= offset;
/* Emit the return values: */
*ptr = upload->map + offset;
- pipe_resource_reference( outbuf, upload->buffer );
+ pipe_resource_reference(outbuf, upload->buffer);
*out_offset = offset;
upload->offset = offset + alloc_size;
- return PIPE_OK;
}
-enum pipe_error u_upload_data( struct u_upload_mgr *upload,
- unsigned min_out_offset,
- unsigned size,
- const void *data,
- unsigned *out_offset,
- struct pipe_resource **outbuf)
+void u_upload_data(struct u_upload_mgr *upload,
+ unsigned min_out_offset,
+ unsigned size,
+ const void *data,
+ unsigned *out_offset,
+ struct pipe_resource **outbuf)
{
uint8_t *ptr;
- enum pipe_error ret = u_upload_alloc(upload, min_out_offset, size,
- out_offset, outbuf,
- (void**)&ptr);
- if (ret != PIPE_OK)
- return ret;
-
- memcpy(ptr, data, size);
- return PIPE_OK;
-}
+ u_upload_alloc(upload, min_out_offset, size,
+ out_offset, outbuf,
+ (void**)&ptr);
+ if (ptr)
+ memcpy(ptr, data, size);
+}
-/* As above, but upload the full contents of a buffer. Useful for
- * uploading user buffers, avoids generating an explosion of GPU
- * buffers if you have an app that does lots of small vertex buffer
- * renders or DrawElements calls.
- */
-enum pipe_error u_upload_buffer( struct u_upload_mgr *upload,
- unsigned min_out_offset,
- unsigned offset,
- unsigned size,
- struct pipe_resource *inbuf,
- unsigned *out_offset,
- struct pipe_resource **outbuf)
+/* XXX: Remove. It's basically a CPU fallback of resource_copy_region. */
+void u_upload_buffer(struct u_upload_mgr *upload,
+ unsigned min_out_offset,
+ unsigned offset,
+ unsigned size,
+ struct pipe_resource *inbuf,
+ unsigned *out_offset,
+ struct pipe_resource **outbuf)
{
- enum pipe_error ret = PIPE_OK;
struct pipe_transfer *transfer = NULL;
const char *map = NULL;
&transfer);
if (map == NULL) {
- return PIPE_ERROR_OUT_OF_MEMORY;
+ pipe_resource_reference(outbuf, NULL);
+ return;
}
if (0)
debug_printf("upload ptr %p ofs %d sz %d\n", map, offset, size);
- ret = u_upload_data( upload,
- min_out_offset,
- size,
- map,
- out_offset,
- outbuf);
-
+ u_upload_data(upload, min_out_offset, size, map, out_offset, outbuf);
pipe_buffer_unmap( upload->pipe, transfer );
-
- return ret;
}
* \param outbuf Pointer to where the upload buffer will be returned.
* \param ptr Pointer to the allocated memory that is returned.
*/
-enum pipe_error u_upload_alloc( struct u_upload_mgr *upload,
- unsigned min_out_offset,
- unsigned size,
- unsigned *out_offset,
- struct pipe_resource **outbuf,
- void **ptr );
+void u_upload_alloc(struct u_upload_mgr *upload,
+ unsigned min_out_offset,
+ unsigned size,
+ unsigned *out_offset,
+ struct pipe_resource **outbuf,
+ void **ptr);
/**
* Same as u_upload_alloc, but in addition to that, it copies "data"
* to the pointer returned from u_upload_alloc.
*/
-enum pipe_error u_upload_data( struct u_upload_mgr *upload,
- unsigned min_out_offset,
- unsigned size,
- const void *data,
- unsigned *out_offset,
- struct pipe_resource **outbuf);
+void u_upload_data(struct u_upload_mgr *upload,
+ unsigned min_out_offset,
+ unsigned size,
+ const void *data,
+ unsigned *out_offset,
+ struct pipe_resource **outbuf);
/**
* Same as u_upload_data, except that the input data comes from a buffer
* instead of a user pointer.
*/
-enum pipe_error u_upload_buffer( struct u_upload_mgr *upload,
- unsigned min_out_offset,
- unsigned offset,
- unsigned size,
- struct pipe_resource *inbuf,
- unsigned *out_offset,
- struct pipe_resource **outbuf);
+void u_upload_buffer(struct u_upload_mgr *upload,
+ unsigned min_out_offset,
+ unsigned offset,
+ unsigned size,
+ struct pipe_resource *inbuf,
+ unsigned *out_offset,
+ struct pipe_resource **outbuf);
struct pipe_resource *out_buffer = NULL;
uint8_t *out_map;
unsigned out_offset, mask;
- enum pipe_error err;
/* Get a translate object. */
tr = translate_cache_find(mgr->translate_cache, key);
assert((ib->buffer || ib->user_buffer) && ib->index_size);
/* Create and map the output buffer. */
- err = u_upload_alloc(mgr->uploader, 0,
- key->output_stride * num_indices,
- &out_offset, &out_buffer,
- (void**)&out_map);
- if (err != PIPE_OK)
- return err;
+ u_upload_alloc(mgr->uploader, 0,
+ key->output_stride * num_indices,
+ &out_offset, &out_buffer,
+ (void**)&out_map);
+ if (!out_buffer)
+ return PIPE_ERROR_OUT_OF_MEMORY;
if (ib->user_buffer) {
map = (uint8_t*)ib->user_buffer + offset;
}
} else {
/* Create and map the output buffer. */
- err = u_upload_alloc(mgr->uploader,
- key->output_stride * start_vertex,
- key->output_stride * num_vertices,
- &out_offset, &out_buffer,
- (void**)&out_map);
- if (err != PIPE_OK)
- return err;
+ u_upload_alloc(mgr->uploader,
+ key->output_stride * start_vertex,
+ key->output_stride * num_vertices,
+ &out_offset, &out_buffer,
+ (void**)&out_map);
+ if (!out_buffer)
+ return PIPE_ERROR_OUT_OF_MEMORY;
out_offset -= key->output_stride * start_vertex;
unsigned start, end;
struct pipe_vertex_buffer *real_vb;
const uint8_t *ptr;
- enum pipe_error err;
i = u_bit_scan(&buffer_mask);
real_vb = &mgr->real_vertex_buffer[i];
ptr = mgr->vertex_buffer[i].user_buffer;
- err = u_upload_data(mgr->uploader, start, end - start, ptr + start,
- &real_vb->buffer_offset, &real_vb->buffer);
- if (err != PIPE_OK)
- return err;
+ u_upload_data(mgr->uploader, start, end - start, ptr + start,
+ &real_vb->buffer_offset, &real_vb->buffer);
+ if (!real_vb->buffer)
+ return PIPE_ERROR_OUT_OF_MEMORY;
real_vb->buffer_offset -= start;
}
* ``PIPE_CAP_DEPTH_BOUNDS_TEST``: Whether bounds_test, bounds_min, and
bounds_max states of pipe_depth_stencil_alpha_state behave according
to the GL_EXT_depth_bounds_test specification.
+* ``PIPE_CAP_TGSI_TXQS``: Whether the `TXQS` opcode is supported
.. _pipe_capf:
For components which don't return a resource dimension, their value
is undefined.
-
.. math::
lod = src0.x
dst.w = texture\_levels(unit)
+
+.. opcode:: TXQS - Texture Samples Query
+
+ This retrieves the number of samples in the texture, and stores it
+ into the x component. The other components are undefined.
+
+.. math::
+
+ dst.x = texture\_samples(unit)
+
+
.. opcode:: TG4 - Texture Gather
As per ARB_texture_gather, gathers the four texels to be used in a bi-linear
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2015-05-20 20:03:07)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67120 bytes, from 2015-08-14 23:22:03)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63915 bytes, from 2015-08-24 16:56:28)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63970 bytes, from 2015-09-14 20:50:12)
Copyright (C) 2013-2015 by the following authors:
- Rob Clark <robdclark@gmail.com> (robclark)
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2015-05-20 20:03:07)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67120 bytes, from 2015-08-14 23:22:03)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63915 bytes, from 2015-08-24 16:56:28)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63970 bytes, from 2015-09-14 20:50:12)
Copyright (C) 2013-2015 by the following authors:
- Rob Clark <robdclark@gmail.com> (robclark)
enum a3xx_intp_mode {
SMOOTH = 0,
FLAT = 1,
+ ZERO = 2,
+ ONE = 3,
};
enum a3xx_repl_mode {
#define A3XX_GRAS_CL_CLIP_CNTL_ZCOORD 0x00800000
#define A3XX_GRAS_CL_CLIP_CNTL_WCOORD 0x01000000
#define A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE 0x02000000
+#define A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__MASK 0x1c000000
+#define A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__SHIFT 26
+static inline uint32_t A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES(uint32_t val)
+{
+ return ((val) << A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__SHIFT) & A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__MASK;
+}
#define REG_A3XX_GRAS_CL_GB_CLIP_ADJ 0x00002044
#define A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK 0x000003ff
#define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT 0
static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(float val)
{
- return ((((int32_t)(val * 16384.0))) << A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK;
+ return ((((int32_t)(val * 1048576.0))) << A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK;
}
#define REG_A3XX_GRAS_SU_POLY_OFFSET_OFFSET 0x0000206d
#define A3XX_RB_MODE_CONTROL_PACKER_TIMER_ENABLE 0x00010000
#define REG_A3XX_RB_RENDER_CONTROL 0x000020c1
+#define A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE 0x00000001
+#define A3XX_RB_RENDER_CONTROL_YUV_IN_ENABLE 0x00000002
+#define A3XX_RB_RENDER_CONTROL_COV_VALUE_INPUT_ENABLE 0x00000004
#define A3XX_RB_RENDER_CONTROL_FACENESS 0x00000008
#define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK 0x00000ff0
#define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__SHIFT 4
#define A3XX_RB_RENDER_CONTROL_YCOORD 0x00008000
#define A3XX_RB_RENDER_CONTROL_ZCOORD 0x00010000
#define A3XX_RB_RENDER_CONTROL_WCOORD 0x00020000
+#define A3XX_RB_RENDER_CONTROL_I_CLAMP_ENABLE 0x00080000
+#define A3XX_RB_RENDER_CONTROL_COV_VALUE_OUTPUT_ENABLE 0x00100000
#define A3XX_RB_RENDER_CONTROL_ALPHA_TEST 0x00400000
#define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK 0x07000000
#define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT 24
{
return ((val) << A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK;
}
+#define A3XX_RB_RENDER_CONTROL_ALPHA_TO_COVERAGE 0x40000000
+#define A3XX_RB_RENDER_CONTROL_ALPHA_TO_ONE 0x80000000
#define REG_A3XX_RB_MSAA_CONTROL 0x000020c2
#define A3XX_RB_MSAA_CONTROL_DISABLE 0x00000400
#include "pipe/p_state.h"
#include "util/u_blend.h"
+#include "util/u_dual_blend.h"
#include "util/u_string.h"
#include "util/u_memory.h"
so->rb_mrt[i].control |= A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS);
}
+ if (cso->rt[0].blend_enable && util_blend_state_is_dual(cso, 0))
+ so->rb_render_control = A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE;
+
return so;
}
struct fd3_blend_stateobj {
struct pipe_blend_state base;
+ uint32_t rb_render_control;
struct {
/* Blend control bits for color if there is an alpha channel */
uint32_t blend_control_rgb;
*/
struct fd_vertex_state blit_vbuf_state;
-
- /*
- * Border color layout *appears* to be as arrays of 0x40 byte
- * elements, with frag shader elements starting at (16 x 0x40).
- * But at some point I should probably experiment more with
- * samplers in vertex shaders to be sure. Unclear about why
- * there is this offset when there are separate VS and FS base
- * addr regs.
- *
- * The first 8 bytes of each entry are the requested border
- * color in fp16. Unclear about the rest.. could be used for
- * other formats, or could simply be for aligning the pitch
- * to 32 pixels.
- */
-#define BORDERCOLOR_SIZE 0x40
-
struct u_upload_mgr *border_color_uploader;
struct pipe_resource *border_color_buf;
&fd3_ctx->border_color_buf,
&ptr);
+ fd_setup_border_colors(tex, ptr, tex_off[sb]);
+
if (tex->num_samplers > 0) {
/* output sampler state: */
OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * tex->num_samplers));
const struct fd3_sampler_stateobj *sampler = tex->samplers[i] ?
fd3_sampler_stateobj(tex->samplers[i]) :
&dummy_sampler;
- uint16_t *bcolor = (uint16_t *)((uint8_t *)ptr +
- (BORDERCOLOR_SIZE * tex_off[sb]) +
- (BORDERCOLOR_SIZE * i));
- uint32_t *bcolor32 = (uint32_t *)&bcolor[16];
-
- /*
- * XXX HACK ALERT XXX
- *
- * The border colors need to be swizzled in a particular
- * format-dependent order. Even though samplers don't know about
- * formats, we can assume that with a GL state tracker, there's a
- * 1:1 correspondence between sampler and texture. Take advantage
- * of that knowledge.
- */
- if (i < tex->num_textures && tex->textures[i]) {
- const struct util_format_description *desc =
- util_format_description(tex->textures[i]->format);
- for (j = 0; j < 4; j++) {
- if (desc->swizzle[j] >= 4)
- continue;
-
- const struct util_format_channel_description *chan =
- &desc->channel[desc->swizzle[j]];
- int size = chan->size;
-
- /* The Z16 texture format we use seems to look in the
- * 32-bit border color slots
- */
- if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
- size = 32;
-
- /* Formats like R11G11B10 or RGB9_E5 don't specify
- * per-channel sizes properly.
- */
- if (desc->layout == UTIL_FORMAT_LAYOUT_OTHER)
- size = 16;
-
- if (chan->pure_integer && size > 16)
- bcolor32[desc->swizzle[j] + 4] =
- sampler->base.border_color.i[j];
- else if (size > 16)
- bcolor32[desc->swizzle[j]] =
- fui(sampler->base.border_color.f[j]);
- else if (chan->pure_integer)
- bcolor[desc->swizzle[j] + 8] =
- sampler->base.border_color.i[j];
- else
- bcolor[desc->swizzle[j]] =
- util_float_to_half(sampler->base.border_color.f[j]);
- }
- }
OUT_RING(ring, sampler->texsamp0);
OUT_RING(ring, sampler->texsamp1);
unsigned vtxcnt_regid = regid(63, 0);
for (i = 0; i < vp->inputs_count; i++) {
- uint8_t semantic = sem2name(vp->inputs[i].semantic);
- if (semantic == TGSI_SEMANTIC_VERTEXID_NOBASE)
- vertex_regid = vp->inputs[i].regid;
- else if (semantic == TGSI_SEMANTIC_INSTANCEID)
- instance_regid = vp->inputs[i].regid;
- else if (semantic == IR3_SEMANTIC_VTXCNT)
- vtxcnt_regid = vp->inputs[i].regid;
- else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask)
+ if (vp->inputs[i].sysval) {
+ switch(vp->inputs[i].slot) {
+ case SYSTEM_VALUE_BASE_VERTEX:
+ /* handled elsewhere */
+ break;
+ case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
+ vertex_regid = vp->inputs[i].regid;
+ break;
+ case SYSTEM_VALUE_INSTANCE_ID:
+ instance_regid = vp->inputs[i].regid;
+ break;
+ case SYSTEM_VALUE_VERTEX_CNT:
+ vtxcnt_regid = vp->inputs[i].regid;
+ break;
+ default:
+ unreachable("invalid system value");
+ break;
+ }
+ } else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) {
last = i;
+ }
}
/* hw doesn't like to be configured for zero vbo's, it seems: */
return;
for (i = 0, j = 0; i <= last; i++) {
- assert(sem2name(vp->inputs[i].semantic) == 0);
+ assert(!vp->inputs[i].sysval);
if (vp->inputs[i].compmask) {
struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
const struct pipe_vertex_buffer *vb =
A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(ctx->sample_mask));
}
- if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) && !emit->key.binning_pass) {
- uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_render_control;
+ if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG | FD_DIRTY_BLEND_DUAL)) &&
+ !emit->key.binning_pass) {
+ uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_render_control |
+ fd3_blend_stateobj(ctx->blend)->rb_render_control;
val |= COND(fp->frag_face, A3XX_RB_RENDER_CONTROL_FACENESS);
val |= COND(fp->frag_coord, A3XX_RB_RENDER_CONTROL_XCOORD |
val |= COND(fp->frag_coord, A3XX_GRAS_CL_CLIP_CNTL_ZCOORD |
A3XX_GRAS_CL_CLIP_CNTL_WCOORD);
/* TODO only use if prog doesn't use clipvertex/clipdist */
- val |= MIN2(util_bitcount(ctx->rasterizer->clip_plane_enable), 6) << 26;
+ val |= A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES(
+ MIN2(util_bitcount(ctx->rasterizer->clip_plane_enable), 6));
OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
OUT_RING(ring, val);
}
OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2]));
}
- if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) {
+ if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_BLEND_DUAL)) {
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
- fd3_program_emit(ring, emit, pfb->nr_cbufs, pfb->cbufs);
+ int nr_cbufs = pfb->nr_cbufs;
+ if (fd3_blend_stateobj(ctx->blend)->rb_render_control &
+ A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE)
+ nr_cbufs++;
+ fd3_program_emit(ring, emit, nr_cbufs, pfb->cbufs);
}
/* TODO we should not need this or fd_wfi() before emit_constants():
case PIPE_FORMAT_R16G16_FLOAT:
case PIPE_FORMAT_R11G11B10_FLOAT:
return RB_R16G16B16A16_FLOAT;
+ case PIPE_FORMAT_L8_UNORM:
+ return RB_R8G8B8A8_UNORM;
default:
return fd3_pipe2color(format);
}
/* seems like vs->constlen + fs->constlen > 256, then CONSTMODE=1 */
constmode = ((vp->constlen + fp->constlen) > 256) ? 1 : 0;
- pos_regid = ir3_find_output_regid(vp,
- ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
- posz_regid = ir3_find_output_regid(fp,
- ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
- psize_regid = ir3_find_output_regid(vp,
- ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0));
+ pos_regid = ir3_find_output_regid(vp, VARYING_SLOT_POS);
+ posz_regid = ir3_find_output_regid(fp, FRAG_RESULT_DEPTH);
+ psize_regid = ir3_find_output_regid(vp, VARYING_SLOT_PSIZ);
if (fp->color0_mrt) {
color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
- ir3_find_output_regid(fp, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
+ ir3_find_output_regid(fp, FRAG_RESULT_COLOR);
} else {
- for (i = 0; i < fp->outputs_count; i++) {
- ir3_semantic sem = fp->outputs[i].semantic;
- unsigned idx = sem2idx(sem);
- if (sem2name(sem) != TGSI_SEMANTIC_COLOR)
- continue;
- debug_assert(idx < ARRAY_SIZE(color_regid));
- color_regid[idx] = fp->outputs[i].regid;
- }
+ color_regid[0] = ir3_find_output_regid(fp, FRAG_RESULT_DATA0);
+ color_regid[1] = ir3_find_output_regid(fp, FRAG_RESULT_DATA1);
+ color_regid[2] = ir3_find_output_regid(fp, FRAG_RESULT_DATA2);
+ color_regid[3] = ir3_find_output_regid(fp, FRAG_RESULT_DATA3);
}
/* adjust regids for alpha output formats. there is no alpha render
j = ir3_next_varying(fp, j);
if (j < fp->inputs_count) {
- k = ir3_find_output(vp, fp->inputs[j].semantic);
+ k = ir3_find_output(vp, fp->inputs[j].slot);
reg |= A3XX_SP_VS_OUT_REG_A_REGID(vp->outputs[k].regid);
reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(fp->inputs[j].compmask);
}
j = ir3_next_varying(fp, j);
if (j < fp->inputs_count) {
- k = ir3_find_output(vp, fp->inputs[j].semantic);
+ k = ir3_find_output(vp, fp->inputs[j].slot);
reg |= A3XX_SP_VS_OUT_REG_B_REGID(vp->outputs[k].regid);
reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(fp->inputs[j].compmask);
}
/* figure out VARYING_INTERP / FLAT_SHAD register values: */
for (j = -1; (j = ir3_next_varying(fp, j)) < (int)fp->inputs_count; ) {
- uint32_t interp = fp->inputs[j].interpolate;
/* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
* instead.. rather than -8 everywhere else..
*/
debug_assert((inloc % 4) == 0);
- if ((interp == TGSI_INTERPOLATE_CONSTANT) ||
- ((interp == TGSI_INTERPOLATE_COLOR) && emit->rasterflat)) {
+ if ((fp->inputs[j].interpolate == INTERP_QUALIFIER_FLAT) ||
+ (fp->inputs[j].rasterflat && emit->rasterflat)) {
uint32_t loc = inloc;
for (i = 0; i < 4; i++, loc++) {
vinterp[loc / 16] |= FLAT << ((loc % 16) * 2);
}
}
- /* Replace the .xy coordinates with S/T from the point sprite. Set
- * interpolation bits for .zw such that they become .01
- */
- if (emit->sprite_coord_enable & (1 << sem2idx(fp->inputs[j].semantic))) {
- vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09)
- << ((inloc % 16) * 2);
- vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2);
- vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2);
+ gl_varying_slot slot = fp->inputs[j].slot;
+
+ /* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */
+ if (slot >= VARYING_SLOT_VAR0) {
+ unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0);
+ /* Replace the .xy coordinates with S/T from the point sprite. Set
+ * interpolation bits for .zw such that they become .01
+ */
+ if (emit->sprite_coord_enable & texmask) {
+ vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09)
+ << ((inloc % 16) * 2);
+ vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2);
+ vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2);
+ }
}
}
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2015-05-20 20:03:07)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67120 bytes, from 2015-08-14 23:22:03)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63915 bytes, from 2015-08-24 16:56:28)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63970 bytes, from 2015-09-14 20:50:12)
Copyright (C) 2013-2015 by the following authors:
- Rob Clark <robdclark@gmail.com> (robclark)
A4XX_TEX_REPEAT = 0,
A4XX_TEX_CLAMP_TO_EDGE = 1,
A4XX_TEX_MIRROR_REPEAT = 2,
- A4XX_TEX_CLAMP_NONE = 3,
+ A4XX_TEX_CLAMP_TO_BORDER = 3,
+ A4XX_TEX_MIRROR_CLAMP = 4,
};
enum a4xx_tex_aniso {
pipe_resource_reference(&fd4_ctx->solid_vbuf, NULL);
pipe_resource_reference(&fd4_ctx->blit_texcoord_vbuf, NULL);
+ u_upload_destroy(fd4_ctx->border_color_uploader);
+
fd_context_destroy(pctx);
}
fd4_query_context_init(pctx);
+ fd4_ctx->border_color_uploader = u_upload_create(pctx, 4096,
+ 2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE, 0);
+
return pctx;
}
#ifndef FD4_CONTEXT_H_
#define FD4_CONTEXT_H_
+#include "util/u_upload_mgr.h"
+
#include "freedreno_drmif.h"
#include "freedreno_context.h"
*/
struct fd_vertex_state blit_vbuf_state;
+ struct u_upload_mgr *border_color_uploader;
+ struct pipe_resource *border_color_buf;
+
/* if *any* of bits are set in {v,f}saturate_{s,t,r} */
bool vsaturate, fsaturate;
// TODO set .half_precision based on render target format,
// ie. float16 and smaller use half, float32 use full..
.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
+ .ucp_enables = ctx->rasterizer ? ctx->rasterizer->clip_plane_enable : 0,
.has_per_samp = (fd4_ctx->fsaturate || fd4_ctx->vsaturate),
.vsaturate_s = fd4_ctx->vsaturate_s,
.vsaturate_t = fd4_ctx->vsaturate_t,
emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
enum adreno_state_block sb, struct fd_texture_stateobj *tex)
{
- unsigned i;
+ static const uint32_t bcolor_reg[] = {
+ [SB_VERT_TEX] = REG_A4XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR,
+ [SB_FRAG_TEX] = REG_A4XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
+ };
+ struct fd4_context *fd4_ctx = fd4_context(ctx);
+ unsigned i, off;
+ void *ptr;
+
+ u_upload_alloc(fd4_ctx->border_color_uploader,
+ 0, 2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE, &off,
+ &fd4_ctx->border_color_buf,
+ &ptr);
+
+ fd_setup_border_colors(tex, ptr, 0);
if (tex->num_samplers > 0) {
int num_samplers;
OUT_RING(ring, 0x00000000);
}
}
+
+ OUT_PKT0(ring, bcolor_reg[sb], 1);
+ OUT_RELOC(ring, fd_resource(fd4_ctx->border_color_buf)->bo, off, 0, 0);
+
+ u_upload_unmap(fd4_ctx->border_color_uploader);
}
/* emit texture state for mem->gmem restore operation.. eventually it would
unsigned vtxcnt_regid = regid(63, 0);
for (i = 0; i < vp->inputs_count; i++) {
- uint8_t semantic = sem2name(vp->inputs[i].semantic);
- if (semantic == TGSI_SEMANTIC_VERTEXID_NOBASE)
- vertex_regid = vp->inputs[i].regid;
- else if (semantic == TGSI_SEMANTIC_INSTANCEID)
- instance_regid = vp->inputs[i].regid;
- else if (semantic == IR3_SEMANTIC_VTXCNT)
- vtxcnt_regid = vp->inputs[i].regid;
- else if ((i < vtx->vtx->num_elements) && vp->inputs[i].compmask)
+ if (vp->inputs[i].sysval) {
+ switch(vp->inputs[i].slot) {
+ case SYSTEM_VALUE_BASE_VERTEX:
+ /* handled elsewhere */
+ break;
+ case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
+ vertex_regid = vp->inputs[i].regid;
+ break;
+ case SYSTEM_VALUE_INSTANCE_ID:
+ instance_regid = vp->inputs[i].regid;
+ break;
+ case SYSTEM_VALUE_VERTEX_CNT:
+ vtxcnt_regid = vp->inputs[i].regid;
+ break;
+ default:
+ unreachable("invalid system value");
+ break;
+ }
+ } else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) {
last = i;
+ }
}
+
/* hw doesn't like to be configured for zero vbo's, it seems: */
if ((vtx->vtx->num_elements == 0) &&
(vertex_regid == regid(63, 0)) &&
return;
for (i = 0, j = 0; i <= last; i++) {
- assert(sem2name(vp->inputs[i].semantic) == 0);
+ assert(!vp->inputs[i].sysval);
if (vp->inputs[i].compmask) {
struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
const struct pipe_vertex_buffer *vb =
_T(L8_UNORM, 8_UNORM, R8_UNORM, WZYX),
_T(I8_UNORM, 8_UNORM, NONE, WZYX),
- /* NOTE: should be TFMT_8_UINT (which then gets remapped to
- * TFMT_8_UNORM for mem2gmem in _gmem_restore_format()), but
- * we don't know TFMT_8_UINT yet.. so just use TFMT_8_UNORM
- * for now.. sampling from stencil as a texture might not
- * work right, but at least should be fine for zsbuf..
- */
- _T(S8_UINT, 8_UNORM, R8_UNORM, WZYX),
+ _T(A8_UINT, 8_UINT, NONE, WZYX),
+ _T(A8_SINT, 8_SINT, NONE, WZYX),
+ _T(L8_UINT, 8_UINT, NONE, WZYX),
+ _T(L8_SINT, 8_SINT, NONE, WZYX),
+ _T(I8_UINT, 8_UINT, NONE, WZYX),
+ _T(I8_SINT, 8_SINT, NONE, WZYX),
+
+ _T(S8_UINT, 8_UINT, R8_UNORM, WZYX),
/* 16-bit */
V_(R16_UNORM, 16_UNORM, NONE, WZYX),
/* blob seems to always use constmode currently: */
constmode = 1;
- pos_regid = ir3_find_output_regid(s[VS].v,
- ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
- posz_regid = ir3_find_output_regid(s[FS].v,
- ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
- psize_regid = ir3_find_output_regid(s[VS].v,
- ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0));
+ pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS);
+ posz_regid = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DEPTH);
+ psize_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_PSIZ);
if (s[FS].v->color0_mrt) {
color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] =
- ir3_find_output_regid(s[FS].v, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
+ ir3_find_output_regid(s[FS].v, FRAG_RESULT_COLOR);
} else {
- const struct ir3_shader_variant *fp = s[FS].v;
- memset(color_regid, 0, sizeof(color_regid));
- for (i = 0; i < fp->outputs_count; i++) {
- ir3_semantic sem = fp->outputs[i].semantic;
- unsigned idx = sem2idx(sem);
- if (sem2name(sem) != TGSI_SEMANTIC_COLOR)
- continue;
- debug_assert(idx < ARRAY_SIZE(color_regid));
- color_regid[idx] = fp->outputs[i].regid;
- }
+ color_regid[0] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA0);
+ color_regid[1] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA1);
+ color_regid[2] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA2);
+ color_regid[3] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA3);
+ color_regid[4] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA4);
+ color_regid[5] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA5);
+ color_regid[6] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA6);
+ color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7);
}
/* adjust regids for alpha output formats. there is no alpha render
if (util_format_is_alpha(pipe_surface_format(bufs[i])))
color_regid[i] += 3;
-
/* TODO get these dynamically: */
face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0);
coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0);
j = ir3_next_varying(s[FS].v, j);
if (j < s[FS].v->inputs_count) {
- k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].semantic);
+ k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].slot);
reg |= A4XX_SP_VS_OUT_REG_A_REGID(s[VS].v->outputs[k].regid);
reg |= A4XX_SP_VS_OUT_REG_A_COMPMASK(s[FS].v->inputs[j].compmask);
}
j = ir3_next_varying(s[FS].v, j);
if (j < s[FS].v->inputs_count) {
- k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].semantic);
+ k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].slot);
reg |= A4XX_SP_VS_OUT_REG_B_REGID(s[VS].v->outputs[k].regid);
reg |= A4XX_SP_VS_OUT_REG_B_COMPMASK(s[FS].v->inputs[j].compmask);
}
*/
/* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) {
- uint32_t interp = s[FS].v->inputs[j].interpolate;
/* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
* instead.. rather than -8 everywhere else..
*/
debug_assert((inloc % 4) == 0);
- if ((interp == TGSI_INTERPOLATE_CONSTANT) ||
- ((interp == TGSI_INTERPOLATE_COLOR) && emit->rasterflat)) {
+ if ((s[FS].v->inputs[j].interpolate == INTERP_QUALIFIER_FLAT) ||
+ (s[FS].v->inputs[j].rasterflat && emit->rasterflat)) {
uint32_t loc = inloc;
for (i = 0; i < 4; i++, loc++) {
}
}
- /* Replace the .xy coordinates with S/T from the point sprite. Set
- * interpolation bits for .zw such that they become .01
- */
- if (emit->sprite_coord_enable & (1 << sem2idx(s[FS].v->inputs[j].semantic))) {
- vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09)
- << ((inloc % 16) * 2);
- vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2);
- vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2);
+ gl_varying_slot slot = s[FS].v->inputs[j].slot;
+
+ /* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */
+ if (slot >= VARYING_SLOT_VAR0) {
+ unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0);
+ /* Replace the .xy coordinates with S/T from the point sprite. Set
+ * interpolation bits for .zw such that they become .01
+ */
+ if (emit->sprite_coord_enable & texmask) {
+ vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09)
+ << ((inloc % 16) * 2);
+ vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2);
+ vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2);
+ }
}
}
#include "fd4_texture.h"
#include "fd4_format.h"
-/* TODO do we need to emulate clamp-to-edge like a3xx? */
static enum a4xx_tex_clamp
-tex_clamp(unsigned wrap)
+tex_clamp(unsigned wrap, bool clamp_to_edge)
{
- /* hardware probably supports more, but we can't coax all the
- * wrap/clamp modes out of the GLESv2 blob driver.
- *
- * TODO once we have basics working, go back and just try
- * different values and see what happens
- */
+ /* Hardware does not support _CLAMP, but we emulate it: */
+ if (wrap == PIPE_TEX_WRAP_CLAMP) {
+ wrap = (clamp_to_edge) ?
+ PIPE_TEX_WRAP_CLAMP_TO_EDGE : PIPE_TEX_WRAP_CLAMP_TO_BORDER;
+ }
+
switch (wrap) {
case PIPE_TEX_WRAP_REPEAT:
return A4XX_TEX_REPEAT;
- case PIPE_TEX_WRAP_CLAMP:
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
return A4XX_TEX_CLAMP_TO_EDGE;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
-// TODO
-// return A4XX_TEX_CLAMP_TO_BORDER;
- case PIPE_TEX_WRAP_MIRROR_CLAMP:
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ return A4XX_TEX_CLAMP_TO_BORDER;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
-// TODO
-// return A4XX_TEX_MIRROR_CLAMP;
+ /* only works for PoT.. need to emulate otherwise! */
+ return A4XX_TEX_MIRROR_CLAMP;
case PIPE_TEX_WRAP_MIRROR_REPEAT:
return A4XX_TEX_MIRROR_REPEAT;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ /* these two we could perhaps emulate, but we currently
+ * just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP
+ */
default:
DBG("invalid wrap: %u", wrap);
return 0;
struct fd4_sampler_stateobj *so = CALLOC_STRUCT(fd4_sampler_stateobj);
unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8));
bool miplinear = false;
+ bool clamp_to_edge;
if (!so)
return NULL;
so->base = *cso;
+ /*
+ * For nearest filtering, _CLAMP means _CLAMP_TO_EDGE; for linear
+ * filtering, _CLAMP means _CLAMP_TO_BORDER while additionally
+ * clamping the texture coordinates to [0.0, 1.0].
+ *
+ * The clamping will be taken care of in the shaders. There are two
+ * filters here, but let the minification one has a say.
+ */
+ clamp_to_edge = (cso->min_img_filter == PIPE_TEX_FILTER_NEAREST);
+ if (!clamp_to_edge) {
+ so->saturate_s = (cso->wrap_s == PIPE_TEX_WRAP_CLAMP);
+ so->saturate_t = (cso->wrap_t == PIPE_TEX_WRAP_CLAMP);
+ so->saturate_r = (cso->wrap_r == PIPE_TEX_WRAP_CLAMP);
+ }
+
so->texsamp0 =
COND(miplinear, A4XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
A4XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
A4XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
A4XX_TEX_SAMP_0_ANISO(aniso) |
- A4XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s)) |
- A4XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t)) |
- A4XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r));
+ A4XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, clamp_to_edge)) |
+ A4XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, clamp_to_edge)) |
+ A4XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, clamp_to_edge));
so->texsamp1 =
// COND(miplinear, A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR) |
return so;
}
+static void
+fd4_sampler_states_bind(struct pipe_context *pctx,
+ unsigned shader, unsigned start,
+ unsigned nr, void **hwcso)
+{
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd4_context *fd4_ctx = fd4_context(ctx);
+ uint16_t saturate_s = 0, saturate_t = 0, saturate_r = 0;
+ unsigned i;
+
+ for (i = 0; i < nr; i++) {
+ if (hwcso[i]) {
+ struct fd4_sampler_stateobj *sampler =
+ fd4_sampler_stateobj(hwcso[i]);
+ if (sampler->saturate_s)
+ saturate_s |= (1 << i);
+ if (sampler->saturate_t)
+ saturate_t |= (1 << i);
+ if (sampler->saturate_r)
+ saturate_r |= (1 << i);
+ }
+ }
+
+ fd_sampler_states_bind(pctx, shader, start, nr, hwcso);
+
+ if (shader == PIPE_SHADER_FRAGMENT) {
+ fd4_ctx->fsaturate =
+ (saturate_s != 0) ||
+ (saturate_t != 0) ||
+ (saturate_r != 0);
+ fd4_ctx->fsaturate_s = saturate_s;
+ fd4_ctx->fsaturate_t = saturate_t;
+ fd4_ctx->fsaturate_r = saturate_r;
+ } else if (shader == PIPE_SHADER_VERTEX) {
+ fd4_ctx->vsaturate =
+ (saturate_s != 0) ||
+ (saturate_t != 0) ||
+ (saturate_r != 0);
+ fd4_ctx->vsaturate_s = saturate_s;
+ fd4_ctx->vsaturate_t = saturate_t;
+ fd4_ctx->vsaturate_r = saturate_r;
+ }
+}
+
static enum a4xx_tex_type
tex_type(unsigned target)
{
fd4_texture_init(struct pipe_context *pctx)
{
pctx->create_sampler_state = fd4_sampler_state_create;
- pctx->bind_sampler_states = fd_sampler_states_bind;
+ pctx->bind_sampler_states = fd4_sampler_states_bind;
pctx->create_sampler_view = fd4_sampler_view_create;
pctx->set_sampler_views = fd_set_sampler_views;
}
struct fd4_sampler_stateobj {
struct pipe_sampler_state base;
uint32_t texsamp0, texsamp1;
+ bool saturate_s, saturate_t, saturate_r;
};
static inline struct fd4_sampler_stateobj *
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2015-05-20 20:03:07)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67120 bytes, from 2015-08-14 23:22:03)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63915 bytes, from 2015-08-24 16:56:28)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63970 bytes, from 2015-09-14 20:50:12)
Copyright (C) 2013-2015 by the following authors:
- Rob Clark <robdclark@gmail.com> (robclark)
FACTOR_CONSTANT_ALPHA = 14,
FACTOR_ONE_MINUS_CONSTANT_ALPHA = 15,
FACTOR_SRC_ALPHA_SATURATE = 16,
+ FACTOR_SRC1_COLOR = 20,
+ FACTOR_ONE_MINUS_SRC1_COLOR = 21,
+ FACTOR_SRC1_ALPHA = 22,
+ FACTOR_ONE_MINUS_SRC1_ALPHA = 23,
};
enum adreno_rb_surface_endian {
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2015-05-20 20:03:07)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67120 bytes, from 2015-08-14 23:22:03)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63915 bytes, from 2015-08-24 16:56:28)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63970 bytes, from 2015-09-14 20:50:12)
Copyright (C) 2013-2015 by the following authors:
- Rob Clark <robdclark@gmail.com> (robclark)
FD_DIRTY_SCISSOR = (1 << 17),
FD_DIRTY_STREAMOUT = (1 << 18),
FD_DIRTY_UCP = (1 << 19),
+ FD_DIRTY_BLEND_DUAL = (1 << 20),
} dirty;
struct pipe_blend_state *blend;
case PIPE_CAP_TEXTURE_MULTISAMPLE:
case PIPE_CAP_TEXTURE_BARRIER:
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
- case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
case PIPE_CAP_START_INSTANCE:
case PIPE_CAP_COMPUTE:
return 0;
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
+ case PIPE_CAP_TGSI_TXQS:
return 0;
case PIPE_CAP_MAX_VIEWPORTS:
/* Render targets. */
case PIPE_CAP_MAX_RENDER_TARGETS:
return screen->max_rts;
+ case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+ return is_a3xx(screen) ? 1 : 0;
/* Queries. */
case PIPE_CAP_QUERY_TIME_ELAPSED:
pscreen = &screen->base;
screen->dev = dev;
+ screen->refcnt = 1;
// maybe this should be in context?
screen->pipe = fd_pipe_new(screen->dev, FD_PIPE_3D);
struct fd_screen {
struct pipe_screen base;
+ /* it would be tempting to use pipe_reference here, but that
+ * really doesn't work well if it isn't the first member of
+ * the struct, so not quite so awesome to be adding refcnting
+ * further down the inheritance hierarchy:
+ */
+ int refcnt;
+
+ /* place for winsys to stash it's own stuff: */
+ void *winsys_priv;
+
uint32_t gmemsize_bytes;
uint32_t device_id;
uint32_t gpu_id; /* 220, 305, etc */
*/
#include "pipe/p_state.h"
+#include "util/u_dual_blend.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_helpers.h"
fd_blend_state_bind(struct pipe_context *pctx, void *hwcso)
{
struct fd_context *ctx = fd_context(pctx);
+ struct pipe_blend_state *cso = hwcso;
+ bool old_is_dual = ctx->blend ?
+ ctx->blend->rt[0].blend_enable && util_blend_state_is_dual(ctx->blend, 0) :
+ false;
+ bool new_is_dual = cso ?
+ cso->rt[0].blend_enable && util_blend_state_is_dual(cso, 0) :
+ false;
ctx->blend = hwcso;
ctx->dirty |= FD_DIRTY_BLEND;
+ if (old_is_dual != new_is_dual)
+ ctx->dirty |= FD_DIRTY_BLEND_DUAL;
}
static void
pctx->sampler_view_destroy = fd_sampler_view_destroy;
}
+
+/* helper for setting up border-color buffer for a3xx/a4xx: */
+void
+fd_setup_border_colors(struct fd_texture_stateobj *tex, void *ptr,
+ unsigned offset)
+{
+ unsigned i, j;
+
+ for (i = 0; i < tex->num_samplers; i++) {
+ struct pipe_sampler_state *sampler = tex->samplers[i];
+ uint16_t *bcolor = (uint16_t *)((uint8_t *)ptr +
+ (BORDERCOLOR_SIZE * offset) +
+ (BORDERCOLOR_SIZE * i));
+ uint32_t *bcolor32 = (uint32_t *)&bcolor[16];
+
+ if (!sampler)
+ continue;
+
+ /*
+ * XXX HACK ALERT XXX
+ *
+ * The border colors need to be swizzled in a particular
+ * format-dependent order. Even though samplers don't know about
+ * formats, we can assume that with a GL state tracker, there's a
+ * 1:1 correspondence between sampler and texture. Take advantage
+ * of that knowledge.
+ */
+ if (i < tex->num_textures && tex->textures[i]) {
+ const struct util_format_description *desc =
+ util_format_description(tex->textures[i]->format);
+ for (j = 0; j < 4; j++) {
+ if (desc->swizzle[j] >= 4)
+ continue;
+
+ const struct util_format_channel_description *chan =
+ &desc->channel[desc->swizzle[j]];
+ int size = chan->size;
+
+ /* The Z16 texture format we use seems to look in the
+ * 32-bit border color slots
+ */
+ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
+ size = 32;
+
+ /* Formats like R11G11B10 or RGB9_E5 don't specify
+ * per-channel sizes properly.
+ */
+ if (desc->layout == UTIL_FORMAT_LAYOUT_OTHER)
+ size = 16;
+
+ if (chan->pure_integer && size > 16)
+ bcolor32[desc->swizzle[j] + 4] =
+ sampler->border_color.i[j];
+ else if (size > 16)
+ bcolor32[desc->swizzle[j]] =
+ fui(sampler->border_color.f[j]);
+ else if (chan->pure_integer)
+ bcolor[desc->swizzle[j] + 8] =
+ sampler->border_color.i[j];
+ else
+ bcolor[desc->swizzle[j]] =
+ util_float_to_half(sampler->border_color.f[j]);
+ }
+ }
+ }
+}
void fd_texture_init(struct pipe_context *pctx);
+struct fd_texture_stateobj;
+
+/* Both a3xx/a4xx share the same layout for the border-color buffer,
+ * which contains the pre-swizzled (based on texture format) border
+ * color value, with the following layout (per sampler):
+ *
+ * offset | description
+ * -------+-------------
+ * 0x00: | fp16[0] \
+ * | fp16[1] |___ swizzled fp16 channel values for "small float"
+ * | fp16[2] | formats (<= 16 bits per component, !integer)
+ * | fp16[3] /
+ * 0x08: | padding
+ * 0x10: | int16[0] \
+ * | int16[1] |___ swizzled int16 channels for for "small integer"
+ * | int16[2] | formats (<= 16 bits per component, integer)
+ * | int16[3] /
+ * 0x18: | padding
+ * 0x20: | fp32[0] \
+ * | fp32[1] |___ swizzled fp32 channel values for "large float"
+ * | fp32[2] | formats (> 16 bits per component, !integer)
+ * | fp32[3] /
+ * 0x30: | int32[0] \
+ * | int32[1] |___ swizzled int32 channel values for "large int"
+ * | int32[2] | formats (> 16 bits per component, integer)
+ * | int32[3] /
+ */
+#define BORDERCOLOR_SIZE 0x40
+void fd_setup_border_colors(struct fd_texture_stateobj *tex, void *ptr,
+ unsigned offset);
+
#endif /* FREEDRENO_TEXTURE_H_ */
case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
return FACTOR_ONE_MINUS_CONSTANT_ALPHA;
case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ return FACTOR_ONE_MINUS_SRC1_COLOR;
case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ return FACTOR_ONE_MINUS_SRC1_ALPHA;
case PIPE_BLENDFACTOR_SRC1_COLOR:
+ return FACTOR_SRC1_COLOR;
case PIPE_BLENDFACTOR_SRC1_ALPHA:
- /* I don't think these are supported */
+ return FACTOR_SRC1_ALPHA;
default:
DBG("invalid blend factor: %x", factor);
return 0;
printf(" --saturate-t MASK - bitmask of samplers to saturate T coord\n");
printf(" --saturate-r MASK - bitmask of samplers to saturate R coord\n");
printf(" --stream-out - enable stream-out (aka transform feedback)\n");
+ printf(" --ucp MASK - bitmask of enabled user-clip-planes\n");
+ printf(" --gpu GPU_ID - specify gpu-id (default 320)\n");
printf(" --help - show this message\n");
}
struct ir3_shader_variant v;
struct ir3_shader s;
struct ir3_shader_key key = {};
+ unsigned gpu_id = 320;
const char *info;
void *ptr;
size_t size;
continue;
}
+ if (!strcmp(argv[n], "--ucp")) {
+ debug_printf(" %s %s", argv[n], argv[n+1]);
+ key.ucp_enables = strtol(argv[n+1], NULL, 0);
+ n += 2;
+ continue;
+ }
+
+ if (!strcmp(argv[n], "--gpu")) {
+ debug_printf(" %s %s", argv[n], argv[n+1]);
+ gpu_id = strtol(argv[n+1], NULL, 0);
+ n += 2;
+ continue;
+ }
+
if (!strcmp(argv[n], "--help")) {
print_usage();
return 0;
}
/* TODO cmdline option to target different gpus: */
- compiler = ir3_compiler_create(320);
+ compiler = ir3_compiler_create(gpu_id);
info = "NIR compiler";
ret = ir3_compile_shader_nir(compiler, &v);
static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val);
static struct ir3_block * get_block(struct ir3_compile *ctx, nir_block *nblock);
-static struct nir_shader *to_nir(const struct tgsi_token *tokens)
+static struct nir_shader *to_nir(struct ir3_compile *ctx,
+ const struct tgsi_token *tokens, struct ir3_shader_variant *so)
{
- struct nir_shader_compiler_options options = {
+ static const nir_shader_compiler_options options = {
.lower_fpow = true,
.lower_fsat = true,
.lower_scmp = true,
.lower_flrp = true,
+ .lower_ffract = true,
.native_integers = true,
};
+ struct nir_lower_tex_options tex_options = {
+ .lower_rect = 0,
+ };
bool progress;
+ switch (so->type) {
+ case SHADER_FRAGMENT:
+ case SHADER_COMPUTE:
+ tex_options.saturate_s = so->key.fsaturate_s;
+ tex_options.saturate_t = so->key.fsaturate_t;
+ tex_options.saturate_r = so->key.fsaturate_r;
+ break;
+ case SHADER_VERTEX:
+ tex_options.saturate_s = so->key.vsaturate_s;
+ tex_options.saturate_t = so->key.vsaturate_t;
+ tex_options.saturate_r = so->key.vsaturate_r;
+ break;
+ }
+
+ if (ctx->compiler->gpu_id >= 400) {
+ /* a4xx seems to have *no* sam.p */
+ tex_options.lower_txp = ~0; /* lower all txp */
+ } else {
+ /* a3xx just needs to avoid sam.p for 3d tex */
+ tex_options.lower_txp = (1 << GLSL_SAMPLER_DIM_3D);
+ }
+
struct nir_shader *s = tgsi_to_nir(tokens, &options);
if (fd_mesa_debug & FD_DBG_OPTMSGS) {
nir_opt_global_to_local(s);
nir_convert_to_ssa(s);
+ if (s->stage == MESA_SHADER_VERTEX) {
+ nir_lower_clip_vs(s, so->key.ucp_enables);
+ } else if (s->stage == MESA_SHADER_FRAGMENT) {
+ nir_lower_clip_fs(s, so->key.ucp_enables);
+ }
+ nir_lower_tex(s, &tex_options);
+ if (so->key.color_two_side)
+ nir_lower_two_sided_color(s);
nir_lower_idiv(s);
nir_lower_load_const_to_scalar(s);
return s;
}
-/* TODO nir doesn't lower everything for us yet, but ideally it would: */
-static const struct tgsi_token *
-lower_tgsi(struct ir3_compile *ctx, const struct tgsi_token *tokens,
- struct ir3_shader_variant *so)
-{
- struct tgsi_shader_info info;
- struct tgsi_lowering_config lconfig = {
- .color_two_side = so->key.color_two_side,
- .lower_FRC = true,
- };
-
- switch (so->type) {
- case SHADER_FRAGMENT:
- case SHADER_COMPUTE:
- lconfig.saturate_s = so->key.fsaturate_s;
- lconfig.saturate_t = so->key.fsaturate_t;
- lconfig.saturate_r = so->key.fsaturate_r;
- break;
- case SHADER_VERTEX:
- lconfig.saturate_s = so->key.vsaturate_s;
- lconfig.saturate_t = so->key.vsaturate_t;
- lconfig.saturate_r = so->key.vsaturate_r;
- break;
- }
-
- if (ctx->compiler->gpu_id >= 400) {
- /* a4xx seems to have *no* sam.p */
- lconfig.lower_TXP = ~0; /* lower all txp */
- } else {
- /* a3xx just needs to avoid sam.p for 3d tex */
- lconfig.lower_TXP = (1 << TGSI_TEXTURE_3D);
- }
-
- return tgsi_transform_lowering(&lconfig, tokens, &info);
-}
-
static struct ir3_compile *
compile_init(struct ir3_compiler *compiler,
struct ir3_shader_variant *so,
const struct tgsi_token *tokens)
{
struct ir3_compile *ctx = rzalloc(NULL, struct ir3_compile);
- const struct tgsi_token *lowered_tokens;
if (compiler->gpu_id >= 400) {
/* need special handling for "flat" */
ctx->block_ht = _mesa_hash_table_create(ctx,
_mesa_hash_pointer, _mesa_key_pointer_equal);
- lowered_tokens = lower_tgsi(ctx, tokens, so);
- if (!lowered_tokens)
- lowered_tokens = tokens;
- ctx->s = to_nir(lowered_tokens);
-
- if (lowered_tokens != tokens)
- free((void *)lowered_tokens);
+ ctx->s = to_nir(ctx, tokens, so);
so->first_driver_param = so->first_immediate = ctx->s->num_uniforms;
* num_uniform * vec4 - user consts
* 4 * vec4 - UBO addresses
* if (vertex shader) {
- * 1 * vec4 - driver params (IR3_DP_*)
+ * N * vec4 - driver params (IR3_DP_*)
* 1 * vec4 - stream-out addresses
* }
*
so->first_immediate += 4;
if (so->type == SHADER_VERTEX) {
- /* one (vec4) slot for driver params (see ir3_driver_param): */
- so->first_immediate++;
+ /* driver params (see ir3_driver_param): */
+ so->first_immediate += IR3_DP_COUNT/4; /* convert to vec4 */
/* one (vec4) slot for stream-output base addresses: */
so->first_immediate++;
}
create_driver_param(struct ir3_compile *ctx, enum ir3_driver_param dp)
{
/* first four vec4 sysval's reserved for UBOs: */
- unsigned r = regid(ctx->so->first_driver_param + 4, dp);
+ /* NOTE: dp is in scalar, but there can be >4 dp components: */
+ unsigned n = ctx->so->first_driver_param + IR3_DRIVER_PARAM_OFF;
+ unsigned r = regid(n + dp / 4, dp % 4);
return create_uniform(ctx, r);
}
struct ir3_block *b = ctx->block;
struct ir3_instruction *addr, *src0, *src1;
/* UBO addresses are the first driver params: */
- unsigned ubo = regid(ctx->so->first_driver_param, 0);
+ unsigned ubo = regid(ctx->so->first_driver_param + IR3_UBOS_OFF, 0);
unsigned off = intr->const_index[0];
/* First src is ubo index, which could either be an immed or not: */
}
}
-static void add_sysval_input(struct ir3_compile *ctx, unsigned name,
+static void add_sysval_input(struct ir3_compile *ctx, gl_system_value slot,
struct ir3_instruction *instr)
{
struct ir3_shader_variant *so = ctx->so;
unsigned r = regid(so->inputs_count, 0);
unsigned n = so->inputs_count++;
- so->inputs[n].semantic = ir3_semantic_name(name, 0);
+ so->inputs[n].sysval = true;
+ so->inputs[n].slot = slot;
so->inputs[n].compmask = 1;
so->inputs[n].regid = r;
- so->inputs[n].interpolate = TGSI_INTERPOLATE_CONSTANT;
+ so->inputs[n].interpolate = INTERP_QUALIFIER_FLAT;
so->total_in++;
ctx->ir->ninputs = MAX2(ctx->ir->ninputs, r + 1);
case nir_intrinsic_load_base_vertex:
if (!ctx->basevertex) {
ctx->basevertex = create_driver_param(ctx, IR3_DP_VTXID_BASE);
- add_sysval_input(ctx, TGSI_SEMANTIC_BASEVERTEX,
+ add_sysval_input(ctx, SYSTEM_VALUE_BASE_VERTEX,
ctx->basevertex);
}
dst[0] = ctx->basevertex;
case nir_intrinsic_load_vertex_id_zero_base:
if (!ctx->vertex_id) {
ctx->vertex_id = create_input(ctx->block, 0);
- add_sysval_input(ctx, TGSI_SEMANTIC_VERTEXID_NOBASE,
+ add_sysval_input(ctx, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
ctx->vertex_id);
}
dst[0] = ctx->vertex_id;
case nir_intrinsic_load_instance_id:
if (!ctx->instance_id) {
ctx->instance_id = create_input(ctx->block, 0);
- add_sysval_input(ctx, TGSI_SEMANTIC_INSTANCEID,
+ add_sysval_input(ctx, SYSTEM_VALUE_INSTANCE_ID,
ctx->instance_id);
}
dst[0] = ctx->instance_id;
break;
+ case nir_intrinsic_load_user_clip_plane:
+ for (int i = 0; i < intr->num_components; i++) {
+ unsigned n = idx * 4 + i;
+ dst[i] = create_driver_param(ctx, IR3_DP_UCP0_X + n);
+ }
+ break;
case nir_intrinsic_discard_if:
case nir_intrinsic_discard: {
struct ir3_instruction *cond, *kill;
case nir_texop_lod:
case nir_texop_tg4:
case nir_texop_query_levels:
+ case nir_texop_texture_samples:
compile_error(ctx, "Unhandled NIR tex type: %d\n", tex->op);
return;
}
* of the shader:
*/
vtxcnt = create_input(ctx->in_block, 0);
- add_sysval_input(ctx, IR3_SEMANTIC_VTXCNT, vtxcnt);
+ add_sysval_input(ctx, SYSTEM_VALUE_VERTEX_CNT, vtxcnt);
maxvtxcnt = create_driver_param(ctx, IR3_DP_VTXCNT_MAX);
unsigned stride = strmout->stride[i];
struct ir3_instruction *base, *off;
- base = create_uniform(ctx, regid(v->first_driver_param + 5, i));
+ base = create_uniform(ctx, regid(v->first_driver_param + IR3_TFBOS_OFF, i));
/* 24-bit should be enough: */
off = ir3_MUL_U(ctx->block, vtxcnt, 0,
struct ir3_shader_variant *so = ctx->so;
unsigned array_len = MAX2(glsl_get_length(in->type), 1);
unsigned ncomp = glsl_get_components(in->type);
- /* XXX: map loc slots to semantics */
- unsigned semantic_name = in->data.location;
- unsigned semantic_index = in->data.index;
unsigned n = in->data.driver_location;
+ unsigned slot = in->data.location;
- DBG("; in: %u:%u, len=%ux%u, loc=%u",
- semantic_name, semantic_index, array_len,
- ncomp, n);
+ DBG("; in: slot=%u, len=%ux%u, drvloc=%u",
+ slot, array_len, ncomp, n);
- so->inputs[n].semantic =
- ir3_semantic_name(semantic_name, semantic_index);
+ so->inputs[n].slot = slot;
so->inputs[n].compmask = (1 << ncomp) - 1;
so->inputs[n].inloc = ctx->next_inloc;
- so->inputs[n].interpolate = 0;
+ so->inputs[n].interpolate = INTERP_QUALIFIER_NONE;
so->inputs_count = MAX2(so->inputs_count, n + 1);
+ so->inputs[n].interpolate = in->data.interpolation;
- /* the fdN_program_emit() code expects tgsi consts here, so map
- * things back to tgsi for now:
- */
- switch (in->data.interpolation) {
- case INTERP_QUALIFIER_FLAT:
- so->inputs[n].interpolate = TGSI_INTERPOLATE_CONSTANT;
- break;
- case INTERP_QUALIFIER_NOPERSPECTIVE:
- so->inputs[n].interpolate = TGSI_INTERPOLATE_LINEAR;
- break;
- case INTERP_QUALIFIER_SMOOTH:
- so->inputs[n].interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
- break;
- }
-
- for (int i = 0; i < ncomp; i++) {
- struct ir3_instruction *instr = NULL;
- unsigned idx = (n * 4) + i;
+ if (ctx->so->type == SHADER_FRAGMENT) {
+ for (int i = 0; i < ncomp; i++) {
+ struct ir3_instruction *instr = NULL;
+ unsigned idx = (n * 4) + i;
- if (ctx->so->type == SHADER_FRAGMENT) {
- if (semantic_name == TGSI_SEMANTIC_POSITION) {
+ if (slot == VARYING_SLOT_POS) {
so->inputs[n].bary = false;
so->frag_coord = true;
instr = create_frag_coord(ctx, i);
- } else if (semantic_name == TGSI_SEMANTIC_FACE) {
+ } else if (slot == VARYING_SLOT_FACE) {
so->inputs[n].bary = false;
so->frag_face = true;
instr = create_frag_face(ctx, i);
} else {
bool use_ldlv = false;
- /* with NIR, we need to infer TGSI_INTERPOLATE_COLOR
- * from the semantic name:
+ /* detect the special case for front/back colors where
+ * we need to do flat vs smooth shading depending on
+ * rast state:
*/
- if ((in->data.interpolation == INTERP_QUALIFIER_NONE) &&
- ((semantic_name == TGSI_SEMANTIC_COLOR) ||
- (semantic_name == TGSI_SEMANTIC_BCOLOR)))
- so->inputs[n].interpolate = TGSI_INTERPOLATE_COLOR;
+ if (in->data.interpolation == INTERP_QUALIFIER_NONE) {
+ switch (slot) {
+ case VARYING_SLOT_COL0:
+ case VARYING_SLOT_COL1:
+ case VARYING_SLOT_BFC0:
+ case VARYING_SLOT_BFC1:
+ so->inputs[n].rasterflat = true;
+ break;
+ default:
+ break;
+ }
+ }
if (ctx->flat_bypass) {
- /* with NIR, we need to infer TGSI_INTERPOLATE_COLOR
- * from the semantic name:
- */
- switch (so->inputs[n].interpolate) {
- case TGSI_INTERPOLATE_COLOR:
- if (!ctx->so->key.rasterflat)
- break;
- /* fallthrough */
- case TGSI_INTERPOLATE_CONSTANT:
+ if ((so->inputs[n].interpolate == INTERP_QUALIFIER_FLAT) ||
+ (so->inputs[n].rasterflat && ctx->so->key.rasterflat))
use_ldlv = true;
- break;
- }
}
so->inputs[n].bary = true;
instr = create_frag_input(ctx,
so->inputs[n].inloc + i - 8, use_ldlv);
}
- } else {
- instr = create_input(ctx->block, idx);
- }
- ctx->ir->inputs[idx] = instr;
+ ctx->ir->inputs[idx] = instr;
+ }
+ } else if (ctx->so->type == SHADER_VERTEX) {
+ for (int i = 0; i < ncomp; i++) {
+ unsigned idx = (n * 4) + i;
+ ctx->ir->inputs[idx] = create_input(ctx->block, idx);
+ }
+ } else {
+ compile_error(ctx, "unknown shader type: %d\n", ctx->so->type);
}
if (so->inputs[n].bary || (ctx->so->type == SHADER_VERTEX)) {
struct ir3_shader_variant *so = ctx->so;
unsigned array_len = MAX2(glsl_get_length(out->type), 1);
unsigned ncomp = glsl_get_components(out->type);
- /* XXX: map loc slots to semantics */
- unsigned semantic_name = out->data.location;
- unsigned semantic_index = out->data.index;
unsigned n = out->data.driver_location;
+ unsigned slot = out->data.location;
unsigned comp = 0;
- DBG("; out: %u:%u, len=%ux%u, loc=%u",
- semantic_name, semantic_index, array_len,
- ncomp, n);
+ DBG("; out: slot=%u, len=%ux%u, drvloc=%u",
+ slot, array_len, ncomp, n);
- if (ctx->so->type == SHADER_VERTEX) {
- switch (semantic_name) {
- case TGSI_SEMANTIC_POSITION:
+ if (ctx->so->type == SHADER_FRAGMENT) {
+ switch (slot) {
+ case FRAG_RESULT_DEPTH:
+ comp = 2; /* tgsi will write to .z component */
so->writes_pos = true;
break;
- case TGSI_SEMANTIC_PSIZE:
- so->writes_psize = true;
- break;
- case TGSI_SEMANTIC_COLOR:
- case TGSI_SEMANTIC_BCOLOR:
- case TGSI_SEMANTIC_GENERIC:
- case TGSI_SEMANTIC_FOG:
- case TGSI_SEMANTIC_TEXCOORD:
+ case FRAG_RESULT_COLOR:
+ so->color0_mrt = 1;
break;
default:
- compile_error(ctx, "unknown VS semantic name: %s\n",
- tgsi_semantic_names[semantic_name]);
+ if (slot >= FRAG_RESULT_DATA0)
+ break;
+ compile_error(ctx, "unknown FS output name: %s\n",
+ gl_frag_result_name(slot));
}
- } else {
- switch (semantic_name) {
- case TGSI_SEMANTIC_POSITION:
- comp = 2; /* tgsi will write to .z component */
+ } else if (ctx->so->type == SHADER_VERTEX) {
+ switch (slot) {
+ case VARYING_SLOT_POS:
so->writes_pos = true;
break;
- case TGSI_SEMANTIC_COLOR:
- if (semantic_index == -1) {
- semantic_index = 0;
- so->color0_mrt = 1;
- }
+ case VARYING_SLOT_PSIZ:
+ so->writes_psize = true;
+ break;
+ case VARYING_SLOT_COL0:
+ case VARYING_SLOT_COL1:
+ case VARYING_SLOT_BFC0:
+ case VARYING_SLOT_BFC1:
+ case VARYING_SLOT_FOGC:
+ case VARYING_SLOT_CLIP_DIST0:
+ case VARYING_SLOT_CLIP_DIST1:
break;
default:
- compile_error(ctx, "unknown FS semantic name: %s\n",
- tgsi_semantic_names[semantic_name]);
+ if (slot >= VARYING_SLOT_VAR0)
+ break;
+ if ((VARYING_SLOT_TEX0 <= slot) && (slot <= VARYING_SLOT_TEX7))
+ break;
+ compile_error(ctx, "unknown VS output name: %s\n",
+ gl_varying_slot_name(slot));
}
+ } else {
+ compile_error(ctx, "unknown shader type: %d\n", ctx->so->type);
}
compile_assert(ctx, n < ARRAY_SIZE(so->outputs));
- so->outputs[n].semantic =
- ir3_semantic_name(semantic_name, semantic_index);
+ so->outputs[n].slot = slot;
so->outputs[n].regid = regid(n, comp);
so->outputs_count = MAX2(so->outputs_count, n + 1);
/* at this point, for binning pass, throw away unneeded outputs: */
if (so->key.binning_pass) {
for (i = 0, j = 0; i < so->outputs_count; i++) {
- unsigned name = sem2name(so->outputs[i].semantic);
- unsigned idx = sem2idx(so->outputs[i].semantic);
+ unsigned slot = so->outputs[i].slot;
/* throw away everything but first position/psize */
- if ((idx == 0) && ((name == TGSI_SEMANTIC_POSITION) ||
- (name == TGSI_SEMANTIC_PSIZE))) {
+ if ((slot == VARYING_SLOT_POS) || (slot == VARYING_SLOT_PSIZ)) {
if (i != j) {
so->outputs[j] = so->outputs[i];
ir->outputs[(j*4)+0] = ir->outputs[(i*4)+0];
* but what we give the hw is the scalar register:
*/
if ((so->type == SHADER_FRAGMENT) &&
- (sem2name(so->outputs[i].semantic) == TGSI_SEMANTIC_POSITION))
+ (so->outputs[i].slot == FRAG_RESULT_DEPTH))
so->outputs[i].regid += 2;
}
sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
nir_ssa_def_rewrite_uses(&phi->dest.ssa,
- nir_src_for_ssa(&sel->dest.dest.ssa),
- state->mem_ctx);
+ nir_src_for_ssa(&sel->dest.dest.ssa));
nir_instr_insert_before(&phi->instr, &sel->instr);
nir_instr_remove(&phi->instr);
debug_printf("; %s: r%d.%c\n", name, r >> 2, "xyzw"[r & 0x3]);
}
-static void dump_semantic(struct ir3_shader_variant *so,
- unsigned sem, const char *name)
+static void dump_output(struct ir3_shader_variant *so,
+ unsigned slot, const char *name)
{
uint32_t regid;
- regid = ir3_find_output_regid(so, ir3_semantic_name(sem, 0));
+ regid = ir3_find_output_regid(so, slot);
dump_reg(name, regid);
}
disasm_a3xx(bin, so->info.sizedwords, 0, so->type);
- debug_printf("; %s: outputs:", type);
- for (i = 0; i < so->outputs_count; i++) {
- uint8_t regid = so->outputs[i].regid;
- ir3_semantic sem = so->outputs[i].semantic;
- debug_printf(" r%d.%c (%u:%u)",
- (regid >> 2), "xyzw"[regid & 0x3],
- sem2name(sem), sem2idx(sem));
- }
- debug_printf("\n");
- debug_printf("; %s: inputs:", type);
- for (i = 0; i < so->inputs_count; i++) {
- uint8_t regid = so->inputs[i].regid;
- ir3_semantic sem = so->inputs[i].semantic;
- debug_printf(" r%d.%c (%u:%u,cm=%x,il=%u,b=%u)",
- (regid >> 2), "xyzw"[regid & 0x3],
- sem2name(sem), sem2idx(sem),
- so->inputs[i].compmask,
- so->inputs[i].inloc,
- so->inputs[i].bary);
+ switch (so->type) {
+ case SHADER_VERTEX:
+ debug_printf("; %s: outputs:", type);
+ for (i = 0; i < so->outputs_count; i++) {
+ uint8_t regid = so->outputs[i].regid;
+ debug_printf(" r%d.%c (%s)",
+ (regid >> 2), "xyzw"[regid & 0x3],
+ gl_varying_slot_name(so->outputs[i].slot));
+ }
+ debug_printf("\n");
+ debug_printf("; %s: inputs:", type);
+ for (i = 0; i < so->inputs_count; i++) {
+ uint8_t regid = so->inputs[i].regid;
+ debug_printf(" r%d.%c (cm=%x,il=%u,b=%u)",
+ (regid >> 2), "xyzw"[regid & 0x3],
+ so->inputs[i].compmask,
+ so->inputs[i].inloc,
+ so->inputs[i].bary);
+ }
+ debug_printf("\n");
+ break;
+ case SHADER_FRAGMENT:
+ debug_printf("; %s: outputs:", type);
+ for (i = 0; i < so->outputs_count; i++) {
+ uint8_t regid = so->outputs[i].regid;
+ debug_printf(" r%d.%c (%s)",
+ (regid >> 2), "xyzw"[regid & 0x3],
+ gl_frag_result_name(so->outputs[i].slot));
+ }
+ debug_printf("\n");
+ debug_printf("; %s: inputs:", type);
+ for (i = 0; i < so->inputs_count; i++) {
+ uint8_t regid = so->inputs[i].regid;
+ debug_printf(" r%d.%c (%s,cm=%x,il=%u,b=%u)",
+ (regid >> 2), "xyzw"[regid & 0x3],
+ gl_varying_slot_name(so->inputs[i].slot),
+ so->inputs[i].compmask,
+ so->inputs[i].inloc,
+ so->inputs[i].bary);
+ }
+ debug_printf("\n");
+ break;
+ case SHADER_COMPUTE:
+ break;
}
- debug_printf("\n");
/* print generic shader info: */
debug_printf("; %s prog %d/%d: %u instructions, %d half, %d full\n",
/* print shader type specific info: */
switch (so->type) {
case SHADER_VERTEX:
- dump_semantic(so, TGSI_SEMANTIC_POSITION, "pos");
- dump_semantic(so, TGSI_SEMANTIC_PSIZE, "psize");
+ dump_output(so, VARYING_SLOT_POS, "pos");
+ dump_output(so, VARYING_SLOT_PSIZ, "psize");
break;
case SHADER_FRAGMENT:
dump_reg("pos (bary)", so->pos_regid);
- dump_semantic(so, TGSI_SEMANTIC_POSITION, "posz");
- dump_semantic(so, TGSI_SEMANTIC_COLOR, "color");
+ dump_output(so, FRAG_RESULT_DEPTH, "posz");
+ if (so->color0_mrt) {
+ dump_output(so, FRAG_RESULT_COLOR, "color");
+ } else {
+ dump_output(so, FRAG_RESULT_DATA0, "data0");
+ dump_output(so, FRAG_RESULT_DATA1, "data1");
+ dump_output(so, FRAG_RESULT_DATA2, "data2");
+ dump_output(so, FRAG_RESULT_DATA3, "data3");
+ dump_output(so, FRAG_RESULT_DATA4, "data4");
+ dump_output(so, FRAG_RESULT_DATA5, "data5");
+ dump_output(so, FRAG_RESULT_DATA6, "data6");
+ dump_output(so, FRAG_RESULT_DATA7, "data7");
+ }
/* these two are hard-coded since we don't know how to
* program them to anything but all 0's...
*/
emit_ubos(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
struct fd_constbuf_stateobj *constbuf)
{
- uint32_t offset = v->first_driver_param; /* UBOs after user consts */
+ uint32_t offset = v->first_driver_param + IR3_UBOS_OFF;
if (v->constlen > offset) {
struct fd_context *ctx = fd_context(v->shader->pctx);
uint32_t params = MIN2(4, v->constlen - offset) * 4;
static void
emit_tfbos(struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
{
- uint32_t offset = v->first_driver_param + 5; /* streamout addresses after driver-params*/
+ /* streamout addresses after driver-params: */
+ uint32_t offset = v->first_driver_param + IR3_TFBOS_OFF;
if (v->constlen > offset) {
struct fd_context *ctx = fd_context(v->shader->pctx);
struct fd_streamout_stateobj *so = &ctx->streamout;
/* emit driver params every time: */
/* TODO skip emit if shader doesn't use driver params to avoid WFI.. */
if (info && (v->type == SHADER_VERTEX)) {
- uint32_t offset = v->first_driver_param + 4; /* driver params after UBOs */
+ uint32_t offset = v->first_driver_param + IR3_DRIVER_PARAM_OFF;
if (v->constlen >= offset) {
- uint32_t vertex_params[4] = {
+ uint32_t vertex_params[IR3_DP_COUNT] = {
[IR3_DP_VTXID_BASE] = info->indexed ?
info->index_bias : info->start,
[IR3_DP_VTXCNT_MAX] = max_tf_vtx(v),
};
+ /* if no user-clip-planes, we don't need to emit the
+ * entire thing:
+ */
+ uint32_t vertex_params_size = 4;
+
+ if (v->key.ucp_enables) {
+ struct pipe_clip_state *ucp = &ctx->ucp;
+ unsigned pos = IR3_DP_UCP0_X;
+ for (unsigned i = 0; pos <= IR3_DP_UCP7_W; i++) {
+ for (unsigned j = 0; j < 4; j++) {
+ vertex_params[pos] = fui(ucp->ucp[i][j]);
+ pos++;
+ }
+ }
+ vertex_params_size = ARRAY_SIZE(vertex_params);
+ }
fd_wfi(ctx, ring);
ctx->emit_const(ring, SHADER_VERTEX, offset * 4, 0,
- ARRAY_SIZE(vertex_params), vertex_params, NULL);
+ vertex_params_size, vertex_params, NULL);
/* if needed, emit stream-out buffer addresses: */
if (vertex_params[IR3_DP_VTXCNT_MAX] > 0) {
#define IR3_SHADER_H_
#include "pipe/p_state.h"
+#include "glsl/shader_enums.h"
#include "ir3.h"
#include "disasm.h"
enum ir3_driver_param {
IR3_DP_VTXID_BASE = 0,
IR3_DP_VTXCNT_MAX = 1,
+ /* user-clip-plane components, up to 8x vec4's: */
+ IR3_DP_UCP0_X = 4,
+ /* .... */
+ IR3_DP_UCP7_W = 35,
+ IR3_DP_COUNT = 36 /* must be aligned to vec4 */
};
-/* internal semantic used for passing vtxcnt to vertex shader to
- * implement transform feedback:
+/* Layout of constant registers:
+ *
+ * num_uniform * vec4 - user consts
+ * 4 * vec4 - UBO addresses
+ * if (vertex shader) {
+ * N * vec4 - driver params (IR3_DP_*)
+ * 1 * vec4 - stream-out addresses
+ * }
+ *
+ * TODO this could be made more dynamic, to at least skip sections
+ * that we don't need..
*/
-#define IR3_SEMANTIC_VTXCNT (TGSI_SEMANTIC_COUNT + 0)
-
-typedef uint16_t ir3_semantic; /* semantic name + index */
-static inline ir3_semantic
-ir3_semantic_name(uint8_t name, uint16_t index)
-{
- return (name << 8) | (index & 0xff);
-}
-
-static inline uint8_t sem2name(ir3_semantic sem)
-{
- return sem >> 8;
-}
-
-static inline uint16_t sem2idx(ir3_semantic sem)
-{
- return sem & 0xff;
-}
+#define IR3_UBOS_OFF 0 /* UBOs after user consts */
+#define IR3_DRIVER_PARAM_OFF 4 /* driver params after UBOs */
+#define IR3_TFBOS_OFF (IR3_DRIVER_PARAM_OFF + IR3_DP_COUNT/4)
/* Configuration key used to identify a shader variant.. different
* shader variants can be used to implement features not supported
struct ir3_shader_key {
union {
struct {
+ /*
+ * Combined Vertex/Fragment shader parameters:
+ */
+ unsigned ucp_enables : 8;
+
/* do we need to check {v,f}saturate_{s,t,r}? */
unsigned has_per_samp : 1;
*/
unsigned color_two_side : 1;
unsigned half_precision : 1;
- /* used when shader needs to handle flat varyings (a4xx),
- * for TGSI_INTERPOLATE_COLOR:
+ /* used when shader needs to handle flat varyings (a4xx)
+ * for front/back color inputs to frag shader:
*/
unsigned rasterflat : 1;
};
uint8_t pos_regid;
bool frag_coord, frag_face, color0_mrt;
+ /* NOTE: for input/outputs, slot is:
+ * gl_vert_attrib - for VS inputs
+ * gl_varying_slot - for VS output / FS input
+ * gl_frag_result - for FS output
+ */
+
/* varyings/outputs: */
unsigned outputs_count;
struct {
- ir3_semantic semantic;
+ uint8_t slot;
uint8_t regid;
} outputs[16 + 2]; /* +POSITION +PSIZE */
bool writes_pos, writes_psize;
/* vertices/inputs: */
unsigned inputs_count;
struct {
- ir3_semantic semantic;
+ uint8_t slot;
uint8_t regid;
uint8_t compmask;
uint8_t ncomp;
* spots where inloc is used.
*/
uint8_t inloc;
- uint8_t bary;
- uint8_t interpolate;
+ /* vertex shader specific: */
+ bool sysval : 1; /* slot is a gl_system_value */
+ /* fragment shader specific: */
+ bool bary : 1; /* fetched varying (vs one loaded into reg) */
+ bool rasterflat : 1; /* special handling for emit->rasterflat */
+ enum glsl_interp_qualifier interpolate;
} inputs[16 + 2]; /* +POSITION +FACE */
unsigned total_in; /* sum of inputs (scalar) */
#include "pipe/p_shader_tokens.h"
static inline int
-ir3_find_output(const struct ir3_shader_variant *so, ir3_semantic semantic)
+ir3_find_output(const struct ir3_shader_variant *so, gl_varying_slot slot)
{
int j;
for (j = 0; j < so->outputs_count; j++)
- if (so->outputs[j].semantic == semantic)
+ if (so->outputs[j].slot == slot)
return j;
/* it seems optional to have a OUT.BCOLOR[n] for each OUT.COLOR[n]
* OUT.COLOR[n] to IN.BCOLOR[n]. And visa versa if there is only
* a OUT.BCOLOR[n] but no matching OUT.COLOR[n]
*/
- if (sem2name(semantic) == TGSI_SEMANTIC_BCOLOR) {
- unsigned idx = sem2idx(semantic);
- semantic = ir3_semantic_name(TGSI_SEMANTIC_COLOR, idx);
- } else if (sem2name(semantic) == TGSI_SEMANTIC_COLOR) {
- unsigned idx = sem2idx(semantic);
- semantic = ir3_semantic_name(TGSI_SEMANTIC_BCOLOR, idx);
+ if (slot == VARYING_SLOT_BFC0) {
+ slot = VARYING_SLOT_COL0;
+ } else if (slot == VARYING_SLOT_BFC1) {
+ slot = VARYING_SLOT_COL1;
+ } else if (slot == VARYING_SLOT_COL0) {
+ slot = VARYING_SLOT_BFC0;
+ } else if (slot == VARYING_SLOT_COL1) {
+ slot = VARYING_SLOT_BFC1;
} else {
return 0;
}
for (j = 0; j < so->outputs_count; j++)
- if (so->outputs[j].semantic == semantic)
+ if (so->outputs[j].slot == slot)
return j;
debug_assert(0);
}
static inline uint32_t
-ir3_find_output_regid(const struct ir3_shader_variant *so, ir3_semantic semantic)
+ir3_find_output_regid(const struct ir3_shader_variant *so, unsigned slot)
{
int j;
for (j = 0; j < so->outputs_count; j++)
- if (so->outputs[j].semantic == semantic)
+ if (so->outputs[j].slot == slot)
return so->outputs[j].regid;
return regid(63, 0);
}
d1->Register.Negate == d2->Register.Negate);
}
-const static struct {
+static const struct {
boolean is_texture;
boolean commutes;
unsigned neutral_element;
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
+ case PIPE_CAP_TGSI_TXQS:
return 0;
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
+ case PIPE_CAP_TGSI_TXQS:
return 0;
case PIPE_CAP_VENDOR_ID:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
+ case PIPE_CAP_TGSI_TXQS:
return 0;
}
/* should only get here on unhandled cases */
enum TexQuery
{
- TXQ_DIMS,
- TXQ_TYPE,
+ TXQ_DIMS, /* x, y, z, levels */
+ TXQ_TYPE, /* ?, ?, samples, ? */
TXQ_SAMPLE_POSITION,
TXQ_FILTER,
TXQ_LOD,
defId(i->def(0), 2);
srcAddr8(i->src(0), 16);
- if (i->getInterpMode() == NV50_IR_INTERP_FLAT) {
+ if (i->encSize != 8 && i->getInterpMode() == NV50_IR_INTERP_FLAT) {
code[0] |= 1 << 8;
} else {
if (i->op == OP_PINTERP) {
}
if (i->encSize == 8) {
- code[1] =
- (code[0] & (3 << 24)) >> (24 - 16) |
- (code[0] & (1 << 8)) << (18 - 8);
- code[0] &= ~0x03000100;
+ if (i->getInterpMode() == NV50_IR_INTERP_FLAT)
+ code[1] = 4 << 16;
+ else
+ code[1] = (code[0] & (3 << 24)) >> (24 - 16);
+ code[0] &= ~0x03000000;
code[0] |= 1;
emitFlagsRd(i);
}
NV50_IR_OPCODE_CASE(SAD, SAD);
NV50_IR_OPCODE_CASE(TXF, TXF);
NV50_IR_OPCODE_CASE(TXQ, TXQ);
+ NV50_IR_OPCODE_CASE(TXQS, TXQ);
NV50_IR_OPCODE_CASE(TG4, TXG);
NV50_IR_OPCODE_CASE(LODQ, TXLQ);
void setTexRS(TexInstruction *, unsigned int& s, int R, int S);
void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy);
void handleTXF(Value *dst0[4], int R, int L_M);
- void handleTXQ(Value *dst0[4], enum TexQuery);
+ void handleTXQ(Value *dst0[4], enum TexQuery, int R);
void handleLIT(Value *dst0[4]);
void handleUserClipPlanes();
}
void
-Converter::handleTXQ(Value *dst0[4], enum TexQuery query)
+Converter::handleTXQ(Value *dst0[4], enum TexQuery query, int R)
{
TexInstruction *tex = new_TexInstruction(func, OP_TXQ);
tex->tex.query = query;
tex->tex.mask |= 1 << c;
tex->setDef(d++, dst0[c]);
}
- tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level
+ if (query == TXQ_DIMS)
+ tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level
+ else
+ tex->setSrc((c = 0), zero);
- setTexRS(tex, ++c, 1, -1);
+ setTexRS(tex, ++c, R, -1);
bb->insertTail(tex);
}
break;
case TGSI_OPCODE_TXQ:
case TGSI_OPCODE_SVIEWINFO:
- handleTXQ(dst0, TXQ_DIMS);
+ handleTXQ(dst0, TXQ_DIMS, 1);
+ break;
+ case TGSI_OPCODE_TXQS:
+ // The TXQ_TYPE query returns samples in its 3rd arg, but we need it to
+ // be in .x
+ dst0[1] = dst0[2] = dst0[3] = NULL;
+ std::swap(dst0[0], dst0[2]);
+ handleTXQ(dst0, TXQ_TYPE, 0);
+ std::swap(dst0[0], dst0[2]);
break;
case TGSI_OPCODE_F2I:
case TGSI_OPCODE_F2U:
Program *prog = fn->getProgram();
r63 = new_LValue(fn, FILE_GPR);
- r63->reg.data.id = 63;
+ if (prog->maxGPR < 63)
+ r63->reg.data.id = 63;
+ else
+ r63->reg.data.id = 127;
// this is actually per-program, but we can do it all on visiting main()
std::list<Instruction *> *outWrites =
bool handleTXL(TexInstruction *); // hate
bool handleTXD(TexInstruction *); // these 3
bool handleTXLQ(TexInstruction *);
+ bool handleTXQ(TexInstruction *);
bool handleCALL(Instruction *);
bool handlePRECONT(Instruction *);
}
bool
+NV50LoweringPreSSA::handleTXQ(TexInstruction *i)
+{
+ Value *ms, *ms_x, *ms_y;
+ if (i->tex.query == TXQ_DIMS)
+ return true;
+ assert(i->tex.query == TXQ_TYPE);
+ assert(i->tex.mask == 4);
+
+ loadTexMsInfo(i->tex.r * 4 * 2, &ms, &ms_x, &ms_y);
+ bld.mkOp2(OP_SHL, TYPE_U32, i->getDef(0), bld.loadImm(NULL, 1), ms);
+ i->bb->remove(i);
+
+ return true;
+}
+
+
+bool
NV50LoweringPreSSA::handleSET(Instruction *i)
{
if (i->dType == TYPE_F32) {
return handleTXD(i->asTex());
case OP_TXLQ:
return handleTXLQ(i->asTex());
+ case OP_TXQ:
+ return handleTXQ(i->asTex());
case OP_EX2:
bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0));
i->setSrc(0, i->getDef(0));
bool
NVC0LoweringPass::handleTXQ(TexInstruction *txq)
{
+ const int chipset = prog->getTarget()->getChipset();
+ if (chipset >= NVISA_GK104_CHIPSET && txq->tex.rIndirectSrc < 0)
+ txq->tex.r += prog->driver->io.texBindBase / 4;
+
if (txq->tex.rIndirectSrc < 0)
return true;
Value *ticRel = txq->getIndirectR();
- const int chipset = prog->getTarget()->getChipset();
txq->setIndirectS(NULL);
txq->tex.sIndirectSrc = -1;
!isFloatType(i->dType))
break;
+ if (i->getDef(0)->reg.data.id >= 64 ||
+ i->getSrc(0)->reg.data.id >= 64)
+ break;
+
def = i->getSrc(1)->getInsn();
if (def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) {
vtmp = i->getSrc(1);
case TYPE_U64:
case TYPE_S64:
default:
- PRINT("0x%016"PRIx64, reg.data.u64);
+ PRINT("0x%016" PRIx64, reg.data.u64);
break;
}
return pos;
#include <stack>
#include <limits>
+#include <tr1/unordered_map>
namespace nv50_ir {
private:
virtual bool visit(BasicBlock *);
inline bool needNewElseBlock(BasicBlock *b, BasicBlock *p);
+ inline void splitEdges(BasicBlock *b);
};
class ArgumentMovesPass : public Pass {
return (n == 2);
}
-// For each operand of each PHI in b, generate a new value by inserting a MOV
-// at the end of the block it is coming from and replace the operand with its
-// result. This eliminates liveness conflicts and enables us to let values be
-// copied to the right register if such a conflict exists nonetheless.
+struct PhiMapHash {
+ size_t operator()(const std::pair<Instruction *, BasicBlock *>& val) const {
+ return std::tr1::hash<Instruction*>()(val.first) * 31 +
+ std::tr1::hash<BasicBlock*>()(val.second);
+ }
+};
+
+typedef std::tr1::unordered_map<
+ std::pair<Instruction *, BasicBlock *>, Value *, PhiMapHash> PhiMap;
+
+// Critical edges need to be split up so that work can be inserted along
+// specific edge transitions. Unfortunately manipulating incident edges into a
+// BB invalidates all the PHI nodes since their sources are implicitly ordered
+// by incident edge order.
//
-// These MOVs are also crucial in making sure the live intervals of phi srces
-// are extended until the end of the loop, since they are not included in the
-// live-in sets.
-bool
-RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
+// TODO: Make it so that that is not the case, and PHI nodes store pointers to
+// the original BBs.
+void
+RegAlloc::PhiMovesPass::splitEdges(BasicBlock *bb)
{
- Instruction *phi, *mov;
BasicBlock *pb, *pn;
-
+ Instruction *phi;
+ Graph::EdgeIterator ei;
std::stack<BasicBlock *> stack;
+ int j = 0;
- for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
+ for (ei = bb->cfg.incident(); !ei.end(); ei.next()) {
pb = BasicBlock::get(ei.getNode());
assert(pb);
if (needNewElseBlock(bb, pb))
stack.push(pb);
}
+
+ // No critical edges were found, no need to perform any work.
+ if (stack.empty())
+ return;
+
+ // We're about to, potentially, reorder the inbound edges. This means that
+ // we need to hold on to the (phi, bb) -> src mapping, and fix up the phi
+ // nodes after the graph has been modified.
+ PhiMap phis;
+
+ j = 0;
+ for (ei = bb->cfg.incident(); !ei.end(); ei.next(), j++) {
+ pb = BasicBlock::get(ei.getNode());
+ for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next)
+ phis.insert(std::make_pair(std::make_pair(phi, pb), phi->getSrc(j)));
+ }
+
while (!stack.empty()) {
pb = stack.top();
pn = new BasicBlock(func);
assert(pb->getExit()->op != OP_CALL);
if (pb->getExit()->asFlow()->target.bb == bb)
pb->getExit()->asFlow()->target.bb = pn;
+
+ for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
+ PhiMap::iterator it = phis.find(std::make_pair(phi, pb));
+ assert(it != phis.end());
+ phis.insert(std::make_pair(std::make_pair(phi, pn), it->second));
+ phis.erase(it);
+ }
}
+ // Now go through and fix up all of the phi node sources.
+ j = 0;
+ for (ei = bb->cfg.incident(); !ei.end(); ei.next(), j++) {
+ pb = BasicBlock::get(ei.getNode());
+ for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
+ PhiMap::const_iterator it = phis.find(std::make_pair(phi, pb));
+ assert(it != phis.end());
+
+ phi->setSrc(j, it->second);
+ }
+ }
+}
+
+// For each operand of each PHI in b, generate a new value by inserting a MOV
+// at the end of the block it is coming from and replace the operand with its
+// result. This eliminates liveness conflicts and enables us to let values be
+// copied to the right register if such a conflict exists nonetheless.
+//
+// These MOVs are also crucial in making sure the live intervals of phi srces
+// are extended until the end of the loop, since they are not included in the
+// live-in sets.
+bool
+RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
+{
+ Instruction *phi, *mov;
+
+ splitEdges(bb);
+
// insert MOVs (phi->src(j) should stem from j-th in-BB)
int j = 0;
for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
- pb = BasicBlock::get(ei.getNode());
+ BasicBlock *pb = BasicBlock::get(ei.getNode());
if (!pb->isTerminated())
pb->insertTail(new_FlowInstruction(func, OP_BRA, bb));
inline void
nouveau_buffer_release_gpu_storage(struct nv04_resource *buf)
{
- nouveau_bo_ref(NULL, &buf->bo);
+ if (buf->fence && buf->fence->state < NOUVEAU_FENCE_STATE_FLUSHED) {
+ nouveau_fence_work(buf->fence, nouveau_fence_unref_bo, buf->bo);
+ buf->bo = NULL;
+ } else {
+ nouveau_bo_ref(NULL, &buf->bo);
+ }
if (buf->mm)
release_allocation(&buf->mm, buf->fence);
nv->copy_data(nv, buf->bo, buf->offset + base, buf->domain,
tx->bo, tx->offset + offset, NOUVEAU_BO_GART, size);
else
- if ((buf->base.bind & PIPE_BIND_CONSTANT_BUFFER) && nv->push_cb && can_cb)
- nv->push_cb(nv, buf->bo, buf->domain, buf->offset, buf->base.width0,
+ if (nv->push_cb && can_cb)
+ nv->push_cb(nv, buf,
base, size / 4, (const uint32_t *)data);
else
nv->push_data(nv, buf->bo, buf->offset + base, buf->domain, size, data);
{
if (tx->map) {
if (likely(tx->bo)) {
- nouveau_bo_ref(NULL, &tx->bo);
+ nouveau_fence_work(nv->screen->fence.current,
+ nouveau_fence_unref_bo, tx->bo);
if (tx->mm)
release_allocation(&tx->mm, nv->screen->fence.current);
} else {
struct nv04_resource *buf = nv04_resource(transfer->resource);
if (tx->base.usage & PIPE_TRANSFER_WRITE) {
- if (!(tx->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT) && tx->map)
- nouveau_transfer_write(nv, tx, 0, tx->base.box.width);
+ if (!(tx->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
+ if (tx->map)
+ nouveau_transfer_write(nv, tx, 0, tx->base.box.width);
+
+ util_range_add(&buf->valid_buffer_range,
+ tx->base.box.x, tx->base.box.x + tx->base.box.width);
+ }
if (likely(buf->domain)) {
const uint8_t bind = buf->base.bind;
if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
nv->vbo_dirty = true;
}
-
- util_range_add(&buf->valid_buffer_range,
- tx->base.box.x, tx->base.box.x + tx->base.box.width);
}
if (!tx->bo && (tx->base.usage & PIPE_TRANSFER_WRITE))
nv->copy_data(nv, buf->bo, buf->offset, new_domain,
bo, offset, old_domain, buf->base.width0);
- nouveau_bo_ref(NULL, &bo);
+ nouveau_fence_work(screen->fence.current, nouveau_fence_unref_bo, bo);
if (mm)
release_allocation(&mm, screen->fence.current);
} else
uint8_t status;
uint8_t domain;
+ uint16_t cb_bindings[6]; /* per-shader per-slot bindings */
+
struct nouveau_fence *fence;
struct nouveau_fence *fence_wr;
#define NOUVEAU_MAX_SCRATCH_BUFS 4
+struct nv04_resource;
+
struct nouveau_context {
struct pipe_context pipe;
struct nouveau_screen *screen;
unsigned, const void *);
/* base, size refer to the whole constant buffer */
void (*push_cb)(struct nouveau_context *,
- struct nouveau_bo *, unsigned domain,
- unsigned base, unsigned size,
+ struct nv04_resource *,
unsigned offset, unsigned words, const uint32_t *);
/* @return: @ref reduced by nr of references found in context */
nouveau_fence_new(screen, &screen->fence.current, false);
}
+
+void
+nouveau_fence_unref_bo(void *data)
+{
+ struct nouveau_bo *bo = data;
+
+ nouveau_bo_ref(NULL, &bo);
+}
bool nouveau_fence_wait(struct nouveau_fence *);
bool nouveau_fence_signalled(struct nouveau_fence *);
+void nouveau_fence_unref_bo(void *data); /* generic unref bo callback */
+
+
static inline void
nouveau_fence_ref(struct nouveau_fence *fence, struct nouveau_fence **ref)
{
#include "util/u_surface.h"
#include "nv_m2mf.xml.h"
+#include "nv_object.xml.h"
#include "nv30/nv30_screen.h"
#include "nv30/nv30_context.h"
#include "nv30/nv30_resource.h"
nv30_transfer_rect(nv30, NEAREST, &src, &dst);
}
-void
-nv30_resource_resolve(struct pipe_context *pipe,
- const struct pipe_resolve_info *info)
+static void
+nv30_resource_resolve(struct nv30_context *nv30,
+ const struct pipe_blit_info *info)
{
-#if 0
- struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_miptree *src_mt = nv30_miptree(info->src.resource);
struct nv30_rect src, dst;
-
- define_rect(info->src.res, 0, 0, info->src.x0, info->src.y0,
- info->src.x1 - info->src.x0, info->src.y1 - info->src.y0, &src);
- define_rect(info->dst.res, info->dst.level, 0, info->dst.x0, info->dst.y0,
- info->dst.x1 - info->dst.x0, info->dst.y1 - info->dst.y0, &dst);
-
- nv30_transfer_rect(nv30, BILINEAR, &src, &dst);
-#endif
+ unsigned x, x0, x1, y, y1, w, h;
+
+ define_rect(info->src.resource, 0, info->src.box.z, info->src.box.x,
+ info->src.box.y, info->src.box.width, info->src.box.height, &src);
+ define_rect(info->dst.resource, 0, info->dst.box.z, info->dst.box.x,
+ info->dst.box.y, info->dst.box.width, info->dst.box.height, &dst);
+
+ x0 = src.x0;
+ x1 = src.x1;
+ y1 = src.y1;
+
+ /* On nv3x we must use sifm which is restricted to 1024x1024 tiles */
+ for (y = src.y0; y < y1; y += h) {
+ h = y1 - y;
+ if (h > 1024)
+ h = 1024;
+
+ src.y0 = 0;
+ src.y1 = h;
+ src.h = h;
+
+ dst.y1 = dst.y0 + (h >> src_mt->ms_y);
+ dst.h = h >> src_mt->ms_y;
+
+ for (x = x0; x < x1; x += w) {
+ w = x1 - x;
+ if (w > 1024)
+ w = 1024;
+
+ src.offset = y * src.pitch + x * src.cpp;
+ src.x0 = 0;
+ src.x1 = w;
+ src.w = w;
+
+ dst.offset = (y >> src_mt->ms_y) * dst.pitch +
+ (x >> src_mt->ms_x) * dst.cpp;
+ dst.x1 = dst.x0 + (w >> src_mt->ms_x);
+ dst.w = w >> src_mt->ms_x;
+
+ nv30_transfer_rect(nv30, BILINEAR, &src, &dst);
+ }
+ }
}
void
info.dst.resource->nr_samples <= 1 &&
!util_format_is_depth_or_stencil(info.src.resource->format) &&
!util_format_is_pure_integer(info.src.resource->format)) {
- debug_printf("nv30: color resolve unimplemented\n");
+ nv30_resource_resolve(nv30, blit_info);
return;
}
struct nv30_context *nv30 = nv30_context(pipe);
struct nv30_transfer *tx = nv30_transfer(ptx);
- if (ptx->usage & PIPE_TRANSFER_WRITE)
+ if (ptx->usage & PIPE_TRANSFER_WRITE) {
nv30_transfer_rect(nv30, NEAREST, &tx->tmp, &tx->img);
- nouveau_bo_ref(NULL, &tx->tmp.bo);
+ /* Allow the copies above to finish executing before freeing the source */
+ nouveau_fence_work(nv30->screen->base.fence.current,
+ nouveau_fence_unref_bo, tx->tmp.bo);
+ } else {
+ nouveau_bo_ref(NULL, &tx->tmp.bo);
+ }
pipe_resource_reference(&ptx->resource, NULL);
FREE(tx);
}
blocksz = util_format_get_blocksize(pt->format);
if ((pt->target == PIPE_TEXTURE_RECT) ||
+ (pt->bind & PIPE_BIND_SCANOUT) ||
!util_is_power_of_two(pt->width0) ||
!util_is_power_of_two(pt->height0) ||
!util_is_power_of_two(pt->depth0) ||
util_format_is_float(pt->format) || mt->ms_mode) {
mt->uniform_pitch = util_format_get_nblocksx(pt->format, w) * blocksz;
mt->uniform_pitch = align(mt->uniform_pitch, 64);
+ if (pt->bind & PIPE_BIND_SCANOUT) {
+ struct nv30_screen *screen = nv30_screen(pscreen);
+ int pitch_align = MAX2(
+ screen->eng3d->oclass >= NV40_3D_CLASS ? 1024 : 256,
+ /* round_down_pow2(mt->uniform_pitch / 4) */
+ 1 << (util_last_bit(mt->uniform_pitch / 4) - 1));
+ mt->uniform_pitch = align(mt->uniform_pitch, pitch_align);
+ }
}
if (!mt->uniform_pitch)
const struct pipe_box *src_box);
void
-nv30_resource_resolve(struct pipe_context *, const struct pipe_resolve_info *);
-
-void
nv30_blit(struct pipe_context *pipe,
const struct pipe_blit_info *blit_info);
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_TGSI_TXQS:
return 0;
case PIPE_CAP_VENDOR_ID:
unsigned sample_count,
unsigned bindings)
{
- if (sample_count > 4)
+ if (sample_count > nv30_screen(pscreen)->max_sample_count)
return false;
+
if (!(0x00000017 & (1 << sample_count)))
return false;
return NULL;
}
+ /*
+ * Some modern apps try to use msaa without keeping in mind the
+ * restrictions on videomem of older cards. Resulting in dmesg saying:
+ * [ 1197.850642] nouveau E[soffice.bin[3785]] fail ttm_validate
+ * [ 1197.850648] nouveau E[soffice.bin[3785]] validating bo list
+ * [ 1197.850654] nouveau E[soffice.bin[3785]] validate: -12
+ *
+ * Because we are running out of video memory, after which the program
+ * using the msaa visual freezes, and eventually the entire system freezes.
+ *
+ * To work around this we do not allow msaa visauls by default and allow
+ * the user to override this via NV30_MAX_MSAA.
+ */
+ screen->max_sample_count = debug_get_num_option("NV30_MAX_MSAA", 0);
+ if (screen->max_sample_count > 4)
+ screen->max_sample_count = 4;
+
pscreen = &screen->base.base;
pscreen->destroy = nv30_screen_destroy;
pscreen->get_param = nv30_screen_get_param;
/*XXX: nvfx state */
struct nouveau_heap *vp_exec_heap;
struct nouveau_heap *vp_data_heap;
+
+ unsigned max_sample_count;
};
static inline struct nv30_screen *
static bool
nv30_transfer_sifm(XFER_ARGS)
{
- if (!src->pitch || (src->w | src->h) > 1024 || src->w < 2 || src->h < 2)
+ if (!src->pitch || src->w > 1024 || src->h > 1024 || src->w < 2 || src->h < 2)
return false;
if (src->d > 1 || dst->d > 1)
return false;
if (!dst->pitch) {
- if ((dst->w | dst->h) > 2048 || dst->w < 2 || dst->h < 2)
+ if (dst->w > 2048 || dst->h > 2048 || dst->w < 2 || dst->h < 2)
return false;
} else {
if (dst->domain != NOUVEAU_BO_VRAM)
}
}
- if (nv50->idxbuf.buffer == res)
+ if (nv50->idxbuf.buffer == res) {
+ /* Just rebind to the bufctx as there is no separate dirty bit */
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_INDEX);
+ BCTX_REFN(nv50->bufctx_3d, INDEX, nv04_resource(res), RD);
if (!--ref)
return ref;
+ }
for (s = 0; s < 3; ++s) {
assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS);
/* nv50_query.c */
void nv50_init_query_functions(struct nv50_context *);
-void nv50_query_pushbuf_submit(struct nouveau_pushbuf *,
+void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t method,
struct pipe_query *, unsigned result_offset);
void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *);
void nva0_so_target_save_offset(struct pipe_context *,
F3B(B5G6R5_UNORM, B5G6R5_UNORM, C2, C1, C0, xx, UNORM, 5_6_5, TD),
C4B(B5G5R5A1_UNORM, BGR5_A1_UNORM, C2, C1, C0, C3, UNORM, 5_5_5_1, TD),
F3B(B5G5R5X1_UNORM, BGR5_X1_UNORM, C2, C1, C0, xx, UNORM, 5_5_5_1, TD),
+#if NOUVEAU_DRIVER != 0xc0
C4B(B4G4R4A4_UNORM, NONE, C2, C1, C0, C3, UNORM, 4_4_4_4, T),
F3B(B4G4R4X4_UNORM, NONE, C2, C1, C0, xx, UNORM, 4_4_4_4, T),
+#endif
F3B(R9G9B9E5_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 9_9_9_E5, T),
C4A(R10G10B10A2_UNORM, RGB10_A2_UNORM, C0, C1, C2, C3, UNORM, 10_10_10_2,
{
struct nv50_miptree *mt = nv50_miptree(pt);
- nouveau_bo_ref(NULL, &mt->base.bo);
+ if (mt->base.fence && mt->base.fence->state < NOUVEAU_FENCE_STATE_FLUSHED)
+ nouveau_fence_work(mt->base.fence, nouveau_fence_unref_bo, mt->base.bo);
+ else
+ nouveau_bo_ref(NULL, &mt->base.bo);
nouveau_fence_ref(NULL, &mt->base.fence);
nouveau_fence_ref(NULL, &mt->base.fence_wr);
case TGSI_SEMANTIC_VERTEXID:
prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START;
+ prog->vp.vertexid = 1;
continue;
default:
break;
ubyte psiz; /* output slot of point size */
ubyte bfc[2]; /* indices into varying for FFC (FP) or BFC (VP) */
ubyte edgeflag;
+ ubyte vertexid;
ubyte clpd[2]; /* output slot of clip distance[i]'s 1st component */
ubyte clpd_nr;
} vp;
nv50_query_get(push, q, 0, 0x1000f010);
break;
case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
+ q->sequence++;
nv50_query_get(push, q, 0, 0x0d005002 | (q->index << 5));
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT:
}
void
-nv50_query_pushbuf_submit(struct nouveau_pushbuf *push,
+nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method,
struct pipe_query *pq, unsigned result_offset)
{
struct nv50_query *q = nv50_query(pq);
- /* XXX: does this exist ? */
-#define NV50_IB_ENTRY_1_NO_PREFETCH (0 << (31 - 8))
+ nv50_query_update(q);
+ if (q->state != NV50_QUERY_STATE_READY)
+ nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, push->client);
+ q->state = NV50_QUERY_STATE_READY;
- PUSH_REFN(push, q->bo, NOUVEAU_BO_RD | NOUVEAU_BO_GART);
- nouveau_pushbuf_space(push, 0, 0, 1);
- nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 |
- NV50_IB_ENTRY_1_NO_PREFETCH);
+ BEGIN_NV04(push, SUBC_3D(method), 1);
+ PUSH_DATA (push, q->data[result_offset / 4]);
}
void
case PIPE_CAP_MAX_TEXEL_OFFSET:
return 7;
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
- return 65536;
+ return 128 * 1024 * 1024;
case PIPE_CAP_GLSL_FEATURE_LEVEL:
return 330;
case PIPE_CAP_MAX_RENDER_TARGETS:
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
+ case PIPE_CAP_TGSI_TXQS:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP:
return 1; /* class_3d >= NVA0_3D_CLASS; */
PUSH_DATA (push, so->num_attribs[i]);
if (n == 4) {
PUSH_DATA(push, targ->pipe.buffer_size);
-
- BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);
if (!targ->clean) {
assert(targ->pq);
- nv50_query_pushbuf_submit(push, targ->pq, 0x4);
+ nv50_query_pushbuf_submit(push, NVA0_3D_STRMOUT_OFFSET(i),
+ targ->pq, 0x4);
} else {
+ BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);
PUSH_DATA(push, 0);
targ->clean = false;
}
(so->stride[i] * nv50->state.prim_size);
prims = MIN2(prims, limit);
}
+ targ->stride = so->stride[i];
BCTX_REFN(nv50->bufctx_3d, SO, buf, WR);
}
if (prims != ~0) {
{ nv50_validate_samplers, NV50_NEW_SAMPLERS },
{ nv50_stream_output_validate, NV50_NEW_STRMOUT |
NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
- { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS },
+ { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS |
+ NV50_NEW_VERTPROG },
{ nv50_validate_min_samples, NV50_NEW_MIN_SAMPLES },
};
#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
return &view->pipe;
}
+static void
+nv50_update_tic(struct nv50_context *nv50, struct nv50_tic_entry *tic,
+ struct nv04_resource *res)
+{
+ uint64_t address = res->address;
+ if (res->base.target != PIPE_BUFFER)
+ return;
+ address += tic->pipe.u.buf.first_element *
+ util_format_get_blocksize(tic->pipe.format);
+ if (tic->tic[1] == (uint32_t)address &&
+ (tic->tic[2] & 0xff) == address >> 32)
+ return;
+
+ nv50_screen_tic_unlock(nv50->screen, tic);
+ tic->id = -1;
+ tic->tic[1] = address;
+ tic->tic[2] &= 0xffffff00;
+ tic->tic[2] |= address >> 32;
+}
+
static bool
nv50_validate_tic(struct nv50_context *nv50, int s)
{
continue;
}
res = &nv50_miptree(tic->pipe.texture)->base;
+ nv50_update_tic(nv50, tic, res);
if (tic->id < 0) {
tic->id = nv50_screen_tic_alloc(nv50->screen, tic);
tx->rect[0].base += mt->layer_stride;
tx->rect[1].base += tx->nblocksy * tx->base.stride;
}
+
+ /* Allow the copies above to finish executing before freeing the source */
+ nouveau_fence_work(nv50->screen->base.fence.current,
+ nouveau_fence_unref_bo, tx->rect[1].bo);
+ } else {
+ nouveau_bo_ref(NULL, &tx->rect[1].bo);
}
- nouveau_bo_ref(NULL, &tx->rect[1].bo);
pipe_resource_reference(&transfer->resource, NULL);
FREE(tx);
uint64_t addrs[PIPE_MAX_ATTRIBS];
uint32_t limits[PIPE_MAX_ATTRIBS];
struct nouveau_pushbuf *push = nv50->base.pushbuf;
- struct nv50_vertex_stateobj *vertex = nv50->vertex;
+ struct nv50_vertex_stateobj dummy = {};
+ struct nv50_vertex_stateobj *vertex = nv50->vertex ? nv50->vertex : &dummy;
struct pipe_vertex_buffer *vb;
struct nv50_vertex_element *ve;
uint32_t mask;
unsigned i;
const unsigned n = MAX2(vertex->num_elements, nv50->state.num_vtxelts);
+ /* A vertexid is not generated for inline data uploads. Have to use a
+ * VBO. This check must come after the vertprog has been validated,
+ * otherwise vertexid may be unset.
+ */
+ assert(nv50->vertprog->translated);
+ if (nv50->vertprog->vp.vertexid)
+ nv50->vbo_push_hint = 0;
+
if (unlikely(vertex->need_conversion))
nv50->vbo_fifo = ~0;
else
if (buf && buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
buf->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
nv50->base.vbo_dirty = true;
- break;
}
}
}
BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BASE), 1);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NVA0_3D(DRAW_TFB_STRIDE), 1);
- PUSH_DATA (push, 0);
- BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BYTES), 1);
- nv50_query_pushbuf_submit(push, so->pq, 0x4);
+ PUSH_DATA (push, so->stride);
+ nv50_query_pushbuf_submit(push, NVA0_3D_DRAW_TFB_BYTES, so->pq, 0x4);
BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);
PUSH_DATA (push, 0);
{
struct nv50_context *nv50 = nv50_context(pipe);
struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ bool tex_dirty = false;
int i, s;
/* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
push->kick_notify = nv50_draw_vbo_kick_notify;
+ /* TODO: Instead of iterating over all the buffer resources looking for
+ * coherent buffers, keep track of a context-wide count.
+ */
for (s = 0; s < 3 && !nv50->cb_dirty; ++s) {
uint32_t valid = nv50->constbuf_valid[s];
nv50->cb_dirty = false;
}
+ for (s = 0; s < 3 && !tex_dirty; ++s) {
+ for (i = 0; i < nv50->num_textures[s] && !tex_dirty; ++i) {
+ if (!nv50->textures[s][i] ||
+ nv50->textures[s][i]->texture->target != PIPE_BUFFER)
+ continue;
+ if (nv50->textures[s][i]->texture->flags &
+ PIPE_RESOURCE_FLAG_MAP_COHERENT)
+ tex_dirty = true;
+ }
+ }
+ if (tex_dirty) {
+ BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);
+ PUSH_DATA (push, 0x20);
+ }
+
if (nv50->vbo_fifo) {
nv50_push_vbo(nv50, info);
push->kick_notify = nv50_default_kick_notify;
nv50->base.vbo_dirty = true;
}
- if (!nv50->base.vbo_dirty && nv50->idxbuf.buffer &&
- nv50->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
- nv50->base.vbo_dirty = true;
-
if (nv50->base.vbo_dirty) {
BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1);
PUSH_DATA (push, 0);
struct nouveau_bo *dst, unsigned offset, unsigned domain,
unsigned size, const void *data);
void
-nvc0_cb_push(struct nouveau_context *,
- struct nouveau_bo *bo, unsigned domain,
- unsigned base, unsigned size,
- unsigned offset, unsigned words, const uint32_t *data);
+nvc0_cb_bo_push(struct nouveau_context *,
+ struct nouveau_bo *bo, unsigned domain,
+ unsigned base, unsigned size,
+ unsigned offset, unsigned words, const uint32_t *data);
/* nvc0_vbo.c */
void nvc0_draw_vbo(struct pipe_context *, const struct pipe_draw_info *);
for (i = 0; i < info->numOutputs; ++i) {
if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
- fp->hdr[18] |= info->out[i].mask << info->out[i].slot[0];
+ fp->hdr[18] |= 0xf << info->out[i].slot[0];
}
fp->fp.early_z = info->prop.fp.earlyFragTests;
case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
return 31;
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
- return 65536;
+ return 128 * 1024 * 1024;
case PIPE_CAP_GLSL_FEATURE_LEVEL:
return 410;
case PIPE_CAP_MAX_RENDER_TARGETS:
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
+ case PIPE_CAP_TGSI_TXQS:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
}
nvc0->constbuf_dirty[s] |= 1 << i;
+ if (nvc0->constbuf[s][i].u.buf)
+ nv04_resource(nvc0->constbuf[s][i].u.buf)->cb_bindings[s] &= ~(1 << i);
pipe_resource_reference(&nvc0->constbuf[s][i].u.buf, res);
nvc0->constbuf[s][i].user = (cb && cb->user_buffer) ? true : false;
BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
PUSH_DATA (push, (0 << 4) | 1);
}
- nvc0_cb_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
+ nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
base, nvc0->state.uniform_buffer_bound[s],
0, (size + 3) / 4,
nvc0->constbuf[s][0].u.data);
BCTX_REFN(nvc0->bufctx_3d, CB(s, i), res, RD);
nvc0->cb_dirty = 1; /* Force cache flush for UBO. */
+ res->cb_bindings[s] |= 1 << i;
} else {
BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
PUSH_DATA (push, (i << 4) | 0);
return &view->pipe;
}
+static void
+nvc0_update_tic(struct nvc0_context *nvc0, struct nv50_tic_entry *tic,
+ struct nv04_resource *res)
+{
+ uint64_t address = res->address;
+ if (res->base.target != PIPE_BUFFER)
+ return;
+ address += tic->pipe.u.buf.first_element *
+ util_format_get_blocksize(tic->pipe.format);
+ if (tic->tic[1] == (uint32_t)address &&
+ (tic->tic[2] & 0xff) == address >> 32)
+ return;
+
+ nvc0_screen_tic_unlock(nvc0->screen, tic);
+ tic->id = -1;
+ tic->tic[1] = address;
+ tic->tic[2] &= 0xffffff00;
+ tic->tic[2] |= address >> 32;
+}
+
static bool
nvc0_validate_tic(struct nvc0_context *nvc0, int s)
{
continue;
}
res = nv04_resource(tic->pipe.texture);
+ nvc0_update_tic(nvc0, tic, res);
if (tic->id < 0) {
tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
continue;
}
res = nv04_resource(tic->pipe.texture);
+ nvc0_update_tic(nvc0, tic, res);
if (tic->id < 0) {
tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
tx->rect[1].base += tx->nblocksy * tx->base.stride;
}
NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_transfers_wr, 1);
+
+ /* Allow the copies above to finish executing before freeing the source */
+ nouveau_fence_work(nvc0->screen->base.fence.current,
+ nouveau_fence_unref_bo, tx->rect[1].bo);
+ } else {
+ nouveau_bo_ref(NULL, &tx->rect[1].bo);
}
if (tx->base.usage & PIPE_TRANSFER_READ)
NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_transfers_rd, 1);
- nouveau_bo_ref(NULL, &tx->rect[1].bo);
pipe_resource_reference(&transfer->resource, NULL);
FREE(tx);
}
/* This happens rather often with DTD9/st. */
-void
+static void
nvc0_cb_push(struct nouveau_context *nv,
- struct nouveau_bo *bo, unsigned domain,
- unsigned base, unsigned size,
+ struct nv04_resource *res,
unsigned offset, unsigned words, const uint32_t *data)
{
+ struct nvc0_context *nvc0 = nvc0_context(&nv->pipe);
+ struct nvc0_constbuf *cb = NULL;
+ int s;
+
+ /* Go through all the constbuf binding points of this buffer and try to
+ * find one which contains the region to be updated.
+ */
+ for (s = 0; s < 6 && !cb; s++) {
+ uint16_t bindings = res->cb_bindings[s];
+ while (bindings) {
+ int i = ffs(bindings) - 1;
+ uint32_t cb_offset = nvc0->constbuf[s][i].offset;
+
+ bindings &= ~(1 << i);
+ if (cb_offset <= offset &&
+ cb_offset + nvc0->constbuf[s][i].size >= offset + words * 4) {
+ cb = &nvc0->constbuf[s][i];
+ break;
+ }
+ }
+ }
+
+ if (cb) {
+ nvc0_cb_bo_push(nv, res->bo, res->domain,
+ res->offset + cb->offset, cb->size,
+ offset - cb->offset, words, data);
+ } else {
+ nv->push_data(nv, res->bo, res->offset + offset, res->domain,
+ words * 4, data);
+ }
+}
+
+void
+nvc0_cb_bo_push(struct nouveau_context *nv,
+ struct nouveau_bo *bo, unsigned domain,
+ unsigned base, unsigned size,
+ unsigned offset, unsigned words, const uint32_t *data)
+{
struct nouveau_pushbuf *push = nv->pushbuf;
NOUVEAU_DRV_STAT(nv->screen, constbuf_upload_count, 1);
assert(!(offset & 3));
size = align(size, 0x100);
+ assert(offset < size);
+ assert(offset + words * 4 <= size);
+
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, size);
PUSH_DATAh(push, bo->offset + base);
push->kick_notify = nvc0_draw_vbo_kick_notify;
+ /* TODO: Instead of iterating over all the buffer resources looking for
+ * coherent buffers, keep track of a context-wide count.
+ */
for (s = 0; s < 5 && !nvc0->cb_dirty; ++s) {
uint32_t valid = nvc0->constbuf_valid[s];
nvc0->cb_dirty = false;
}
+ for (s = 0; s < 5; ++s) {
+ for (int i = 0; i < nvc0->num_textures[s]; ++i) {
+ struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
+ struct pipe_resource *res;
+ if (!tic)
+ continue;
+ res = nvc0->textures[s][i]->texture;
+ if (res->target != PIPE_BUFFER ||
+ !(res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT))
+ continue;
+
+ BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
+ PUSH_DATA (push, (tic->id << 4) | 1);
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1);
+ }
+ }
+
if (nvc0->state.vbo_mode) {
nvc0_push_vbo(nvc0, info);
push->kick_notify = nvc0_default_kick_notify;
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
+ case PIPE_CAP_TGSI_TXQS:
return 0;
/* SWTCL-only features. */
S_SQ_CF_WORD1_BARRIER(1) |
S_SQ_CF_WORD1_COND(cf->cond) |
S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) |
+ S_SQ_CF_WORD1_COUNT(cf->count) |
S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
}
}
"allocating %u dwords lds.\n",
num_pipes, num_waves, lds_size);
- r600_write_config_reg(cs, R_008970_VGT_NUM_INDICES, group_size);
+ radeon_set_config_reg(cs, R_008970_VGT_NUM_INDICES, group_size);
- r600_write_config_reg_seq(cs, R_00899C_VGT_COMPUTE_START_X, 3);
+ radeon_set_config_reg_seq(cs, R_00899C_VGT_COMPUTE_START_X, 3);
radeon_emit(cs, 0); /* R_00899C_VGT_COMPUTE_START_X */
radeon_emit(cs, 0); /* R_0089A0_VGT_COMPUTE_START_Y */
radeon_emit(cs, 0); /* R_0089A4_VGT_COMPUTE_START_Z */
- r600_write_config_reg(cs, R_0089AC_VGT_COMPUTE_THREAD_GROUP_SIZE,
+ radeon_set_config_reg(cs, R_0089AC_VGT_COMPUTE_THREAD_GROUP_SIZE,
group_size);
- r600_write_compute_context_reg_seq(cs, R_0286EC_SPI_COMPUTE_NUM_THREAD_X, 3);
+ radeon_compute_set_context_reg_seq(cs, R_0286EC_SPI_COMPUTE_NUM_THREAD_X, 3);
radeon_emit(cs, block_layout[0]); /* R_0286EC_SPI_COMPUTE_NUM_THREAD_X */
radeon_emit(cs, block_layout[1]); /* R_0286F0_SPI_COMPUTE_NUM_THREAD_Y */
radeon_emit(cs, block_layout[2]); /* R_0286F4_SPI_COMPUTE_NUM_THREAD_Z */
assert(lds_size <= 8160);
}
- r600_write_compute_context_reg(cs, CM_R_0288E8_SQ_LDS_ALLOC,
+ radeon_compute_set_context_reg(cs, CM_R_0288E8_SQ_LDS_ALLOC,
lds_size | (num_waves << 14));
/* Dispatch packet */
/* XXX support more than 8 colorbuffers (the offsets are not a multiple of 0x3C for CB8-11) */
for (i = 0; i < 8 && i < ctx->framebuffer.state.nr_cbufs; i++) {
struct r600_surface *cb = (struct r600_surface*)ctx->framebuffer.state.cbufs[i];
- unsigned reloc = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx,
+ unsigned reloc = radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.gfx,
(struct r600_resource*)cb->base.texture,
RADEON_USAGE_READWRITE,
RADEON_PRIO_SHADER_RESOURCE_RW);
- r600_write_compute_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7);
+ radeon_compute_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7);
radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */
radeon_emit(cs, cb->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */
radeon_emit(cs, cb->cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */
}
if (ctx->keep_tiling_flags) {
for (; i < 8 ; i++) {
- r600_write_compute_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
+ radeon_compute_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
S_028C70_FORMAT(V_028C70_COLOR_INVALID));
}
for (; i < 12; i++) {
- r600_write_compute_context_reg(cs, R_028E50_CB_COLOR8_INFO + (i - 8) * 0x1C,
+ radeon_compute_set_context_reg(cs, R_028E50_CB_COLOR8_INFO + (i - 8) * 0x1C,
S_028C70_FORMAT(V_028C70_COLOR_INVALID));
}
}
/* Set CB_TARGET_MASK XXX: Use cb_misc_state */
- r600_write_compute_context_reg(cs, R_028238_CB_TARGET_MASK,
+ radeon_compute_set_context_reg(cs, R_028238_CB_TARGET_MASK,
ctx->compute_cb_target_mask);
nstack = shader->bc.nstack;
#endif
- r600_write_compute_context_reg_seq(cs, R_0288D0_SQ_PGM_START_LS, 3);
+ radeon_compute_set_context_reg_seq(cs, R_0288D0_SQ_PGM_START_LS, 3);
radeon_emit(cs, va >> 8); /* R_0288D0_SQ_PGM_START_LS */
radeon_emit(cs, /* R_0288D4_SQ_PGM_RESOURCES_LS */
S_0288D4_NUM_GPRS(ngpr)
radeon_emit(cs, 0); /* R_0288D8_SQ_PGM_RESOURCES_LS_2 */
radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0));
- radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
code_bo, RADEON_USAGE_READ,
RADEON_PRIO_SHADER_DATA));
}
for (i = 0; i < ncopy; i++) {
csize = size < EG_DMA_COPY_MAX_SIZE ? size : EG_DMA_COPY_MAX_SIZE;
/* emit reloc before writing cs so that cs is always in consistent state */
- r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
+ radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
RADEON_PRIO_MIN);
- r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
+ radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
RADEON_PRIO_MIN);
cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, csize);
cs->buf[cs->cdw++] = dst_offset & 0xffffffff;
}
/* This must be done after r600_need_cs_space. */
- reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+ reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
(struct r600_resource*)dst, RADEON_USAGE_WRITE,
RADEON_PRIO_MIN);
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
struct pipe_clip_state *state = &rctx->clip_state.state;
- r600_write_context_reg_seq(cs, R_0285BC_PA_CL_UCP0_X, 6*4);
+ radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP0_X, 6*4);
radeon_emit_array(cs, (unsigned*)state, 6*4);
}
const struct pipe_scissor_state *state)
{
struct r600_context *rctx = (struct r600_context *)ctx;
+ struct r600_scissor_state *rstate = &rctx->scissor;
int i;
- for (i = start_slot; i < start_slot + num_scissors; i++) {
- rctx->scissor[i].scissor = state[i - start_slot];
- r600_mark_atom_dirty(rctx, &rctx->scissor[i].atom);
- }
+ for (i = start_slot; i < start_slot + num_scissors; i++)
+ rstate->scissor[i] = state[i - start_slot];
+ rstate->dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
+ rstate->atom.num_dw = util_bitcount(rstate->dirty_mask) * 4;
+ r600_mark_atom_dirty(rctx, &rstate->atom);
}
static void evergreen_emit_scissor_state(struct r600_context *rctx, struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
- struct r600_scissor_state *rstate = (struct r600_scissor_state *)atom;
- struct pipe_scissor_state *state = &rstate->scissor;
- unsigned offset = rstate->idx * 4 * 2;
+ struct r600_scissor_state *rstate = &rctx->scissor;
+ struct pipe_scissor_state *state;
+ uint32_t dirty_mask;
+ unsigned i, offset;
uint32_t tl, br;
- evergreen_get_scissor_rect(rctx, state->minx, state->miny, state->maxx, state->maxy, &tl, &br);
+ dirty_mask = rstate->dirty_mask;
+ while (dirty_mask != 0) {
+ i = u_bit_scan(&dirty_mask);
+ state = &rstate->scissor[i];
+ evergreen_get_scissor_rect(rctx, state->minx, state->miny, state->maxx, state->maxy, &tl, &br);
- r600_write_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset, 2);
- radeon_emit(cs, tl);
- radeon_emit(cs, br);
+ offset = i * 4 * 2;
+ radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset, 2);
+ radeon_emit(cs, tl);
+ radeon_emit(cs, br);
+ }
+ rstate->dirty_mask = 0;
+ rstate->atom.num_dw = 0;
}
/**
nr_samples = 0;
break;
case 2:
- r600_write_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_0, Elements(eg_sample_locs_2x));
+ radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_0, Elements(eg_sample_locs_2x));
radeon_emit_array(cs, eg_sample_locs_2x, Elements(eg_sample_locs_2x));
max_dist = eg_max_dist_2x;
break;
case 4:
- r600_write_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_0, Elements(eg_sample_locs_4x));
+ radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_0, Elements(eg_sample_locs_4x));
radeon_emit_array(cs, eg_sample_locs_4x, Elements(eg_sample_locs_4x));
max_dist = eg_max_dist_4x;
break;
case 8:
- r600_write_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_0, Elements(sample_locs_8x));
+ radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_0, Elements(sample_locs_8x));
radeon_emit_array(cs, sample_locs_8x, Elements(sample_locs_8x));
max_dist = max_dist_8x;
break;
}
if (nr_samples > 1) {
- r600_write_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
+ radeon_set_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
radeon_emit(cs, S_028C00_LAST_PIXEL(1) |
S_028C00_EXPAND_LINE_WIDTH(1)); /* R_028C00_PA_SC_LINE_CNTL */
radeon_emit(cs, S_028C04_MSAA_NUM_SAMPLES(util_logbase2(nr_samples)) |
S_028C04_MAX_SAMPLE_DIST(max_dist)); /* R_028C04_PA_SC_AA_CONFIG */
- r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1));
+ radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1));
} else {
- r600_write_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
+ radeon_set_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
radeon_emit(cs, S_028C00_LAST_PIXEL(1)); /* R_028C00_PA_SC_LINE_CNTL */
radeon_emit(cs, 0); /* R_028C04_PA_SC_AA_CONFIG */
- r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
+ radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
}
}
cb = (struct r600_surface*)state->cbufs[i];
if (!cb) {
- r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
+ radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
S_028C70_FORMAT(V_028C70_COLOR_INVALID));
continue;
}
tex = (struct r600_texture *)cb->base.texture;
- reloc = r600_context_bo_reloc(&rctx->b,
+ reloc = radeon_add_to_buffer_list(&rctx->b,
&rctx->b.rings.gfx,
(struct r600_resource*)cb->base.texture,
RADEON_USAGE_READWRITE,
RADEON_PRIO_COLOR_BUFFER);
if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
- cmask_reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+ cmask_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
tex->cmask_buffer, RADEON_USAGE_READWRITE,
RADEON_PRIO_COLOR_META);
} else {
cmask_reloc = reloc;
}
- r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 13);
+ radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 13);
radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */
radeon_emit(cs, cb->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */
radeon_emit(cs, cb->cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */
}
/* set CB_COLOR1_INFO for possible dual-src blending */
if (i == 1 && state->cbufs[0]) {
- r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
+ radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
cb->cb_color_info | tex->cb_color_info);
if (!rctx->keep_tiling_flags) {
- unsigned reloc = r600_context_bo_reloc(&rctx->b,
+ unsigned reloc = radeon_add_to_buffer_list(&rctx->b,
&rctx->b.rings.gfx,
(struct r600_resource*)state->cbufs[0]->texture,
RADEON_USAGE_READWRITE,
}
if (rctx->keep_tiling_flags) {
for (; i < 8 ; i++) {
- r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
+ radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
}
for (; i < 12; i++) {
- r600_write_context_reg(cs, R_028E50_CB_COLOR8_INFO + (i - 8) * 0x1C, 0);
+ radeon_set_context_reg(cs, R_028E50_CB_COLOR8_INFO + (i - 8) * 0x1C, 0);
}
}
/* ZS buffer. */
if (state->zsbuf) {
struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
- unsigned reloc = r600_context_bo_reloc(&rctx->b,
+ unsigned reloc = radeon_add_to_buffer_list(&rctx->b,
&rctx->b.rings.gfx,
(struct r600_resource*)state->zsbuf->texture,
RADEON_USAGE_READWRITE,
RADEON_PRIO_DEPTH_BUFFER_MSAA :
RADEON_PRIO_DEPTH_BUFFER);
- r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
+ radeon_set_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
zb->pa_su_poly_offset_db_fmt_cntl);
- r600_write_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
+ radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
- r600_write_context_reg_seq(cs, R_028040_DB_Z_INFO, 8);
+ radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 8);
radeon_emit(cs, zb->db_z_info); /* R_028040_DB_Z_INFO */
radeon_emit(cs, zb->db_stencil_info); /* R_028044_DB_STENCIL_INFO */
radeon_emit(cs, zb->db_depth_base); /* R_028048_DB_Z_READ_BASE */
} else if (rctx->screen->b.info.drm_minor >= 18) {
/* DRM 2.6.18 allows the INVALID format to disable depth/stencil.
* Older kernels are out of luck. */
- r600_write_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
+ radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */
radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */
}
/* Framebuffer dimensions. */
evergreen_get_scissor_rect(rctx, 0, 0, state->width, state->height, &tl, &br);
- r600_write_context_reg_seq(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, 2);
+ radeon_set_context_reg_seq(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, 2);
radeon_emit(cs, tl); /* R_028204_PA_SC_WINDOW_SCISSOR_TL */
radeon_emit(cs, br); /* R_028208_PA_SC_WINDOW_SCISSOR_BR */
default:;
}
- r600_write_context_reg_seq(cs, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 4);
+ radeon_set_context_reg_seq(cs, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 4);
radeon_emit(cs, fui(offset_scale));
radeon_emit(cs, fui(offset_units));
radeon_emit(cs, fui(offset_scale));
unsigned fb_colormask = (1ULL << ((unsigned)a->nr_cbufs * 4)) - 1;
unsigned ps_colormask = (1ULL << ((unsigned)a->nr_ps_color_outputs * 4)) - 1;
- r600_write_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
+ radeon_set_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
radeon_emit(cs, a->blend_colormask & fb_colormask); /* R_028238_CB_TARGET_MASK */
/* This must match the used export instructions exactly.
* Other values may lead to undefined behavior and hangs.
struct r600_texture *rtex = (struct r600_texture *)a->rsurf->base.texture;
unsigned reloc_idx;
- r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
- r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
- r600_write_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control);
- r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
- reloc_idx = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer,
+ radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
+ radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
+ radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control);
+ radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
+ reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer,
RADEON_USAGE_READWRITE, RADEON_PRIO_DEPTH_META);
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
cs->buf[cs->cdw++] = reloc_idx;
} else {
- r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, 0);
- r600_write_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0);
+ radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, 0);
+ radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0);
}
}
db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(1);
}
- r600_write_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
+ radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
radeon_emit(cs, db_render_control); /* R_028000_DB_RENDER_CONTROL */
radeon_emit(cs, db_count_control); /* R_028004_DB_COUNT_CONTROL */
- r600_write_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override);
- r600_write_context_reg(cs, R_02880C_DB_SHADER_CONTROL, a->db_shader_control);
+ radeon_set_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override);
+ radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, a->db_shader_control);
}
static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
radeon_emit(cs, (resource_offset + buffer_index) * 8);
radeon_emit(cs, va); /* RESOURCEi_WORD0 */
- radeon_emit(cs, rbuffer->buf->size - vb->buffer_offset - 1); /* RESOURCEi_WORD1 */
+ radeon_emit(cs, rbuffer->b.b.width0 - vb->buffer_offset - 1); /* RESOURCEi_WORD1 */
radeon_emit(cs, /* RESOURCEi_WORD2 */
S_030008_ENDIAN_SWAP(r600_endian_swap(32)) |
S_030008_STRIDE(vb->stride) |
radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD7 */
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
- radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
}
state->dirty_mask = 0;
static void evergreen_fs_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom * atom)
{
- evergreen_emit_vertex_buffers(rctx, &rctx->vertex_buffer_state, 992, 0);
+ evergreen_emit_vertex_buffers(rctx, &rctx->vertex_buffer_state, EG_FETCH_CONSTANTS_OFFSET_FS, 0);
}
static void evergreen_cs_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom * atom)
{
- evergreen_emit_vertex_buffers(rctx, &rctx->cs_vertex_buffer_state, 816,
+ evergreen_emit_vertex_buffers(rctx, &rctx->cs_vertex_buffer_state, EG_FETCH_CONSTANTS_OFFSET_CS,
RADEON_CP_PACKET3_COMPUTE_MODE);
}
va = rbuffer->gpu_address + cb->buffer_offset;
if (!gs_ring_buffer) {
- r600_write_context_reg_flag(cs, reg_alu_constbuf_size + buffer_index * 4,
+ radeon_set_context_reg_flag(cs, reg_alu_constbuf_size + buffer_index * 4,
ALIGN_DIVUP(cb->buffer_size >> 4, 16), pkt_flags);
- r600_write_context_reg_flag(cs, reg_alu_const_cache + buffer_index * 4, va >> 8,
+ radeon_set_context_reg_flag(cs, reg_alu_const_cache + buffer_index * 4, va >> 8,
pkt_flags);
}
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
- radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
radeon_emit(cs, (buffer_id_base + buffer_index) * 8);
radeon_emit(cs, va); /* RESOURCEi_WORD0 */
- radeon_emit(cs, rbuffer->buf->size - cb->buffer_offset - 1); /* RESOURCEi_WORD1 */
+ radeon_emit(cs, rbuffer->b.b.width0 - cb->buffer_offset - 1); /* RESOURCEi_WORD1 */
radeon_emit(cs, /* RESOURCEi_WORD2 */
S_030008_ENDIAN_SWAP(gs_ring_buffer ? ENDIAN_NONE : r600_endian_swap(32)) |
S_030008_STRIDE(gs_ring_buffer ? 4 : 16) |
S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER));
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
- radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
dirty_mask &= ~(1 << buffer_index);
static void evergreen_emit_vs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
{
- evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX], 176,
+ evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX],
+ EG_FETCH_CONSTANTS_OFFSET_VS,
R_028180_ALU_CONST_BUFFER_SIZE_VS_0,
R_028980_ALU_CONST_CACHE_VS_0,
0 /* PKT3 flags */);
static void evergreen_emit_gs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
{
- evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_GEOMETRY], 336,
+ evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_GEOMETRY],
+ EG_FETCH_CONSTANTS_OFFSET_GS,
R_0281C0_ALU_CONST_BUFFER_SIZE_GS_0,
R_0289C0_ALU_CONST_CACHE_GS_0,
0 /* PKT3 flags */);
static void evergreen_emit_ps_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
{
- evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_FRAGMENT], 0,
- R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
- R_028940_ALU_CONST_CACHE_PS_0,
- 0 /* PKT3 flags */);
+ evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_FRAGMENT],
+ EG_FETCH_CONSTANTS_OFFSET_PS,
+ R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
+ R_028940_ALU_CONST_CACHE_PS_0,
+ 0 /* PKT3 flags */);
}
static void evergreen_emit_cs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
{
- evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_COMPUTE], 816,
+ evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_COMPUTE],
+ EG_FETCH_CONSTANTS_OFFSET_CS,
R_028FC0_ALU_CONST_BUFFER_SIZE_LS_0,
R_028F40_ALU_CONST_CACHE_LS_0,
RADEON_CP_PACKET3_COMPUTE_MODE);
radeon_emit(cs, (resource_id_base + resource_index) * 8);
radeon_emit_array(cs, rview->tex_resource_words, 8);
- reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource,
+ reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource,
RADEON_USAGE_READ,
rview->tex_resource->b.b.nr_samples > 1 ?
RADEON_PRIO_SHADER_TEXTURE_MSAA :
static void evergreen_emit_vs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
{
evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views,
- 176 + R600_MAX_CONST_BUFFERS, 0);
+ EG_FETCH_CONSTANTS_OFFSET_VS + R600_MAX_CONST_BUFFERS, 0);
}
static void evergreen_emit_gs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
{
evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views,
- 336 + R600_MAX_CONST_BUFFERS, 0);
+ EG_FETCH_CONSTANTS_OFFSET_GS + R600_MAX_CONST_BUFFERS, 0);
}
static void evergreen_emit_ps_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
{
evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views,
- R600_MAX_CONST_BUFFERS, 0);
+ EG_FETCH_CONSTANTS_OFFSET_PS + R600_MAX_CONST_BUFFERS, 0);
}
static void evergreen_emit_cs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
{
evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].views,
- 816 + 2, RADEON_CP_PACKET3_COMPUTE_MODE);
+ EG_FETCH_CONSTANTS_OFFSET_CS + 2, RADEON_CP_PACKET3_COMPUTE_MODE);
}
static void evergreen_emit_sampler_states(struct r600_context *rctx,
radeon_emit_array(cs, rstate->tex_sampler_words, 3);
if (rstate->border_color_use) {
- r600_write_config_reg_seq(cs, border_index_reg, 5);
+ radeon_set_config_reg_seq(cs, border_index_reg, 5);
radeon_emit(cs, i);
radeon_emit_array(cs, rstate->border_color.ui, 4);
}
struct r600_sample_mask *s = (struct r600_sample_mask*)a;
uint8_t mask = s->sample_mask;
- r600_write_context_reg(rctx->b.rings.gfx.cs, R_028C3C_PA_SC_AA_MASK,
+ radeon_set_context_reg(rctx->b.rings.gfx.cs, R_028C3C_PA_SC_AA_MASK,
mask | (mask << 8) | (mask << 16) | (mask << 24));
}
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
uint16_t mask = s->sample_mask;
- r600_write_context_reg_seq(cs, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
+ radeon_set_context_reg_seq(cs, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
radeon_emit(cs, mask | (mask << 16)); /* X0Y0_X1Y0 */
radeon_emit(cs, mask | (mask << 16)); /* X0Y1_X1Y1 */
}
struct r600_cso_state *state = (struct r600_cso_state*)a;
struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso;
- r600_write_context_reg(cs, R_0288A4_SQ_PGM_START_FS,
+ radeon_set_context_reg(cs, R_0288A4_SQ_PGM_START_FS,
(shader->buffer->gpu_address + shader->offset) >> 8);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, shader->buffer,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->buffer,
RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA));
}
primid = 1;
}
- r600_write_context_reg(cs, R_028B54_VGT_SHADER_STAGES_EN, v);
- r600_write_context_reg(cs, R_028A40_VGT_GS_MODE, v2);
- r600_write_context_reg(cs, R_028A84_VGT_PRIMITIVEID_EN, primid);
+ radeon_set_context_reg(cs, R_028B54_VGT_SHADER_STAGES_EN, v);
+ radeon_set_context_reg(cs, R_028A40_VGT_GS_MODE, v2);
+ radeon_set_context_reg(cs, R_028A84_VGT_PRIMITIVEID_EN, primid);
}
static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a;
struct r600_resource *rbuffer;
- r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
+ radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH));
if (state->enable) {
rbuffer =(struct r600_resource*)state->esgs_ring.buffer;
- r600_write_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE,
+ radeon_set_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE,
rbuffer->gpu_address >> 8);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
RADEON_USAGE_READWRITE,
RADEON_PRIO_SHADER_RESOURCE_RW));
- r600_write_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
+ radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
state->esgs_ring.buffer_size >> 8);
rbuffer =(struct r600_resource*)state->gsvs_ring.buffer;
- r600_write_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE,
+ radeon_set_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE,
rbuffer->gpu_address >> 8);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
RADEON_USAGE_READWRITE,
RADEON_PRIO_SHADER_RESOURCE_RW));
- r600_write_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
+ radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
state->gsvs_ring.buffer_size >> 8);
} else {
- r600_write_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, 0);
- r600_write_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, 0);
+ radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, 0);
+ radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, 0);
}
- r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
+ radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH));
}
struct r600_command_buffer *cb = &shader->command_buffer;
struct r600_shader *rshader = &shader->shader;
struct r600_shader *cp_shader = &shader->gs_copy_shader->shader;
- unsigned gsvs_itemsize =
- (cp_shader->ring_item_size * shader->selector->gs_max_out_vertices) >> 2;
+ unsigned gsvs_itemsizes[4] = {
+ (cp_shader->ring_item_sizes[0] * shader->selector->gs_max_out_vertices) >> 2,
+ (cp_shader->ring_item_sizes[1] * shader->selector->gs_max_out_vertices) >> 2,
+ (cp_shader->ring_item_sizes[2] * shader->selector->gs_max_out_vertices) >> 2,
+ (cp_shader->ring_item_sizes[3] * shader->selector->gs_max_out_vertices) >> 2
+ };
r600_init_command_buffer(cb, 64);
S_028B90_ENABLE(shader->selector->gs_num_invocations > 0));
}
r600_store_context_reg_seq(cb, R_02891C_SQ_GS_VERT_ITEMSIZE, 4);
- r600_store_value(cb, cp_shader->ring_item_size >> 2);
- r600_store_value(cb, 0);
- r600_store_value(cb, 0);
- r600_store_value(cb, 0);
+ r600_store_value(cb, cp_shader->ring_item_sizes[0] >> 2);
+ r600_store_value(cb, cp_shader->ring_item_sizes[1] >> 2);
+ r600_store_value(cb, cp_shader->ring_item_sizes[2] >> 2);
+ r600_store_value(cb, cp_shader->ring_item_sizes[3] >> 2);
r600_store_context_reg(cb, R_028900_SQ_ESGS_RING_ITEMSIZE,
- (rshader->ring_item_size) >> 2);
+ (rshader->ring_item_sizes[0]) >> 2);
r600_store_context_reg(cb, R_028904_SQ_GSVS_RING_ITEMSIZE,
- gsvs_itemsize);
+ gsvs_itemsizes[0] +
+ gsvs_itemsizes[1] +
+ gsvs_itemsizes[2] +
+ gsvs_itemsizes[3]);
r600_store_context_reg_seq(cb, R_02892C_SQ_GSVS_RING_OFFSET_1, 3);
- r600_store_value(cb, gsvs_itemsize);
- r600_store_value(cb, gsvs_itemsize);
- r600_store_value(cb, gsvs_itemsize);
+ r600_store_value(cb, gsvs_itemsizes[0]);
+ r600_store_value(cb, gsvs_itemsizes[0] + gsvs_itemsizes[1]);
+ r600_store_value(cb, gsvs_itemsizes[0] + gsvs_itemsizes[1] + gsvs_itemsizes[2]);
/* FIXME calculate these values somehow ??? */
r600_store_context_reg_seq(cb, R_028A54_GS_PER_ES, 3);
}
size = (cheight * pitch) / 4;
/* emit reloc before writing cs so that cs is always in consistent state */
- r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, &rsrc->resource,
+ radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rsrc->resource,
RADEON_USAGE_READ, RADEON_PRIO_MIN);
- r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, &rdst->resource,
+ radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rdst->resource,
RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, size);
cs->buf[cs->cdw++] = base >> 8;
void evergreen_init_state_functions(struct r600_context *rctx)
{
- unsigned id = 4;
- int i;
+ unsigned id = 1;
+
/* !!!
* To avoid GPU lockup registers must be emited in a specific order
* (no kidding ...). The order below is important and have been
r600_init_atom(rctx, &rctx->dsa_state.atom, id++, r600_emit_cso_state, 0);
r600_init_atom(rctx, &rctx->poly_offset_state.atom, id++, evergreen_emit_polygon_offset, 6);
r600_init_atom(rctx, &rctx->rasterizer_state.atom, id++, r600_emit_cso_state, 0);
- for (i = 0; i < R600_MAX_VIEWPORTS; i++) {
- r600_init_atom(rctx, &rctx->viewport[i].atom, id++, r600_emit_viewport_state, 8);
- r600_init_atom(rctx, &rctx->scissor[i].atom, id++, evergreen_emit_scissor_state, 4);
- rctx->viewport[i].idx = i;
- rctx->scissor[i].idx = i;
- }
+ r600_init_atom(rctx, &rctx->scissor.atom, id++, evergreen_emit_scissor_state, 0);
+ r600_init_atom(rctx, &rctx->viewport.atom, id++, r600_emit_viewport_state, 0);
r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, evergreen_emit_vertex_fetch_shader, 5);
r600_add_atom(rctx, &rctx->b.streamout.begin_atom, id++);
#define DMA_PACKET_CONSTANT_FILL 0xd
#define DMA_PACKET_NOP 0xf
+#define EG_FETCH_CONSTANTS_OFFSET_PS 0
+#define EG_FETCH_CONSTANTS_OFFSET_VS 176
+#define EG_FETCH_CONSTANTS_OFFSET_GS 336
+#define EG_FETCH_CONSTANTS_OFFSET_HS 496
+#define EG_FETCH_CONSTANTS_OFFSET_LS 656
+#define EG_FETCH_CONSTANTS_OFFSET_CS 816
+#define EG_FETCH_CONSTANTS_OFFSET_FS 992
+
#endif
return 0;
}
+static int is_alu_64bit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
+{
+ const struct alu_op_info *op = r600_isa_alu(alu->op);
+ return (op->flags & AF_64);
+}
+
static int is_alu_vec_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
{
unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op);
for (i = 0; i < max_slots; ++i) {
if (prev[i] && (prev[i]->dst.write || prev[i]->is_op3) && !prev[i]->dst.rel) {
+
+ if (is_alu_64bit_inst(bc, prev[i])) {
+ gpr[i] = -1;
+ continue;
+ }
+
gpr[i] = prev[i]->dst.sel;
/* cube writes more than PV.X */
if (is_alu_reduction_inst(bc, prev[i]))
if(!alu)
continue;
+ if (is_alu_64bit_inst(bc, alu))
+ continue;
num_src = r600_bytecode_get_num_operands(bc, alu);
for (src = 0; src < num_src; ++src) {
if (!is_gpr(alu->src[src].sel) || alu->src[src].rel)
fprintf(stderr, "CND:%X ", cf->cond);
if (cf->pop_count)
fprintf(stderr, "POP:%X ", cf->pop_count);
+ if (cf->count && (cfop->flags & CF_EMIT))
+ fprintf(stderr, "STREAM%d ", cf->count);
if (cf->end_of_program)
fprintf(stderr, "EOP ");
fprintf(stderr, "\n");
unsigned id;
unsigned cond;
unsigned pop_count;
+ unsigned count;
unsigned cf_addr; /* control flow addr */
struct r600_bytecode_kcache kcache[4];
unsigned r6xx_uses_waterfall;
void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
unsigned *num_format, unsigned *format_comp, unsigned *endian);
+
+static inline int fp64_switch(int i)
+{
+ switch (i) {
+ case 0:
+ return 1;
+ case 1:
+ return 0;
+ case 2:
+ return 3;
+ case 3:
+ return 2;
+ }
+ return 0;
+}
#endif
util_blitter_save_rasterizer(rctx->blitter, rctx->rasterizer_state.cso);
if (op & R600_SAVE_FRAGMENT_STATE) {
- util_blitter_save_viewport(rctx->blitter, &rctx->viewport[0].state);
- util_blitter_save_scissor(rctx->blitter, &rctx->scissor[0].scissor);
+ util_blitter_save_viewport(rctx->blitter, &rctx->viewport.state[0]);
+ util_blitter_save_scissor(rctx->blitter, &rctx->scissor.scissor[0]);
util_blitter_save_fragment_shader(rctx->blitter, rctx->ps_shader);
util_blitter_save_blend(rctx->blitter, rctx->blend_state.cso);
util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->dsa_state.cso);
if (buffers & PIPE_CLEAR_COLOR && rctx->b.chip_class >= EVERGREEN) {
evergreen_do_fast_color_clear(&rctx->b, fb, &rctx->framebuffer.atom,
- &buffers, color);
+ &buffers, NULL, color);
if (!buffers)
return; /* all buffers have been fast cleared */
}
num_dw += ctx->b.rings.gfx.cs->cdw;
if (count_draw_in) {
- unsigned i;
+ uint64_t mask;
/* The number of dwords all the dirty states would take. */
- i = r600_next_dirty_atom(ctx, 0);
- while (i < R600_NUM_ATOMS) {
- num_dw += ctx->atoms[i]->num_dw;
+ mask = ctx->dirty_atoms;
+ while (mask != 0) {
+ num_dw += ctx->atoms[u_bit_scan64(&mask)]->num_dw;
if (ctx->screen->b.trace_bo) {
num_dw += R600_TRACE_CS_DWORDS;
}
- i = r600_next_dirty_atom(ctx, i + 1);
}
/* The upper-bound of how much space a draw command would take. */
/* Use of WAIT_UNTIL is deprecated on Cayman+ */
if (rctx->b.family < CHIP_CAYMAN) {
/* wait for things to settle */
- r600_write_config_reg(cs, R_008040_WAIT_UNTIL, wait_until);
+ radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, wait_until);
}
}
/* old kernels and userspace don't set SX_MISC, so we must reset it to 0 here */
if (ctx->b.chip_class == R600) {
- r600_write_context_reg(cs, R_028350_SX_MISC, 0);
+ radeon_set_context_reg(cs, R_028350_SX_MISC, 0);
}
/* force to keep tiling flags */
void r600_begin_new_cs(struct r600_context *ctx)
{
unsigned shader;
- int i;
+
ctx->b.flags = 0;
ctx->b.gtt = 0;
ctx->b.vram = 0;
r600_mark_atom_dirty(ctx, &ctx->poly_offset_state.atom);
r600_mark_atom_dirty(ctx, &ctx->vgt_state.atom);
r600_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
- for (i = 0; i < R600_MAX_VIEWPORTS; i++) {
- r600_mark_atom_dirty(ctx, &ctx->scissor[i].atom);
- r600_mark_atom_dirty(ctx, &ctx->viewport[i].atom);
- }
+ ctx->scissor.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
+ ctx->scissor.atom.num_dw = R600_MAX_VIEWPORTS * 4;
+ r600_mark_atom_dirty(ctx, &ctx->scissor.atom);
+ ctx->viewport.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
+ ctx->viewport.atom.num_dw = R600_MAX_VIEWPORTS * 8;
+ r600_mark_atom_dirty(ctx, &ctx->viewport.atom);
if (ctx->b.chip_class < EVERGREEN) {
r600_mark_atom_dirty(ctx, &ctx->config_state.atom);
}
}
/* This must be done after r600_need_cs_space. */
- src_reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)src,
+ src_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)src,
RADEON_USAGE_READ, RADEON_PRIO_MIN);
- dst_reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)dst,
+ dst_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)dst,
RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
for (i = 0; i < ncopy; i++) {
csize = size < R600_DMA_COPY_MAX_SIZE_DW ? size : R600_DMA_COPY_MAX_SIZE_DW;
/* emit reloc before writing cs so that cs is always in consistent state */
- r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
+ radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
RADEON_PRIO_MIN);
- r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
+ radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
RADEON_PRIO_MIN);
cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 0, 0, csize);
cs->buf[cs->cdw++] = dst_offset & 0xfffffffc;
{"PRED_SETGT_64", 2, { 0x7C, 0xC7 },{ AF_V, AF_V, AF_V, AF_V}, AF_PRED | AF_CC_GT | AF_64 },
{"PRED_SETE_64", 2, { 0x7D, 0xC8 },{ AF_V, AF_V, AF_V, AF_V}, AF_PRED | AF_CC_E | AF_64 },
{"PRED_SETGE_64", 2, { 0x7E, 0xC9 },{ AF_V, AF_V, AF_V, AF_V}, AF_PRED | AF_CC_GE | AF_64 },
- {"MUL_64", 2, { 0x1B, 0xCA },{ AF_V, AF_V, AF_V, AF_V}, AF_64 },
+ {"MUL_64", 2, { 0x1B, 0xCA },{ AF_V, AF_V, AF_V, AF_4V}, AF_64 },
{"ADD_64", 2, { 0x17, 0xCB },{ AF_V, AF_V, AF_V, AF_V}, AF_64 },
{"MOVA_INT", 1, { 0x18, 0xCC },{ AF_V, AF_V, AF_V, AF_V}, AF_MOVA },
- {"FLT64_TO_FLT32", 1, { 0x1C, 0xCD },{ AF_V, AF_V, AF_V, AF_V}, 0 },
- {"FLT32_TO_FLT64", 1, { 0x1D, 0xCE },{ AF_V, AF_V, AF_V, AF_V}, 0 },
+ {"FLT64_TO_FLT32", 1, { 0x1C, 0xCD },{ AF_V, AF_V, AF_V, AF_V}, AF_64 },
+ {"FLT32_TO_FLT64", 1, { 0x1D, 0xCE },{ AF_V, AF_V, AF_V, AF_V}, AF_64 },
{"SAD_ACCUM_PREV_UINT", 2, { -1, 0xCF },{ 0, 0, AF_V, AF_V}, AF_UINT_DST | AF_PREV_NEXT },
{"DOT", 2, { -1, 0xD0 },{ 0, 0, AF_V, AF_V}, AF_PREV_NEXT },
{"MUL_PREV", 1, { -1, 0xD1 },{ 0, 0, AF_V, AF_V}, AF_PREV_INTERLEAVE },
{"FMA", 3, { -1, 0x07 },{ 0, 0, AF_V, AF_V}, 0 },
{"MULADD_INT24", 3, { -1, 0x08 },{ 0, 0, 0, AF_V}, AF_INT_DST | AF_24 },
{"CNDNE_64", 3, { -1, 0x09 },{ 0, 0, AF_V, AF_V}, AF_CMOV | AF_64 },
- {"FMA_64", 3, { -1, 0x0A },{ 0, 0, AF_V, AF_V}, AF_64 },
+ {"FMA_64", 3, { -1, 0x0A },{ 0, 0, AF_V, AF_4V}, AF_64 },
{"LERP_UINT", 3, { -1, 0x0B },{ 0, 0, AF_V, AF_V}, AF_UINT_DST },
{"BIT_ALIGN_INT", 3, { -1, 0x0C },{ 0, 0, AF_V, AF_V}, AF_INT_DST },
{"BYTE_ALIGN_INT", 3, { -1, 0x0D },{ 0, 0, AF_V, AF_V}, AF_INT_DST },
#if defined R600_USE_LLVM || defined HAVE_OPENCL
#define CONSTANT_BUFFER_0_ADDR_SPACE 8
-#define CONSTANT_BUFFER_1_ADDR_SPACE (CONSTANT_BUFFER_0_ADDR_SPACE + R600_UCP_CONST_BUFFER)
+#define CONSTANT_BUFFER_1_ADDR_SPACE (CONSTANT_BUFFER_0_ADDR_SPACE + R600_BUFFER_INFO_CONST_BUFFER)
#define LLVM_R600_BUFFER_INFO_CONST_BUFFER \
(CONSTANT_BUFFER_0_ADDR_SPACE + R600_BUFFER_INFO_CONST_BUFFER)
default: assert(!"unknown system value");
}
-#if HAVE_LLVM >= 0x0304
ctx->system_values[index] = LLVMBuildExtractElement(ctx->gallivm.builder,
LLVMGetParam(ctx->main_fn, 0), lp_build_const_int32(&(ctx->gallivm), chan),
"");
-#else
- LLVMValueRef reg = lp_build_const_int32(
- ctx->soa.bld_base.base.gallivm, chan);
- ctx->system_values[index] = lp_build_intrinsic(
- ctx->soa.bld_base.base.gallivm->builder,
- "llvm.R600.load.input",
- ctx->soa.bld_base.base.elem_type, ®, 1,
- LLVMReadNoneAttribute);
-#endif
}
-#if HAVE_LLVM >= 0x0304
static LLVMValueRef
llvm_load_input_vector(
struct radeon_llvm_context * ctx, unsigned location, unsigned ijregs,
VecType, Args, ArgCount, LLVMReadNoneAttribute);
}
}
-#else
-static LLVMValueRef
-llvm_load_input_helper(
- struct radeon_llvm_context * ctx,
- unsigned idx, int interp, int ij_index)
-{
- const struct lp_build_context * bb = &ctx->soa.bld_base.base;
- LLVMValueRef arg[2];
- int arg_count;
- const char * intrinsic;
-
- arg[0] = lp_build_const_int32(bb->gallivm, idx);
-
- if (interp) {
- intrinsic = "llvm.R600.interp.input";
- arg[1] = lp_build_const_int32(bb->gallivm, ij_index);
- arg_count = 2;
- } else {
- intrinsic = "llvm.R600.load.input";
- arg_count = 1;
- }
-
- return lp_build_intrinsic(bb->gallivm->builder, intrinsic,
- bb->elem_type, &arg[0], arg_count, LLVMReadNoneAttribute);
-}
-#endif
-#if HAVE_LLVM >= 0x0304
static LLVMValueRef
llvm_face_select_helper(
struct radeon_llvm_context * ctx,
return LLVMBuildSelect(bb->gallivm->builder, is_front,
front_color, back_color, "");
}
-#else
-static LLVMValueRef
-llvm_face_select_helper(
- struct radeon_llvm_context * ctx,
- unsigned face_loc, LLVMValueRef front_color, LLVMValueRef back_color)
-{
- const struct lp_build_context * bb = &ctx->soa.bld_base.base;
- LLVMValueRef face = llvm_load_input_helper(ctx, face_loc, 0, 0);
- LLVMValueRef is_front = LLVMBuildFCmp(
- bb->gallivm->builder, LLVMRealUGT, face,
- lp_build_const_float(bb->gallivm, 0.0f), "");
- return LLVMBuildSelect(bb->gallivm->builder, is_front,
- front_color, back_color, "");
-}
-#endif
static void llvm_load_input(
struct radeon_llvm_context * ctx,
{
const struct r600_shader_io * input = &ctx->r600_inputs[input_index];
unsigned chan;
-#if HAVE_LLVM < 0x0304
- unsigned interp = 0;
- int ij_index;
-#endif
int two_side = (ctx->two_side && input->name == TGSI_SEMANTIC_COLOR);
LLVMValueRef v;
-#if HAVE_LLVM >= 0x0304
boolean require_interp_intrinsic = ctx->chip_class >= EVERGREEN &&
ctx->type == TGSI_PROCESSOR_FRAGMENT;
-#endif
-#if HAVE_LLVM >= 0x0304
if (require_interp_intrinsic && input->spi_sid) {
v = llvm_load_input_vector(ctx, input->lds_pos, input->ij_index,
(input->interpolate > 0));
lp_build_const_float(&(ctx->gallivm), 1.0f),
ctx->inputs[soa_index], "");
}
-}
-#else
- if (ctx->chip_class >= EVERGREEN && ctx->type == TGSI_PROCESSOR_FRAGMENT &&
- input->spi_sid) {
- interp = 1;
- ij_index = (input->interpolate > 0) ? input->ij_index : -1;
}
-
- for (chan = 0; chan < 4; chan++) {
- unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
- int loc;
-
- if (interp) {
- loc = 4 * input->lds_pos + chan;
- } else {
- if (input->name == TGSI_SEMANTIC_FACE)
- loc = 4 * ctx->face_gpr;
- else
- loc = 4 * input->gpr + chan;
- }
-
- v = llvm_load_input_helper(ctx, loc, interp, ij_index);
-
- if (two_side) {
- struct r600_shader_io * back_input =
- &ctx->r600_inputs[input->back_color_input];
- int back_loc = interp ? back_input->lds_pos : back_input->gpr;
- LLVMValueRef v2;
-
- back_loc = 4 * back_loc + chan;
- v2 = llvm_load_input_helper(ctx, back_loc, interp, ij_index);
- v = llvm_face_select_helper(ctx, 4 * ctx->face_gpr, v, v2);
- } else if (input->name == TGSI_SEMANTIC_POSITION &&
- ctx->type == TGSI_PROCESSOR_FRAGMENT && chan == 3) {
- /* RCP for fragcoord.w */
- v = LLVMBuildFDiv(ctx->gallivm.builder,
- lp_build_const_float(&(ctx->gallivm), 1.0f),
- v, "");
- }
-
- ctx->inputs[soa_index] = v;
- }
-#endif
}
static void llvm_emit_prologue(struct lp_build_tgsi_context * bld_base)
struct tgsi_shader_info shader_info;
struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base;
radeon_llvm_context_init(ctx);
-#if HAVE_LLVM >= 0x0304
LLVMTypeRef Arguments[32];
unsigned ArgumentsCount = 0;
for (unsigned i = 0; i < ctx->inputs_count; i++)
LLVMValueRef P = LLVMGetParam(ctx->main_fn, i);
LLVMAddAttribute(P, LLVMInRegAttribute);
}
-#else
- radeon_llvm_create_func(ctx, NULL, 0);
-#endif
tgsi_scan_shader(tokens, &shader_info);
bld_base->info = &shader_info;
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_TGSI_TXQS:
return 1;
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
return 16384;
case PIPE_CAP_MAX_VERTEX_STREAMS:
- return 1;
+ return family >= CHIP_CEDAR ? 4 : 1;
case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
return 2047;
return PIPE_SHADER_IR_TGSI;
}
case PIPE_SHADER_CAP_DOUBLES:
+ if (rscreen->b.family == CHIP_CYPRESS ||
+ rscreen->b.family == CHIP_CAYMAN || rscreen->b.family == CHIP_ARUBA)
+ return 1;
return 0;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
#include "tgsi/tgsi_scan.h"
-#define R600_NUM_ATOMS 75
+#define R600_NUM_ATOMS 42
#define R600_MAX_VIEWPORTS 16
#define R600_TRACE_CS_DWORDS 7
#define R600_MAX_USER_CONST_BUFFERS 13
-#define R600_MAX_DRIVER_CONST_BUFFERS 3
+#define R600_MAX_DRIVER_CONST_BUFFERS 2
#define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS)
/* start driver buffers after user buffers */
-#define R600_UCP_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
-#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
-#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
+#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
+#define R600_UCP_SIZE (4*4*8)
+#define R600_BUFFER_INFO_OFFSET (R600_UCP_SIZE)
+
+#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
/* Currently R600_MAX_CONST_BUFFERS just fits on the hw, which has a limit
* of 16 const buffers.
* UCP/SAMPLE_POSITIONS are never accessed by same shader stage so they can use the same id.
* In order to support d3d 11 mandated minimum of 15 user const buffers
* we'd have to squash all use cases into one driver buffer.
*/
-#define R600_SAMPLE_POSITIONS_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
-
#define R600_MAX_CONST_BUFFER_SIZE (4096 * sizeof(float[4]))
#ifdef PIPE_ARCH_BIG_ENDIAN
#define R600_BIG_ENDIAN 0
#endif
-#define R600_DIRTY_ATOM_WORD_BITS (sizeof(unsigned long) * 8)
-#define R600_DIRTY_ATOM_ARRAY_LEN DIV_ROUND_UP(R600_NUM_ATOMS, R600_DIRTY_ATOM_WORD_BITS)
-
struct r600_context;
struct r600_bytecode;
union r600_shader_key;
struct r600_viewport_state {
struct r600_atom atom;
- struct pipe_viewport_state state;
- int idx;
+ struct pipe_viewport_state state[R600_MAX_VIEWPORTS];
+ uint32_t dirty_mask;
};
struct r600_shader_stages_state {
struct r600_samplerview_state views;
struct r600_sampler_states states;
bool is_array_sampler[NUM_TEX_UNITS];
+};
- /* cube array txq workaround */
- uint32_t *txq_constants;
- /* buffer related workarounds */
- uint32_t *buffer_constants;
+struct r600_shader_driver_constants_info {
+ /* currently 128 bytes for UCP/samplepos + sampler buffer constants */
+ uint32_t *constants;
+ uint32_t alloc_size;
+ bool vs_ucp_dirty;
+ bool texture_const_dirty;
+ bool ps_sample_pos_dirty;
};
struct r600_constbuf_state
struct r600_scissor_state
{
struct r600_atom atom;
- struct pipe_scissor_state scissor;
+ struct pipe_scissor_state scissor[R600_MAX_VIEWPORTS];
+ uint32_t dirty_mask;
bool enable; /* r6xx only */
- int idx;
};
struct r600_fetch_shader {
/* State binding slots are here. */
struct r600_atom *atoms[R600_NUM_ATOMS];
/* Dirty atom bitmask for fast tests */
- unsigned long dirty_atoms[R600_DIRTY_ATOM_ARRAY_LEN];
+ uint64_t dirty_atoms;
/* States for CS initialization. */
struct r600_command_buffer start_cs_cmd; /* invariant state mostly */
/** Compute specific registers initializations. The start_cs_cmd atom
struct r600_poly_offset_state poly_offset_state;
struct r600_cso_state rasterizer_state;
struct r600_sample_mask sample_mask;
- struct r600_scissor_state scissor[R600_MAX_VIEWPORTS];
+ struct r600_scissor_state scissor;
struct r600_seamless_cube_map seamless_cube_map;
struct r600_config_state config_state;
struct r600_stencil_ref_state stencil_ref;
struct r600_vgt_state vgt_state;
- struct r600_viewport_state viewport[R600_MAX_VIEWPORTS];
+ struct r600_viewport_state viewport;
/* Shaders and shader resources. */
struct r600_cso_state vertex_fetch_shader;
struct r600_shader_state vertex_shader;
struct r600_gs_rings_state gs_rings;
struct r600_constbuf_state constbuf_state[PIPE_SHADER_TYPES];
struct r600_textures_info samplers[PIPE_SHADER_TYPES];
+
+ struct r600_shader_driver_constants_info driver_consts[PIPE_SHADER_TYPES];
+
/** Vertex buffers for fetch shaders */
struct r600_vertexbuf_state vertex_buffer_state;
/** Vertex buffers for compute shaders */
void *sb_context;
struct r600_isa *isa;
+ float sample_positions[4 * 16];
};
static inline void r600_emit_command_buffer(struct radeon_winsys_cs *cs,
struct r600_atom *atom,
bool dirty)
{
- unsigned long mask;
- unsigned int w;
-
- atom->dirty = dirty;
+ uint64_t mask;
assert(atom->id != 0);
- w = atom->id / R600_DIRTY_ATOM_WORD_BITS;
- mask = 1ul << (atom->id % R600_DIRTY_ATOM_WORD_BITS);
+ assert(atom->id < sizeof(mask) * 8);
+ mask = 1ull << atom->id;
if (dirty)
- rctx->dirty_atoms[w] |= mask;
+ rctx->dirty_atoms |= mask;
else
- rctx->dirty_atoms[w] &= ~mask;
+ rctx->dirty_atoms &= ~mask;
}
static inline void r600_mark_atom_dirty(struct r600_context *rctx,
r600_set_atom_dirty(rctx, atom, true);
}
-static inline unsigned int r600_next_dirty_atom(struct r600_context *rctx,
- unsigned int id)
-{
-#if !defined(DEBUG) && defined(HAVE___BUILTIN_CTZ)
- unsigned int w = id / R600_DIRTY_ATOM_WORD_BITS;
- unsigned int bit = id % R600_DIRTY_ATOM_WORD_BITS;
- unsigned long bits, mask = (1ul << bit) - 1;
-
- for (; w < R600_DIRTY_ATOM_ARRAY_LEN; w++, mask = 0ul) {
- bits = rctx->dirty_atoms[w] & ~mask;
- if (bits == 0)
- continue;
- return w * R600_DIRTY_ATOM_WORD_BITS + __builtin_ctzl(bits);
- }
-
- return R600_NUM_ATOMS;
-#else
- for (; id < R600_NUM_ATOMS; id++) {
- bool dirty = !!(rctx->dirty_atoms[id / R600_DIRTY_ATOM_WORD_BITS] &
- (1ul << (id % R600_DIRTY_ATOM_WORD_BITS)));
- assert(dirty == (rctx->atoms[id] && rctx->atoms[id]->dirty));
- if (dirty)
- break;
- }
-
- return id;
-#endif
-}
-
void r600_trace_emit(struct r600_context *rctx);
static inline void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw);
void r600_release_command_buffer(struct r600_command_buffer *cb);
-static inline void r600_write_compute_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void radeon_compute_set_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
- r600_write_context_reg_seq(cs, reg, num);
+ radeon_set_context_reg_seq(cs, reg, num);
/* Set the compute bit on the packet header */
cs->buf[cs->cdw - 2] |= RADEON_CP_PACKET3_COMPUTE_MODE;
}
-static inline void r600_write_ctl_const_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void radeon_set_ctl_const_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg >= R600_CTL_CONST_OFFSET);
assert(cs->cdw+2+num <= cs->max_dw);
cs->buf[cs->cdw++] = (reg - R600_CTL_CONST_OFFSET) >> 2;
}
-static inline void r600_write_compute_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void radeon_compute_set_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
- r600_write_compute_context_reg_seq(cs, reg, 1);
+ radeon_compute_set_context_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
-static inline void r600_write_context_reg_flag(struct radeon_winsys_cs *cs, unsigned reg, unsigned value, unsigned flag)
+static inline void radeon_set_context_reg_flag(struct radeon_winsys_cs *cs, unsigned reg, unsigned value, unsigned flag)
{
if (flag & RADEON_CP_PACKET3_COMPUTE_MODE) {
- r600_write_compute_context_reg(cs, reg, value);
+ radeon_compute_set_context_reg(cs, reg, value);
} else {
- r600_write_context_reg(cs, reg, value);
+ radeon_set_context_reg(cs, reg, value);
}
}
-static inline void r600_write_ctl_const(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void radeon_set_ctl_const(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
- r600_write_ctl_const_seq(cs, reg, 1);
+ radeon_set_ctl_const_seq(cs, reg, 1);
radeon_emit(cs, value);
}
These 8xx t-slot only opcodes become vector ops, with all four
slots expecting the arguments on sources a and b. Result is
broadcast to all channels.
-MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
+MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT, MUL_64
These 8xx t-slot only opcodes become vector ops in the z, y, and
x slots.
EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
The compiler must issue the source argument to slots z, y, and x
*/
+#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
static int r600_shader_from_tgsi(struct r600_context *rctx,
struct r600_pipe_shader *pipeshader,
union r600_shader_key key);
for (i = 0; i < so->num_outputs; i++) {
unsigned mask = ((1 << so->output[i].num_components) - 1) <<
so->output[i].start_component;
- fprintf(stderr, " %i: MEM_STREAM0_BUF%i[%i..%i] <- OUT[%i].%s%s%s%s%s\n",
- i, so->output[i].output_buffer,
+ fprintf(stderr, " %i: MEM_STREAM%d_BUF%i[%i..%i] <- OUT[%i].%s%s%s%s%s\n",
+ i,
+ so->output[i].stream,
+ so->output[i].output_buffer,
so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1,
so->output[i].register_index,
mask & 1 ? "x" : "",
bool dump = r600_can_dump_shader(&rctx->screen->b, sel->tokens);
unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB);
unsigned sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM);
- unsigned export_shader = key.vs.as_es;
+ unsigned export_shader;
shader->shader.bc.isa = rctx->isa;
}
/* disable SB for shaders using CF_INDEX_0/1 (sampler/ubo array indexing) as it doesn't handle those currently */
use_sb &= !shader->shader.uses_index_registers;
+ /* disable SB for shaders using doubles */
+ use_sb &= !shader->shader.uses_doubles;
/* Check if the bytecode has already been built. When using the llvm
* backend, r600_shader_from_tgsi() will take care of building the
}
break;
case TGSI_PROCESSOR_VERTEX:
+ export_shader = key.vs.as_es;
if (rctx->b.chip_class >= EVERGREEN) {
if (export_shader)
evergreen_update_es_state(ctx, shader);
int gs_out_ring_offset;
int gs_next_vertex;
struct r600_shader *gs_for_vs;
- int gs_export_gpr_treg;
+ int gs_export_gpr_tregs[4];
+ const struct pipe_stream_output_info *gs_stream_output_info;
unsigned enabled_stream_buffers_mask;
};
int (*process)(struct r600_shader_ctx *ctx);
};
-static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind);
+static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind);
static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
int j;
- if (i->Instruction.NumDstRegs > 1) {
+ if (i->Instruction.NumDstRegs > 1 && i->Instruction.Opcode != TGSI_OPCODE_DFRACEXP) {
R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
return -EINVAL;
}
memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
vtx.op = FETCH_OP_VFETCH;
- vtx.buffer_id = R600_SAMPLE_POSITIONS_CONST_BUFFER;
+ vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER;
vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
if (sample_id == NULL) {
vtx.src_gpr = ctx->fixed_pt_position_gpr; // SAMPLEID is in .w;
return 0;
}
-static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output_info *so)
+static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output_info *so,
+ int stream, unsigned *stream_item_size)
{
unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS];
+ unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS];
int i, j, r;
/* Sanity checking. */
/* Initialize locations where the outputs are stored. */
for (i = 0; i < so->num_outputs; i++) {
- so_gpr[i] = ctx->shader->output[so->output[i].register_index].gpr;
+ so_gpr[i] = ctx->shader->output[so->output[i].register_index].gpr;
+ start_comp[i] = so->output[i].start_component;
/* Lower outputs with dst_offset < start_component.
*
* We can only output 4D vectors with a write mask, e.g. we can
if (r)
return r;
}
- so->output[i].start_component = 0;
+ start_comp[i] = 0;
so_gpr[i] = tmp;
}
}
for (i = 0; i < so->num_outputs; i++) {
struct r600_bytecode_output output;
+ if (stream != -1 && stream != so->output[i].output_buffer)
+ continue;
+
memset(&output, 0, sizeof(struct r600_bytecode_output));
output.gpr = so_gpr[i];
- output.elem_size = so->output[i].num_components;
- output.array_base = so->output[i].dst_offset - so->output[i].start_component;
+ output.elem_size = so->output[i].num_components - 1;
+ if (output.elem_size == 2)
+ output.elem_size = 3; // 3 not supported, write 4 with junk at end
+ output.array_base = so->output[i].dst_offset - start_comp[i];
output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
output.burst_count = 1;
/* array_size is an upper limit for the burst_count
* with MEM_STREAM instructions */
output.array_size = 0xFFF;
- output.comp_mask = ((1 << so->output[i].num_components) - 1) << so->output[i].start_component;
-
- ctx->enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer);
+ output.comp_mask = ((1 << so->output[i].num_components) - 1) << start_comp[i];
if (ctx->bc->chip_class >= EVERGREEN) {
switch (so->output[i].output_buffer) {
output.op = CF_OP_MEM_STREAM0_BUF3;
break;
}
+ output.op += so->output[i].stream * 4;
+ assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3);
+ ctx->enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer) << so->output[i].stream * 4;
} else {
switch (so->output[i].output_buffer) {
case 0:
output.op = CF_OP_MEM_STREAM3;
break;
}
+ ctx->enabled_stream_buffers_mask |= 1 << so->output[i].output_buffer;
}
r = r600_bytecode_add_output(ctx->bc, &output);
if (r)
struct r600_bytecode_output output;
struct r600_bytecode_cf *cf_jump, *cf_pop,
*last_exp_pos = NULL, *last_exp_param = NULL;
- int i, next_clip_pos = 61, next_param = 0;
+ int i, j, next_clip_pos = 61, next_param = 0;
+ int ring;
cshader = calloc(1, sizeof(struct r600_pipe_shader));
if (!cshader)
ctx.bc->isa = rctx->isa;
+ cf_jump = NULL;
+ memset(cshader->shader.ring_item_sizes, 0, sizeof(cshader->shader.ring_item_sizes));
+
/* R0.x = R0.x & 0x3fffffff */
memset(&alu, 0, sizeof(alu));
alu.op = ALU_OP2_AND_INT;
alu.last = 1;
r600_bytecode_add_alu(ctx.bc, &alu);
- /* PRED_SETE_INT __, R0.y, 0 */
- memset(&alu, 0, sizeof(alu));
- alu.op = ALU_OP2_PRED_SETE_INT;
- alu.src[0].chan = 1;
- alu.src[1].sel = V_SQ_ALU_SRC_0;
- alu.execute_mask = 1;
- alu.update_pred = 1;
- alu.last = 1;
- r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE);
-
- r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP);
- cf_jump = ctx.bc->cf_last;
-
/* fetch vertex data from GSVS ring */
for (i = 0; i < ocnt; ++i) {
struct r600_shader_io *out = &ctx.shader->output[i];
+
out->gpr = i + 1;
out->ring_offset = i * 16;
vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
vtx.offset = out->ring_offset;
vtx.dst_gpr = out->gpr;
+ vtx.src_gpr = 0;
vtx.dst_sel_x = 0;
vtx.dst_sel_y = 1;
vtx.dst_sel_z = 2;
r600_bytecode_add_vtx(ctx.bc, &vtx);
}
+ ctx.temp_reg = i + 1;
+ for (ring = 3; ring >= 0; --ring) {
+ bool enabled = false;
+ for (i = 0; i < so->num_outputs; i++) {
+ if (so->output[i].stream == ring) {
+ enabled = true;
+ break;
+ }
+ }
+ if (ring != 0 && !enabled) {
+ cshader->shader.ring_item_sizes[ring] = 0;
+ continue;
+ }
+
+ if (cf_jump) {
+ // Patch up jump label
+ r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP);
+ cf_pop = ctx.bc->cf_last;
+
+ cf_jump->cf_addr = cf_pop->id + 2;
+ cf_jump->pop_count = 1;
+ cf_pop->cf_addr = cf_pop->id + 2;
+ cf_pop->pop_count = 1;
+ }
+
+ /* PRED_SETE_INT __, R0.y, ring */
+ memset(&alu, 0, sizeof(alu));
+ alu.op = ALU_OP2_PRED_SETE_INT;
+ alu.src[0].chan = 1;
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = ring;
+ alu.execute_mask = 1;
+ alu.update_pred = 1;
+ alu.last = 1;
+ r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE);
- /* XXX handle clipvertex, streamout? */
- emit_streamout(&ctx, so);
+ r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP);
+ cf_jump = ctx.bc->cf_last;
+
+ if (enabled)
+ emit_streamout(&ctx, so, ring, &cshader->shader.ring_item_sizes[ring]);
+ cshader->shader.ring_item_sizes[ring] = ocnt * 16;
+ }
/* export vertex data */
/* XXX factor out common code with r600_shader_from_tgsi ? */
for (i = 0; i < ocnt; ++i) {
struct r600_shader_io *out = &ctx.shader->output[i];
-
+ bool instream0 = true;
if (out->name == TGSI_SEMANTIC_CLIPVERTEX)
continue;
+ for (j = 0; j < so->num_outputs; j++) {
+ if (so->output[j].register_index == i) {
+ if (so->output[j].stream == 0)
+ break;
+ if (so->output[j].stream > 0)
+ instream0 = false;
+ }
+ }
+ if (!instream0)
+ continue;
memset(&output, 0, sizeof(output));
output.gpr = out->gpr;
output.elem_size = 3;
}
gs->gs_copy_shader = cshader;
+ cshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask;
ctx.bc->nstack = 1;
- cshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask;
- cshader->shader.ring_item_size = ocnt * 16;
-
return r600_bytecode_build(ctx.bc);
}
-static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind)
+static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind)
{
struct r600_bytecode_output output;
int i, k, ring_offset;
+ int effective_stream = stream == -1 ? 0 : stream;
+ int idx = 0;
for (i = 0; i < ctx->shader->noutput; i++) {
if (ctx->gs_for_vs) {
if (ring_offset == -1)
continue;
- } else
- ring_offset = i * 16;
+ } else {
+ ring_offset = idx * 16;
+ idx++;
+ }
+ if (stream > 0 && ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION)
+ continue;
/* next_ring_offset after parsing input decls contains total size of
* single vertex data, gs_next_vertex - current vertex index */
if (!ind)
ring_offset += ctx->gs_out_ring_offset * ctx->gs_next_vertex;
- /* get a temp and add the ring offset to the next vertex base in the shader */
memset(&output, 0, sizeof(struct r600_bytecode_output));
output.gpr = ctx->shader->output[i].gpr;
output.elem_size = 3;
output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
else
output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
- output.op = CF_OP_MEM_RING;
+ switch (stream) {
+ default:
+ case 0:
+ output.op = CF_OP_MEM_RING; break;
+ case 1:
+ output.op = CF_OP_MEM_RING1; break;
+ case 2:
+ output.op = CF_OP_MEM_RING2; break;
+ case 3:
+ output.op = CF_OP_MEM_RING3; break;
+ }
if (ind) {
output.array_base = ring_offset >> 2; /* in dwords */
output.array_size = 0xfff;
- output.index_gpr = ctx->gs_export_gpr_treg;
+ output.index_gpr = ctx->gs_export_gpr_tregs[effective_stream];
} else
output.array_base = ring_offset >> 2; /* in dwords */
r600_bytecode_add_output(ctx->bc, &output);
}
if (ind) {
+ /* get a temp and add the ring offset to the next vertex base in the shader */
struct r600_bytecode_alu alu;
int r;
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP2_ADD_INT;
- alu.src[0].sel = ctx->gs_export_gpr_treg;
+ alu.src[0].sel = ctx->gs_export_gpr_tregs[effective_stream];
alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[1].value = ctx->gs_out_ring_offset >> 4;
- alu.dst.sel = ctx->gs_export_gpr_treg;
+ alu.dst.sel = ctx->gs_export_gpr_tregs[effective_stream];
alu.dst.write = 1;
alu.last = 1;
r = r600_bytecode_add_alu(ctx->bc, &alu);
ctx.shader = shader;
ctx.native_integers = true;
- shader->vs_as_gs_a = key.vs.as_gs_a;
- shader->vs_as_es = key.vs.as_es;
r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family,
rscreen->has_compressed_msaa_texturing);
ctx.tokens = tokens;
tgsi_scan_shader(tokens, &ctx.info);
shader->indirect_files = ctx.info.indirect_files;
+
+ shader->uses_doubles = ctx.info.uses_doubles;
+
indirect_gprs = ctx.info.indirect_files & ~(1 << TGSI_FILE_CONSTANT);
tgsi_parse_init(&ctx.parse, tokens);
ctx.type = ctx.info.processor;
shader->processor_type = ctx.type;
ctx.bc->type = shader->processor_type;
- ring_outputs = key.vs.as_es || (ctx.type == TGSI_PROCESSOR_GEOMETRY);
+ if (ctx.type == TGSI_PROCESSOR_VERTEX) {
+ shader->vs_as_gs_a = key.vs.as_gs_a;
+ shader->vs_as_es = key.vs.as_es;
+ }
+
+ ring_outputs = shader->vs_as_es || ctx.type == TGSI_PROCESSOR_GEOMETRY;
- if (key.vs.as_es) {
+ if (shader->vs_as_es) {
ctx.gs_for_vs = &rctx->gs_shader->current->shader;
} else {
ctx.gs_for_vs = NULL;
ctx.next_ring_offset = 0;
ctx.gs_out_ring_offset = 0;
ctx.gs_next_vertex = 0;
+ ctx.gs_stream_output_info = &so;
shader->uses_index_registers = false;
ctx.face_gpr = -1;
shader->nr_ps_color_exports = 0;
shader->nr_ps_max_color_exports = 0;
- shader->two_side = key.ps.color_two_side;
+ if (ctx.type == TGSI_PROCESSOR_FRAGMENT)
+ shader->two_side = key.ps.color_two_side;
/* register allocations */
/* Values [0,127] correspond to GPR[0..127].
ctx.bc->index_reg[1] = ctx.bc->ar_reg + 2;
if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
- ctx.gs_export_gpr_treg = ctx.bc->ar_reg + 3;
- ctx.temp_reg = ctx.bc->ar_reg + 4;
+ ctx.gs_export_gpr_tregs[0] = ctx.bc->ar_reg + 3;
+ ctx.gs_export_gpr_tregs[1] = ctx.bc->ar_reg + 4;
+ ctx.gs_export_gpr_tregs[2] = ctx.bc->ar_reg + 5;
+ ctx.gs_export_gpr_tregs[3] = ctx.bc->ar_reg + 6;
+ ctx.temp_reg = ctx.bc->ar_reg + 7;
} else {
ctx.temp_reg = ctx.bc->ar_reg + 3;
}
ctx.nliterals = 0;
ctx.literals = NULL;
- shader->fs_write_all = FALSE;
- if (ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
- shader->fs_write_all = TRUE;
- shader->vs_position_window_space = FALSE;
- if (ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION])
- shader->vs_position_window_space = TRUE;
+ shader->fs_write_all = ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS];
+ shader->vs_position_window_space = ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
if (shader->vs_as_gs_a)
vs_add_primid_output(&ctx, key.vs.prim_id_out);
}
}
- shader->ring_item_size = ctx.next_ring_offset;
+ shader->ring_item_sizes[0] = ctx.next_ring_offset;
+ shader->ring_item_sizes[1] = 0;
+ shader->ring_item_sizes[2] = 0;
+ shader->ring_item_sizes[3] = 0;
/* Process two side if needed */
if (shader->two_side && ctx.colors_used) {
if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
struct r600_bytecode_alu alu;
int r;
-
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP1_MOV;
- alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
- alu.src[0].value = 0;
- alu.dst.sel = ctx.gs_export_gpr_treg;
- alu.dst.write = 1;
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx.bc, &alu);
- if (r)
- return r;
+ for (j = 0; j < 4; j++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[0].value = 0;
+ alu.dst.sel = ctx.gs_export_gpr_tregs[j];
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx.bc, &alu);
+ if (r)
+ return r;
+ }
}
if (shader->two_side && ctx.colors_used) {
if ((r = process_twoside_color_inputs(&ctx)))
alu.src[0].chan = j;
alu.src[1].sel = 512 + i;
- alu.src[1].kc_bank = R600_UCP_CONST_BUFFER;
+ alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
alu.src[1].chan = j;
alu.dst.sel = clipdist_temp[oreg];
/* Add stream outputs. */
if (!ring_outputs && ctx.type == TGSI_PROCESSOR_VERTEX &&
so.num_outputs && !use_llvm)
- emit_streamout(&ctx, &so);
+ emit_streamout(&ctx, &so, -1, NULL);
pipeshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask;
convert_edgeflag_to_int(&ctx);
if (ring_outputs) {
- if (key.vs.as_es)
- emit_gs_ring_writes(&ctx, FALSE);
+ if (shader->vs_as_es) {
+ ctx.gs_export_gpr_tregs[0] = r600_get_temp(&ctx);
+ ctx.gs_export_gpr_tregs[1] = -1;
+ ctx.gs_export_gpr_tregs[2] = -1;
+ ctx.gs_export_gpr_tregs[3] = -1;
+
+ emit_gs_ring_writes(&ctx, &so, -1, FALSE);
+ }
} else {
/* Export output */
next_clip_base = shader->vs_out_misc_write ? 62 : 61;
return lasti;
}
+
+
+static int tgsi_op2_64_params(struct r600_shader_ctx *ctx, bool singledest, bool swap)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ unsigned write_mask = inst->Dst[0].Register.WriteMask;
+ struct r600_bytecode_alu alu;
+ int i, j, r, lasti = tgsi_last_instruction(write_mask);
+ int use_tmp = 0;
+
+ if (singledest) {
+ switch (write_mask) {
+ case 0x1:
+ write_mask = 0x3;
+ break;
+ case 0x2:
+ use_tmp = 1;
+ write_mask = 0x3;
+ break;
+ case 0x4:
+ write_mask = 0xc;
+ break;
+ case 0x8:
+ write_mask = 0xc;
+ use_tmp = 3;
+ break;
+ }
+ }
+
+ lasti = tgsi_last_instruction(write_mask);
+ for (i = 0; i <= lasti; i++) {
+
+ if (!(write_mask & (1 << i)))
+ continue;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+
+ if (singledest) {
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (use_tmp) {
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ }
+ if (i == 1 || i == 3)
+ alu.dst.write = 0;
+ } else
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+
+ alu.op = ctx->inst_info->op;
+ if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DABS) {
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+ } else if (!swap) {
+ for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+ r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i));
+ }
+ } else {
+ r600_bytecode_src(&alu.src[0], &ctx->src[1], fp64_switch(i));
+ r600_bytecode_src(&alu.src[1], &ctx->src[0], fp64_switch(i));
+ }
+
+ /* handle some special cases */
+ if (i == 1 || i == 3) {
+ switch (ctx->parse.FullToken.FullInstruction.Instruction.Opcode) {
+ case TGSI_OPCODE_SUB:
+ r600_bytecode_src_toggle_neg(&alu.src[1]);
+ break;
+ case TGSI_OPCODE_DABS:
+ r600_bytecode_src_set_abs(&alu.src[0]);
+ break;
+ default:
+ break;
+ }
+ }
+ if (i == lasti) {
+ alu.last = 1;
+ }
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ if (use_tmp) {
+ write_mask = inst->Dst[0].Register.WriteMask;
+
+ /* move result from temp to dst */
+ for (i = 0; i <= lasti; i++) {
+ if (!(write_mask & (1 << i)))
+ continue;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = use_tmp - 1;
+ alu.last = (i == lasti);
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ }
+ return 0;
+}
+
+static int tgsi_op2_64(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ unsigned write_mask = inst->Dst[0].Register.WriteMask;
+ /* confirm writemasking */
+ if ((write_mask & 0x3) != 0x3 &&
+ (write_mask & 0xc) != 0xc) {
+ fprintf(stderr, "illegal writemask for 64-bit: 0x%x\n", write_mask);
+ return -1;
+ }
+ return tgsi_op2_64_params(ctx, false, false);
+}
+
+static int tgsi_op2_64_single_dest(struct r600_shader_ctx *ctx)
+{
+ return tgsi_op2_64_params(ctx, true, false);
+}
+
+static int tgsi_op2_64_single_dest_s(struct r600_shader_ctx *ctx)
+{
+ return tgsi_op2_64_params(ctx, true, true);
+}
+
+static int tgsi_op3_64(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_alu alu;
+ int i, j, r;
+ int lasti = 3;
+ int tmp = r600_get_temp(ctx);
+
+ for (i = 0; i < lasti + 1; i++) {
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ctx->inst_info->op;
+ for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+ r600_bytecode_src(&alu.src[j], &ctx->src[j], i == 3 ? 0 : 1);
+ }
+
+ if (inst->Dst[0].Register.WriteMask & (1 << i))
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ else
+ alu.dst.sel = tmp;
+
+ alu.dst.chan = i;
+ alu.is_op3 = 1;
+ if (i == lasti) {
+ alu.last = 1;
+ }
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
}
+static int tgsi_dneg(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_alu alu;
+ int i, r;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+
+ for (i = 0; i < lasti + 1; i++) {
+
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+
+ if (i == 1 || i == 3)
+ r600_bytecode_src_toggle_neg(&alu.src[0]);
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+
+ if (i == lasti) {
+ alu.last = 1;
+ }
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+
+}
+
+static int tgsi_dfracexp(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_alu alu;
+ unsigned write_mask = inst->Dst[0].Register.WriteMask;
+ int i, j, r;
+ int firsti = write_mask == 0xc ? 2 : 0;
+
+ for (i = 0; i <= 3; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ctx->inst_info->op;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+ r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i));
+ }
+
+ if (i == 3)
+ alu.last = 1;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ /* MOV first two channels to writemask dst0 */
+ for (i = 0; i <= 1; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].chan = i + 2;
+ alu.src[0].sel = ctx->temp_reg;
+
+ tgsi_dst(ctx, &inst->Dst[0], firsti + i, &alu.dst);
+ alu.dst.write = (inst->Dst[0].Register.WriteMask >> (firsti + i)) & 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i <= 3; i++) {
+ if (inst->Dst[1].Register.WriteMask & (1 << i)) {
+ /* MOV third channels to writemask dst1 */
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].chan = 1;
+ alu.src[0].sel = ctx->temp_reg;
+
+ tgsi_dst(ctx, &inst->Dst[1], i, &alu.dst);
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ break;
+ }
+ }
+ return 0;
+}
+
+
+static int egcm_int_to_double(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_alu alu;
+ int i, r;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+
+ assert(inst->Instruction.Opcode == TGSI_OPCODE_I2D ||
+ inst->Instruction.Opcode == TGSI_OPCODE_U2D);
+
+ for (i = 0; i <= (lasti+1)/2; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ctx->inst_info->op;
+
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i <= lasti; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_FLT32_TO_FLT64;
+
+ alu.src[0].chan = i/2;
+ if (i%2 == 0)
+ alu.src[0].sel = ctx->temp_reg;
+ else {
+ alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[0].value = 0x0;
+ }
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.last = i == lasti;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int egcm_double_to_int(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_alu alu;
+ int i, r;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+
+ assert(inst->Instruction.Opcode == TGSI_OPCODE_D2I ||
+ inst->Instruction.Opcode == TGSI_OPCODE_D2U);
+
+ for (i = 0; i <= lasti; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_FLT64_TO_FLT32;
+
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], fp64_switch(i));
+ alu.dst.chan = i;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = i%2 == 0;
+ alu.last = i == lasti;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i <= (lasti+1)/2; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ctx->inst_info->op;
+
+ alu.src[0].chan = i*2;
+ alu.src[0].sel = ctx->temp_reg;
+ tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
+ alu.last = 1;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int cayman_emit_double_instr(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ int i, r;
+ struct r600_bytecode_alu alu;
+ int last_slot = 3;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+ int t1 = ctx->temp_reg;
+
+ /* these have to write the result to X/Y by the looks of it */
+ for (i = 0 ; i < last_slot; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ctx->inst_info->op;
+
+ /* should only be one src regs */
+ assert (inst->Instruction.NumSrcRegs == 1);
+
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
+ r600_bytecode_src(&alu.src[1], &ctx->src[0], 0);
+
+ /* RSQ should take the absolute value of src */
+ if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DRSQ ||
+ ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DSQRT) {
+ r600_bytecode_src_set_abs(&alu.src[1]);
+ }
+ alu.dst.sel = t1;
+ alu.dst.chan = i;
+ alu.dst.write = (i == 0 || i == 1);
+
+ if (ctx->bc->chip_class != CAYMAN || i == last_slot - 1)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ for (i = 0 ; i <= lasti; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = t1;
+ alu.src[0].chan = (i == 0 || i == 2) ? 0 : 1;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.dst.write = 1;
+ if (i == lasti)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
return 0;
}
+
+static int cayman_mul_double_instr(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ int i, j, k, r;
+ struct r600_bytecode_alu alu;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+ int t1 = ctx->temp_reg;
+
+ for (k = 0; k < 2; k++) {
+ if (!(inst->Dst[0].Register.WriteMask & (0x3 << (k * 2))))
+ continue;
+
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ctx->inst_info->op;
+ for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+ r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1));;
+ }
+ alu.dst.sel = t1;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ if (i == 3)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ }
+
+ for (i = 0; i <= lasti; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = t1;
+ alu.src[0].chan = i;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.dst.write = 1;
+ if (i == lasti)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
/*
* r600 - trunc to -PI..PI range
* r700 - normalize by dividing by 2PI
alu.src[0].sel = vtx.dst_gpr;
alu.src[0].chan = i;
- alu.src[1].sel = 512 + (id * 2);
+ alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL;
+ alu.src[1].sel += (id * 2);
alu.src[1].chan = i % 4;
alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
alu.src[0].sel = vtx.dst_gpr;
alu.src[0].chan = 3;
- alu.src[1].sel = 512 + (id * 2) + 1;
+ alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL + (id * 2) + 1;
alu.src[1].chan = 0;
alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP1_MOV;
-
+ alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
if (ctx->bc->chip_class >= EVERGREEN) {
/* channel 0 or 2 of each word */
- alu.src[0].sel = 512 + (id / 2);
+ alu.src[0].sel += (id / 2);
alu.src[0].chan = (id % 2) * 2;
} else {
/* r600 we have them at channel 2 of the second dword */
- alu.src[0].sel = 512 + (id * 2) + 1;
+ alu.src[0].sel += (id * 2) + 1;
alu.src[0].chan = 1;
}
alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
/* Texture fetch instructions can only use gprs as source.
* Also they cannot negate the source or take the absolute value */
const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ &&
+ inst->Instruction.Opcode != TGSI_OPCODE_TXQS &&
tgsi_tex_src_requires_loading(ctx, 0)) ||
read_compressed_msaa || txf_add_offsets;
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
if (ctx->bc->chip_class >= EVERGREEN) {
/* channel 1 or 3 of each word */
- alu.src[0].sel = 512 + (id / 2);
+ alu.src[0].sel += (id / 2);
alu.src[0].chan = ((id % 2) * 2) + 1;
} else {
/* r600 we have them at channel 2 of the second dword */
- alu.src[0].sel = 512 + (id * 2) + 1;
+ alu.src[0].sel += (id * 2) + 1;
alu.src[0].chan = 2;
}
alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
tex.dst_sel_z = 7;
tex.dst_sel_w = 7;
}
+ else if (inst->Instruction.Opcode == TGSI_OPCODE_TXQS) {
+ tex.dst_sel_x = 3;
+ tex.dst_sel_y = 7;
+ tex.dst_sel_z = 7;
+ tex.dst_sel_w = 7;
+ }
else {
tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
}
- if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ) {
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ||
+ inst->Instruction.Opcode == TGSI_OPCODE_TXQS) {
tex.src_sel_x = 4;
tex.src_sel_y = 4;
tex.src_sel_z = 4;
static int tgsi_gs_emit(struct r600_shader_ctx *ctx)
{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ int stream = ctx->literals[inst->Src[0].Register.Index * 4 + inst->Src[0].Register.SwizzleX];
+ int r;
+
if (ctx->inst_info->op == CF_OP_EMIT_VERTEX)
- emit_gs_ring_writes(ctx, TRUE);
+ emit_gs_ring_writes(ctx, ctx->gs_stream_output_info, stream, TRUE);
- return r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
+ r = r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
+ if (!r)
+ ctx->bc->cf_last->count = stream; // Count field for CUT/EMIT_VERTEX indicates which stream
+ return r;
}
static int tgsi_umad(struct r600_shader_ctx *ctx)
[TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop},
[TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
- [104] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex},
[105] = { ALU_OP0_NOP, tgsi_unsupported},
[106] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop},
[TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
- [104] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex},
[105] = { ALU_OP0_NOP, tgsi_unsupported},
[106] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_interp_egcm},
[TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_interp_egcm},
[TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_interp_egcm},
+ [TGSI_OPCODE_F2D] = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64},
+ [TGSI_OPCODE_D2F] = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest},
+ [TGSI_OPCODE_DABS] = { ALU_OP1_MOV, tgsi_op2_64},
+ [TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg},
+ [TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64},
+ [TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr},
+ [TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64},
+ [TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64},
+ [TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s},
+ [TGSI_OPCODE_DSGE] = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest},
+ [TGSI_OPCODE_DSEQ] = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest},
+ [TGSI_OPCODE_DSNE] = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest},
+ [TGSI_OPCODE_DRCP] = { ALU_OP2_RECIP_64, cayman_emit_double_instr},
+ [TGSI_OPCODE_DSQRT] = { ALU_OP2_SQRT_64, cayman_emit_double_instr},
+ [TGSI_OPCODE_DMAD] = { ALU_OP3_FMA_64, tgsi_op3_64},
+ [TGSI_OPCODE_DFRAC] = { ALU_OP1_FRACT_64, tgsi_op2_64},
+ [TGSI_OPCODE_DLDEXP] = { ALU_OP2_LDEXP_64, tgsi_op2_64},
+ [TGSI_OPCODE_DFRACEXP] = { ALU_OP1_FREXP_64, tgsi_dfracexp},
+ [TGSI_OPCODE_D2I] = { ALU_OP1_FLT_TO_INT, egcm_double_to_int},
+ [TGSI_OPCODE_I2D] = { ALU_OP1_INT_TO_FLT, egcm_int_to_double},
+ [TGSI_OPCODE_D2U] = { ALU_OP1_FLT_TO_UINT, egcm_double_to_int},
+ [TGSI_OPCODE_U2D] = { ALU_OP1_UINT_TO_FLT, egcm_int_to_double},
+ [TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr},
[TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported},
};
[TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop},
[TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
- [104] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex},
[105] = { ALU_OP0_NOP, tgsi_unsupported},
[106] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_interp_egcm},
[TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_interp_egcm},
[TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_interp_egcm},
+ [TGSI_OPCODE_F2D] = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64},
+ [TGSI_OPCODE_D2F] = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest},
+ [TGSI_OPCODE_DABS] = { ALU_OP1_MOV, tgsi_op2_64},
+ [TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg},
+ [TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64},
+ [TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr},
+ [TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64},
+ [TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64},
+ [TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s},
+ [TGSI_OPCODE_DSGE] = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest},
+ [TGSI_OPCODE_DSEQ] = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest},
+ [TGSI_OPCODE_DSNE] = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest},
+ [TGSI_OPCODE_DRCP] = { ALU_OP2_RECIP_64, cayman_emit_double_instr},
+ [TGSI_OPCODE_DSQRT] = { ALU_OP2_SQRT_64, cayman_emit_double_instr},
+ [TGSI_OPCODE_DMAD] = { ALU_OP3_FMA_64, tgsi_op3_64},
+ [TGSI_OPCODE_DFRAC] = { ALU_OP1_FRACT_64, tgsi_op2_64},
+ [TGSI_OPCODE_DLDEXP] = { ALU_OP2_LDEXP_64, tgsi_op2_64},
+ [TGSI_OPCODE_DFRACEXP] = { ALU_OP1_FREXP_64, tgsi_dfracexp},
+ [TGSI_OPCODE_D2I] = { ALU_OP1_FLT_TO_INT, egcm_double_to_int},
+ [TGSI_OPCODE_I2D] = { ALU_OP1_INT_TO_FLT, egcm_int_to_double},
+ [TGSI_OPCODE_D2U] = { ALU_OP1_FLT_TO_UINT, egcm_double_to_int},
+ [TGSI_OPCODE_U2D] = { ALU_OP1_UINT_TO_FLT, egcm_int_to_double},
+ [TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr},
[TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported},
};
/* Temporarily workaround SB not handling CF_INDEX_[01] index registers */
boolean uses_index_registers;
- /* size in bytes of a data item in the ring (single vertex data) */
- unsigned ring_item_size;
+ /* Size in bytes of a data item in the ring(s) (single vertex data).
+ Stages with only one ring items 123 will be set to 0. */
+ unsigned ring_item_sizes[4];
unsigned indirect_files;
unsigned max_arrays;
unsigned vs_as_gs_a;
unsigned ps_prim_id_input;
struct r600_shader_array * arrays;
+
+ boolean uses_doubles;
};
union r600_shader_key {
default:;
}
- r600_write_context_reg_seq(cs, R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE, 4);
+ radeon_set_context_reg_seq(cs, R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE, 4);
radeon_emit(cs, fui(offset_scale));
radeon_emit(cs, fui(offset_units));
radeon_emit(cs, fui(offset_scale));
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
struct pipe_clip_state *state = &rctx->clip_state.state;
- r600_write_context_reg_seq(cs, R_028E20_PA_CL_UCP0_X, 6*4);
+ radeon_set_context_reg_seq(cs, R_028E20_PA_CL_UCP0_X, 6*4);
radeon_emit_array(cs, (unsigned*)state, 6*4);
}
static void r600_emit_scissor_state(struct r600_context *rctx, struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
- struct r600_scissor_state *rstate = (struct r600_scissor_state *)atom;
- struct pipe_scissor_state *state = &rstate->scissor;
- unsigned offset = rstate->idx * 4 * 2;
-
- if (rctx->b.chip_class != R600 || rctx->scissor[0].enable) {
- r600_write_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset, 2);
- radeon_emit(cs, S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) |
- S_028240_WINDOW_OFFSET_DISABLE(1));
- radeon_emit(cs, S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy));
- } else {
- r600_write_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
- radeon_emit(cs, S_028240_TL_X(0) | S_028240_TL_Y(0) |
- S_028240_WINDOW_OFFSET_DISABLE(1));
- radeon_emit(cs, S_028244_BR_X(8192) | S_028244_BR_Y(8192));
+ struct r600_scissor_state *rstate = &rctx->scissor;
+ struct pipe_scissor_state *state;
+ bool do_disable_workaround = false;
+ uint32_t dirty_mask;
+ unsigned i, offset;
+ uint32_t tl, br;
+
+ if (rctx->b.chip_class == R600 && !rctx->scissor.enable) {
+ tl = S_028240_TL_X(0) | S_028240_TL_Y(0) | S_028240_WINDOW_OFFSET_DISABLE(1);
+ br = S_028244_BR_X(8192) | S_028244_BR_Y(8192);
+ do_disable_workaround = true;
+ }
+
+ dirty_mask = rstate->dirty_mask;
+ while (dirty_mask != 0)
+ {
+ i = u_bit_scan(&dirty_mask);
+ offset = i * 4 * 2;
+ radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset, 2);
+ if (!do_disable_workaround) {
+ state = &rstate->scissor[i];
+ tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) |
+ S_028240_WINDOW_OFFSET_DISABLE(1);
+ br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy);
+ }
+ radeon_emit(cs, tl);
+ radeon_emit(cs, br);
}
+ rstate->dirty_mask = 0;
+ rstate->atom.num_dw = 0;
}
static void r600_set_scissor_states(struct pipe_context *ctx,
const struct pipe_scissor_state *state)
{
struct r600_context *rctx = (struct r600_context *)ctx;
+ struct r600_scissor_state *rstate = &rctx->scissor;
int i;
- for (i = start_slot ; i < start_slot + num_scissors; i++) {
- rctx->scissor[i].scissor = state[i - start_slot];
- }
+ for (i = start_slot ; i < start_slot + num_scissors; i++)
+ rstate->scissor[i] = state[i - start_slot];
+ rstate->dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
+ rstate->atom.num_dw = util_bitcount(rstate->dirty_mask) * 4;
- if (rctx->b.chip_class == R600 && !rctx->scissor[0].enable)
+ if (rctx->b.chip_class == R600 && !rstate->enable)
return;
- for (i = start_slot ; i < start_slot + num_scissors; i++) {
- r600_mark_atom_dirty(rctx, &rctx->scissor[i].atom);
- }
+ r600_mark_atom_dirty(rctx, &rstate->atom);
}
static struct r600_resource *r600_buffer_create_helper(struct r600_screen *rscreen,
/* CMASK. */
if (!rctx->dummy_cmask ||
- rctx->dummy_cmask->buf->size < cmask.size ||
+ rctx->dummy_cmask->b.b.width0 < cmask.size ||
rctx->dummy_cmask->buf->alignment % cmask.alignment != 0) {
struct pipe_transfer *transfer;
void *ptr;
/* FMASK. */
if (!rctx->dummy_fmask ||
- rctx->dummy_fmask->buf->size < fmask.size ||
+ rctx->dummy_fmask->b.b.width0 < fmask.size ||
rctx->dummy_fmask->buf->alignment % fmask.alignment != 0) {
pipe_resource_reference((struct pipe_resource**)&rctx->dummy_fmask, NULL);
rctx->dummy_fmask = r600_buffer_create_helper(rscreen, fmask.size, fmask.alignment);
nr_samples = 0;
break;
case 2:
- r600_write_config_reg(cs, R_008B40_PA_SC_AA_SAMPLE_LOCS_2S, sample_locs_2x[0]);
+ radeon_set_config_reg(cs, R_008B40_PA_SC_AA_SAMPLE_LOCS_2S, sample_locs_2x[0]);
max_dist = max_dist_2x;
break;
case 4:
- r600_write_config_reg(cs, R_008B44_PA_SC_AA_SAMPLE_LOCS_4S, sample_locs_4x[0]);
+ radeon_set_config_reg(cs, R_008B44_PA_SC_AA_SAMPLE_LOCS_4S, sample_locs_4x[0]);
max_dist = max_dist_4x;
break;
case 8:
- r600_write_config_reg_seq(cs, R_008B48_PA_SC_AA_SAMPLE_LOCS_8S_WD0, 2);
+ radeon_set_config_reg_seq(cs, R_008B48_PA_SC_AA_SAMPLE_LOCS_8S_WD0, 2);
radeon_emit(cs, sample_locs_8x[0]); /* R_008B48_PA_SC_AA_SAMPLE_LOCS_8S_WD0 */
radeon_emit(cs, sample_locs_8x[1]); /* R_008B4C_PA_SC_AA_SAMPLE_LOCS_8S_WD1 */
max_dist = max_dist_8x;
} else {
switch (nr_samples) {
default:
- r600_write_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2);
+ radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2);
radeon_emit(cs, 0); /* R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX */
radeon_emit(cs, 0); /* R_028C20_PA_SC_AA_SAMPLE_LOCS_8D_WD1_MCTX */
nr_samples = 0;
break;
case 2:
- r600_write_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2);
+ radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2);
radeon_emit(cs, sample_locs_2x[0]); /* R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX */
radeon_emit(cs, sample_locs_2x[1]); /* R_028C20_PA_SC_AA_SAMPLE_LOCS_8D_WD1_MCTX */
max_dist = max_dist_2x;
break;
case 4:
- r600_write_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2);
+ radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2);
radeon_emit(cs, sample_locs_4x[0]); /* R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX */
radeon_emit(cs, sample_locs_4x[1]); /* R_028C20_PA_SC_AA_SAMPLE_LOCS_8D_WD1_MCTX */
max_dist = max_dist_4x;
break;
case 8:
- r600_write_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2);
+ radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 2);
radeon_emit(cs, sample_locs_8x[0]); /* R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX */
radeon_emit(cs, sample_locs_8x[1]); /* R_028C20_PA_SC_AA_SAMPLE_LOCS_8D_WD1_MCTX */
max_dist = max_dist_8x;
}
if (nr_samples > 1) {
- r600_write_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
+ radeon_set_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
radeon_emit(cs, S_028C00_LAST_PIXEL(1) |
S_028C00_EXPAND_LINE_WIDTH(1)); /* R_028C00_PA_SC_LINE_CNTL */
radeon_emit(cs, S_028C04_MSAA_NUM_SAMPLES(util_logbase2(nr_samples)) |
S_028C04_MAX_SAMPLE_DIST(max_dist)); /* R_028C04_PA_SC_AA_CONFIG */
} else {
- r600_write_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
+ radeon_set_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
radeon_emit(cs, S_028C00_LAST_PIXEL(1)); /* R_028C00_PA_SC_LINE_CNTL */
radeon_emit(cs, 0); /* R_028C04_PA_SC_AA_CONFIG */
}
unsigned i, sbu = 0;
/* Colorbuffers. */
- r600_write_context_reg_seq(cs, R_0280A0_CB_COLOR0_INFO, 8);
+ radeon_set_context_reg_seq(cs, R_0280A0_CB_COLOR0_INFO, 8);
for (i = 0; i < nr_cbufs; i++) {
radeon_emit(cs, cb[i] ? cb[i]->cb_color_info : 0);
}
continue;
/* COLOR_BASE */
- r600_write_context_reg(cs, R_028040_CB_COLOR0_BASE + i*4, cb[i]->cb_color_base);
+ radeon_set_context_reg(cs, R_028040_CB_COLOR0_BASE + i*4, cb[i]->cb_color_base);
- reloc = r600_context_bo_reloc(&rctx->b,
+ reloc = radeon_add_to_buffer_list(&rctx->b,
&rctx->b.rings.gfx,
(struct r600_resource*)cb[i]->base.texture,
RADEON_USAGE_READWRITE,
radeon_emit(cs, reloc);
/* FMASK */
- r600_write_context_reg(cs, R_0280E0_CB_COLOR0_FRAG + i*4, cb[i]->cb_color_fmask);
+ radeon_set_context_reg(cs, R_0280E0_CB_COLOR0_FRAG + i*4, cb[i]->cb_color_fmask);
- reloc = r600_context_bo_reloc(&rctx->b,
+ reloc = radeon_add_to_buffer_list(&rctx->b,
&rctx->b.rings.gfx,
cb[i]->cb_buffer_fmask,
RADEON_USAGE_READWRITE,
radeon_emit(cs, reloc);
/* CMASK */
- r600_write_context_reg(cs, R_0280C0_CB_COLOR0_TILE + i*4, cb[i]->cb_color_cmask);
+ radeon_set_context_reg(cs, R_0280C0_CB_COLOR0_TILE + i*4, cb[i]->cb_color_cmask);
- reloc = r600_context_bo_reloc(&rctx->b,
+ reloc = radeon_add_to_buffer_list(&rctx->b,
&rctx->b.rings.gfx,
cb[i]->cb_buffer_cmask,
RADEON_USAGE_READWRITE,
radeon_emit(cs, reloc);
}
- r600_write_context_reg_seq(cs, R_028060_CB_COLOR0_SIZE, nr_cbufs);
+ radeon_set_context_reg_seq(cs, R_028060_CB_COLOR0_SIZE, nr_cbufs);
for (i = 0; i < nr_cbufs; i++) {
radeon_emit(cs, cb[i] ? cb[i]->cb_color_size : 0);
}
- r600_write_context_reg_seq(cs, R_028080_CB_COLOR0_VIEW, nr_cbufs);
+ radeon_set_context_reg_seq(cs, R_028080_CB_COLOR0_VIEW, nr_cbufs);
for (i = 0; i < nr_cbufs; i++) {
radeon_emit(cs, cb[i] ? cb[i]->cb_color_view : 0);
}
- r600_write_context_reg_seq(cs, R_028100_CB_COLOR0_MASK, nr_cbufs);
+ radeon_set_context_reg_seq(cs, R_028100_CB_COLOR0_MASK, nr_cbufs);
for (i = 0; i < nr_cbufs; i++) {
radeon_emit(cs, cb[i] ? cb[i]->cb_color_mask : 0);
}
/* Zbuffer. */
if (state->zsbuf) {
struct r600_surface *surf = (struct r600_surface*)state->zsbuf;
- unsigned reloc = r600_context_bo_reloc(&rctx->b,
+ unsigned reloc = radeon_add_to_buffer_list(&rctx->b,
&rctx->b.rings.gfx,
(struct r600_resource*)state->zsbuf->texture,
RADEON_USAGE_READWRITE,
RADEON_PRIO_DEPTH_BUFFER_MSAA :
RADEON_PRIO_DEPTH_BUFFER);
- r600_write_context_reg(cs, R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
+ radeon_set_context_reg(cs, R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
surf->pa_su_poly_offset_db_fmt_cntl);
- r600_write_context_reg_seq(cs, R_028000_DB_DEPTH_SIZE, 2);
+ radeon_set_context_reg_seq(cs, R_028000_DB_DEPTH_SIZE, 2);
radeon_emit(cs, surf->db_depth_size); /* R_028000_DB_DEPTH_SIZE */
radeon_emit(cs, surf->db_depth_view); /* R_028004_DB_DEPTH_VIEW */
- r600_write_context_reg_seq(cs, R_02800C_DB_DEPTH_BASE, 2);
+ radeon_set_context_reg_seq(cs, R_02800C_DB_DEPTH_BASE, 2);
radeon_emit(cs, surf->db_depth_base); /* R_02800C_DB_DEPTH_BASE */
radeon_emit(cs, surf->db_depth_info); /* R_028010_DB_DEPTH_INFO */
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, reloc);
- r600_write_context_reg(cs, R_028D34_DB_PREFETCH_LIMIT, surf->db_prefetch_limit);
+ radeon_set_context_reg(cs, R_028D34_DB_PREFETCH_LIMIT, surf->db_prefetch_limit);
sbu |= SURFACE_BASE_UPDATE_DEPTH;
} else if (rctx->screen->b.info.drm_minor >= 18) {
/* DRM 2.6.18 allows the INVALID format to disable depth/stencil.
* Older kernels are out of luck. */
- r600_write_context_reg(cs, R_028010_DB_DEPTH_INFO, S_028010_FORMAT(V_028010_DEPTH_INVALID));
+ radeon_set_context_reg(cs, R_028010_DB_DEPTH_INFO, S_028010_FORMAT(V_028010_DEPTH_INVALID));
}
/* SURFACE_BASE_UPDATE */
}
/* Framebuffer dimensions. */
- r600_write_context_reg_seq(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, 2);
+ radeon_set_context_reg_seq(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, 2);
radeon_emit(cs, S_028240_TL_X(0) | S_028240_TL_Y(0) |
S_028240_WINDOW_OFFSET_DISABLE(1)); /* R_028204_PA_SC_WINDOW_SCISSOR_TL */
radeon_emit(cs, S_028244_BR_X(state->width) |
S_028244_BR_Y(state->height)); /* R_028208_PA_SC_WINDOW_SCISSOR_BR */
if (rctx->framebuffer.is_msaa_resolve) {
- r600_write_context_reg(cs, R_0287A0_CB_SHADER_CONTROL, 1);
+ radeon_set_context_reg(cs, R_0287A0_CB_SHADER_CONTROL, 1);
} else {
/* Always enable the first colorbuffer in CB_SHADER_CONTROL. This
* will assure that the alpha-test will work even if there is
* no colorbuffer bound. */
- r600_write_context_reg(cs, R_0287A0_CB_SHADER_CONTROL,
+ radeon_set_context_reg(cs, R_0287A0_CB_SHADER_CONTROL,
(1ull << MAX2(nr_cbufs, 1)) - 1);
}
struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom;
if (G_028808_SPECIAL_OP(a->cb_color_control) == V_028808_SPECIAL_RESOLVE_BOX) {
- r600_write_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
+ radeon_set_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
if (rctx->b.chip_class == R600) {
radeon_emit(cs, 0xff); /* R_028238_CB_TARGET_MASK */
radeon_emit(cs, 0xff); /* R_02823C_CB_SHADER_MASK */
radeon_emit(cs, 0xf); /* R_028238_CB_TARGET_MASK */
radeon_emit(cs, 0xf); /* R_02823C_CB_SHADER_MASK */
}
- r600_write_context_reg(cs, R_028808_CB_COLOR_CONTROL, a->cb_color_control);
+ radeon_set_context_reg(cs, R_028808_CB_COLOR_CONTROL, a->cb_color_control);
} else {
unsigned fb_colormask = (1ULL << ((unsigned)a->nr_cbufs * 4)) - 1;
unsigned ps_colormask = (1ULL << ((unsigned)a->nr_ps_color_outputs * 4)) - 1;
unsigned multiwrite = a->multiwrite && a->nr_cbufs > 1;
- r600_write_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
+ radeon_set_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
radeon_emit(cs, a->blend_colormask & fb_colormask); /* R_028238_CB_TARGET_MASK */
/* Always enable the first color output to make sure alpha-test works even without one. */
radeon_emit(cs, 0xf | (multiwrite ? fb_colormask : ps_colormask)); /* R_02823C_CB_SHADER_MASK */
- r600_write_context_reg(cs, R_028808_CB_COLOR_CONTROL,
+ radeon_set_context_reg(cs, R_028808_CB_COLOR_CONTROL,
a->cb_color_control |
S_028808_MULTIWRITE_ENABLE(multiwrite));
}
struct r600_texture *rtex = (struct r600_texture *)a->rsurf->base.texture;
unsigned reloc_idx;
- r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
- r600_write_context_reg(cs, R_028D24_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
- r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
- reloc_idx = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer,
+ radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
+ radeon_set_context_reg(cs, R_028D24_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
+ radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
+ reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer,
RADEON_USAGE_READWRITE, RADEON_PRIO_DEPTH_META);
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
cs->buf[cs->cdw++] = reloc_idx;
} else {
- r600_write_context_reg(cs, R_028D24_DB_HTILE_SURFACE, 0);
+ radeon_set_context_reg(cs, R_028D24_DB_HTILE_SURFACE, 0);
}
}
db_render_override |= S_028D10_MAX_TILES_IN_DTT(6);
}
- r600_write_context_reg_seq(cs, R_028D0C_DB_RENDER_CONTROL, 2);
+ radeon_set_context_reg_seq(cs, R_028D0C_DB_RENDER_CONTROL, 2);
radeon_emit(cs, db_render_control); /* R_028D0C_DB_RENDER_CONTROL */
radeon_emit(cs, db_render_override); /* R_028D10_DB_RENDER_OVERRIDE */
- r600_write_context_reg(cs, R_02880C_DB_SHADER_CONTROL, a->db_shader_control);
+ radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, a->db_shader_control);
}
static void r600_emit_config_state(struct r600_context *rctx, struct r600_atom *atom)
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
struct r600_config_state *a = (struct r600_config_state*)atom;
- r600_write_config_reg(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, a->sq_gpr_resource_mgmt_1);
- r600_write_config_reg(cs, R_008C08_SQ_GPR_RESOURCE_MGMT_2, a->sq_gpr_resource_mgmt_2);
+ radeon_set_config_reg(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, a->sq_gpr_resource_mgmt_1);
+ radeon_set_config_reg(cs, R_008C08_SQ_GPR_RESOURCE_MGMT_2, a->sq_gpr_resource_mgmt_2);
}
static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom)
offset = vb->buffer_offset;
- /* fetch resources start at index 320 */
+ /* fetch resources start at index 320 (OFFSET_FS) */
radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
- radeon_emit(cs, (320 + buffer_index) * 7);
+ radeon_emit(cs, (R600_FETCH_CONSTANTS_OFFSET_FS + buffer_index) * 7);
radeon_emit(cs, offset); /* RESOURCEi_WORD0 */
- radeon_emit(cs, rbuffer->buf->size - offset - 1); /* RESOURCEi_WORD1 */
+ radeon_emit(cs, rbuffer->b.b.width0 - offset - 1); /* RESOURCEi_WORD1 */
radeon_emit(cs, /* RESOURCEi_WORD2 */
S_038008_ENDIAN_SWAP(r600_endian_swap(32)) |
S_038008_STRIDE(vb->stride));
radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD6 */
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
}
}
offset = cb->buffer_offset;
if (!gs_ring_buffer) {
- r600_write_context_reg(cs, reg_alu_constbuf_size + buffer_index * 4,
+ radeon_set_context_reg(cs, reg_alu_constbuf_size + buffer_index * 4,
ALIGN_DIVUP(cb->buffer_size >> 4, 16));
- r600_write_context_reg(cs, reg_alu_const_cache + buffer_index * 4, offset >> 8);
+ radeon_set_context_reg(cs, reg_alu_const_cache + buffer_index * 4, offset >> 8);
}
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
radeon_emit(cs, (buffer_id_base + buffer_index) * 7);
radeon_emit(cs, offset); /* RESOURCEi_WORD0 */
- radeon_emit(cs, rbuffer->buf->size - offset - 1); /* RESOURCEi_WORD1 */
+ radeon_emit(cs, rbuffer->b.b.width0 - offset - 1); /* RESOURCEi_WORD1 */
radeon_emit(cs, /* RESOURCEi_WORD2 */
S_038008_ENDIAN_SWAP(gs_ring_buffer ? ENDIAN_NONE : r600_endian_swap(32)) |
S_038008_STRIDE(gs_ring_buffer ? 4 : 16));
radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD6 */
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
dirty_mask &= ~(1 << buffer_index);
static void r600_emit_vs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
{
- r600_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX], 160,
+ r600_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX],
+ R600_FETCH_CONSTANTS_OFFSET_VS,
R_028180_ALU_CONST_BUFFER_SIZE_VS_0,
R_028980_ALU_CONST_CACHE_VS_0);
}
static void r600_emit_gs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
{
- r600_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_GEOMETRY], 336,
+ r600_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_GEOMETRY],
+ R600_FETCH_CONSTANTS_OFFSET_GS,
R_0281C0_ALU_CONST_BUFFER_SIZE_GS_0,
R_0289C0_ALU_CONST_CACHE_GS_0);
}
static void r600_emit_ps_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
{
- r600_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_FRAGMENT], 0,
+ r600_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_FRAGMENT],
+ R600_FETCH_CONSTANTS_OFFSET_PS,
R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
R_028940_ALU_CONST_CACHE_PS_0);
}
radeon_emit(cs, (resource_id_base + resource_index) * 7);
radeon_emit_array(cs, rview->tex_resource_words, 7);
- reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource,
+ reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource,
RADEON_USAGE_READ,
rview->tex_resource->b.b.nr_samples > 1 ?
RADEON_PRIO_SHADER_TEXTURE_MSAA :
state->dirty_mask = 0;
}
-/* Resource IDs:
- * PS: 0 .. +160
- * VS: 160 .. +160
- * FS: 320 .. +16
- * GS: 336 .. +160
- */
static void r600_emit_vs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
{
- r600_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views, 160 + R600_MAX_CONST_BUFFERS);
+ r600_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views, R600_FETCH_CONSTANTS_OFFSET_VS + R600_MAX_CONST_BUFFERS);
}
static void r600_emit_gs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
{
- r600_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views, 336 + R600_MAX_CONST_BUFFERS);
+ r600_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views, R600_FETCH_CONSTANTS_OFFSET_GS + R600_MAX_CONST_BUFFERS);
}
static void r600_emit_ps_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
{
- r600_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views, R600_MAX_CONST_BUFFERS);
+ r600_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views, R600_FETCH_CONSTANTS_OFFSET_PS + R600_MAX_CONST_BUFFERS);
}
static void r600_emit_sampler_states(struct r600_context *rctx,
offset = border_color_reg;
offset += i * 16;
- r600_write_config_reg_seq(cs, offset, 4);
+ radeon_set_config_reg_seq(cs, offset, 4);
radeon_emit_array(cs, rstate->border_color.ui, 4);
}
}
if (!rctx->seamless_cube_map.enabled) {
tmp |= S_009508_DISABLE_CUBE_WRAP(1);
}
- r600_write_config_reg(cs, R_009508_TA_CNTL_AUX, tmp);
+ radeon_set_config_reg(cs, R_009508_TA_CNTL_AUX, tmp);
}
static void r600_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a)
struct r600_sample_mask *s = (struct r600_sample_mask*)a;
uint8_t mask = s->sample_mask;
- r600_write_context_reg(rctx->b.rings.gfx.cs, R_028C48_PA_SC_AA_MASK,
+ radeon_set_context_reg(rctx->b.rings.gfx.cs, R_028C48_PA_SC_AA_MASK,
mask | (mask << 8) | (mask << 16) | (mask << 24));
}
struct r600_cso_state *state = (struct r600_cso_state*)a;
struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso;
- r600_write_context_reg(cs, R_028894_SQ_PGM_START_FS, shader->offset >> 8);
+ radeon_set_context_reg(cs, R_028894_SQ_PGM_START_FS, shader->offset >> 8);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, shader->buffer,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->buffer,
RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA));
}
primid = 1;
}
- r600_write_context_reg(cs, R_028A40_VGT_GS_MODE, v2);
- r600_write_context_reg(cs, R_028A84_VGT_PRIMITIVEID_EN, primid);
+ radeon_set_context_reg(cs, R_028A40_VGT_GS_MODE, v2);
+ radeon_set_context_reg(cs, R_028A84_VGT_PRIMITIVEID_EN, primid);
}
static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a;
struct r600_resource *rbuffer;
- r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
+ radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH));
if (state->enable) {
rbuffer =(struct r600_resource*)state->esgs_ring.buffer;
- r600_write_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE, 0);
+ radeon_set_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE, 0);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
RADEON_USAGE_READWRITE,
RADEON_PRIO_SHADER_RESOURCE_RW));
- r600_write_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
+ radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
state->esgs_ring.buffer_size >> 8);
rbuffer =(struct r600_resource*)state->gsvs_ring.buffer;
- r600_write_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE, 0);
+ radeon_set_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE, 0);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
RADEON_USAGE_READWRITE,
RADEON_PRIO_SHADER_RESOURCE_RW));
- r600_write_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
+ radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
state->gsvs_ring.buffer_size >> 8);
} else {
- r600_write_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, 0);
- r600_write_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, 0);
+ radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, 0);
+ radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, 0);
}
- r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
+ radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH));
}
/* always privilege vs stage so that at worst we have the
* pixel stage producing wrong output (not the vertex
* stage) */
- new_num_ps_gprs = max_gprs - ((new_num_vs_gprs - new_num_es_gprs - new_num_gs_gprs) + def_num_clause_temp_gprs * 2);
+ new_num_ps_gprs = max_gprs - ((new_num_vs_gprs + new_num_es_gprs + new_num_gs_gprs) + def_num_clause_temp_gprs * 2);
new_num_vs_gprs = num_vs_gprs;
new_num_gs_gprs = num_gs_gprs;
new_num_es_gprs = num_es_gprs;
struct r600_shader *rshader = &shader->shader;
struct r600_shader *cp_shader = &shader->gs_copy_shader->shader;
unsigned gsvs_itemsize =
- (cp_shader->ring_item_size * shader->selector->gs_max_out_vertices) >> 2;
+ (cp_shader->ring_item_sizes[0] * shader->selector->gs_max_out_vertices) >> 2;
r600_init_command_buffer(cb, 64);
r600_conv_prim_to_gs_out(shader->selector->gs_output_prim));
r600_store_context_reg(cb, R_0288C8_SQ_GS_VERT_ITEMSIZE,
- cp_shader->ring_item_size >> 2);
+ cp_shader->ring_item_sizes[0] >> 2);
r600_store_context_reg(cb, R_0288A8_SQ_ESGS_RING_ITEMSIZE,
- (rshader->ring_item_size) >> 2);
+ (rshader->ring_item_sizes[0]) >> 2);
r600_store_context_reg(cb, R_0288AC_SQ_GSVS_RING_ITEMSIZE,
gsvs_itemsize);
cheight = cheight > copy_height ? copy_height : cheight;
size = (cheight * pitch) / 4;
/* emit reloc before writing cs so that cs is always in consistent state */
- r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, &rsrc->resource, RADEON_USAGE_READ,
+ radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rsrc->resource, RADEON_USAGE_READ,
RADEON_PRIO_MIN);
- r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, &rdst->resource, RADEON_USAGE_WRITE,
+ radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rdst->resource, RADEON_USAGE_WRITE,
RADEON_PRIO_MIN);
cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 1, 0, size);
cs->buf[cs->cdw++] = base >> 8;
void r600_init_state_functions(struct r600_context *rctx)
{
- unsigned id = 4;
- int i;
+ unsigned id = 1;
/* !!!
* To avoid GPU lockup registers must be emited in a specific order
r600_init_atom(rctx, &rctx->dsa_state.atom, id++, r600_emit_cso_state, 0);
r600_init_atom(rctx, &rctx->poly_offset_state.atom, id++, r600_emit_polygon_offset, 6);
r600_init_atom(rctx, &rctx->rasterizer_state.atom, id++, r600_emit_cso_state, 0);
- for (i = 0;i < R600_MAX_VIEWPORTS; i++) {
- r600_init_atom(rctx, &rctx->scissor[i].atom, id++, r600_emit_scissor_state, 4);
- r600_init_atom(rctx, &rctx->viewport[i].atom, id++, r600_emit_viewport_state, 8);
- rctx->scissor[i].idx = i;
- rctx->viewport[i].idx = i;
- }
+ r600_init_atom(rctx, &rctx->scissor.atom, id++, r600_emit_scissor_state, 0);
+ r600_init_atom(rctx, &rctx->viewport.atom, id++, r600_emit_viewport_state, 0);
r600_init_atom(rctx, &rctx->config_state.atom, id++, r600_emit_config_state, 3);
r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, r600_emit_vertex_fetch_shader, 5);
assert(rctx->atoms[id] == NULL);
rctx->atoms[id] = atom;
atom->id = id;
- atom->dirty = false;
}
void r600_init_atom(struct r600_context *rctx,
alpha_ref &= ~0x1FFF;
}
- r600_write_context_reg(cs, R_028410_SX_ALPHA_TEST_CONTROL,
+ radeon_set_context_reg(cs, R_028410_SX_ALPHA_TEST_CONTROL,
a->sx_alpha_test_control |
S_028410_ALPHA_TEST_BYPASS(a->bypass));
- r600_write_context_reg(cs, R_028438_SX_ALPHA_REF, alpha_ref);
+ radeon_set_context_reg(cs, R_028438_SX_ALPHA_REF, alpha_ref);
}
static void r600_texture_barrier(struct pipe_context *ctx)
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
struct pipe_blend_color *state = &rctx->blend_color.state;
- r600_write_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
+ radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
radeon_emit(cs, fui(state->color[0])); /* R_028414_CB_BLEND_RED */
radeon_emit(cs, fui(state->color[1])); /* R_028418_CB_BLEND_GREEN */
radeon_emit(cs, fui(state->color[2])); /* R_02841C_CB_BLEND_BLUE */
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
struct r600_vgt_state *a = (struct r600_vgt_state *)atom;
- r600_write_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, a->vgt_multi_prim_ib_reset_en);
- r600_write_context_reg_seq(cs, R_028408_VGT_INDX_OFFSET, 2);
+ radeon_set_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, a->vgt_multi_prim_ib_reset_en);
+ radeon_set_context_reg_seq(cs, R_028408_VGT_INDX_OFFSET, 2);
radeon_emit(cs, a->vgt_indx_offset); /* R_028408_VGT_INDX_OFFSET */
radeon_emit(cs, a->vgt_multi_prim_ib_reset_indx); /* R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX */
if (a->last_draw_was_indirect) {
a->last_draw_was_indirect = false;
- r600_write_ctl_const(cs, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
+ radeon_set_ctl_const(cs, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
}
}
const struct pipe_clip_state *state)
{
struct r600_context *rctx = (struct r600_context *)ctx;
- struct pipe_constant_buffer cb;
rctx->clip_state.state = *state;
r600_mark_atom_dirty(rctx, &rctx->clip_state.atom);
-
- cb.buffer = NULL;
- cb.user_buffer = state->ucp;
- cb.buffer_offset = 0;
- cb.buffer_size = 4*4*8;
- ctx->set_constant_buffer(ctx, PIPE_SHADER_VERTEX, R600_UCP_CONST_BUFFER, &cb);
- pipe_resource_reference(&cb.buffer, NULL);
+ rctx->driver_consts[PIPE_SHADER_VERTEX].vs_ucp_dirty = true;
}
static void r600_set_stencil_ref(struct pipe_context *ctx,
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
struct r600_stencil_ref_state *a = (struct r600_stencil_ref_state*)atom;
- r600_write_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
+ radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
radeon_emit(cs, /* R_028430_DB_STENCILREFMASK */
S_028430_STENCILREF(a->state.ref_value[0]) |
S_028430_STENCILMASK(a->state.valuemask[0]) |
/* Workaround for a missing scissor enable on r600. */
if (rctx->b.chip_class == R600 &&
- rs->scissor_enable != rctx->scissor[0].enable) {
- rctx->scissor[0].enable = rs->scissor_enable;
- r600_mark_atom_dirty(rctx, &rctx->scissor[0].atom);
+ rs->scissor_enable != rctx->scissor.enable) {
+ rctx->scissor.enable = rs->scissor_enable;
+ rctx->scissor.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
+ rctx->scissor.atom.num_dw = R600_MAX_VIEWPORTS * 4;
+ r600_mark_atom_dirty(rctx, &rctx->scissor.atom);
}
/* Re-emit PA_SC_LINE_STIPPLE. */
const struct pipe_viewport_state *state)
{
struct r600_context *rctx = (struct r600_context *)ctx;
+ struct r600_viewport_state *rstate = &rctx->viewport;
int i;
- for (i = start_slot; i < start_slot + num_viewports; i++) {
- rctx->viewport[i].state = state[i - start_slot];
- r600_mark_atom_dirty(rctx, &rctx->viewport[i].atom);
- }
+ for (i = start_slot; i < start_slot + num_viewports; i++)
+ rstate->state[i] = state[i - start_slot];
+ rstate->dirty_mask |= ((1 << num_viewports) - 1) << start_slot;
+ rstate->atom.num_dw = util_bitcount(rstate->dirty_mask) * 8;
+ r600_mark_atom_dirty(rctx, &rctx->viewport.atom);
}
void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
- struct r600_viewport_state *rstate = (struct r600_viewport_state *)atom;
- struct pipe_viewport_state *state = &rstate->state;
- int offset = rstate->idx * 6 * 4;
-
- r600_write_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE_0 + offset, 6);
- radeon_emit(cs, fui(state->scale[0])); /* R_02843C_PA_CL_VPORT_XSCALE_0 */
- radeon_emit(cs, fui(state->translate[0])); /* R_028440_PA_CL_VPORT_XOFFSET_0 */
- radeon_emit(cs, fui(state->scale[1])); /* R_028444_PA_CL_VPORT_YSCALE_0 */
- radeon_emit(cs, fui(state->translate[1])); /* R_028448_PA_CL_VPORT_YOFFSET_0 */
- radeon_emit(cs, fui(state->scale[2])); /* R_02844C_PA_CL_VPORT_ZSCALE_0 */
- radeon_emit(cs, fui(state->translate[2])); /* R_028450_PA_CL_VPORT_ZOFFSET_0 */
+ struct r600_viewport_state *rstate = &rctx->viewport;
+ struct pipe_viewport_state *state;
+ uint32_t dirty_mask;
+ unsigned i, offset;
+
+ dirty_mask = rstate->dirty_mask;
+ while (dirty_mask != 0) {
+ i = u_bit_scan(&dirty_mask);
+ offset = i * 6 * 4;
+ radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE_0 + offset, 6);
+ state = &rstate->state[i];
+ radeon_emit(cs, fui(state->scale[0])); /* R_02843C_PA_CL_VPORT_XSCALE_0 */
+ radeon_emit(cs, fui(state->translate[0])); /* R_028440_PA_CL_VPORT_XOFFSET_0 */
+ radeon_emit(cs, fui(state->scale[1])); /* R_028444_PA_CL_VPORT_YSCALE_0 */
+ radeon_emit(cs, fui(state->translate[1])); /* R_028448_PA_CL_VPORT_YOFFSET_0 */
+ radeon_emit(cs, fui(state->scale[2])); /* R_02844C_PA_CL_VPORT_ZSCALE_0 */
+ radeon_emit(cs, fui(state->translate[2])); /* R_028450_PA_CL_VPORT_ZOFFSET_0 */
+ }
+ rstate->dirty_mask = 0;
+ rstate->atom.num_dw = 0;
}
/* Compute the key for the hw shader variant */
r600_mark_atom_dirty(rctx, &rctx->sample_mask.atom);
}
+static void r600_update_driver_const_buffers(struct r600_context *rctx)
+{
+ int sh, size;;
+ void *ptr;
+ struct pipe_constant_buffer cb;
+ for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) {
+ struct r600_shader_driver_constants_info *info = &rctx->driver_consts[sh];
+ if (!info->vs_ucp_dirty &&
+ !info->texture_const_dirty &&
+ !info->ps_sample_pos_dirty)
+ continue;
+
+ ptr = info->constants;
+ size = info->alloc_size;
+ if (info->vs_ucp_dirty) {
+ assert(sh == PIPE_SHADER_VERTEX);
+ if (!size) {
+ ptr = rctx->clip_state.state.ucp;
+ size = R600_UCP_SIZE;
+ } else {
+ memcpy(ptr, rctx->clip_state.state.ucp, R600_UCP_SIZE);
+ }
+ info->vs_ucp_dirty = false;
+ }
+
+ if (info->ps_sample_pos_dirty) {
+ assert(sh == PIPE_SHADER_FRAGMENT);
+ if (!size) {
+ ptr = rctx->sample_positions;
+ size = R600_UCP_SIZE;
+ } else {
+ memcpy(ptr, rctx->sample_positions, R600_UCP_SIZE);
+ }
+ info->ps_sample_pos_dirty = false;
+ }
+
+ if (info->texture_const_dirty) {
+ assert (ptr);
+ assert (size);
+ if (sh == PIPE_SHADER_VERTEX)
+ memcpy(ptr, rctx->clip_state.state.ucp, R600_UCP_SIZE);
+ if (sh == PIPE_SHADER_FRAGMENT)
+ memcpy(ptr, rctx->sample_positions, R600_UCP_SIZE);
+ }
+ info->texture_const_dirty = false;
+
+ cb.buffer = NULL;
+ cb.user_buffer = ptr;
+ cb.buffer_offset = 0;
+ cb.buffer_size = size;
+ rctx->b.b.set_constant_buffer(&rctx->b.b, sh, R600_BUFFER_INFO_CONST_BUFFER, &cb);
+ pipe_resource_reference(&cb.buffer, NULL);
+ }
+}
+
+static void *r600_alloc_buf_consts(struct r600_context *rctx, int shader_type,
+ int array_size, uint32_t *base_offset)
+{
+ struct r600_shader_driver_constants_info *info = &rctx->driver_consts[shader_type];
+ if (array_size + R600_UCP_SIZE > info->alloc_size) {
+ info->constants = realloc(info->constants, array_size + R600_UCP_SIZE);
+ info->alloc_size = array_size + R600_UCP_SIZE;
+ }
+ memset(info->constants + (R600_UCP_SIZE / 4), 0, array_size);
+ info->texture_const_dirty = true;
+ *base_offset = R600_UCP_SIZE;
+ return info->constants;
+}
/*
* On r600/700 hw we don't have vertex fetch swizzle, though TBO
* doesn't require full swizzles it does need masking and setting alpha
struct r600_textures_info *samplers = &rctx->samplers[shader_type];
int bits;
uint32_t array_size;
- struct pipe_constant_buffer cb;
int i, j;
-
+ uint32_t *constants;
+ uint32_t base_offset;
if (!samplers->views.dirty_buffer_constants)
return;
bits = util_last_bit(samplers->views.enabled_mask);
array_size = bits * 8 * sizeof(uint32_t) * 4;
- samplers->buffer_constants = realloc(samplers->buffer_constants, array_size);
- memset(samplers->buffer_constants, 0, array_size);
+
+ constants = r600_alloc_buf_consts(rctx, shader_type, array_size, &base_offset);
+
for (i = 0; i < bits; i++) {
if (samplers->views.enabled_mask & (1 << i)) {
- int offset = i * 8;
+ int offset = (base_offset / 4) + i * 8;
const struct util_format_description *desc;
desc = util_format_description(samplers->views.views[i]->base.format);
for (j = 0; j < 4; j++)
if (j < desc->nr_channels)
- samplers->buffer_constants[offset+j] = 0xffffffff;
+ constants[offset+j] = 0xffffffff;
else
- samplers->buffer_constants[offset+j] = 0x0;
+ constants[offset+j] = 0x0;
if (desc->nr_channels < 4) {
if (desc->channel[0].pure_integer)
- samplers->buffer_constants[offset+4] = 1;
+ constants[offset+4] = 1;
else
- samplers->buffer_constants[offset+4] = fui(1.0);
+ constants[offset+4] = fui(1.0);
} else
- samplers->buffer_constants[offset + 4] = 0;
+ constants[offset + 4] = 0;
- samplers->buffer_constants[offset + 5] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
- samplers->buffer_constants[offset + 6] = samplers->views.views[i]->base.texture->array_size / 6;
+ constants[offset + 5] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
+ constants[offset + 6] = samplers->views.views[i]->base.texture->array_size / 6;
}
}
- cb.buffer = NULL;
- cb.user_buffer = samplers->buffer_constants;
- cb.buffer_offset = 0;
- cb.buffer_size = array_size;
- rctx->b.b.set_constant_buffer(&rctx->b.b, shader_type, R600_BUFFER_INFO_CONST_BUFFER, &cb);
- pipe_resource_reference(&cb.buffer, NULL);
}
/* On evergreen we store two values
struct r600_textures_info *samplers = &rctx->samplers[shader_type];
int bits;
uint32_t array_size;
- struct pipe_constant_buffer cb;
int i;
-
+ uint32_t *constants;
+ uint32_t base_offset;
if (!samplers->views.dirty_buffer_constants)
return;
bits = util_last_bit(samplers->views.enabled_mask);
array_size = bits * 2 * sizeof(uint32_t) * 4;
- samplers->buffer_constants = realloc(samplers->buffer_constants, array_size);
- memset(samplers->buffer_constants, 0, array_size);
+
+ constants = r600_alloc_buf_consts(rctx, shader_type, array_size,
+ &base_offset);
+
for (i = 0; i < bits; i++) {
if (samplers->views.enabled_mask & (1 << i)) {
- uint32_t offset = i * 2;
- samplers->buffer_constants[offset] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
- samplers->buffer_constants[offset + 1] = samplers->views.views[i]->base.texture->array_size / 6;
+ uint32_t offset = (base_offset / 4) + i * 2;
+ constants[offset] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
+ constants[offset + 1] = samplers->views.views[i]->base.texture->array_size / 6;
}
}
-
- cb.buffer = NULL;
- cb.user_buffer = samplers->buffer_constants;
- cb.buffer_offset = 0;
- cb.buffer_size = array_size;
- rctx->b.b.set_constant_buffer(&rctx->b.b, shader_type, R600_BUFFER_INFO_CONST_BUFFER, &cb);
- pipe_resource_reference(&cb.buffer, NULL);
}
/* set sample xy locations as array of fragment shader constants */
void r600_set_sample_locations_constant_buffer(struct r600_context *rctx)
{
- struct pipe_constant_buffer constbuf = {0};
- float values[4*16] = {0.0f};
int i;
struct pipe_context *ctx = &rctx->b.b;
- assert(rctx->framebuffer.nr_samples <= Elements(values)/4);
+ assert(rctx->framebuffer.nr_samples < R600_UCP_SIZE);
+ assert(rctx->framebuffer.nr_samples <= Elements(rctx->sample_positions)/4);
+
+ memset(rctx->sample_positions, 0, 4 * 4 * 16);
for (i = 0; i < rctx->framebuffer.nr_samples; i++) {
- ctx->get_sample_position(ctx, rctx->framebuffer.nr_samples, i, &values[4*i]);
+ ctx->get_sample_position(ctx, rctx->framebuffer.nr_samples, i, &rctx->sample_positions[4*i]);
/* Also fill in center-zeroed positions used for interpolateAtSample */
- values[4*i + 2] = values[4*i + 0] - 0.5f;
- values[4*i + 3] = values[4*i + 1] - 0.5f;
+ rctx->sample_positions[4*i + 2] = rctx->sample_positions[4*i + 0] - 0.5f;
+ rctx->sample_positions[4*i + 3] = rctx->sample_positions[4*i + 1] - 0.5f;
}
- constbuf.user_buffer = values;
- constbuf.buffer_size = rctx->framebuffer.nr_samples * 4 * 4;
- ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT,
- R600_SAMPLE_POSITIONS_CONST_BUFFER, &constbuf);
- pipe_resource_reference(&constbuf.buffer, NULL);
+ rctx->driver_consts[PIPE_SHADER_FRAGMENT].ps_sample_pos_dirty = true;
}
static void update_shader_atom(struct pipe_context *ctx,
}
}
+ r600_update_driver_const_buffers(rctx);
+
if (rctx->b.chip_class < EVERGREEN && rctx->ps_shader && rctx->vs_shader) {
if (!r600_adjust_gprs(rctx)) {
/* discard rendering */
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
struct r600_clip_misc_state *state = &rctx->clip_misc_state;
- r600_write_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
+ radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
state->pa_cl_clip_cntl |
(state->clip_dist_write ? 0 : state->clip_plane_enable & 0x3F) |
S_028810_CLIP_DISABLE(state->clip_disable));
- r600_write_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
+ radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
state->pa_cl_vs_out_cntl |
(state->clip_plane_enable & state->clip_dist_write));
}
struct r600_context *rctx = (struct r600_context *)ctx;
struct pipe_draw_info info = *dinfo;
struct pipe_index_buffer ib = {};
- unsigned i;
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ uint64_t mask;
if (!info.indirect && !info.count && (info.indexed || !info.count_from_stream_output)) {
return;
r600_need_cs_space(rctx, ib.user_buffer ? 5 : 0, TRUE);
r600_flush_emit(rctx);
- i = r600_next_dirty_atom(rctx, 0);
- while (i < R600_NUM_ATOMS) {
- r600_emit_atom(rctx, rctx->atoms[i]);
- i = r600_next_dirty_atom(rctx, i + 1);
+ mask = rctx->dirty_atoms;
+ while (mask != 0) {
+ r600_emit_atom(rctx, rctx->atoms[u_bit_scan64(&mask)]);
}
if (rctx->b.chip_class == CAYMAN) {
rctx->b.streamout.prims_gen_query_enabled)
partial_vs_wave = true;
- r600_write_context_reg(cs, CM_R_028AA8_IA_MULTI_VGT_PARAM,
+ radeon_set_context_reg(cs, CM_R_028AA8_IA_MULTI_VGT_PARAM,
S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1));
info.mode == R600_PRIM_RECTANGLE_LIST) {
su_sc_mode_cntl &= C_028814_CULL_FRONT;
}
- r600_write_context_reg(cs, R_028814_PA_SU_SC_MODE_CNTL, su_sc_mode_cntl);
+ radeon_set_context_reg(cs, R_028814_PA_SU_SC_MODE_CNTL, su_sc_mode_cntl);
}
/* Update start instance. */
if (!info.indirect && rctx->last_start_instance != info.start_instance) {
- r600_write_ctl_const(cs, R_03CFF4_SQ_VTX_START_INST_LOC, info.start_instance);
+ radeon_set_ctl_const(cs, R_03CFF4_SQ_VTX_START_INST_LOC, info.start_instance);
rctx->last_start_instance = info.start_instance;
}
info.mode == PIPE_PRIM_LINE_LOOP)
ls_mask = 2;
- r600_write_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE,
+ radeon_set_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE,
S_028A0C_AUTO_RESET_CNTL(ls_mask) |
(rctx->rasterizer ? rctx->rasterizer->pa_sc_line_stipple : 0));
- r600_write_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE,
+ radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE,
r600_conv_pipe_prim(info.mode));
rctx->last_primitive_type = info.mode;
cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
- cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+ cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
(struct r600_resource*)info.indirect,
RADEON_USAGE_READ, RADEON_PRIO_MIN);
}
cs->buf[cs->cdw++] = info.count;
cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA;
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
- cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+ cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
(struct r600_resource*)ib.buffer,
RADEON_USAGE_READ, RADEON_PRIO_MIN);
}
cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
- cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+ cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
(struct r600_resource*)ib.buffer,
RADEON_USAGE_READ, RADEON_PRIO_MIN);
struct r600_so_target *t = (struct r600_so_target*)info.count_from_stream_output;
uint64_t va = t->buf_filled_size->gpu_address + t->buf_filled_size_offset;
- r600_write_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw);
+ radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw);
cs->buf[cs->cdw++] = PKT3(PKT3_COPY_DW, 4, 0);
cs->buf[cs->cdw++] = COPY_DW_SRC_IS_MEM | COPY_DW_DST_IS_REG;
cs->buf[cs->cdw++] = 0; /* unused */
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
- cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+ cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
t->buf_filled_size, RADEON_USAGE_READ,
RADEON_PRIO_MIN);
}
r600_emit_command_buffer(cs, &shader->command_buffer);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, shader->bo,
+ radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->bo,
RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA));
}
uint32_t reloc;
va = rscreen->b.trace_bo->gpu_address;
- reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rscreen->b.trace_bo,
+ reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rscreen->b.trace_bo,
RADEON_USAGE_READWRITE, RADEON_PRIO_MIN);
radeon_emit(cs, PKT3(PKT3_MEM_WRITE, 3, 0));
radeon_emit(cs, va & 0xFFFFFFFFUL);
#define DMA_PACKET_CONSTANT_FILL 0xd /* 7xx only */
#define DMA_PACKET_NOP 0xf
+
+/* Resource IDs:
+ * PS: 0 .. +160
+ * VS: 160 .. +160
+ * FS: 320 .. +16
+ * GS: 336 .. +160
+ */
+#define R600_FETCH_CONSTANTS_OFFSET_PS 0
+#define R600_FETCH_CONSTANTS_OFFSET_VS 160
+#define R600_FETCH_CONSTANTS_OFFSET_FS 320
+#define R600_FETCH_CONSTANTS_OFFSET_GS 336
#endif
if (n.bc.pop_count)
s << " POP:" << n.bc.pop_count;
+
+ if (n.bc.count && (n.bc.op_ptr->flags & CF_EMIT))
+ s << " STREAM" << n.bc.count;
}
if (!n.bc.barrier)
for (unsigned k = 0; k < 3; ++k)
if (n.bc.offset[k])
s << " O" << chans[k] << ":" << n.bc.offset[k];
+ if (ctx.is_egcm() && n.bc.resource_index_mode)
+ s << " RIM:SQ_CF_INDEX_" << n.bc.resource_index_mode;
+ if (ctx.is_egcm() && n.bc.sampler_index_mode)
+ s << " SID:SQ_CF_INDEX_" << n.bc.sampler_index_mode;
}
sblog << s.str() << "\n";
mask |= (1 << chan);
}
- assert(reg >= 0 && mask);
-
if (reg >= 0)
update_ngpr(reg);
c->bc.end_of_program = eop;
} else if (flags & CF_EMIT) {
- c->flags |= NF_DONT_KILL | NF_DONT_HOIST | NF_DONT_MOVE;
+ /* quick peephole */
+ cf_node *prev = static_cast<cf_node *>(c->prev);
+ if (c->bc.op == CF_OP_CUT_VERTEX &&
+ prev && prev->is_valid() &&
+ prev->bc.op == CF_OP_EMIT_VERTEX &&
+ c->bc.count == prev->bc.count) {
+ prev->bc.set_op(CF_OP_EMIT_CUT_VERTEX);
+ prev->bc.end_of_program = c->bc.end_of_program;
+ c->remove();
+ }
+ else {
+ c->flags |= NF_DONT_KILL | NF_DONT_HOIST | NF_DONT_MOVE;
- c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
- c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
+ c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
+ c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
+ }
}
}
{
switch (nr_samples) {
case 2:
- r600_write_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]);
- r600_write_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]);
- r600_write_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_2x[2]);
- r600_write_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_2x[3]);
+ radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]);
+ radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]);
+ radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_2x[2]);
+ radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_2x[3]);
break;
case 4:
- r600_write_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_4x[0]);
- r600_write_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_4x[1]);
- r600_write_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_4x[2]);
- r600_write_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_4x[3]);
+ radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_4x[0]);
+ radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_4x[1]);
+ radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_4x[2]);
+ radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_4x[3]);
break;
case 8:
- r600_write_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
+ radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
radeon_emit(cs, cm_sample_locs_8x[0]);
radeon_emit(cs, cm_sample_locs_8x[4]);
radeon_emit(cs, 0);
radeon_emit(cs, cm_sample_locs_8x[7]);
break;
case 16:
- r600_write_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16);
+ radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16);
radeon_emit(cs, cm_sample_locs_16x[0]);
radeon_emit(cs, cm_sample_locs_16x[4]);
radeon_emit(cs, cm_sample_locs_16x[8]);
unsigned log_ps_iter_samples =
util_logbase2(util_next_power_of_two(ps_iter_samples));
- r600_write_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
+ radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
radeon_emit(cs, S_028BDC_LAST_PIXEL(1) |
S_028BDC_EXPAND_LINE_WIDTH(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */
radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* CM_R_028BE0_PA_SC_AA_CONFIG */
if (nr_samples > 1) {
- r600_write_context_reg(cs, CM_R_028804_DB_EQAA,
+ radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) |
S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
- r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
+ radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1));
} else if (overrast_samples > 1) {
- r600_write_context_reg(cs, CM_R_028804_DB_EQAA,
+ radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) |
S_028804_OVERRASTERIZATION_AMOUNT(log_samples));
- r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
+ radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
}
} else {
- r600_write_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
+ radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
radeon_emit(cs, S_028BDC_LAST_PIXEL(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */
radeon_emit(cs, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
- r600_write_context_reg(cs, CM_R_028804_DB_EQAA,
+ radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
- r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
+ radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
}
}
data += box->x % R600_MAP_BUFFER_ALIGNMENT;
return r600_buffer_get_transfer(ctx, resource, level, usage, box,
ptransfer, data, staging, offset);
- } else {
- return NULL; /* error, shouldn't occur though */
}
+ } else {
+ /* At this point, the buffer is always idle (we checked it above). */
+ usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
}
- /* At this point, the buffer is always idle (we checked it above). */
- usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
}
/* Using a staging buffer in GTT for larger reads is much faster. */
else if ((usage & PIPE_TRANSFER_READ) &&
ptransfer, data, NULL, 0);
}
-static void r600_buffer_transfer_unmap(struct pipe_context *ctx,
- struct pipe_transfer *transfer)
+static void r600_buffer_do_flush_region(struct pipe_context *ctx,
+ struct pipe_transfer *transfer,
+ const struct pipe_box *box)
{
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
struct r600_resource *rbuffer = r600_resource(transfer->resource);
if (rtransfer->staging) {
- if (rtransfer->transfer.usage & PIPE_TRANSFER_WRITE) {
- struct pipe_resource *dst, *src;
- unsigned soffset, doffset, size;
- struct pipe_box box;
+ struct pipe_resource *dst, *src;
+ unsigned soffset;
+ struct pipe_box dma_box;
- dst = transfer->resource;
- src = &rtransfer->staging->b.b;
- size = transfer->box.width;
- doffset = transfer->box.x;
- soffset = rtransfer->offset + transfer->box.x % R600_MAP_BUFFER_ALIGNMENT;
+ dst = transfer->resource;
+ src = &rtransfer->staging->b.b;
+ soffset = rtransfer->offset + box->x % R600_MAP_BUFFER_ALIGNMENT;
- u_box_1d(soffset, size, &box);
+ u_box_1d(soffset, box->width, &dma_box);
- /* Copy the staging buffer into the original one. */
- rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, &box);
- }
- pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL);
+ /* Copy the staging buffer into the original one. */
+ rctx->dma_copy(ctx, dst, 0, box->x, 0, 0, src, 0, &dma_box);
}
- if (transfer->usage & PIPE_TRANSFER_WRITE) {
- util_range_add(&rbuffer->valid_buffer_range, transfer->box.x,
- transfer->box.x + transfer->box.width);
+ util_range_add(&rbuffer->valid_buffer_range, box->x,
+ box->x + box->width);
+}
+
+static void r600_buffer_flush_region(struct pipe_context *ctx,
+ struct pipe_transfer *transfer,
+ const struct pipe_box *rel_box)
+{
+ if (transfer->usage & (PIPE_TRANSFER_WRITE |
+ PIPE_TRANSFER_FLUSH_EXPLICIT)) {
+ struct pipe_box box;
+
+ u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box);
+ r600_buffer_do_flush_region(ctx, transfer, &box);
}
+}
+
+static void r600_buffer_transfer_unmap(struct pipe_context *ctx,
+ struct pipe_transfer *transfer)
+{
+ struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+ struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
+
+ if (transfer->usage & PIPE_TRANSFER_WRITE &&
+ !(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
+ r600_buffer_do_flush_region(ctx, transfer, &transfer->box);
+
+ if (rtransfer->staging)
+ pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL);
+
util_slab_free(&rctx->pool_transfers, transfer);
}
NULL, /* get_handle */
r600_buffer_destroy, /* resource_destroy */
r600_buffer_transfer_map, /* transfer_map */
- NULL, /* transfer_flush_region */
+ r600_buffer_flush_region, /* transfer_flush_region */
r600_buffer_transfer_unmap, /* transfer_unmap */
NULL /* transfer_inline_write */
};
#include "r600_pipe_common.h"
#include "r600d_common.h"
-static inline unsigned r600_context_bo_reloc(struct r600_common_context *rctx,
- struct r600_ring *ring,
- struct r600_resource *rbo,
- enum radeon_bo_usage usage,
- enum radeon_bo_priority priority)
+/**
+ * Add a buffer to the buffer list for the given command stream (CS).
+ *
+ * All buffers used by a CS must be added to the list. This tells the kernel
+ * driver which buffers are used by GPU commands. Other buffers can
+ * be swapped out (not accessible) during execution.
+ *
+ * The buffer list becomes empty after every context flush and must be
+ * rebuilt.
+ */
+static inline unsigned radeon_add_to_buffer_list(struct r600_common_context *rctx,
+ struct r600_ring *ring,
+ struct r600_resource *rbo,
+ enum radeon_bo_usage usage,
+ enum radeon_bo_priority priority)
{
assert(usage);
{
struct radeon_winsys_cs *cs = ring->cs;
bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.r600_virtual_address;
- unsigned reloc = r600_context_bo_reloc(rctx, ring, rbo, usage, priority);
+ unsigned reloc = radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority);
if (!has_vm) {
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
}
}
-static inline void r600_write_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void radeon_set_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg < R600_CONTEXT_REG_OFFSET);
assert(cs->cdw+2+num <= cs->max_dw);
radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2);
}
-static inline void r600_write_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void radeon_set_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
- r600_write_config_reg_seq(cs, reg, 1);
+ radeon_set_config_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
-static inline void r600_write_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void radeon_set_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg >= R600_CONTEXT_REG_OFFSET);
assert(cs->cdw+2+num <= cs->max_dw);
radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2);
}
-static inline void r600_write_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void radeon_set_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
- r600_write_context_reg_seq(cs, reg, 1);
+ radeon_set_context_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
-static inline void si_write_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void radeon_set_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
assert(cs->cdw+2+num <= cs->max_dw);
radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
}
-static inline void si_write_sh_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void radeon_set_sh_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
- si_write_sh_reg_seq(cs, reg, 1);
+ radeon_set_sh_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
-static inline void cik_write_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void radeon_set_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
assert(cs->cdw+2+num <= cs->max_dw);
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
}
-static inline void cik_write_uconfig_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void radeon_set_uconfig_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
- cik_write_uconfig_reg_seq(cs, reg, 1);
+ radeon_set_uconfig_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
* I guess the 4th one is derived from the first 3.
* The vertex specification should match u_blitter's vertex element state. */
u_upload_alloc(rctx->uploader, 0, sizeof(float) * 24, &offset, &buf, (void**)&vb);
+ if (!buf)
+ return;
+
vb[0] = x1;
vb[1] = y1;
vb[2] = depth;
rctx->max_db = 4;
rctx->b.transfer_map = u_transfer_map_vtbl;
- rctx->b.transfer_flush_region = u_default_transfer_flush_region;
+ rctx->b.transfer_flush_region = u_transfer_flush_region_vtbl;
rctx->b.transfer_unmap = u_transfer_unmap_vtbl;
rctx->b.transfer_inline_write = u_default_transfer_inline_write;
rctx->b.memory_barrier = r600_memory_barrier;
case CHIP_KAVERI: return "kaveri";
case CHIP_HAWAII: return "hawaii";
case CHIP_MULLINS:
-#if HAVE_LLVM >= 0x0305
return "mullins";
-#else
- return "kabini";
-#endif
case CHIP_TONGA: return "tonga";
case CHIP_ICELAND: return "iceland";
case CHIP_CARRIZO: return "carrizo";
{"VRAM-usage", R600_QUERY_VRAM_USAGE, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
{"GTT-usage", R600_QUERY_GTT_USAGE, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
{"GPU-load", R600_QUERY_GPU_LOAD, {100}},
- {"temperature", R600_QUERY_GPU_TEMPERATURE, {100}},
+ {"temperature", R600_QUERY_GPU_TEMPERATURE, {125}},
{"shader-clock", R600_QUERY_CURRENT_GPU_SCLK, {0}, PIPE_DRIVER_QUERY_TYPE_HZ},
{"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, {0}, PIPE_DRIVER_QUERY_TYPE_HZ},
};
struct r600_atom {
void (*emit)(struct r600_common_context *ctx, struct r600_atom *state);
unsigned num_dw;
- unsigned short id; /* used by r600 only */
- bool dirty;
+ unsigned short id;
};
struct r600_so_target {
void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
struct pipe_framebuffer_state *fb,
struct r600_atom *fb_state,
- unsigned *buffers,
+ unsigned *buffers, unsigned *dirty_cbufs,
const union pipe_color_union *color);
void r600_init_screen_texture_functions(struct r600_common_screen *rscreen);
void r600_init_context_texture_functions(struct r600_common_context *rctx);
}
if (rctx->chip_class >= CIK) {
- cik_write_uconfig_reg(cs, reg_strmout_cntl, 0);
+ radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0);
} else {
- r600_write_config_reg(cs, reg_strmout_cntl, 0);
+ radeon_set_config_reg(cs, reg_strmout_cntl, 0);
}
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
/* SI binds streamout buffers as shader resources.
* VGT only counts primitives and tells the shader
* through SGPRs what to do. */
- r600_write_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2);
+ radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2);
radeon_emit(cs, (t[i]->b.buffer_offset +
t[i]->b.buffer_size) >> 2); /* BUFFER_SIZE (in DW) */
radeon_emit(cs, stride_in_dw[i]); /* VTX_STRIDE (in DW) */
update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i);
- r600_write_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 3);
+ radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 3);
radeon_emit(cs, (t[i]->b.buffer_offset +
t[i]->b.buffer_size) >> 2); /* BUFFER_SIZE (in DW) */
radeon_emit(cs, stride_in_dw[i]); /* VTX_STRIDE (in DW) */
* primitives emitted) may be enabled even if there is not
* buffer bound. This ensures that the primitives-emitted query
* won't increment. */
- r600_write_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0);
+ radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0);
t[i]->buf_filled_size_valid = true;
}
S_028B94_STREAMOUT_2_EN(r600_get_strmout_en(rctx)) |
S_028B94_STREAMOUT_3_EN(r600_get_strmout_en(rctx));
}
- r600_write_context_reg(rctx->rings.gfx.cs, strmout_buffer_reg, strmout_buffer_val);
- r600_write_context_reg(rctx->rings.gfx.cs, strmout_config_reg, strmout_config_val);
+ radeon_set_context_reg(rctx->rings.gfx.cs, strmout_buffer_reg, strmout_buffer_val);
+ radeon_set_context_reg(rctx->rings.gfx.cs, strmout_config_reg, strmout_config_val);
}
static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable)
if (usage & PIPE_TRANSFER_READ) {
struct pipe_resource *temp = ctx->screen->resource_create(ctx->screen, &resource);
+ if (!temp) {
+ R600_ERR("failed to create a temporary depth texture\n");
+ FREE(trans);
+ return NULL;
+ }
r600_copy_region_with_blit(ctx, temp, 0, 0, 0, 0, texture, level, box);
rctx->blit_decompress_depth(ctx, (struct r600_texture*)temp, staging_depth,
NULL, /* get_handle */
r600_texture_destroy, /* resource_destroy */
r600_texture_transfer_map, /* transfer_map */
- NULL, /* transfer_flush_region */
+ u_default_transfer_flush_region, /* transfer_flush_region */
r600_texture_transfer_unmap, /* transfer_unmap */
NULL /* transfer_inline_write */
};
void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
struct pipe_framebuffer_state *fb,
struct r600_atom *fb_state,
- unsigned *buffers,
+ unsigned *buffers, unsigned *dirty_cbufs,
const union pipe_color_union *color)
{
int i;
tex->cmask.offset, tex->cmask.size, 0, true);
tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
+ if (dirty_cbufs)
+ *dirty_cbufs |= 1 << i;
rctx->set_atom_dirty(rctx, fb_state, true);
*buffers &= ~clear_bit;
}
return target;
}
-#if HAVE_LLVM >= 0x0305
-
static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context)
{
if (LLVMGetDiagInfoSeverity(di) == LLVMDSError) {
}
}
-#endif
-
/**
* Compile an LLVM module to machine code.
*
/* Setup Diagnostic Handler*/
llvm_ctx = LLVMGetModuleContext(M);
-#if HAVE_LLVM >= 0x0305
LLVMContextSetDiagnosticHandler(llvm_ctx, radeonDiagnosticHandler, &rval);
-#endif
rval = 0;
/* Compile IR*/
bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.AMDIL.round.nearest.";
- bld_base->op_actions[TGSI_OPCODE_RSQ].intr_name =
- HAVE_LLVM >= 0x0305 ? "llvm.AMDGPU.rsq.clamped.f32" : "llvm.AMDGPU.rsq";
+ bld_base->op_actions[TGSI_OPCODE_RSQ].intr_name = "llvm.AMDGPU.rsq.clamped.f32";
bld_base->op_actions[TGSI_OPCODE_RSQ].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_cmp;
bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_cmp;
{
int i;
+ if (!enc->pic.rate_ctrl.frame_rate_num)
+ return;
+
RVCE_BEGIN(0x04000009); // vui
RVCE_CS(0x00000000); //aspectRatioInfoPresentFlag
RVCE_CS(0x00000000); //aspectRatioInfo.aspectRatioIdc
boolean has_uvd;
uint32_t vce_fw_version;
boolean has_userptr;
+ bool gfx_ib_pad_with_type2;
uint32_t r300_num_gb_pipes;
uint32_t r300_num_z_pipes;
LOCAL_PATH := $(call my-dir)
-# get C_SOURCES
+# get C_SOURCES and GENERATED_SOURCES
include $(LOCAL_PATH)/Makefile.sources
include $(CLEAR_VARS)
LOCAL_SHARED_LIBRARIES := libdrm libdrm_radeon
LOCAL_MODULE := libmesa_pipe_radeonsi
+# generate sources
+LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+intermediates := $(call local-generated-sources-dir)
+LOCAL_GENERATED_SOURCES := $(addprefix $(intermediates)/, $(GENERATED_SOURCES))
+
+$(LOCAL_GENERATED_SOURCES): PRIVATE_PYTHON := $(MESA_PYTHON2)
+$(LOCAL_GENERATED_SOURCES): PRIVATE_CUSTOM_TOOL = $(PRIVATE_PYTHON) $^ > $@
+
+$(intermediates)/sid_tables.h: $(intermediates)/%.h: $(LOCAL_PATH)/%.py $(LOCAL_PATH)/sid.h
+ $(transform-generated-source)
+
include $(GALLIUM_COMMON_MK)
include $(BUILD_STATIC_LIBRARY)
noinst_LTLIBRARIES = libradeonsi.la
-libradeonsi_la_SOURCES = $(C_SOURCES)
+libradeonsi_la_SOURCES = $(C_SOURCES) $(GENERATED_SOURCES)
sid_tables.h: $(srcdir)/sid_tables.py $(srcdir)/sid.h
$(AM_V_GEN) $(PYTHON2) $(srcdir)/sid_tables.py $(srcdir)/sid.h > $@
C_SOURCES := \
cik_sdma.c \
si_blit.c \
- si_commands.c \
si_compute.c \
si_cp_dma.c \
si_debug.c \
si_descriptors.c \
sid.h \
- sid_tables.h \
si_dma.c \
si_hw_context.c \
si_pipe.c \
si_state_shaders.c \
si_state.h \
si_uvd.c
+
+GENERATED_SOURCES := \
+ sid_tables.h
ncopy = (size + CIK_SDMA_COPY_MAX_SIZE - 1) / CIK_SDMA_COPY_MAX_SIZE;
r600_need_dma_space(&ctx->b, ncopy * 7);
- r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
+ radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
RADEON_PRIO_MIN);
- r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
+ radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
RADEON_PRIO_MIN);
for (i = 0; i < ncopy; i++) {
ncopy = (copy_height + cheight - 1) / cheight;
r600_need_dma_space(&ctx->b, ncopy * 12);
- r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
+ radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
RADEON_USAGE_READ, RADEON_PRIO_MIN);
- r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
+ radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
copy_height = size * 4 / pitch;
util_blitter_save_blend(sctx->blitter, sctx->queued.named.blend);
util_blitter_save_depth_stencil_alpha(sctx->blitter, sctx->queued.named.dsa);
- util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref);
+ util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref.state);
util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer);
util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader);
util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader);
util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader);
util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader);
util_blitter_save_vertex_elements(sctx->blitter, sctx->vertex_elements);
- if (sctx->queued.named.sample_mask) {
- util_blitter_save_sample_mask(sctx->blitter,
- sctx->queued.named.sample_mask->sample_mask);
- }
- if (sctx->queued.named.viewport[0]) {
- util_blitter_save_viewport(sctx->blitter, &sctx->queued.named.viewport[0]->viewport);
- }
- if (sctx->queued.named.scissor[0]) {
- util_blitter_save_scissor(sctx->blitter, &sctx->queued.named.scissor[0]->scissor);
- }
+ util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask.sample_mask);
+ util_blitter_save_viewport(sctx->blitter, &sctx->viewports.states[0]);
+ util_blitter_save_scissor(sctx->blitter, &sctx->scissors.states[0]);
util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer);
util_blitter_save_so_targets(sctx->blitter, sctx->b.streamout.num_targets,
(struct pipe_stream_output_target**)sctx->b.streamout.targets);
zsbuf ? (struct r600_texture*)zsbuf->texture : NULL;
if (buffers & PIPE_CLEAR_COLOR) {
- evergreen_do_fast_color_clear(&sctx->b, fb, &sctx->framebuffer.atom,
- &buffers, color);
+ evergreen_do_fast_color_clear(&sctx->b, fb,
+ &sctx->framebuffer.atom, &buffers,
+ &sctx->framebuffer.dirty_cbufs,
+ color);
if (!buffers)
return; /* all buffers have been fast cleared */
}
}
zstex->depth_clear_value = depth;
+ sctx->framebuffer.dirty_zsbuf = true;
si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */
sctx->db_depth_clear = true;
si_mark_atom_dirty(sctx, &sctx->db_render_state);
+++ /dev/null
-/*
- * Copyright 2012 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Christian König <christian.koenig@amd.com>
- */
-
-#include "sid.h"
-#include "si_pipe.h"
-
-void si_cmd_context_control(struct si_pm4_state *pm4)
-{
- si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL);
- si_pm4_cmd_add(pm4, 0x80000000);
- si_pm4_cmd_add(pm4, 0x80000000);
- si_pm4_cmd_end(pm4, false);
-}
#include "sid.h"
#define MAX_GLOBAL_BUFFERS 20
-#if HAVE_LLVM < 0x0305
-#define NUM_USER_SGPRS 2
-#else
+
/* XXX: Even though we don't pass the scratch buffer via user sgprs any more
* LLVM still expects that we specify 4 USER_SGPRS so it can remain compatible
* with older mesa. */
#define NUM_USER_SGPRS 4
-#endif
struct si_compute {
struct si_context *ctx;
SI_CONTEXT_INV_KCACHE |
SI_CONTEXT_FLUSH_WITH_INV_L2 |
SI_CONTEXT_FLAG_COMPUTE;
- si_emit_cache_flush(&sctx->b, NULL);
+ si_emit_cache_flush(sctx, NULL);
pm4->compute_pkt = true;
shader->scratch_bytes_per_wave *
num_waves_for_scratch);
- si_pm4_add_bo(pm4, shader->scratch_bo,
- RADEON_USAGE_READWRITE,
- RADEON_PRIO_SHADER_RESOURCE_RW);
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ shader->scratch_bo,
+ RADEON_USAGE_READWRITE,
+ RADEON_PRIO_SHADER_RESOURCE_RW);
scratch_buffer_va = shader->scratch_bo->gpu_address;
}
kernel_args_va = input_buffer->gpu_address;
kernel_args_va += kernel_args_offset;
- si_pm4_add_bo(pm4, input_buffer, RADEON_USAGE_READ,
- RADEON_PRIO_SHADER_DATA);
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, input_buffer,
+ RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va);
si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4, S_008F04_BASE_ADDRESS_HI (kernel_args_va >> 32) | S_008F04_STRIDE(0));
if (!buffer) {
continue;
}
- si_pm4_add_bo(pm4, buffer, RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, buffer,
+ RADEON_USAGE_READWRITE,
+ RADEON_PRIO_SHADER_RESOURCE_RW);
}
/* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID
#if HAVE_LLVM >= 0x0306
shader_va += pc;
#endif
- si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, shader->bo,
+ RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, shader_va >> 8);
si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40);
SI_CONTEXT_INV_ICACHE |
SI_CONTEXT_INV_KCACHE |
SI_CONTEXT_FLAG_COMPUTE;
- si_emit_cache_flush(&sctx->b, NULL);
+ si_emit_cache_flush(sctx, NULL);
}
unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
unsigned dma_flags = tc_l2_flag;
- si_need_cs_space(sctx, 7 + (sctx->b.flags ? sctx->cache_flush.num_dw : 0),
- FALSE);
+ si_need_cs_space(sctx);
/* This must be done after need_cs_space. */
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
(struct r600_resource*)dst, RADEON_USAGE_WRITE,
RADEON_PRIO_MIN);
/* Flush the caches for the first copy only.
* Also wait for the previous CP DMA operations. */
if (sctx->b.flags) {
- si_emit_cache_flush(&sctx->b, NULL);
+ si_emit_cache_flush(sctx, NULL);
dma_flags |= SI_CP_DMA_RAW_WAIT; /* same as WAIT_UNTIL=CP_DMA_IDLE */
}
unsigned sync_flags = tc_l2_flag;
unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
- si_need_cs_space(sctx, 7 + (sctx->b.flags ? sctx->cache_flush.num_dw : 0), FALSE);
+ si_need_cs_space(sctx);
/* Flush the caches for the first copy only. Also wait for old CP DMA packets to complete. */
if (sctx->b.flags) {
- si_emit_cache_flush(&sctx->b, NULL);
+ si_emit_cache_flush(sctx, NULL);
sync_flags |= SI_CP_DMA_RAW_WAIT;
}
}
/* This must be done after r600_need_cs_space. */
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)src,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)src,
RADEON_USAGE_READ, RADEON_PRIO_MIN);
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)dst,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)dst,
RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
si_emit_cp_dma_copy_buffer(sctx, dst_offset, src_offset, byte_count, sync_flags);
util_memcpy_cpu_to_le32(ptr, desc->list, list_size);
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, desc->buffer,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, desc->buffer,
RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
desc->list_dirty = false;
if (!rview->resource)
continue;
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
rview->resource, RADEON_USAGE_READ,
si_get_resource_ro_priority(rview->resource));
}
if (!views->desc.buffer)
return;
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, views->desc.buffer,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, views->desc.buffer,
RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
}
(struct si_sampler_view*)view;
if (rview->resource)
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
rview->resource, RADEON_USAGE_READ,
si_get_resource_ro_priority(rview->resource));
{
if (!states->desc.buffer)
return;
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, states->desc.buffer,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, states->desc.buffer,
RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
}
-void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader,
- unsigned start, unsigned count, void **states)
+static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
+ unsigned start, unsigned count, void **states)
{
+ struct si_context *sctx = (struct si_context *)ctx;
struct si_sampler_states *samplers = &sctx->samplers[shader].states;
struct si_sampler_state **sstates = (struct si_sampler_state**)states;
int i;
+ if (!count || shader >= SI_NUM_SHADERS)
+ return;
+
if (start == 0)
samplers->saved_states[0] = states[0];
if (start == 1)
while (mask) {
int i = u_bit_scan64(&mask);
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
(struct r600_resource*)buffers->buffers[i],
buffers->shader_usage, buffers->priority);
}
if (!buffers->desc.buffer)
return;
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
buffers->desc.buffer, RADEON_USAGE_READWRITE,
RADEON_PRIO_SHADER_DATA);
}
if (!sctx->vertex_buffer[vb].buffer)
continue;
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
(struct r600_resource*)sctx->vertex_buffer[vb].buffer,
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
}
if (!desc->buffer)
return;
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
desc->buffer, RADEON_USAGE_READ,
RADEON_PRIO_SHADER_DATA);
}
if (!desc->buffer)
return false;
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
desc->buffer, RADEON_USAGE_READ,
RADEON_PRIO_SHADER_DATA);
desc[3] = sctx->vertex_elements->rsrc_word3[i];
if (!bound[ve->vertex_buffer_index]) {
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
(struct r600_resource*)vb->buffer,
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
bound[ve->vertex_buffer_index] = true;
u_upload_alloc(sctx->b.uploader, 0, size, const_offset,
(struct pipe_resource**)rbuffer, &tmp);
- util_memcpy_cpu_to_le32(tmp, ptr, size);
+ if (rbuffer)
+ util_memcpy_cpu_to_le32(tmp, ptr, size);
}
static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint slot,
si_upload_const_buffer(sctx,
(struct r600_resource**)&buffer, input->user_buffer,
input->buffer_size, &buffer_offset);
+ if (!buffer) {
+ /* Just unbind on failure. */
+ si_set_constant_buffer(ctx, shader, slot, NULL);
+ return;
+ }
va = r600_resource(buffer)->gpu_address + buffer_offset;
} else {
pipe_resource_reference(&buffer, input->buffer);
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
buffers->buffers[slot] = buffer;
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
(struct r600_resource*)buffer,
buffers->shader_usage, buffers->priority);
buffers->desc.enabled_mask |= 1llu << slot;
S_008F0C_ADD_TID_ENABLE(add_tid);
pipe_resource_reference(&buffers->buffers[slot], buffer);
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
(struct r600_resource*)buffer,
buffers->shader_usage, buffers->priority);
buffers->desc.enabled_mask |= 1llu << slot;
/* Set the resource. */
pipe_resource_reference(&buffers->buffers[bufidx],
buffer);
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
(struct r600_resource*)buffer,
buffers->shader_usage, buffers->priority);
buffers->desc.enabled_mask |= 1llu << bufidx;
old_va, buf);
buffers->desc.list_dirty = true;
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
rbuffer, buffers->shader_usage,
buffers->priority);
old_va, buf);
buffers->desc.list_dirty = true;
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
rbuffer, buffers->shader_usage,
buffers->priority);
}
old_va, buf);
views->desc.list_dirty = true;
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
rbuffer, RADEON_USAGE_READ,
RADEON_PRIO_SHADER_BUFFER_RO);
}
desc->pointer_dirty = keep_dirty;
}
-static void si_emit_shader_userdata(struct si_context *sctx,
- struct r600_atom *atom)
+void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom)
{
unsigned i;
uint32_t *sh_base = sctx->shader_userdata.sh_base;
4, SI_NUM_VERTEX_BUFFERS);
/* Set pipe_context functions. */
+ sctx->b.b.bind_sampler_states = si_bind_sampler_states;
sctx->b.b.set_constant_buffer = si_set_constant_buffer;
sctx->b.b.set_sampler_views = si_set_sampler_views;
sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
sctx->b.invalidate_buffer = si_invalidate_buffer;
/* Shader user data. */
- sctx->atoms.s.shader_userdata = &sctx->shader_userdata.atom;
- sctx->shader_userdata.atom.emit = (void*)si_emit_shader_userdata;
-
- /* Upper bound, 4 pointers per shader, +1 for vertex buffers, +2 for the VS copy shader. */
- sctx->shader_userdata.atom.num_dw = (SI_NUM_SHADERS * 4 + 1 + 2) * 4;
+ si_init_atom(sctx, &sctx->shader_userdata.atom, &sctx->atoms.s.shader_userdata,
+ si_emit_shader_userdata);
/* Set default and immutable mappings. */
si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B130_SPI_SHADER_USER_DATA_VS_0);
r600_need_dma_space(&ctx->b, ncopy * 5);
- r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
+ radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
RADEON_PRIO_MIN);
- r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
+ radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
RADEON_PRIO_MIN);
for (i = 0; i < ncopy; i++) {
ncopy = (size / SI_DMA_COPY_MAX_SIZE_DW) + !!(size % SI_DMA_COPY_MAX_SIZE_DW);
r600_need_dma_space(&ctx->b, ncopy * 9);
- r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
+ radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
RADEON_USAGE_READ, RADEON_PRIO_MIN);
- r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
+ radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
for (i = 0; i < ncopy; i++) {
#include "si_pipe.h"
/* initialize */
-void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
- boolean count_draw_in)
+void si_need_cs_space(struct si_context *ctx)
{
struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
- int i;
-
- /* If the CS is sufficiently large, don't count the space needed
- * and just flush if there is less than 8096 dwords left. */
- if (cs->max_dw >= 24 * 1024) {
- if (cs->cdw > cs->max_dw - 8 * 1024)
- ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
- return;
- }
/* There are two memory usage counters in the winsys for all buffers
* that have been added (cs_add_reloc) and two counters in the pipe
* driver for those that haven't been added yet.
- * */
- if (!ctx->b.ws->cs_memory_below_limit(ctx->b.rings.gfx.cs, ctx->b.vram, ctx->b.gtt)) {
+ */
+ if (unlikely(!ctx->b.ws->cs_memory_below_limit(ctx->b.rings.gfx.cs,
+ ctx->b.vram, ctx->b.gtt))) {
ctx->b.gtt = 0;
ctx->b.vram = 0;
ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
ctx->b.gtt = 0;
ctx->b.vram = 0;
- /* The number of dwords we already used in the CS so far. */
- num_dw += cs->cdw;
-
- if (count_draw_in) {
- for (i = 0; i < SI_NUM_ATOMS(ctx); i++) {
- if (ctx->atoms.array[i]->dirty) {
- num_dw += ctx->atoms.array[i]->num_dw;
- }
- }
-
- /* The number of dwords all the dirty states would take. */
- num_dw += si_pm4_dirty_dw(ctx);
-
- /* The upper-bound of how much a draw command would take. */
- num_dw += SI_MAX_DRAW_CS_DWORDS;
- }
-
- /* Count in queries_suspend. */
- num_dw += ctx->b.num_cs_dw_nontimer_queries_suspend +
- ctx->b.num_cs_dw_timer_queries_suspend;
-
- /* Count in streamout_end at the end of CS. */
- if (ctx->b.streamout.begin_emitted) {
- num_dw += ctx->b.streamout.num_dw_for_end;
- }
-
- /* Count in render_condition(NULL) at the end of CS. */
- if (ctx->b.predicate_drawing) {
- num_dw += 3;
- }
-
- /* Count in framebuffer cache flushes at the end of CS. */
- num_dw += ctx->atoms.s.cache_flush->num_dw;
-
- if (ctx->screen->b.trace_bo)
- num_dw += SI_TRACE_CS_DWORDS * 2;
-
- /* Flush if there's not enough space. */
- if (num_dw > cs->max_dw) {
+ /* If the CS is sufficiently large, don't count the space needed
+ * and just flush if there is not enough space left.
+ */
+ if (unlikely(cs->cdw > cs->max_dw - 2048))
ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
- }
}
void si_context_gfx_flush(void *context, unsigned flags,
SI_CONTEXT_INV_TC_L2 |
/* this is probably not needed anymore */
SI_CONTEXT_PS_PARTIAL_FLUSH;
- si_emit_cache_flush(&ctx->b, NULL);
+ si_emit_cache_flush(ctx, NULL);
/* force to keep tiling flags */
flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
/* The CS initialization should be emitted before everything else. */
si_pm4_emit(ctx, ctx->init_config);
- si_mark_atom_dirty(ctx, &ctx->clip_regs);
+ ctx->framebuffer.dirty_cbufs = (1 << 8) - 1;
+ ctx->framebuffer.dirty_zsbuf = true;
si_mark_atom_dirty(ctx, &ctx->framebuffer.atom);
+
+ si_mark_atom_dirty(ctx, &ctx->clip_regs);
+ si_mark_atom_dirty(ctx, &ctx->clip_state.atom);
si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs);
si_mark_atom_dirty(ctx, &ctx->msaa_config);
+ si_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
+ si_mark_atom_dirty(ctx, &ctx->cb_target_mask);
+ si_mark_atom_dirty(ctx, &ctx->blend_color.atom);
si_mark_atom_dirty(ctx, &ctx->db_render_state);
+ si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
+ si_mark_atom_dirty(ctx, &ctx->spi_map);
si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
si_all_descriptors_begin_new_cs(ctx);
+ ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+ ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+ si_mark_atom_dirty(ctx, &ctx->scissors.atom);
+ si_mark_atom_dirty(ctx, &ctx->viewports.atom);
+
r600_postflush_resume_features(&ctx->b);
ctx->b.initial_gfx_cs_size = ctx->b.rings.gfx.cs->cdw;
pipe_resource_reference(&sctx->gsvs_ring, NULL);
pipe_resource_reference(&sctx->tf_ring, NULL);
pipe_resource_reference(&sctx->null_const_buf.buffer, NULL);
- r600_resource_reference(&sctx->border_color_table, NULL);
+ r600_resource_reference(&sctx->border_color_buffer, NULL);
+ free(sctx->border_color_table);
r600_resource_reference(&sctx->scratch_buffer, NULL);
sctx->b.ws->fence_reference(&sctx->last_gfx_fence, NULL);
si_pm4_free_state(sctx, sctx->init_config, ~0);
- si_pm4_delete_state(sctx, gs_rings, sctx->gs_rings);
- si_pm4_delete_state(sctx, tf_ring, sctx->tf_state);
for (i = 0; i < Elements(sctx->vgt_shader_config); i++)
si_pm4_delete_state(sctx, vgt_shader_config, sctx->vgt_shader_config[i]);
if (sctx->blitter)
util_blitter_destroy(sctx->blitter);
- si_pm4_cleanup(sctx);
-
r600_common_context_cleanup(&sctx->b);
#if HAVE_LLVM >= 0x0306
sscreen->b.trace_bo->cs_buf : NULL);
sctx->b.rings.gfx.flush = si_context_gfx_flush;
- si_init_all_descriptors(sctx);
-
- /* Initialize cache_flush. */
- sctx->cache_flush = si_atom_cache_flush;
- sctx->atoms.s.cache_flush = &sctx->cache_flush;
-
- sctx->msaa_sample_locs = si_atom_msaa_sample_locs;
- sctx->atoms.s.msaa_sample_locs = &sctx->msaa_sample_locs;
+ /* Border colors. */
+ sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
+ sizeof(*sctx->border_color_table));
+ if (!sctx->border_color_table)
+ goto fail;
- sctx->msaa_config = si_atom_msaa_config;
- sctx->atoms.s.msaa_config = &sctx->msaa_config;
+ sctx->border_color_buffer = (struct r600_resource*)
+ pipe_buffer_create(screen, PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT,
+ SI_MAX_BORDER_COLORS *
+ sizeof(*sctx->border_color_table));
+ if (!sctx->border_color_buffer)
+ goto fail;
- sctx->atoms.s.streamout_begin = &sctx->b.streamout.begin_atom;
- sctx->atoms.s.streamout_enable = &sctx->b.streamout.enable_atom;
+ sctx->border_color_map =
+ ws->buffer_map(sctx->border_color_buffer->cs_buf,
+ NULL, PIPE_TRANSFER_WRITE);
+ if (!sctx->border_color_map)
+ goto fail;
+ si_init_all_descriptors(sctx);
si_init_state_functions(sctx);
si_init_shader_functions(sctx);
goto fail;
sctx->blitter->draw_rectangle = r600_draw_rectangle;
+ sctx->sample_mask.sample_mask = 0xffff;
+
/* these must be last */
si_begin_new_cs(sctx);
r600_query_init_backend_mask(&sctx->b); /* this emits commands and must be last */
if (sctx->b.chip_class == CIK) {
sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER,
PIPE_USAGE_DEFAULT, 16);
+ if (!sctx->null_const_buf.buffer)
+ goto fail;
sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0;
for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
r600_target = radeon_llvm_get_r600_target(triple);
sctx->tm = LLVMCreateTargetMachine(r600_target, triple,
r600_get_llvm_processor_name(sscreen->b.family),
- sctx->b.chip_class >= VI ?
- "+DumpCode" :
- "+DumpCode,+vgpr-spilling",
+ "+DumpCode,+vgpr-spilling",
LLVMCodeGenLevelDefault,
LLVMRelocDefault,
LLVMCodeModelDefault);
return &sctx->b.b;
fail:
+ fprintf(stderr, "radeonsi: Failed to create a context.\n");
si_destroy_context(&sctx->b.b);
return NULL;
}
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
+ case PIPE_CAP_TEXTURE_QUERY_LOD:
+ case PIPE_CAP_TEXTURE_GATHER_SM5:
+ case PIPE_CAP_TGSI_TXQS:
return 1;
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+ case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
return 4;
case PIPE_CAP_GLSL_FEATURE_LEVEL:
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
return MIN2(sscreen->b.info.vram_size, 0xFFFFFFFF);
- case PIPE_CAP_TEXTURE_QUERY_LOD:
- case PIPE_CAP_TEXTURE_GATHER_SM5:
- return HAVE_LLVM >= 0x0305;
- case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
- return HAVE_LLVM >= 0x0305 ? 4 : 0;
-
/* Unsupported features. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
return 8;
case PIPE_CAP_MAX_VIEWPORTS:
- return 16;
+ return SI_MAX_VIEWPORTS;
/* Timer queries, present when the clock frequency is non zero. */
case PIPE_CAP_QUERY_TIMESTAMP:
#define SI_RESTART_INDEX_UNKNOWN INT_MIN
#define SI_NUM_SMOOTH_AA_SAMPLES 8
-#define SI_TRACE_CS_DWORDS 7
-
-#define SI_MAX_DRAW_CS_DWORDS \
- (/*scratch:*/ 3 + /*derived prim state:*/ 3 + \
- /*draw regs:*/ 18 + /*draw packets:*/ 31 +\
- /*derived tess state:*/ 19)
-
/* Instruction cache. */
#define SI_CONTEXT_INV_ICACHE (R600_CONTEXT_PRIVATE_FLAG << 0)
/* Cache used by scalar memory (SMEM) instructions. They also use TC
#define SI_IS_TRACE_POINT(x) (((x) & 0xcafe0000) == 0xcafe0000)
#define SI_GET_TRACE_POINT_ID(x) ((x) & 0xffff)
+#define SI_MAX_VIEWPORTS 16
+#define SI_MAX_BORDER_COLORS 4096
+
struct si_compute;
struct si_screen {
struct r600_common_screen b;
};
+struct si_blend_color {
+ struct r600_atom atom;
+ struct pipe_blend_color state;
+};
+
struct si_sampler_view {
struct pipe_sampler_view base;
struct list_head list;
struct si_sampler_state {
uint32_t val[4];
- uint32_t border_color[4];
};
struct si_cs_shader_state {
unsigned cb0_is_integer;
unsigned compressed_cb_mask;
unsigned export_16bpc;
+ unsigned dirty_cbufs;
+ bool dirty_zsbuf;
};
-#define SI_NUM_ATOMS(sctx) (sizeof((sctx)->atoms)/sizeof((sctx)->atoms.array[0]))
+struct si_clip_state {
+ struct r600_atom atom;
+ struct pipe_clip_state state;
+};
+
+struct si_sample_mask {
+ struct r600_atom atom;
+ uint16_t sample_mask;
+};
+
+struct si_scissors {
+ struct r600_atom atom;
+ unsigned dirty_mask;
+ struct pipe_scissor_state states[SI_MAX_VIEWPORTS];
+};
+
+struct si_viewports {
+ struct r600_atom atom;
+ unsigned dirty_mask;
+ struct pipe_viewport_state states[SI_MAX_VIEWPORTS];
+};
struct si_context {
struct r600_common_context b;
void *custom_blend_fastclear;
void *pstipple_sampler_state;
struct si_screen *screen;
- struct si_pm4_state *init_config;
struct pipe_fence_handle *last_gfx_fence;
struct si_shader_selector *fixed_func_tcs_shader;
+ LLVMTargetMachineRef tm;
- union {
- struct {
- /* The order matters. */
- struct r600_atom *cache_flush;
- struct r600_atom *streamout_begin;
- struct r600_atom *streamout_enable; /* must be after streamout_begin */
- struct r600_atom *framebuffer;
- struct r600_atom *msaa_sample_locs;
- struct r600_atom *db_render_state;
- struct r600_atom *msaa_config;
- struct r600_atom *clip_regs;
- struct r600_atom *shader_userdata;
- } s;
- struct r600_atom *array[0];
- } atoms;
+ /* Atoms (direct states). */
+ union si_state_atoms atoms;
+ unsigned dirty_atoms; /* mask */
+ /* PM4 states (precomputed immutable states) */
+ union si_state queued;
+ union si_state emitted;
+ /* Atom declarations. */
+ struct r600_atom cache_flush;
struct si_framebuffer framebuffer;
- struct si_vertex_element *vertex_elements;
- /* for saving when using blitter */
- struct pipe_stencil_ref stencil_ref;
+ struct r600_atom msaa_sample_locs;
+ struct r600_atom db_render_state;
+ struct r600_atom msaa_config;
+ struct si_sample_mask sample_mask;
+ struct r600_atom cb_target_mask;
+ struct si_blend_color blend_color;
+ struct r600_atom clip_regs;
+ struct si_clip_state clip_state;
+ struct si_shader_data shader_userdata;
+ struct si_scissors scissors;
+ struct si_viewports viewports;
+ struct si_stencil_ref stencil_ref;
+ struct r600_atom spi_map;
+
+ /* Precomputed states. */
+ struct si_pm4_state *init_config;
+ struct si_pm4_state *vgt_shader_config[4];
+ /* With rasterizer discard, there doesn't have to be a pixel shader.
+ * In that case, we bind this one: */
+ void *dummy_pixel_shader;
+
/* shaders */
struct si_shader_selector *ps_shader;
struct si_shader_selector *gs_shader;
struct si_shader_selector *tcs_shader;
struct si_shader_selector *tes_shader;
struct si_cs_shader_state cs_shader_state;
- struct si_shader_data shader_userdata;
+
/* shader information */
+ struct si_vertex_element *vertex_elements;
unsigned sprite_coord_enable;
bool flatshade;
+
+ /* shader descriptors */
struct si_descriptors vertex_buffers;
struct si_buffer_resources const_buffers[SI_NUM_SHADERS];
struct si_buffer_resources rw_buffers[SI_NUM_SHADERS];
struct si_textures_info samplers[SI_NUM_SHADERS];
- struct r600_resource *scratch_buffer;
- struct r600_resource *border_color_table;
- unsigned border_color_offset;
- struct r600_atom clip_regs;
- struct r600_atom msaa_sample_locs;
- struct r600_atom msaa_config;
- int ps_iter_samples;
- bool smoothing_enabled;
+ /* other shader resources */
+ struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on CIK */
+ struct pipe_resource *esgs_ring;
+ struct pipe_resource *gsvs_ring;
+ struct pipe_resource *tf_ring;
+ union pipe_color_union *border_color_table; /* in CPU memory, any endian */
+ struct r600_resource *border_color_buffer;
+ union pipe_color_union *border_color_map; /* in VRAM (slow access), little endian */
+ unsigned border_color_count;
/* Vertex and index buffers. */
- bool vertex_buffers_dirty;
- struct pipe_index_buffer index_buffer;
- struct pipe_vertex_buffer vertex_buffer[SI_NUM_VERTEX_BUFFERS];
-
- /* With rasterizer discard, there doesn't have to be a pixel shader.
- * In that case, we bind this one: */
- void *dummy_pixel_shader;
- struct r600_atom cache_flush;
- struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on CIK */
-
- /* VGT states. */
- struct si_pm4_state *vgt_shader_config[4];
- struct si_pm4_state *gs_rings;
- struct pipe_resource *esgs_ring;
- struct pipe_resource *gsvs_ring;
- struct si_pm4_state *tf_state;
- struct pipe_resource *tf_ring;
+ bool vertex_buffers_dirty;
+ struct pipe_index_buffer index_buffer;
+ struct pipe_vertex_buffer vertex_buffer[SI_NUM_VERTEX_BUFFERS];
- LLVMTargetMachineRef tm;
-
- /* SI state handling */
- union si_state queued;
- union si_state emitted;
+ /* MSAA config state. */
+ int ps_iter_samples;
+ bool smoothing_enabled;
/* DB render state. */
- struct r600_atom db_render_state;
bool dbcb_depth_copy_enabled;
bool dbcb_stencil_copy_enabled;
unsigned dbcb_copy_sample;
int last_rast_prim;
unsigned last_sc_line_stipple;
int current_rast_prim; /* primitive type after TES, GS */
+ unsigned last_gsvs_itemsize;
/* Scratch buffer */
+ struct r600_resource *scratch_buffer;
boolean emit_scratch_reloc;
unsigned scratch_waves;
unsigned spi_tmpring_size;
void si_context_gfx_flush(void *context, unsigned flags,
struct pipe_fence_handle **fence);
void si_begin_new_cs(struct si_context *ctx);
-void si_need_cs_space(struct si_context *ctx, unsigned num_dw, boolean count_draw_in);
+void si_need_cs_space(struct si_context *ctx);
/* si_compute.c */
void si_init_compute_functions(struct si_context *sctx);
si_set_atom_dirty(struct si_context *sctx,
struct r600_atom *atom, bool dirty)
{
- atom->dirty = dirty;
+ unsigned bit = 1 << (atom->id - 1);
+
+ if (dirty)
+ sctx->dirty_atoms |= bit;
+ else
+ sctx->dirty_atoms &= ~bit;
}
static inline void
{
for (int i = 0; i < state->nbo; ++i)
r600_resource_reference(&state->bo[i], NULL);
+ r600_resource_reference(&state->indirect_buffer, NULL);
FREE(state);
}
si_pm4_free_state_simple(state);
}
-unsigned si_pm4_dirty_dw(struct si_context *sctx)
-{
- unsigned count = 0;
-
- for (int i = 0; i < NUMBER_OF_STATES; ++i) {
- struct si_pm4_state *state = sctx->queued.array[i];
-
- if (!state || sctx->emitted.array[i] == state)
- continue;
-
- count += state->ndw;
- }
-
- return count;
-}
-
void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state)
{
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+
for (int i = 0; i < state->nbo; ++i) {
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, state->bo[i],
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, state->bo[i],
state->bo_usage[i], state->bo_priority[i]);
}
- memcpy(&cs->buf[cs->cdw], state->pm4, state->ndw * 4);
+ if (!state->indirect_buffer) {
+ radeon_emit_array(cs, state->pm4, state->ndw);
+ } else {
+ struct r600_resource *ib = state->indirect_buffer;
- for (int i = 0; i < state->nrelocs; ++i) {
- cs->buf[cs->cdw + state->relocs[i]] += cs->cdw << 2;
- }
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, ib,
+ RADEON_USAGE_READ, RADEON_PRIO_MIN);
- cs->cdw += state->ndw;
+ radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
+ radeon_emit(cs, ib->gpu_address);
+ radeon_emit(cs, (ib->gpu_address >> 32) & 0xffff);
+ radeon_emit(cs, (ib->b.b.width0 >> 2) & 0xfffff);
+ }
}
void si_pm4_emit_dirty(struct si_context *sctx)
memset(&sctx->emitted, 0, sizeof(sctx->emitted));
}
-void si_pm4_cleanup(struct si_context *sctx)
+void si_pm4_upload_indirect_buffer(struct si_context *sctx,
+ struct si_pm4_state *state)
{
- for (int i = 0; i < NUMBER_OF_STATES; ++i) {
- si_pm4_free_state(sctx, sctx->queued.array[i], i);
+ struct pipe_screen *screen = sctx->b.b.screen;
+ unsigned aligned_ndw = align(state->ndw, 8);
+
+ /* only supported on CIK and later */
+ if (sctx->b.chip_class < CIK)
+ return;
+
+ assert(state->ndw);
+ assert(aligned_ndw <= SI_PM4_MAX_DW);
+
+ r600_resource_reference(&state->indirect_buffer, NULL);
+ state->indirect_buffer = (struct r600_resource*)
+ pipe_buffer_create(screen, PIPE_BIND_CUSTOM,
+ PIPE_USAGE_DEFAULT, aligned_ndw * 4);
+ if (!state->indirect_buffer)
+ return;
+
+ /* Pad the IB to 8 DWs to meet CP fetch alignment requirements. */
+ if (sctx->screen->b.info.gfx_ib_pad_with_type2) {
+ for (int i = state->ndw; i < aligned_ndw; i++)
+ state->pm4[i] = 0x80000000; /* type2 nop packet */
+ } else {
+ for (int i = state->ndw; i < aligned_ndw; i++)
+ state->pm4[i] = 0xffff1000; /* type3 nop packet */
}
+
+ pipe_buffer_write(&sctx->b.b, &state->indirect_buffer->b.b,
+ 0, aligned_ndw *4, state->pm4);
}
#include "radeon/radeon_winsys.h"
-#define SI_PM4_MAX_DW 256
-#define SI_PM4_MAX_BO 32
-#define SI_PM4_MAX_RELOCS 4
+#define SI_PM4_MAX_DW 160
+#define SI_PM4_MAX_BO 1
// forward defines
struct si_context;
struct si_pm4_state
{
+ /* optional indirect buffer */
+ struct r600_resource *indirect_buffer;
+
/* PKT3_SET_*_REG handling */
unsigned last_opcode;
unsigned last_reg;
enum radeon_bo_usage bo_usage[SI_PM4_MAX_BO];
enum radeon_bo_priority bo_priority[SI_PM4_MAX_BO];
- /* relocs for shader data */
- unsigned nrelocs;
- unsigned relocs[SI_PM4_MAX_RELOCS];
-
bool compute_pkt;
};
struct r600_resource *bo,
enum radeon_bo_usage usage,
enum radeon_bo_priority priority);
+void si_pm4_upload_indirect_buffer(struct si_context *sctx,
+ struct si_pm4_state *state);
void si_pm4_free_state_simple(struct si_pm4_state *state);
void si_pm4_free_state(struct si_context *sctx,
struct si_pm4_state *state,
unsigned idx);
-unsigned si_pm4_dirty_dw(struct si_context *sctx);
void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state);
void si_pm4_emit_dirty(struct si_context *sctx);
void si_pm4_reset_emitted(struct si_context *sctx);
-void si_pm4_cleanup(struct si_context *sctx);
#endif
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data);
-static bool tgsi_is_shadow_sampler(unsigned target)
+static bool tgsi_is_array_sampler(unsigned target)
{
- return target == TGSI_TEXTURE_SHADOW1D ||
+ return target == TGSI_TEXTURE_1D_ARRAY ||
target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
- target == TGSI_TEXTURE_SHADOW2D ||
+ target == TGSI_TEXTURE_2D_ARRAY ||
target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
- target == TGSI_TEXTURE_SHADOWCUBE ||
+ target == TGSI_TEXTURE_CUBE_ARRAY ||
target == TGSI_TEXTURE_SHADOWCUBE_ARRAY ||
- target == TGSI_TEXTURE_SHADOWRECT;
+ target == TGSI_TEXTURE_2D_ARRAY_MSAA;
+}
+
+static void set_tex_fetch_args(struct gallivm_state *gallivm,
+ struct lp_build_emit_data *emit_data,
+ unsigned opcode, unsigned target,
+ LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
+ LLVMValueRef *param, unsigned count,
+ unsigned dmask)
+{
+ unsigned num_args;
+ unsigned is_rect = target == TGSI_TEXTURE_RECT;
+ LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
+
+ /* Pad to power of two vector */
+ while (count < util_next_power_of_two(count))
+ param[count++] = LLVMGetUndef(i32);
+
+ /* Texture coordinates. */
+ if (count > 1)
+ emit_data->args[0] = lp_build_gather_values(gallivm, param, count);
+ else
+ emit_data->args[0] = param[0];
+
+ /* Resource. */
+ emit_data->args[1] = res_ptr;
+ num_args = 2;
+
+ if (opcode == TGSI_OPCODE_TXF || opcode == TGSI_OPCODE_TXQ)
+ emit_data->dst_type = LLVMVectorType(i32, 4);
+ else {
+ emit_data->dst_type = LLVMVectorType(
+ LLVMFloatTypeInContext(gallivm->context), 4);
+
+ emit_data->args[num_args++] = samp_ptr;
+ }
+
+ emit_data->args[num_args++] = lp_build_const_int32(gallivm, dmask);
+ emit_data->args[num_args++] = lp_build_const_int32(gallivm, is_rect); /* unorm */
+ emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* r128 */
+ emit_data->args[num_args++] = lp_build_const_int32(gallivm,
+ tgsi_is_array_sampler(target)); /* da */
+ emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* glc */
+ emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* slc */
+ emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* tfe */
+ emit_data->args[num_args++] = lp_build_const_int32(gallivm, 0); /* lwe */
+
+ emit_data->arg_count = num_args;
}
static const struct lp_build_tgsi_action tex_action;
-static void tex_fetch_args(
+static void tex_fetch_ptrs(
struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
+ struct lp_build_emit_data * emit_data,
+ LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr)
{
struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
const struct tgsi_full_instruction * inst = emit_data->inst;
- unsigned opcode = inst->Instruction.Opcode;
unsigned target = inst->Texture.Texture;
- LLVMValueRef coords[5], derivs[6];
- LLVMValueRef address[16];
- int ref_pos;
- unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &ref_pos);
- unsigned count = 0;
- unsigned chan;
unsigned sampler_src;
unsigned sampler_index;
- unsigned num_deriv_channels = 0;
- bool has_offset = HAVE_LLVM >= 0x0305 ? inst->Texture.NumOffsets > 0 : false;
- LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
sampler_index = emit_data->inst->Src[sampler_src].Register.Index;
ind_index = get_indirect_index(si_shader_ctx, ®->Indirect, reg->Register.Index);
- res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
- res_ptr = build_indexed_load_const(si_shader_ctx, res_ptr, ind_index);
+ *res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
+ *res_ptr = build_indexed_load_const(si_shader_ctx, *res_ptr, ind_index);
- samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER);
- samp_ptr = build_indexed_load_const(si_shader_ctx, samp_ptr, ind_index);
+ *samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER);
+ *samp_ptr = build_indexed_load_const(si_shader_ctx, *samp_ptr, ind_index);
if (target == TGSI_TEXTURE_2D_MSAA ||
target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
ind_index = LLVMBuildAdd(gallivm->builder, ind_index,
lp_build_const_int32(gallivm,
SI_FMASK_TEX_OFFSET), "");
- fmask_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
- fmask_ptr = build_indexed_load_const(si_shader_ctx, res_ptr, ind_index);
+ *fmask_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
+ *fmask_ptr = build_indexed_load_const(si_shader_ctx, *fmask_ptr, ind_index);
}
} else {
- res_ptr = si_shader_ctx->resources[sampler_index];
- samp_ptr = si_shader_ctx->samplers[sampler_index];
- fmask_ptr = si_shader_ctx->resources[SI_FMASK_TEX_OFFSET + sampler_index];
+ *res_ptr = si_shader_ctx->resources[sampler_index];
+ *samp_ptr = si_shader_ctx->samplers[sampler_index];
+ *fmask_ptr = si_shader_ctx->resources[SI_FMASK_TEX_OFFSET + sampler_index];
+ }
+}
+
+static void tex_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+ unsigned opcode = inst->Instruction.Opcode;
+ unsigned target = inst->Texture.Texture;
+ LLVMValueRef coords[5], derivs[6];
+ LLVMValueRef address[16];
+ int ref_pos;
+ unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &ref_pos);
+ unsigned count = 0;
+ unsigned chan;
+ unsigned num_deriv_channels = 0;
+ bool has_offset = inst->Texture.NumOffsets > 0;
+ LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
+ LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
+ unsigned dmask = 0xf;
+
+ tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr);
+
+ if (opcode == TGSI_OPCODE_TXQ) {
+ if (target == TGSI_TEXTURE_BUFFER) {
+ LLVMTypeRef v8i32 = LLVMVectorType(i32, 8);
+
+ /* Read the size from the buffer descriptor directly. */
+ LLVMValueRef res = LLVMBuildBitCast(builder, res_ptr, v8i32, "");
+ LLVMValueRef size = LLVMBuildExtractElement(builder, res,
+ lp_build_const_int32(gallivm, 6), "");
+
+ if (si_shader_ctx->screen->b.chip_class >= VI) {
+ /* On VI, the descriptor contains the size in bytes,
+ * but TXQ must return the size in elements.
+ * The stride is always non-zero for resources using TXQ.
+ */
+ LLVMValueRef stride =
+ LLVMBuildExtractElement(builder, res,
+ lp_build_const_int32(gallivm, 5), "");
+ stride = LLVMBuildLShr(builder, stride,
+ lp_build_const_int32(gallivm, 16), "");
+ stride = LLVMBuildAnd(builder, stride,
+ lp_build_const_int32(gallivm, 0x3FFF), "");
+
+ size = LLVMBuildUDiv(builder, size, stride, "");
+ }
+
+ emit_data->args[0] = size;
+ return;
+ }
+
+ /* Textures - set the mip level. */
+ address[count++] = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
+
+ set_tex_fetch_args(gallivm, emit_data, opcode, target, res_ptr,
+ NULL, address, count, 0xf);
+ return;
}
if (target == TGSI_TEXTURE_BUFFER) {
address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0);
/* Pack depth comparison value */
- if (tgsi_is_shadow_sampler(target) && opcode != TGSI_OPCODE_LODQ) {
+ if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) {
if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0);
} else {
for (chan = 0; chan < count; chan++ ) {
address[chan] = LLVMBuildBitCast(gallivm->builder,
- address[chan],
- LLVMInt32TypeInContext(gallivm->context),
- "");
+ address[chan], i32, "");
}
/* Adjust the sample index according to FMASK.
}
txf_address[3] = bld_base->uint_bld.zero;
- /* Pad to a power-of-two size. */
- while (txf_count < util_next_power_of_two(txf_count))
- txf_address[txf_count++] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
-
/* Read FMASK using TXF. */
inst.Instruction.Opcode = TGSI_OPCODE_TXF;
- inst.Texture.Texture = target == TGSI_TEXTURE_2D_MSAA ? TGSI_TEXTURE_2D : TGSI_TEXTURE_2D_ARRAY;
+ inst.Texture.Texture = target;
txf_emit_data.inst = &inst;
txf_emit_data.chan = 0;
- txf_emit_data.dst_type = LLVMVectorType(
- LLVMInt32TypeInContext(gallivm->context), 4);
- txf_emit_data.args[0] = lp_build_gather_values(gallivm, txf_address, txf_count);
- txf_emit_data.args[1] = fmask_ptr;
- txf_emit_data.args[2] = lp_build_const_int32(gallivm, inst.Texture.Texture);
- txf_emit_data.arg_count = 3;
-
+ set_tex_fetch_args(gallivm, &txf_emit_data, TGSI_OPCODE_TXF,
+ target, fmask_ptr, NULL,
+ txf_address, txf_count, 0xf);
build_tex_intrinsic(&tex_action, bld_base, &txf_emit_data);
/* Initialize some constants. */
final_sample, address[sample_chan], "");
}
- /* Resource */
- emit_data->args[1] = res_ptr;
-
if (opcode == TGSI_OPCODE_TXF) {
/* add tex offsets */
if (inst->Texture.NumOffsets) {
/* texture offsets do not apply to other texture targets */
}
}
+ }
- emit_data->args[2] = lp_build_const_int32(gallivm, target);
- emit_data->arg_count = 3;
+ if (opcode == TGSI_OPCODE_TG4) {
+ unsigned gather_comp = 0;
- emit_data->dst_type = LLVMVectorType(
- LLVMInt32TypeInContext(gallivm->context),
- 4);
- } else if (opcode == TGSI_OPCODE_TG4 ||
- opcode == TGSI_OPCODE_LODQ ||
- has_offset) {
- unsigned is_array = target == TGSI_TEXTURE_1D_ARRAY ||
- target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
- target == TGSI_TEXTURE_2D_ARRAY ||
- target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
- target == TGSI_TEXTURE_CUBE_ARRAY ||
- target == TGSI_TEXTURE_SHADOWCUBE_ARRAY;
- unsigned is_rect = target == TGSI_TEXTURE_RECT;
- unsigned dmask = 0xf;
-
- if (opcode == TGSI_OPCODE_TG4) {
- unsigned gather_comp = 0;
-
- /* DMASK was repurposed for GATHER4. 4 components are always
- * returned and DMASK works like a swizzle - it selects
- * the component to fetch. The only valid DMASK values are
- * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
- * (red,red,red,red) etc.) The ISA document doesn't mention
- * this.
- */
+ /* DMASK was repurposed for GATHER4. 4 components are always
+ * returned and DMASK works like a swizzle - it selects
+ * the component to fetch. The only valid DMASK values are
+ * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
+ * (red,red,red,red) etc.) The ISA document doesn't mention
+ * this.
+ */
- /* Get the component index from src1.x for Gather4. */
- if (!tgsi_is_shadow_sampler(target)) {
- LLVMValueRef (*imms)[4] = lp_soa_context(bld_base)->immediates;
- LLVMValueRef comp_imm;
- struct tgsi_src_register src1 = inst->Src[1].Register;
+ /* Get the component index from src1.x for Gather4. */
+ if (!tgsi_is_shadow_target(target)) {
+ LLVMValueRef (*imms)[4] = lp_soa_context(bld_base)->immediates;
+ LLVMValueRef comp_imm;
+ struct tgsi_src_register src1 = inst->Src[1].Register;
- assert(src1.File == TGSI_FILE_IMMEDIATE);
+ assert(src1.File == TGSI_FILE_IMMEDIATE);
- comp_imm = imms[src1.Index][src1.SwizzleX];
- gather_comp = LLVMConstIntGetZExtValue(comp_imm);
- gather_comp = CLAMP(gather_comp, 0, 3);
- }
-
- dmask = 1 << gather_comp;
+ comp_imm = imms[src1.Index][src1.SwizzleX];
+ gather_comp = LLVMConstIntGetZExtValue(comp_imm);
+ gather_comp = CLAMP(gather_comp, 0, 3);
}
- emit_data->args[2] = samp_ptr;
- emit_data->args[3] = lp_build_const_int32(gallivm, dmask);
- emit_data->args[4] = lp_build_const_int32(gallivm, is_rect); /* unorm */
- emit_data->args[5] = lp_build_const_int32(gallivm, 0); /* r128 */
- emit_data->args[6] = lp_build_const_int32(gallivm, is_array); /* da */
- emit_data->args[7] = lp_build_const_int32(gallivm, 0); /* glc */
- emit_data->args[8] = lp_build_const_int32(gallivm, 0); /* slc */
- emit_data->args[9] = lp_build_const_int32(gallivm, 0); /* tfe */
- emit_data->args[10] = lp_build_const_int32(gallivm, 0); /* lwe */
-
- emit_data->arg_count = 11;
-
- emit_data->dst_type = LLVMVectorType(
- LLVMFloatTypeInContext(gallivm->context),
- 4);
- } else {
- emit_data->args[2] = samp_ptr;
- emit_data->args[3] = lp_build_const_int32(gallivm, target);
- emit_data->arg_count = 4;
-
- emit_data->dst_type = LLVMVectorType(
- LLVMFloatTypeInContext(gallivm->context),
- 4);
+ dmask = 1 << gather_comp;
}
- /* The fetch opcode has been converted to a 2D array fetch.
- * This simplifies the LLVM backend. */
- if (target == TGSI_TEXTURE_CUBE_ARRAY)
- target = TGSI_TEXTURE_2D_ARRAY;
- else if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
- target = TGSI_TEXTURE_SHADOW2D_ARRAY;
-
- /* Pad to power of two vector */
- while (count < util_next_power_of_two(count))
- address[count++] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
-
- emit_data->args[0] = lp_build_gather_values(gallivm, address, count);
+ set_tex_fetch_args(gallivm, emit_data, opcode, target, res_ptr,
+ samp_ptr, address, count, dmask);
}
static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
unsigned opcode = emit_data->inst->Instruction.Opcode;
unsigned target = emit_data->inst->Texture.Texture;
char intr_name[127];
- bool has_offset = HAVE_LLVM >= 0x0305 ?
- emit_data->inst->Texture.NumOffsets > 0 : false;
+ bool has_offset = emit_data->inst->Texture.NumOffsets > 0;
+ bool is_shadow = tgsi_is_shadow_target(target);
+ char type[64];
+ const char *name = "llvm.SI.image.sample";
+ const char *infix = "";
+
+ if (opcode == TGSI_OPCODE_TXQ && target == TGSI_TEXTURE_BUFFER) {
+ /* Just return the buffer size. */
+ emit_data->output[emit_data->chan] = emit_data->args[0];
+ return;
+ }
if (target == TGSI_TEXTURE_BUFFER) {
emit_data->output[emit_data->chan] = lp_build_intrinsic(
return;
}
- if (opcode == TGSI_OPCODE_TG4 ||
- opcode == TGSI_OPCODE_LODQ ||
- (opcode != TGSI_OPCODE_TXF && has_offset)) {
- bool is_shadow = tgsi_is_shadow_sampler(target);
- const char *name = "llvm.SI.image.sample";
- const char *infix = "";
-
- switch (opcode) {
- case TGSI_OPCODE_TEX:
- case TGSI_OPCODE_TEX2:
- case TGSI_OPCODE_TXP:
- break;
- case TGSI_OPCODE_TXB:
- case TGSI_OPCODE_TXB2:
- infix = ".b";
- break;
- case TGSI_OPCODE_TXL:
- case TGSI_OPCODE_TXL2:
- infix = ".l";
- break;
- case TGSI_OPCODE_TXD:
- infix = ".d";
- break;
- case TGSI_OPCODE_TG4:
- name = "llvm.SI.gather4";
- break;
- case TGSI_OPCODE_LODQ:
- name = "llvm.SI.getlod";
- is_shadow = false;
- has_offset = false;
- break;
- default:
- assert(0);
- return;
- }
+ switch (opcode) {
+ case TGSI_OPCODE_TXF:
+ name = target == TGSI_TEXTURE_2D_MSAA ||
+ target == TGSI_TEXTURE_2D_ARRAY_MSAA ?
+ "llvm.SI.image.load" :
+ "llvm.SI.image.load.mip";
+ is_shadow = false;
+ has_offset = false;
+ break;
+ case TGSI_OPCODE_TXQ:
+ name = "llvm.SI.getresinfo";
+ is_shadow = false;
+ has_offset = false;
+ break;
+ case TGSI_OPCODE_LODQ:
+ name = "llvm.SI.getlod";
+ is_shadow = false;
+ has_offset = false;
+ break;
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TEX2:
+ case TGSI_OPCODE_TXP:
+ break;
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXB2:
+ infix = ".b";
+ break;
+ case TGSI_OPCODE_TXL:
+ case TGSI_OPCODE_TXL2:
+ infix = ".l";
+ break;
+ case TGSI_OPCODE_TXD:
+ infix = ".d";
+ break;
+ case TGSI_OPCODE_TG4:
+ name = "llvm.SI.gather4";
+ break;
+ default:
+ assert(0);
+ return;
+ }
- /* Add the type and suffixes .c, .o if needed. */
- sprintf(intr_name, "%s%s%s%s.v%ui32", name,
- is_shadow ? ".c" : "", infix, has_offset ? ".o" : "",
+ if (LLVMGetTypeKind(LLVMTypeOf(emit_data->args[0])) == LLVMVectorTypeKind)
+ sprintf(type, ".v%ui32",
LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0])));
+ else
+ strcpy(type, ".i32");
- emit_data->output[emit_data->chan] = lp_build_intrinsic(
- base->gallivm->builder, intr_name, emit_data->dst_type,
- emit_data->args, emit_data->arg_count,
- LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
- } else {
- LLVMTypeRef i8, v16i8, v32i8;
- const char *name;
-
- switch (opcode) {
- case TGSI_OPCODE_TEX:
- case TGSI_OPCODE_TEX2:
- case TGSI_OPCODE_TXP:
- name = "llvm.SI.sample";
- break;
- case TGSI_OPCODE_TXB:
- case TGSI_OPCODE_TXB2:
- name = "llvm.SI.sampleb";
- break;
- case TGSI_OPCODE_TXD:
- name = "llvm.SI.sampled";
- break;
- case TGSI_OPCODE_TXF:
- name = "llvm.SI.imageload";
- break;
- case TGSI_OPCODE_TXL:
- case TGSI_OPCODE_TXL2:
- name = "llvm.SI.samplel";
- break;
- default:
- assert(0);
- return;
- }
+ /* Add the type and suffixes .c, .o if needed. */
+ sprintf(intr_name, "%s%s%s%s%s",
+ name, is_shadow ? ".c" : "", infix,
+ has_offset ? ".o" : "", type);
- i8 = LLVMInt8TypeInContext(base->gallivm->context);
- v16i8 = LLVMVectorType(i8, 16);
- v32i8 = LLVMVectorType(i8, 32);
+ emit_data->output[emit_data->chan] = lp_build_intrinsic(
+ base->gallivm->builder, intr_name, emit_data->dst_type,
+ emit_data->args, emit_data->arg_count,
+ LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
- emit_data->args[1] = LLVMBuildBitCast(base->gallivm->builder,
- emit_data->args[1], v32i8, "");
- if (opcode != TGSI_OPCODE_TXF) {
- emit_data->args[2] = LLVMBuildBitCast(base->gallivm->builder,
- emit_data->args[2], v16i8, "");
- }
+ /* Divide the number of layers by 6 to get the number of cubes. */
+ if (opcode == TGSI_OPCODE_TXQ &&
+ (target == TGSI_TEXTURE_CUBE_ARRAY ||
+ target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)) {
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ LLVMValueRef two = lp_build_const_int32(bld_base->base.gallivm, 2);
+ LLVMValueRef six = lp_build_const_int32(bld_base->base.gallivm, 6);
- sprintf(intr_name, "%s.v%ui32", name,
- LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0])));
+ LLVMValueRef v4 = emit_data->output[emit_data->chan];
+ LLVMValueRef z = LLVMBuildExtractElement(builder, v4, two, "");
+ z = LLVMBuildSDiv(builder, z, six, "");
- emit_data->output[emit_data->chan] = lp_build_intrinsic(
- base->gallivm->builder, intr_name, emit_data->dst_type,
- emit_data->args, emit_data->arg_count,
- LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
+ emit_data->output[emit_data->chan] =
+ LLVMBuildInsertElement(builder, v4, z, two, "");
}
}
-static void txq_fetch_args(
+static void si_llvm_emit_txqs(
+ const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
- struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
- const struct tgsi_full_instruction *inst = emit_data->inst;
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
- unsigned target = inst->Texture.Texture;
- LLVMValueRef res_ptr;
-
- if (inst->Src[1].Register.Indirect) {
- const struct tgsi_full_src_register *reg = &inst->Src[1];
- LLVMValueRef ind_index;
-
- ind_index = get_indirect_index(si_shader_ctx, ®->Indirect, reg->Register.Index);
-
- res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
- res_ptr = build_indexed_load_const(si_shader_ctx, res_ptr,
- ind_index);
- } else
- res_ptr = si_shader_ctx->resources[inst->Src[1].Register.Index];
-
- if (target == TGSI_TEXTURE_BUFFER) {
- LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
- LLVMTypeRef v8i32 = LLVMVectorType(i32, 8);
-
- /* Read the size from the buffer descriptor directly. */
- LLVMValueRef res = LLVMBuildBitCast(builder, res_ptr, v8i32, "");
- LLVMValueRef size = LLVMBuildExtractElement(builder, res,
- lp_build_const_int32(gallivm, 6), "");
-
- if (si_shader_ctx->screen->b.chip_class >= VI) {
- /* On VI, the descriptor contains the size in bytes,
- * but TXQ must return the size in elements.
- * The stride is always non-zero for resources using TXQ.
- */
- LLVMValueRef stride =
- LLVMBuildExtractElement(builder, res,
- lp_build_const_int32(gallivm, 5), "");
- stride = LLVMBuildLShr(builder, stride,
- lp_build_const_int32(gallivm, 16), "");
- stride = LLVMBuildAnd(builder, stride,
- lp_build_const_int32(gallivm, 0x3FFF), "");
-
- size = LLVMBuildUDiv(builder, size, stride, "");
- }
-
- emit_data->args[0] = size;
- return;
- }
-
- /* Mip level */
- emit_data->args[0] = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
-
- /* Resource */
- emit_data->args[1] = res_ptr;
-
- /* Texture target */
- if (target == TGSI_TEXTURE_CUBE_ARRAY ||
- target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
- target = TGSI_TEXTURE_2D_ARRAY;
-
- emit_data->args[2] = lp_build_const_int32(bld_base->base.gallivm,
- target);
-
- emit_data->arg_count = 3;
-
- emit_data->dst_type = LLVMVectorType(
- LLVMInt32TypeInContext(bld_base->base.gallivm->context),
- 4);
-}
-
-static void build_txq_intrinsic(const struct lp_build_tgsi_action * action,
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- unsigned target = emit_data->inst->Texture.Texture;
+ LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
+ LLVMTypeRef v8i32 = LLVMVectorType(i32, 8);
+ LLVMValueRef res, samples;
+ LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
- if (target == TGSI_TEXTURE_BUFFER) {
- /* Just return the buffer size. */
- emit_data->output[emit_data->chan] = emit_data->args[0];
- return;
- }
+ tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr);
- build_tgsi_intrinsic_nomem(action, bld_base, emit_data);
- /* Divide the number of layers by 6 to get the number of cubes. */
- if (target == TGSI_TEXTURE_CUBE_ARRAY ||
- target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- LLVMValueRef two = lp_build_const_int32(bld_base->base.gallivm, 2);
- LLVMValueRef six = lp_build_const_int32(bld_base->base.gallivm, 6);
+ /* Read the samples from the descriptor directly. */
+ res = LLVMBuildBitCast(builder, res_ptr, v8i32, "");
+ samples = LLVMBuildExtractElement(
+ builder, res,
+ lp_build_const_int32(gallivm, 3), "");
+ samples = LLVMBuildLShr(builder, samples,
+ lp_build_const_int32(gallivm, 16), "");
+ samples = LLVMBuildAnd(builder, samples,
+ lp_build_const_int32(gallivm, 0xf), "");
+ samples = LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1),
+ samples, "");
- LLVMValueRef v4 = emit_data->output[emit_data->chan];
- LLVMValueRef z = LLVMBuildExtractElement(builder, v4, two, "");
- z = LLVMBuildSDiv(builder, z, six, "");
-
- emit_data->output[emit_data->chan] =
- LLVMBuildInsertElement(builder, v4, z, two, "");
- }
+ emit_data->output[emit_data->chan] = samples;
}
/*
.emit = build_tex_intrinsic,
};
-static const struct lp_build_tgsi_action txq_action = {
- .fetch_args = txq_fetch_args,
- .emit = build_txq_intrinsic,
- .intr_name = "llvm.SI.resinfo"
-};
-
static const struct lp_build_tgsi_action interp_action = {
.fetch_args = interp_fetch_args,
.emit = build_interp_intrinsic,
{
const struct radeon_shader_binary *binary = &shader->binary;
unsigned i;
+ int r;
bool dump = r600_can_dump_shader(&sscreen->b,
shader->selector ? shader->selector->tokens : NULL);
si_shader_binary_read_config(sscreen, shader, 0);
- si_shader_binary_upload(sscreen, shader);
+ r = si_shader_binary_upload(sscreen, shader);
+ if (r)
+ return r;
if (dump) {
if (!(sscreen->b.debug_flags & DBG_NO_ASM)) {
bld_base->op_actions[TGSI_OPCODE_TXL] = tex_action;
bld_base->op_actions[TGSI_OPCODE_TXL2] = tex_action;
bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action;
- bld_base->op_actions[TGSI_OPCODE_TXQ] = txq_action;
+ bld_base->op_actions[TGSI_OPCODE_TXQ] = tex_action;
bld_base->op_actions[TGSI_OPCODE_TG4] = tex_action;
bld_base->op_actions[TGSI_OPCODE_LODQ] = tex_action;
+ bld_base->op_actions[TGSI_OPCODE_TXQS].emit = si_llvm_emit_txqs;
bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
unsigned gs_output_prim;
unsigned gs_max_out_vertices;
unsigned gs_num_invocations;
+ unsigned gsvs_itemsize;
/* masks of "get_unique_index" bits */
uint64_t inputs_read;
uint64_t outputs_written;
uint32_t patch_outputs_written;
+ uint32_t ps_colors_written;
};
/* Valid shader configurations:
return &sctx->gs_shader->info;
else if (sctx->tes_shader)
return &sctx->tes_shader->info;
- else
+ else if (sctx->vs_shader)
return &sctx->vs_shader->info;
+ else
+ return NULL;
}
static inline struct si_shader* si_get_vs_state(struct si_context *sctx)
#include "sid.h"
#include "radeon/r600_cs.h"
+#include "util/u_dual_blend.h"
#include "util/u_format.h"
#include "util/u_format_s3tc.h"
#include "util/u_memory.h"
#include "util/u_pstipple.h"
-static void si_init_atom(struct r600_atom *atom, struct r600_atom **list_elem,
- void (*emit_func)(struct si_context *ctx, struct r600_atom *state),
- unsigned num_dw)
+/* Initialize an external atom (owned by ../radeon). */
+static void
+si_init_external_atom(struct si_context *sctx, struct r600_atom *atom,
+ struct r600_atom **list_elem)
+{
+ atom->id = list_elem - sctx->atoms.array + 1;
+ *list_elem = atom;
+}
+
+/* Initialize an atom owned by radeonsi. */
+void si_init_atom(struct si_context *sctx, struct r600_atom *atom,
+ struct r600_atom **list_elem,
+ void (*emit_func)(struct si_context *ctx, struct r600_atom *state))
{
atom->emit = (void*)emit_func;
- atom->num_dw = num_dw;
- atom->dirty = false;
+ atom->id = list_elem - sctx->atoms.array + 1; /* index+1 in the atom array */
*list_elem = atom;
}
* - The COLOR1 format isn't INVALID because of possible dual-source blending,
* so COLOR1 is enabled pretty much all the time.
* So CB_TARGET_MASK is the only register that can disable COLOR1.
+ *
+ * Another reason is to avoid a hang with dual source blending.
*/
-static void si_update_fb_blend_state(struct si_context *sctx)
+static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *atom)
{
- struct si_pm4_state *pm4;
+ struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
struct si_state_blend *blend = sctx->queued.named.blend;
uint32_t mask = 0, i;
- if (blend == NULL)
- return;
-
- pm4 = CALLOC_STRUCT(si_pm4_state);
- if (pm4 == NULL)
- return;
-
for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++)
if (sctx->framebuffer.state.cbufs[i])
mask |= 0xf << (4*i);
- mask &= blend->cb_target_mask;
- si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask);
- si_pm4_set_state(sctx, fb_blend, pm4);
+ if (blend)
+ mask &= blend->cb_target_mask;
+
+ /* Avoid a hang that happens when dual source blending is enabled
+ * but there is not enough color outputs. This is undefined behavior,
+ * so disable color writes completely.
+ *
+ * Reproducible with Unigine Heaven 4.0 and drirc missing.
+ */
+ if (blend->dual_src_blend &&
+ (sctx->ps_shader->ps_colors_written & 0x3) != 0x3)
+ mask = 0;
+
+ radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, mask);
}
/*
return NULL;
blend->alpha_to_one = state->alpha_to_one;
+ blend->dual_src_blend = util_blend_state_is_dual(state, 0);
if (state->logicop_enable) {
color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
{
struct si_context *sctx = (struct si_context *)ctx;
si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state);
- si_update_fb_blend_state(sctx);
+ si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
}
static void si_delete_blend_state(struct pipe_context *ctx, void *state)
const struct pipe_blend_color *state)
{
struct si_context *sctx = (struct si_context *)ctx;
- struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
- if (pm4 == NULL)
- return;
+ if (memcmp(&sctx->blend_color.state, state, sizeof(*state)) == 0)
+ return;
+
+ sctx->blend_color.state = *state;
+ si_mark_atom_dirty(sctx, &sctx->blend_color.atom);
+}
- si_pm4_set_reg(pm4, R_028414_CB_BLEND_RED, fui(state->color[0]));
- si_pm4_set_reg(pm4, R_028418_CB_BLEND_GREEN, fui(state->color[1]));
- si_pm4_set_reg(pm4, R_02841C_CB_BLEND_BLUE, fui(state->color[2]));
- si_pm4_set_reg(pm4, R_028420_CB_BLEND_ALPHA, fui(state->color[3]));
+static void si_emit_blend_color(struct si_context *sctx, struct r600_atom *atom)
+{
+ struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
- si_pm4_set_state(sctx, blend_color, pm4);
+ radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
+ radeon_emit_array(cs, (uint32_t*)sctx->blend_color.state.color, 4);
}
/*
const struct pipe_clip_state *state)
{
struct si_context *sctx = (struct si_context *)ctx;
- struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
struct pipe_constant_buffer cb;
- if (pm4 == NULL)
+ if (memcmp(&sctx->clip_state.state, state, sizeof(*state)) == 0)
return;
- for (int i = 0; i < 6; i++) {
- si_pm4_set_reg(pm4, R_0285BC_PA_CL_UCP_0_X + i * 16,
- fui(state->ucp[i][0]));
- si_pm4_set_reg(pm4, R_0285C0_PA_CL_UCP_0_Y + i * 16,
- fui(state->ucp[i][1]));
- si_pm4_set_reg(pm4, R_0285C4_PA_CL_UCP_0_Z + i * 16,
- fui(state->ucp[i][2]));
- si_pm4_set_reg(pm4, R_0285C8_PA_CL_UCP_0_W + i * 16,
- fui(state->ucp[i][3]));
- }
+ sctx->clip_state.state = *state;
+ si_mark_atom_dirty(sctx, &sctx->clip_state.atom);
cb.buffer = NULL;
cb.user_buffer = state->ucp;
cb.buffer_size = 4*4*8;
ctx->set_constant_buffer(ctx, PIPE_SHADER_VERTEX, SI_DRIVER_STATE_CONST_BUF, &cb);
pipe_resource_reference(&cb.buffer, NULL);
+}
+
+static void si_emit_clip_state(struct si_context *sctx, struct r600_atom *atom)
+{
+ struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
- si_pm4_set_state(sctx, clip, pm4);
+ radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4);
+ radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4);
}
#define SIX_BITS 0x3F
unsigned clipdist_mask =
info->writes_clipvertex ? SIX_BITS : info->clipdist_writemask;
- r600_write_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
+ radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) |
S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) |
S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) |
S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1) |
(sctx->queued.named.rasterizer->clip_plane_enable &
clipdist_mask));
- r600_write_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
+ radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
sctx->queued.named.rasterizer->pa_cl_clip_cntl |
(clipdist_mask ? 0 :
sctx->queued.named.rasterizer->clip_plane_enable & SIX_BITS) |
const struct pipe_scissor_state *state)
{
struct si_context *sctx = (struct si_context *)ctx;
- struct si_state_scissor *scissor;
- struct si_pm4_state *pm4;
int i;
- for (i = start_slot; i < start_slot + num_scissors; i++) {
- int idx = i - start_slot;
- int offset = i * 4 * 2;
+ for (i = 0; i < num_scissors; i++)
+ sctx->scissors.states[start_slot + i] = state[i];
- scissor = CALLOC_STRUCT(si_state_scissor);
- if (scissor == NULL)
- return;
- pm4 = &scissor->pm4;
- scissor->scissor = state[idx];
- si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset,
- S_028250_TL_X(state[idx].minx) | S_028250_TL_Y(state[idx].miny) |
- S_028250_WINDOW_OFFSET_DISABLE(1));
- si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR + offset,
- S_028254_BR_X(state[idx].maxx) | S_028254_BR_Y(state[idx].maxy));
- si_pm4_set_state(sctx, scissor[i], scissor);
+ sctx->scissors.dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
+ si_mark_atom_dirty(sctx, &sctx->scissors.atom);
+}
+
+static void si_emit_scissors(struct si_context *sctx, struct r600_atom *atom)
+{
+ struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct pipe_scissor_state *states = sctx->scissors.states;
+ unsigned mask = sctx->scissors.dirty_mask;
+
+ /* The simple case: Only 1 viewport is active. */
+ if (mask & 1 &&
+ !si_get_vs_info(sctx)->writes_viewport_index) {
+ radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
+ radeon_emit(cs, S_028250_TL_X(states[0].minx) |
+ S_028250_TL_Y(states[0].miny) |
+ S_028250_WINDOW_OFFSET_DISABLE(1));
+ radeon_emit(cs, S_028254_BR_X(states[0].maxx) |
+ S_028254_BR_Y(states[0].maxy));
+ sctx->scissors.dirty_mask &= ~1; /* clear one bit */
+ return;
}
+
+ while (mask) {
+ int start, count, i;
+
+ u_bit_scan_consecutive_range(&mask, &start, &count);
+
+ radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL +
+ start * 4 * 2, count * 2);
+ for (i = start; i < start+count; i++) {
+ radeon_emit(cs, S_028250_TL_X(states[i].minx) |
+ S_028250_TL_Y(states[i].miny) |
+ S_028250_WINDOW_OFFSET_DISABLE(1));
+ radeon_emit(cs, S_028254_BR_X(states[i].maxx) |
+ S_028254_BR_Y(states[i].maxy));
+ }
+ }
+ sctx->scissors.dirty_mask = 0;
}
static void si_set_viewport_states(struct pipe_context *ctx,
const struct pipe_viewport_state *state)
{
struct si_context *sctx = (struct si_context *)ctx;
- struct si_state_viewport *viewport;
- struct si_pm4_state *pm4;
int i;
- for (i = start_slot; i < start_slot + num_viewports; i++) {
- int idx = i - start_slot;
- int offset = i * 4 * 6;
+ for (i = 0; i < num_viewports; i++)
+ sctx->viewports.states[start_slot + i] = state[i];
- viewport = CALLOC_STRUCT(si_state_viewport);
- if (!viewport)
- return;
- pm4 = &viewport->pm4;
+ sctx->viewports.dirty_mask |= ((1 << num_viewports) - 1) << start_slot;
+ si_mark_atom_dirty(sctx, &sctx->viewports.atom);
+}
+
+static void si_emit_viewports(struct si_context *sctx, struct r600_atom *atom)
+{
+ struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct pipe_viewport_state *states = sctx->viewports.states;
+ unsigned mask = sctx->viewports.dirty_mask;
+
+ /* The simple case: Only 1 viewport is active. */
+ if (mask & 1 &&
+ !si_get_vs_info(sctx)->writes_viewport_index) {
+ radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
+ radeon_emit(cs, fui(states[0].scale[0]));
+ radeon_emit(cs, fui(states[0].translate[0]));
+ radeon_emit(cs, fui(states[0].scale[1]));
+ radeon_emit(cs, fui(states[0].translate[1]));
+ radeon_emit(cs, fui(states[0].scale[2]));
+ radeon_emit(cs, fui(states[0].translate[2]));
+ sctx->viewports.dirty_mask &= ~1; /* clear one bit */
+ return;
+ }
+
+ while (mask) {
+ int start, count, i;
- viewport->viewport = state[idx];
- si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE + offset, fui(state[idx].scale[0]));
- si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET + offset, fui(state[idx].translate[0]));
- si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE + offset, fui(state[idx].scale[1]));
- si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET + offset, fui(state[idx].translate[1]));
- si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE + offset, fui(state[idx].scale[2]));
- si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET + offset, fui(state[idx].translate[2]));
+ u_bit_scan_consecutive_range(&mask, &start, &count);
- si_pm4_set_state(sctx, viewport[i], viewport);
+ radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE +
+ start * 4 * 6, count * 6);
+ for (i = start; i < start+count; i++) {
+ radeon_emit(cs, fui(states[i].scale[0]));
+ radeon_emit(cs, fui(states[i].translate[0]));
+ radeon_emit(cs, fui(states[i].scale[1]));
+ radeon_emit(cs, fui(states[i].translate[1]));
+ radeon_emit(cs, fui(states[i].scale[2]));
+ radeon_emit(cs, fui(states[i].translate[2]));
+ }
}
+ sctx->viewports.dirty_mask = 0;
}
/*
* inferred state between framebuffer and rasterizer
*/
-static void si_update_fb_rs_state(struct si_context *sctx)
+static void si_update_poly_offset_state(struct si_context *sctx)
{
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
- struct si_pm4_state *pm4;
- float offset_units;
- if (!rs || !sctx->framebuffer.state.zsbuf)
+ if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf)
return;
- offset_units = sctx->queued.named.rasterizer->offset_units;
switch (sctx->framebuffer.state.zsbuf->texture->format) {
- case PIPE_FORMAT_S8_UINT_Z24_UNORM:
- case PIPE_FORMAT_X8Z24_UNORM:
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- offset_units *= 2.0f;
+ case PIPE_FORMAT_Z16_UNORM:
+ si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]);
+ break;
+ default: /* 24-bit */
+ si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]);
break;
case PIPE_FORMAT_Z32_FLOAT:
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- offset_units *= 1.0f;
- break;
- case PIPE_FORMAT_Z16_UNORM:
- offset_units *= 4.0f;
+ si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]);
break;
- default:
- return;
}
-
- pm4 = CALLOC_STRUCT(si_pm4_state);
-
- if (pm4 == NULL)
- return;
-
- /* FIXME some of those reg can be computed with cso */
- si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
- fui(sctx->queued.named.rasterizer->offset_scale));
- si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, fui(offset_units));
- si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
- fui(sctx->queued.named.rasterizer->offset_scale));
- si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, fui(offset_units));
-
- si_pm4_set_state(sctx, fb_rs, pm4);
}
/*
{
struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer);
struct si_pm4_state *pm4 = &rs->pm4;
- unsigned tmp;
+ unsigned tmp, i;
float psize_min, psize_max;
if (rs == NULL) {
rs->poly_stipple_enable = state->poly_stipple_enable;
rs->line_smooth = state->line_smooth;
rs->poly_smooth = state->poly_smooth;
+ rs->uses_poly_offset = state->offset_point || state->offset_line ||
+ state->offset_tri;
rs->flatshade = state->flatshade;
rs->sprite_coord_enable = state->sprite_coord_enable;
S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) |
S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
- /* offset */
- rs->offset_units = state->offset_units;
- rs->offset_scale = state->offset_scale * 16.0f;
-
si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0,
S_0286D4_FLAT_SHADE_ENA(1) |
S_0286D4_PNT_SPRITE_ENA(1) |
state->fill_back != PIPE_POLYGON_MODE_FILL) |
S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) |
S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)));
+
+ /* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */
+ for (i = 0; i < 3; i++) {
+ struct si_pm4_state *pm4 = &rs->pm4_poly_offset[i];
+ float offset_units = state->offset_units;
+ float offset_scale = state->offset_scale * 16.0f;
+
+ switch (i) {
+ case 0: /* 16-bit zbuffer */
+ offset_units *= 4.0f;
+ break;
+ case 1: /* 24-bit zbuffer */
+ offset_units *= 2.0f;
+ break;
+ case 2: /* 32-bit zbuffer */
+ offset_units *= 1.0f;
+ break;
+ }
+
+ si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
+ fui(offset_scale));
+ si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET,
+ fui(offset_units));
+ si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
+ fui(offset_scale));
+ si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET,
+ fui(offset_units));
+ }
+
return rs;
}
si_mark_atom_dirty(sctx, &sctx->db_render_state);
si_pm4_bind_state(sctx, rasterizer, rs);
- si_update_fb_rs_state(sctx);
+ si_update_poly_offset_state(sctx);
si_mark_atom_dirty(sctx, &sctx->clip_regs);
}
static void si_delete_rs_state(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
+
+ if (sctx->queued.named.rasterizer == state)
+ si_pm4_bind_state(sctx, poly_offset, NULL);
si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state);
}
/*
* infeered state between dsa and stencil ref
*/
-static void si_update_dsa_stencil_ref(struct si_context *sctx)
+static void si_emit_stencil_ref(struct si_context *sctx, struct r600_atom *atom)
{
- struct si_pm4_state *pm4;
- struct pipe_stencil_ref *ref = &sctx->stencil_ref;
- struct si_state_dsa *dsa = sctx->queued.named.dsa;
-
- if (!dsa)
- return;
-
- pm4 = CALLOC_STRUCT(si_pm4_state);
- if (pm4 == NULL)
- return;
-
- si_pm4_set_reg(pm4, R_028430_DB_STENCILREFMASK,
- S_028430_STENCILTESTVAL(ref->ref_value[0]) |
- S_028430_STENCILMASK(dsa->valuemask[0]) |
- S_028430_STENCILWRITEMASK(dsa->writemask[0]) |
- S_028430_STENCILOPVAL(1));
- si_pm4_set_reg(pm4, R_028434_DB_STENCILREFMASK_BF,
- S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) |
- S_028434_STENCILMASK_BF(dsa->valuemask[1]) |
- S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) |
- S_028434_STENCILOPVAL_BF(1));
+ struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct pipe_stencil_ref *ref = &sctx->stencil_ref.state;
+ struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part;
- si_pm4_set_state(sctx, dsa_stencil_ref, pm4);
+ radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
+ radeon_emit(cs, S_028430_STENCILTESTVAL(ref->ref_value[0]) |
+ S_028430_STENCILMASK(dsa->valuemask[0]) |
+ S_028430_STENCILWRITEMASK(dsa->writemask[0]) |
+ S_028430_STENCILOPVAL(1));
+ radeon_emit(cs, S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) |
+ S_028434_STENCILMASK_BF(dsa->valuemask[1]) |
+ S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) |
+ S_028434_STENCILOPVAL_BF(1));
}
-static void si_set_pipe_stencil_ref(struct pipe_context *ctx,
- const struct pipe_stencil_ref *state)
+static void si_set_stencil_ref(struct pipe_context *ctx,
+ const struct pipe_stencil_ref *state)
{
struct si_context *sctx = (struct si_context *)ctx;
- sctx->stencil_ref = *state;
- si_update_dsa_stencil_ref(sctx);
+
+ if (memcmp(&sctx->stencil_ref.state, state, sizeof(*state)) == 0)
+ return;
+
+ sctx->stencil_ref.state = *state;
+ si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
}
return NULL;
}
- dsa->valuemask[0] = state->stencil[0].valuemask;
- dsa->valuemask[1] = state->stencil[1].valuemask;
- dsa->writemask[0] = state->stencil[0].writemask;
- dsa->writemask[1] = state->stencil[1].writemask;
+ dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask;
+ dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask;
+ dsa->stencil_ref.writemask[0] = state->stencil[0].writemask;
+ dsa->stencil_ref.writemask[1] = state->stencil[1].writemask;
db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
return;
si_pm4_bind_state(sctx, dsa, dsa);
- si_update_dsa_stencil_ref(sctx);
+
+ if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part,
+ sizeof(struct si_dsa_stencil_ref_part)) != 0) {
+ sctx->stencil_ref.dsa_part = dsa->stencil_ref;
+ si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
+ }
}
static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
unsigned db_shader_control;
- r600_write_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
+ radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
/* DB_RENDER_CONTROL */
if (sctx->dbcb_depth_copy_enabled ||
/* DB_RENDER_OVERRIDE2 */
if (sctx->db_depth_disable_expclear) {
- r600_write_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
+ radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(1));
} else {
- r600_write_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 0);
+ radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 0);
}
db_shader_control = S_02880C_ALPHA_TO_MASK_DISABLE(sctx->framebuffer.cb0_is_integer) |
if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable))
db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
- r600_write_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
+ radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
db_shader_control);
}
SI_CONTEXT_INV_TC_L2 |
SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+ /* Take the maximum of the old and new count. If the new count is lower,
+ * dirtying is needed to disable the unbound colorbuffers.
+ */
+ sctx->framebuffer.dirty_cbufs |=
+ (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1;
+ sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf;
+
util_copy_framebuffer_state(&sctx->framebuffer.state, state);
sctx->framebuffer.export_16bpc = 0;
r600_context_add_resource_size(ctx, surf->base.texture);
}
- si_update_fb_rs_state(sctx);
- si_update_fb_blend_state(sctx);
-
- sctx->framebuffer.atom.num_dw = state->nr_cbufs*16 + (8 - state->nr_cbufs)*3;
- sctx->framebuffer.atom.num_dw += state->zsbuf ? 26 : 4;
- sctx->framebuffer.atom.num_dw += 3; /* WINDOW_SCISSOR_BR */
- sctx->framebuffer.atom.num_dw += 18; /* MSAA sample locations */
+ si_update_poly_offset_state(sctx);
+ si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
if (sctx->framebuffer.nr_samples != old_nr_samples) {
/* Colorbuffers. */
for (i = 0; i < nr_cbufs; i++) {
+ if (!(sctx->framebuffer.dirty_cbufs & (1 << i)))
+ continue;
+
cb = (struct r600_surface*)state->cbufs[i];
if (!cb) {
- r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
+ radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
S_028C70_FORMAT(V_028C70_COLOR_INVALID));
continue;
}
tex = (struct r600_texture *)cb->base.texture;
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
&tex->resource, RADEON_USAGE_READWRITE,
tex->surface.nsamples > 1 ?
RADEON_PRIO_COLOR_BUFFER_MSAA :
RADEON_PRIO_COLOR_BUFFER);
if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
tex->cmask_buffer, RADEON_USAGE_READWRITE,
RADEON_PRIO_COLOR_META);
}
- r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
+ radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
sctx->b.chip_class >= VI ? 14 : 13);
radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */
radeon_emit(cs, cb->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */
radeon_emit(cs, 0); /* R_028C94_CB_COLOR0_DCC_BASE */
}
/* set CB_COLOR1_INFO for possible dual-src blending */
- if (i == 1 && state->cbufs[0]) {
- r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
+ if (i == 1 && state->cbufs[0] &&
+ sctx->framebuffer.dirty_cbufs & (1 << 0)) {
+ radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
cb->cb_color_info | tex->cb_color_info);
i++;
}
- for (; i < 8 ; i++) {
- r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
- }
+ for (; i < 8 ; i++)
+ if (sctx->framebuffer.dirty_cbufs & (1 << i))
+ radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
/* ZS buffer. */
- if (state->zsbuf) {
+ if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
struct r600_texture *rtex = (struct r600_texture*)zb->base.texture;
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
&rtex->resource, RADEON_USAGE_READWRITE,
zb->base.texture->nr_samples > 1 ?
RADEON_PRIO_DEPTH_BUFFER_MSAA :
RADEON_PRIO_DEPTH_BUFFER);
if (zb->db_htile_data_base) {
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
rtex->htile_buffer, RADEON_USAGE_READWRITE,
RADEON_PRIO_DEPTH_META);
}
- r600_write_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
- r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
+ radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
+ radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
- r600_write_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9);
+ radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9);
radeon_emit(cs, zb->db_depth_info); /* R_02803C_DB_DEPTH_INFO */
radeon_emit(cs, zb->db_z_info | /* R_028040_DB_Z_INFO */
S_028040_ZRANGE_PRECISION(rtex->depth_clear_value != 0));
radeon_emit(cs, zb->db_depth_size); /* R_028058_DB_DEPTH_SIZE */
radeon_emit(cs, zb->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */
- r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface);
- r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
- r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
+ radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface);
+ radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
+ radeon_set_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
zb->pa_su_poly_offset_db_fmt_cntl);
- } else {
- r600_write_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
+ } else if (sctx->framebuffer.dirty_zsbuf) {
+ radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */
radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */
}
/* Framebuffer dimensions. */
/* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */
- r600_write_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
+ radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
+
+ sctx->framebuffer.dirty_cbufs = 0;
+ sctx->framebuffer.dirty_zsbuf = false;
}
-static void si_emit_msaa_sample_locs(struct r600_common_context *rctx,
+static void si_emit_msaa_sample_locs(struct si_context *sctx,
struct r600_atom *atom)
{
- struct si_context *sctx = (struct si_context *)rctx;
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
unsigned nr_samples = sctx->framebuffer.nr_samples;
SI_NUM_SMOOTH_AA_SAMPLES);
}
-const struct r600_atom si_atom_msaa_sample_locs = { si_emit_msaa_sample_locs, 18 }; /* number of CS dwords */
-
-static void si_emit_msaa_config(struct r600_common_context *rctx, struct r600_atom *atom)
+static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom)
{
- struct si_context *sctx = (struct si_context *)rctx;
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples,
sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0);
}
-const struct r600_atom si_atom_msaa_config = { si_emit_msaa_config, 10 }; /* number of CS dwords */
static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
{
static void *si_create_sampler_state(struct pipe_context *ctx,
const struct pipe_sampler_state *state)
{
+ struct si_context *sctx = (struct si_context *)ctx;
struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state);
unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0;
- unsigned border_color_type;
+ unsigned border_color_type, border_color_index = 0;
if (rstate == NULL) {
return NULL;
}
- if (sampler_state_needs_border_color(state))
- border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
- else
+ if (!sampler_state_needs_border_color(state))
+ border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
+ else if (state->border_color.f[0] == 0 &&
+ state->border_color.f[1] == 0 &&
+ state->border_color.f[2] == 0 &&
+ state->border_color.f[3] == 0)
border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
+ else if (state->border_color.f[0] == 0 &&
+ state->border_color.f[1] == 0 &&
+ state->border_color.f[2] == 0 &&
+ state->border_color.f[3] == 1)
+ border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
+ else if (state->border_color.f[0] == 1 &&
+ state->border_color.f[1] == 1 &&
+ state->border_color.f[2] == 1 &&
+ state->border_color.f[3] == 1)
+ border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
+ else {
+ int i;
+
+ border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
+
+ /* Check if the border has been uploaded already. */
+ for (i = 0; i < sctx->border_color_count; i++)
+ if (memcmp(&sctx->border_color_table[i], &state->border_color,
+ sizeof(state->border_color)) == 0)
+ break;
+
+ if (i >= SI_MAX_BORDER_COLORS) {
+ /* Getting 4096 unique border colors is very unlikely. */
+ fprintf(stderr, "radeonsi: The border color table is full. "
+ "Any new border colors will be just black. "
+ "Please file a bug.\n");
+ border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
+ } else {
+ if (i == sctx->border_color_count) {
+ /* Upload a new border color. */
+ memcpy(&sctx->border_color_table[i], &state->border_color,
+ sizeof(state->border_color));
+ util_memcpy_cpu_to_le32(&sctx->border_color_map[i],
+ &state->border_color,
+ sizeof(state->border_color));
+ sctx->border_color_count++;
+ }
+
+ border_color_index = i;
+ }
+ }
rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter) | aniso_flag_offset) |
S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter) | aniso_flag_offset) |
S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)));
- rstate->val[3] = S_008F3C_BORDER_COLOR_TYPE(border_color_type);
-
- if (border_color_type == V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) {
- memcpy(rstate->border_color, state->border_color.ui,
- sizeof(rstate->border_color));
- }
-
+ rstate->val[3] = S_008F3C_BORDER_COLOR_PTR(border_color_index) |
+ S_008F3C_BORDER_COLOR_TYPE(border_color_type);
return rstate;
}
-/* Upload border colors and update the pointers in resource descriptors.
- * There can only be 4096 border colors per context.
- *
- * XXX: This is broken if the buffer gets reallocated.
- */
-static void si_set_border_colors(struct si_context *sctx, unsigned count,
- void **states)
-{
- struct si_sampler_state **rstates = (struct si_sampler_state **)states;
- uint32_t *border_color_table = NULL;
- int i, j;
-
- for (i = 0; i < count; i++) {
- if (rstates[i] &&
- G_008F3C_BORDER_COLOR_TYPE(rstates[i]->val[3]) ==
- V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) {
- if (!sctx->border_color_table ||
- ((sctx->border_color_offset + count - i) &
- C_008F3C_BORDER_COLOR_PTR)) {
- r600_resource_reference(&sctx->border_color_table, NULL);
- sctx->border_color_offset = 0;
-
- sctx->border_color_table =
- si_resource_create_custom(&sctx->screen->b.b,
- PIPE_USAGE_DYNAMIC,
- 4096 * 4 * 4);
- }
-
- if (!border_color_table) {
- border_color_table =
- sctx->b.ws->buffer_map(sctx->border_color_table->cs_buf,
- sctx->b.rings.gfx.cs,
- PIPE_TRANSFER_WRITE |
- PIPE_TRANSFER_UNSYNCHRONIZED);
- }
-
- for (j = 0; j < 4; j++) {
- border_color_table[4 * sctx->border_color_offset + j] =
- util_le32_to_cpu(rstates[i]->border_color[j]);
- }
-
- rstates[i]->val[3] &= C_008F3C_BORDER_COLOR_PTR;
- rstates[i]->val[3] |= S_008F3C_BORDER_COLOR_PTR(sctx->border_color_offset++);
- }
- }
-
- if (border_color_table) {
- struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
-
- uint64_t va_offset = sctx->border_color_table->gpu_address;
-
- si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, va_offset >> 8);
- if (sctx->b.chip_class >= CIK)
- si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, va_offset >> 40);
- si_pm4_add_bo(pm4, sctx->border_color_table, RADEON_USAGE_READ,
- RADEON_PRIO_SHADER_DATA);
- si_pm4_set_state(sctx, ta_bordercolor_base, pm4);
- }
-}
-
-static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
- unsigned start, unsigned count,
- void **states)
+static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
{
struct si_context *sctx = (struct si_context *)ctx;
- if (!count || shader >= SI_NUM_SHADERS)
+ if (sctx->sample_mask.sample_mask == (uint16_t)sample_mask)
return;
- si_set_border_colors(sctx, count, states);
- si_set_sampler_descriptors(sctx, shader, start, count, states);
+ sctx->sample_mask.sample_mask = sample_mask;
+ si_mark_atom_dirty(sctx, &sctx->sample_mask.atom);
}
-static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
+static void si_emit_sample_mask(struct si_context *sctx, struct r600_atom *atom)
{
- struct si_context *sctx = (struct si_context *)ctx;
- struct si_state_sample_mask *state = CALLOC_STRUCT(si_state_sample_mask);
- struct si_pm4_state *pm4 = &state->pm4;
- uint16_t mask = sample_mask;
-
- if (state == NULL)
- return;
-
- state->sample_mask = mask;
- si_pm4_set_reg(pm4, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, mask | (mask << 16));
- si_pm4_set_reg(pm4, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, mask | (mask << 16));
+ struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ unsigned mask = sctx->sample_mask.sample_mask;
- si_pm4_set_state(sctx, sample_mask, state);
+ radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
+ radeon_emit(cs, mask | (mask << 16));
+ radeon_emit(cs, mask | (mask << 16));
}
static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element);
int i;
- assert(count < PIPE_MAX_ATTRIBS);
+ assert(count < SI_MAX_ATTRIBS);
if (!v)
return NULL;
static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
bool include_draw_vbo)
{
- si_need_cs_space((struct si_context*)ctx, num_dw, include_draw_vbo);
+ si_need_cs_space((struct si_context*)ctx);
}
static void si_init_config(struct si_context *sctx);
void si_init_state_functions(struct si_context *sctx)
{
- si_init_atom(&sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state, 0);
- si_init_atom(&sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state, 10);
- si_init_atom(&sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs, 6);
+ si_init_external_atom(sctx, &sctx->b.streamout.begin_atom, &sctx->atoms.s.streamout_begin);
+ si_init_external_atom(sctx, &sctx->b.streamout.enable_atom, &sctx->atoms.s.streamout_enable);
+
+ si_init_atom(sctx, &sctx->cache_flush, &sctx->atoms.s.cache_flush, si_emit_cache_flush);
+ si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state);
+ si_init_atom(sctx, &sctx->msaa_sample_locs, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs);
+ si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state);
+ si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config);
+ si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask);
+ si_init_atom(sctx, &sctx->cb_target_mask, &sctx->atoms.s.cb_target_mask, si_emit_cb_target_mask);
+ si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color);
+ si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs);
+ si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state);
+ si_init_atom(sctx, &sctx->scissors.atom, &sctx->atoms.s.scissors, si_emit_scissors);
+ si_init_atom(sctx, &sctx->viewports.atom, &sctx->atoms.s.viewports, si_emit_viewports);
+ si_init_atom(sctx, &sctx->stencil_ref.atom, &sctx->atoms.s.stencil_ref, si_emit_stencil_ref);
sctx->b.b.create_blend_state = si_create_blend_state;
sctx->b.b.bind_blend_state = si_bind_blend_state;
sctx->b.b.set_clip_state = si_set_clip_state;
sctx->b.b.set_scissor_states = si_set_scissor_states;
sctx->b.b.set_viewport_states = si_set_viewport_states;
- sctx->b.b.set_stencil_ref = si_set_pipe_stencil_ref;
+ sctx->b.b.set_stencil_ref = si_set_stencil_ref;
sctx->b.b.set_framebuffer_state = si_set_framebuffer_state;
sctx->b.b.get_sample_position = cayman_get_sample_position;
sctx->b.b.create_sampler_state = si_create_sampler_state;
- sctx->b.b.bind_sampler_states = si_bind_sampler_states;
sctx->b.b.delete_sampler_state = si_delete_sampler_state;
sctx->b.b.create_sampler_view = si_create_sampler_view;
unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16);
unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
unsigned raster_config, raster_config_1;
+ uint64_t border_color_va = sctx->border_color_buffer->gpu_address;
struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
+ int i;
if (pm4 == NULL)
return;
- si_cmd_context_control(pm4);
+ si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL);
+ si_pm4_cmd_add(pm4, 0x80000000);
+ si_pm4_cmd_add(pm4, 0x80000000);
+ si_pm4_cmd_end(pm4, false);
si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
- si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0);
si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
+ for (i = 0; i < 16; i++) {
+ si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0 + i*8, 0);
+ si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0 + i*8, fui(1.0));
+ }
+
switch (sctx->screen->b.family) {
case CHIP_TAHITI:
case CHIP_PITCAIRN:
si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
/* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
- si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0, 0);
- si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0, fui(1.0));
si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, fui(1.0));
si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0));
si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
}
+ si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
+ if (sctx->b.chip_class >= CIK)
+ si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);
+ si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ,
+ RADEON_PRIO_SHADER_DATA);
+
+ si_pm4_upload_indirect_buffer(sctx, pm4);
sctx->init_config = pm4;
}
#include "radeon/r600_pipe_common.h"
#define SI_NUM_SHADERS (PIPE_SHADER_TESS_EVAL+1)
+#define SI_MAX_ATTRIBS 16
struct si_screen;
struct si_shader;
struct si_pm4_state pm4;
uint32_t cb_target_mask;
bool alpha_to_one;
-};
-
-struct si_state_sample_mask {
- struct si_pm4_state pm4;
- uint16_t sample_mask;
-};
-
-struct si_state_scissor {
- struct si_pm4_state pm4;
- struct pipe_scissor_state scissor;
-};
-
-struct si_state_viewport {
- struct si_pm4_state pm4;
- struct pipe_viewport_state viewport;
+ bool dual_src_blend;
};
struct si_state_rasterizer {
struct si_pm4_state pm4;
+ /* poly offset states for 16-bit, 24-bit, and 32-bit zbuffers */
+ struct si_pm4_state pm4_poly_offset[3];
bool flatshade;
bool two_side;
bool multisample_enable;
unsigned pa_sc_line_stipple;
unsigned pa_cl_clip_cntl;
unsigned clip_plane_enable;
- float offset_units;
- float offset_scale;
bool poly_stipple_enable;
bool line_smooth;
bool poly_smooth;
+ bool uses_poly_offset;
};
-struct si_state_dsa {
- struct si_pm4_state pm4;
- unsigned alpha_func;
+struct si_dsa_stencil_ref_part {
uint8_t valuemask[2];
uint8_t writemask[2];
};
+struct si_state_dsa {
+ struct si_pm4_state pm4;
+ unsigned alpha_func;
+ struct si_dsa_stencil_ref_part stencil_ref;
+};
+
+struct si_stencil_ref {
+ struct r600_atom atom;
+ struct pipe_stencil_ref state;
+ struct si_dsa_stencil_ref_part dsa_part;
+};
+
struct si_vertex_element
{
unsigned count;
- uint32_t rsrc_word3[PIPE_MAX_ATTRIBS];
- uint32_t format_size[PIPE_MAX_ATTRIBS];
- struct pipe_vertex_element elements[PIPE_MAX_ATTRIBS];
+ uint32_t rsrc_word3[SI_MAX_ATTRIBS];
+ uint32_t format_size[SI_MAX_ATTRIBS];
+ struct pipe_vertex_element elements[SI_MAX_ATTRIBS];
};
union si_state {
struct {
struct si_state_blend *blend;
- struct si_pm4_state *blend_color;
- struct si_pm4_state *clip;
- struct si_state_sample_mask *sample_mask;
- struct si_state_scissor *scissor[16];
- struct si_state_viewport *viewport[16];
struct si_state_rasterizer *rasterizer;
struct si_state_dsa *dsa;
- struct si_pm4_state *fb_rs;
- struct si_pm4_state *fb_blend;
- struct si_pm4_state *dsa_stencil_ref;
- struct si_pm4_state *ta_bordercolor_base;
+ struct si_pm4_state *poly_offset;
struct si_pm4_state *ls;
struct si_pm4_state *hs;
struct si_pm4_state *es;
struct si_pm4_state *gs;
- struct si_pm4_state *gs_rings;
- struct si_pm4_state *tf_ring;
struct si_pm4_state *vgt_shader_config;
struct si_pm4_state *vs;
struct si_pm4_state *ps;
- struct si_pm4_state *spi;
} named;
struct si_pm4_state *array[0];
};
+union si_state_atoms {
+ struct {
+ /* The order matters. */
+ struct r600_atom *cache_flush;
+ struct r600_atom *streamout_begin;
+ struct r600_atom *streamout_enable; /* must be after streamout_begin */
+ struct r600_atom *framebuffer;
+ struct r600_atom *msaa_sample_locs;
+ struct r600_atom *db_render_state;
+ struct r600_atom *msaa_config;
+ struct r600_atom *sample_mask;
+ struct r600_atom *cb_target_mask;
+ struct r600_atom *blend_color;
+ struct r600_atom *clip_regs;
+ struct r600_atom *clip_state;
+ struct r600_atom *shader_userdata;
+ struct r600_atom *scissors;
+ struct r600_atom *viewports;
+ struct r600_atom *stencil_ref;
+ struct r600_atom *spi_map;
+ } s;
+ struct r600_atom *array[0];
+};
+
+#define SI_NUM_ATOMS (sizeof(union si_state_atoms)/sizeof(struct r600_atom*))
+
struct si_shader_data {
struct r600_atom atom;
uint32_t sh_base[SI_NUM_SHADERS];
#define SI_SO_BUF_OFFSET SI_NUM_RING_BUFFERS
#define SI_NUM_RW_BUFFERS (SI_SO_BUF_OFFSET + 4)
-#define SI_NUM_VERTEX_BUFFERS 16
+#define SI_NUM_VERTEX_BUFFERS SI_MAX_ATTRIBS
/* This represents descriptors in memory, such as buffer resources,
si_pm4_block_idx(member)); \
} while(0)
-#define si_pm4_set_state(sctx, member, value) \
- do { \
- if ((sctx)->queued.named.member != (value)) { \
- si_pm4_free_state(sctx, \
- (struct si_pm4_state *)(sctx)->queued.named.member, \
- si_pm4_block_idx(member)); \
- (sctx)->queued.named.member = (value); \
- } \
- } while(0)
-
/* si_descriptors.c */
-void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader,
- unsigned start, unsigned count, void **states);
void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
struct pipe_resource *buffer,
unsigned stride, unsigned num_records,
void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
const uint8_t *ptr, unsigned size, uint32_t *const_offset);
void si_shader_change_notify(struct si_context *sctx);
+void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom);
/* si_state.c */
struct si_shader_selector;
+void si_init_atom(struct si_context *sctx, struct r600_atom *atom,
+ struct r600_atom **list_elem,
+ void (*emit_func)(struct si_context *ctx, struct r600_atom *state));
boolean si_is_format_supported(struct pipe_screen *screen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned force_level);
/* si_state_shader.c */
-void si_update_shaders(struct si_context *sctx);
+bool si_update_shaders(struct si_context *sctx);
void si_init_shader_functions(struct si_context *sctx);
/* si_state_draw.c */
-extern const struct r600_atom si_atom_cache_flush;
-extern const struct r600_atom si_atom_msaa_sample_locs;
-extern const struct r600_atom si_atom_msaa_config;
-void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *atom);
+void si_emit_cache_flush(struct si_context *sctx, struct r600_atom *atom);
void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo);
void si_trace_emit(struct si_context *sctx);
-/* si_commands.c */
-void si_cmd_context_control(struct si_pm4_state *pm4);
-
#endif
/* Due to a hw bug, RSRC2_LS must be written twice with another
* LS register written in between. */
if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII)
- si_write_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
- si_write_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
+ radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
+ radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
radeon_emit(cs, ls->current->ls_rsrc1);
radeon_emit(cs, ls_rsrc2);
((perpatch_output_offset / 16) << 16);
/* Set them for LS. */
- si_write_sh_reg(cs,
+ radeon_set_sh_reg(cs,
R_00B530_SPI_SHADER_USER_DATA_LS_0 + SI_SGPR_LS_OUT_LAYOUT * 4,
tcs_in_layout);
/* Set them for TCS. */
- si_write_sh_reg_seq(cs,
+ radeon_set_sh_reg_seq(cs,
R_00B430_SPI_SHADER_USER_DATA_HS_0 + SI_SGPR_TCS_OUT_OFFSETS * 4, 3);
radeon_emit(cs, tcs_out_offsets);
radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26));
radeon_emit(cs, tcs_in_layout);
/* Set them for TES. */
- si_write_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TCS_OUT_OFFSETS * 4, 2);
+ radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TCS_OUT_OFFSETS * 4, 2);
radeon_emit(cs, tcs_out_offsets);
radeon_emit(cs, tcs_out_layout | (num_tcs_output_cp << 26));
}
if (!sctx->emit_scratch_reloc)
return;
- r600_write_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE,
+ radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE,
sctx->spi_tmpring_size);
if (sctx->scratch_buffer) {
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
sctx->scratch_buffer, RADEON_USAGE_READWRITE,
RADEON_PRIO_SHADER_RESOURCE_RW);
rs->pa_sc_line_stipple == sctx->last_sc_line_stipple)
return;
- r600_write_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE,
+ radeon_set_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE,
rs->pa_sc_line_stipple |
S_028A0C_AUTO_RESET_CNTL(rast_prim == PIPE_PRIM_LINES ? 1 :
rast_prim == PIPE_PRIM_LINE_STRIP ? 2 : 0));
radeon_emit(cs, ia_multi_vgt_param); /* IA_MULTI_VGT_PARAM */
radeon_emit(cs, ls_hs_config); /* VGT_LS_HS_CONFIG */
} else {
- r600_write_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, prim);
- r600_write_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
- r600_write_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config);
+ radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, prim);
+ radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
+ radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config);
}
sctx->last_prim = prim;
sctx->last_multi_vgt_param = ia_multi_vgt_param;
}
if (gs_out_prim != sctx->last_gs_out_prim) {
- r600_write_context_reg(cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out_prim);
+ radeon_set_context_reg(cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out_prim);
sctx->last_gs_out_prim = gs_out_prim;
}
/* Primitive restart. */
if (info->primitive_restart != sctx->last_primitive_restart_en) {
- r600_write_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, info->primitive_restart);
+ radeon_set_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, info->primitive_restart);
sctx->last_primitive_restart_en = info->primitive_restart;
if (info->primitive_restart &&
(info->restart_index != sctx->last_restart_index ||
sctx->last_restart_index == SI_RESTART_INDEX_UNKNOWN)) {
- r600_write_context_reg(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
+ radeon_set_context_reg(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
info->restart_index);
sctx->last_restart_index = info->restart_index;
}
uint64_t va = t->buf_filled_size->gpu_address +
t->buf_filled_size_offset;
- r600_write_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE,
+ radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE,
t->stride_in_dw);
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
radeon_emit(cs, 0); /* unused */
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
t->buf_filled_size, RADEON_USAGE_READ,
RADEON_PRIO_MIN);
}
sctx->last_base_vertex == SI_BASE_VERTEX_UNKNOWN ||
info->start_instance != sctx->last_start_instance ||
sh_base_reg != sctx->last_sh_base_reg) {
- si_write_sh_reg_seq(cs, sh_base_reg + SI_SGPR_BASE_VERTEX * 4, 2);
+ radeon_set_sh_reg_seq(cs, sh_base_reg + SI_SGPR_BASE_VERTEX * 4, 2);
radeon_emit(cs, base_vertex);
radeon_emit(cs, info->start_instance);
} else {
si_invalidate_draw_sh_constants(sctx);
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
(struct r600_resource *)info->indirect,
RADEON_USAGE_READ, RADEON_PRIO_MIN);
}
ib->index_size;
uint64_t index_va = r600_resource(ib->buffer)->gpu_address + ib->offset;
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
(struct r600_resource *)ib->buffer,
RADEON_USAGE_READ, RADEON_PRIO_MIN);
#define BOTH_ICACHE_KCACHE (SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_KCACHE)
-void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *atom)
+void si_emit_cache_flush(struct si_context *si_ctx, struct r600_atom *atom)
{
+ struct r600_common_context *sctx = &si_ctx->b;
struct radeon_winsys_cs *cs = sctx->rings.gfx.cs;
uint32_t cp_coher_cntl = 0;
uint32_t compute =
sctx->flags = 0;
}
-const struct r600_atom si_atom_cache_flush = { si_emit_cache_flush, 24 }; /* number of CS dwords */
-
static void si_get_draw_start_count(struct si_context *sctx,
const struct pipe_draw_info *info,
unsigned *start, unsigned *count)
{
struct si_context *sctx = (struct si_context *)ctx;
struct pipe_index_buffer ib = {};
- unsigned i;
+ unsigned mask;
if (!info->count && !info->indirect &&
(info->indexed || !info->count_from_stream_output))
else
sctx->current_rast_prim = info->mode;
- si_update_shaders(sctx);
- if (!si_upload_shader_descriptors(sctx))
+ if (!si_update_shaders(sctx) ||
+ !si_upload_shader_descriptors(sctx))
return;
if (info->indexed) {
u_upload_alloc(sctx->b.uploader, start_offset, count * 2,
&out_offset, &out_buffer, &ptr);
+ if (!out_buffer) {
+ pipe_resource_reference(&ib.buffer, NULL);
+ return;
+ }
util_shorten_ubyte_elts_to_userptr(&sctx->b.b, &ib, 0,
ib.offset + start_offset,
u_upload_data(sctx->b.uploader, start_offset, count * ib.index_size,
(char*)ib.user_buffer + start_offset,
&ib.offset, &ib.buffer);
+ if (!ib.buffer)
+ return;
/* info->start will be added by the drawing code */
ib.offset -= start_offset;
}
if (sctx->b.flags)
si_mark_atom_dirty(sctx, sctx->atoms.s.cache_flush);
- si_need_cs_space(sctx, 0, TRUE);
+ si_need_cs_space(sctx);
/* Emit states. */
- for (i = 0; i < SI_NUM_ATOMS(sctx); i++) {
- if (sctx->atoms.array[i]->dirty) {
- sctx->atoms.array[i]->emit(&sctx->b, sctx->atoms.array[i]);
- sctx->atoms.array[i]->dirty = false;
- }
+ mask = sctx->dirty_atoms;
+ while (mask) {
+ struct r600_atom *atom = sctx->atoms.array[u_bit_scan(&mask)];
+
+ atom->emit(&sctx->b, atom);
}
+ sctx->dirty_atoms = 0;
si_pm4_emit_dirty(sctx);
si_emit_scratch_reloc(sctx);
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
sctx->trace_id++;
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, sctx->trace_buf,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, sctx->trace_buf,
RADEON_USAGE_READWRITE, RADEON_PRIO_MIN);
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
#include "si_pipe.h"
#include "si_shader.h"
#include "sid.h"
+#include "radeon/r600_cs.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_ureg.h"
struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector);
int i;
+ if (!sel)
+ return NULL;
+
sel->type = pipe_shader_type;
sel->tokens = tgsi_dup_tokens(state->tokens);
+ if (!sel->tokens) {
+ FREE(sel);
+ return NULL;
+ }
+
sel->so = state->stream_output;
tgsi_scan_shader(state->tokens, &sel->info);
p_atomic_inc(&sscreen->b.num_shaders_created);
sel->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
sel->gs_num_invocations =
sel->info.properties[TGSI_PROPERTY_GS_INVOCATIONS];
+ sel->gsvs_itemsize = sel->info.num_outputs * 16 *
+ sel->gs_max_out_vertices;
for (i = 0; i < sel->info.num_inputs; i++) {
unsigned name = sel->info.input_semantic_name[i];
}
}
break;
+ case PIPE_SHADER_FRAGMENT:
+ for (i = 0; i < sel->info.num_outputs; i++) {
+ unsigned name = sel->info.output_semantic_name[i];
+ unsigned index = sel->info.output_semantic_index[i];
+
+ if (name == TGSI_SEMANTIC_COLOR)
+ sel->ps_colors_written |= 1 << index;
+ }
+ break;
}
if (sscreen->b.debug_flags & DBG_PRECOMPILE)
- si_shader_select(ctx, sel);
+ if (si_shader_select(ctx, sel)) {
+ fprintf(stderr, "radeonsi: can't create a shader\n");
+ tgsi_free_tokens(sel->tokens);
+ FREE(sel);
+ return NULL;
+ }
return sel;
}
return si_create_shader_state(ctx, state, PIPE_SHADER_TESS_EVAL);
}
+/**
+ * Normally, we only emit 1 viewport and 1 scissor if no shader is using
+ * the VIEWPORT_INDEX output, and emitting the other viewports and scissors
+ * is delayed. When a shader with VIEWPORT_INDEX appears, this should be
+ * called to emit the rest.
+ */
+static void si_update_viewports_and_scissors(struct si_context *sctx)
+{
+ struct tgsi_shader_info *info = si_get_vs_info(sctx);
+
+ if (!info || !info->writes_viewport_index)
+ return;
+
+ if (sctx->scissors.dirty_mask)
+ si_mark_atom_dirty(sctx, &sctx->scissors.atom);
+ if (sctx->viewports.dirty_mask)
+ si_mark_atom_dirty(sctx, &sctx->viewports.atom);
+}
+
static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
sctx->vs_shader = sel;
si_mark_atom_dirty(sctx, &sctx->clip_regs);
+ si_update_viewports_and_scissors(sctx);
}
static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
if (enable_changed)
si_shader_change_notify(sctx);
+ si_update_viewports_and_scissors(sctx);
}
static void si_bind_tcs_shader(struct pipe_context *ctx, void *state)
si_shader_change_notify(sctx);
sctx->last_tes_sh_base = -1; /* invalidate derived tess state */
}
+ si_update_viewports_and_scissors(sctx);
}
static void si_make_dummy_ps(struct si_context *sctx)
}
sctx->ps_shader = sel;
+ si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
}
static void si_delete_shader_selector(struct pipe_context *ctx,
si_delete_shader_selector(ctx, sel);
}
-static void si_update_spi_map(struct si_context *sctx)
+static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom)
{
+ struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
struct si_shader *ps = sctx->ps_shader->current;
struct si_shader *vs = si_get_vs_state(sctx);
struct tgsi_shader_info *psinfo = &ps->selector->info;
struct tgsi_shader_info *vsinfo = &vs->selector->info;
- struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
- unsigned i, j, tmp;
+ unsigned i, j, tmp, num_written = 0;
+
+ if (!ps->nparam)
+ return;
+
+ radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, ps->nparam);
for (i = 0; i < psinfo->num_inputs; i++) {
unsigned name = psinfo->input_semantic_name[i];
tmp = S_028644_OFFSET(0x20);
}
- si_pm4_set_reg(pm4,
- R_028644_SPI_PS_INPUT_CNTL_0 + param_offset * 4,
- tmp);
+ assert(param_offset == num_written);
+ radeon_emit(cs, tmp);
+ num_written++;
if (name == TGSI_SEMANTIC_COLOR &&
ps->key.ps.color_two_side) {
goto bcolor;
}
}
-
- si_pm4_set_state(sctx, spi, pm4);
+ assert(ps->nparam == num_written);
}
/* Initialize state related to ESGS / GSVS ring buffers */
unsigned esgs_ring_size = 128 * 1024;
unsigned gsvs_ring_size = 60 * 1024 * 1024;
- assert(!sctx->gs_rings);
- sctx->gs_rings = CALLOC_STRUCT(si_pm4_state);
+ assert(!sctx->esgs_ring && !sctx->gsvs_ring);
sctx->esgs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
PIPE_USAGE_DEFAULT, esgs_ring_size);
+ if (!sctx->esgs_ring)
+ return;
sctx->gsvs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
PIPE_USAGE_DEFAULT, gsvs_ring_size);
+ if (!sctx->gsvs_ring) {
+ pipe_resource_reference(&sctx->esgs_ring, NULL);
+ return;
+ }
+ /* Append these registers to the init config state. */
if (sctx->b.chip_class >= CIK) {
if (sctx->b.chip_class >= VI) {
/* The maximum sizes are 63.999 MB on VI, because
assert(esgs_ring_size / 256 < (1 << 18));
assert(gsvs_ring_size / 256 < (1 << 18));
}
- si_pm4_set_reg(sctx->gs_rings, R_030900_VGT_ESGS_RING_SIZE,
+ si_pm4_set_reg(sctx->init_config, R_030900_VGT_ESGS_RING_SIZE,
esgs_ring_size / 256);
- si_pm4_set_reg(sctx->gs_rings, R_030904_VGT_GSVS_RING_SIZE,
+ si_pm4_set_reg(sctx->init_config, R_030904_VGT_GSVS_RING_SIZE,
gsvs_ring_size / 256);
} else {
- si_pm4_set_reg(sctx->gs_rings, R_0088C8_VGT_ESGS_RING_SIZE,
+ si_pm4_set_reg(sctx->init_config, R_0088C8_VGT_ESGS_RING_SIZE,
esgs_ring_size / 256);
- si_pm4_set_reg(sctx->gs_rings, R_0088CC_VGT_GSVS_RING_SIZE,
+ si_pm4_set_reg(sctx->init_config, R_0088CC_VGT_GSVS_RING_SIZE,
gsvs_ring_size / 256);
}
+ /* Flush the context to re-emit the init_config state.
+ * This is done only once in a lifetime of a context.
+ */
+ si_pm4_upload_indirect_buffer(sctx, sctx->init_config);
+ sctx->b.initial_gfx_cs_size = 0; /* force flush */
+ si_context_gfx_flush(sctx, RADEON_FLUSH_ASYNC, NULL);
+
si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_VERTEX, SI_RING_ESGS,
sctx->esgs_ring, 0, esgs_ring_size,
true, true, 4, 64, 0);
static void si_update_gs_rings(struct si_context *sctx)
{
- unsigned gs_vert_itemsize = sctx->gs_shader->info.num_outputs * 16;
- unsigned gs_max_vert_out = sctx->gs_shader->gs_max_out_vertices;
- unsigned gsvs_itemsize = gs_vert_itemsize * gs_max_vert_out;
+ unsigned gsvs_itemsize = sctx->gs_shader->gsvs_itemsize;
uint64_t offset;
+ if (gsvs_itemsize == sctx->last_gsvs_itemsize)
+ return;
+
+ sctx->last_gsvs_itemsize = gsvs_itemsize;
+
si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS,
sctx->gsvs_ring, gsvs_itemsize,
64, true, true, 4, 16, 0);
si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS_3,
sctx->gsvs_ring, gsvs_itemsize,
64, true, true, 4, 16, offset);
-
}
+
/**
- * @returns 1 if \p sel has been updated to use a new scratch buffer and 0
- * otherwise.
+ * @returns 1 if \p sel has been updated to use a new scratch buffer
+ * 0 if not
+ * < 0 if there was a failure
*/
-static unsigned si_update_scratch_buffer(struct si_context *sctx,
+static int si_update_scratch_buffer(struct si_context *sctx,
struct si_shader_selector *sel)
{
struct si_shader *shader;
uint64_t scratch_va = sctx->scratch_buffer->gpu_address;
+ int r;
if (!sel)
return 0;
si_shader_apply_scratch_relocs(sctx, shader, scratch_va);
/* Replace the shader bo with a new bo that has the relocs applied. */
- si_shader_binary_upload(sctx->screen, shader);
+ r = si_shader_binary_upload(sctx->screen, shader);
+ if (r)
+ return r;
/* Update the shader state to use the new shader bo. */
si_shader_init_pm4_state(shader);
return bytes;
}
-static void si_update_spi_tmpring_size(struct si_context *sctx)
+static bool si_update_spi_tmpring_size(struct si_context *sctx)
{
unsigned current_scratch_buffer_size =
si_get_current_scratch_buffer_size(sctx);
si_get_max_scratch_bytes_per_wave(sctx);
unsigned scratch_needed_size = scratch_bytes_per_wave *
sctx->scratch_waves;
+ int r;
if (scratch_needed_size > 0) {
sctx->scratch_buffer =
si_resource_create_custom(&sctx->screen->b.b,
PIPE_USAGE_DEFAULT, scratch_needed_size);
+ if (!sctx->scratch_buffer)
+ return false;
+ sctx->emit_scratch_reloc = true;
}
/* Update the shaders, so they are using the latest scratch. The
* last used, so we still need to try to update them, even if
* they require scratch buffers smaller than the current size.
*/
- if (si_update_scratch_buffer(sctx, sctx->ps_shader))
+ r = si_update_scratch_buffer(sctx, sctx->ps_shader);
+ if (r < 0)
+ return false;
+ if (r == 1)
si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
- if (si_update_scratch_buffer(sctx, sctx->gs_shader))
+
+ r = si_update_scratch_buffer(sctx, sctx->gs_shader);
+ if (r < 0)
+ return false;
+ if (r == 1)
si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
- if (si_update_scratch_buffer(sctx, sctx->tcs_shader))
+
+ r = si_update_scratch_buffer(sctx, sctx->tcs_shader);
+ if (r < 0)
+ return false;
+ if (r == 1)
si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
/* VS can be bound as LS, ES, or VS. */
if (sctx->tes_shader) {
- if (si_update_scratch_buffer(sctx, sctx->vs_shader))
+ r = si_update_scratch_buffer(sctx, sctx->vs_shader);
+ if (r < 0)
+ return false;
+ if (r == 1)
si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
} else if (sctx->gs_shader) {
- if (si_update_scratch_buffer(sctx, sctx->vs_shader))
+ r = si_update_scratch_buffer(sctx, sctx->vs_shader);
+ if (r < 0)
+ return false;
+ if (r == 1)
si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
} else {
- if (si_update_scratch_buffer(sctx, sctx->vs_shader))
+ r = si_update_scratch_buffer(sctx, sctx->vs_shader);
+ if (r < 0)
+ return false;
+ if (r == 1)
si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
}
/* TES can be bound as ES or VS. */
if (sctx->gs_shader) {
- if (si_update_scratch_buffer(sctx, sctx->tes_shader))
+ r = si_update_scratch_buffer(sctx, sctx->tes_shader);
+ if (r < 0)
+ return false;
+ if (r == 1)
si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
} else {
- if (si_update_scratch_buffer(sctx, sctx->tes_shader))
+ r = si_update_scratch_buffer(sctx, sctx->tes_shader);
+ if (r < 0)
+ return false;
+ if (r == 1)
si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4);
}
}
sctx->spi_tmpring_size = S_0286E8_WAVES(sctx->scratch_waves) |
S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10);
+ return true;
}
static void si_init_tess_factor_ring(struct si_context *sctx)
{
- assert(!sctx->tf_state);
- sctx->tf_state = CALLOC_STRUCT(si_pm4_state);
+ assert(!sctx->tf_ring);
sctx->tf_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
PIPE_USAGE_DEFAULT,
32768 * sctx->screen->b.info.max_se);
- sctx->b.clear_buffer(&sctx->b.b, sctx->tf_ring, 0,
- sctx->tf_ring->width0, fui(0), false);
+ if (!sctx->tf_ring)
+ return;
+
assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0);
+ /* Append these registers to the init config state. */
if (sctx->b.chip_class >= CIK) {
- si_pm4_set_reg(sctx->tf_state, R_030938_VGT_TF_RING_SIZE,
+ si_pm4_set_reg(sctx->init_config, R_030938_VGT_TF_RING_SIZE,
S_030938_SIZE(sctx->tf_ring->width0 / 4));
- si_pm4_set_reg(sctx->tf_state, R_030940_VGT_TF_MEMORY_BASE,
+ si_pm4_set_reg(sctx->init_config, R_030940_VGT_TF_MEMORY_BASE,
r600_resource(sctx->tf_ring)->gpu_address >> 8);
} else {
- si_pm4_set_reg(sctx->tf_state, R_008988_VGT_TF_RING_SIZE,
+ si_pm4_set_reg(sctx->init_config, R_008988_VGT_TF_RING_SIZE,
S_008988_SIZE(sctx->tf_ring->width0 / 4));
- si_pm4_set_reg(sctx->tf_state, R_0089B8_VGT_TF_MEMORY_BASE,
+ si_pm4_set_reg(sctx->init_config, R_0089B8_VGT_TF_MEMORY_BASE,
r600_resource(sctx->tf_ring)->gpu_address >> 8);
}
- si_pm4_add_bo(sctx->tf_state, r600_resource(sctx->tf_ring),
- RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
- si_pm4_bind_state(sctx, tf_ring, sctx->tf_state);
+
+ /* Flush the context to re-emit the init_config state.
+ * This is done only once in a lifetime of a context.
+ */
+ si_pm4_upload_indirect_buffer(sctx, sctx->init_config);
+ sctx->b.initial_gfx_cs_size = 0; /* force flush */
+ si_context_gfx_flush(sctx, RADEON_FLUSH_ASYNC, NULL);
si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_TESS_CTRL,
SI_RING_TESS_FACTOR, sctx->tf_ring, 0,
sctx->tf_ring->width0, false, false, 0, 0, 0);
-
- sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
}
/**
sctx->fixed_func_tcs_shader =
ureg_create_shader_and_destroy(ureg, &sctx->b.b);
- assert(sctx->fixed_func_tcs_shader);
}
static void si_update_vgt_shader_config(struct si_context *sctx)
sctx->b.streamout.stride_in_dw = shader->so.stride;
}
-void si_update_shaders(struct si_context *sctx)
+bool si_update_shaders(struct si_context *sctx)
{
struct pipe_context *ctx = (struct pipe_context*)sctx;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
+ int r;
/* Update stages before GS. */
if (sctx->tes_shader) {
- if (!sctx->tf_state)
+ if (!sctx->tf_ring) {
si_init_tess_factor_ring(sctx);
+ if (!sctx->tf_ring)
+ return false;
+ }
/* VS as LS */
- si_shader_select(ctx, sctx->vs_shader);
+ r = si_shader_select(ctx, sctx->vs_shader);
+ if (r)
+ return false;
si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
if (sctx->tcs_shader) {
- si_shader_select(ctx, sctx->tcs_shader);
+ r = si_shader_select(ctx, sctx->tcs_shader);
+ if (r)
+ return false;
si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
} else {
- if (!sctx->fixed_func_tcs_shader)
+ if (!sctx->fixed_func_tcs_shader) {
si_generate_fixed_func_tcs(sctx);
- si_shader_select(ctx, sctx->fixed_func_tcs_shader);
+ if (!sctx->fixed_func_tcs_shader)
+ return false;
+ }
+
+ r = si_shader_select(ctx, sctx->fixed_func_tcs_shader);
+ if (r)
+ return false;
si_pm4_bind_state(sctx, hs,
sctx->fixed_func_tcs_shader->current->pm4);
}
- si_shader_select(ctx, sctx->tes_shader);
+ r = si_shader_select(ctx, sctx->tes_shader);
+ if (r)
+ return false;
+
if (sctx->gs_shader) {
/* TES as ES */
si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
}
} else if (sctx->gs_shader) {
/* VS as ES */
- si_shader_select(ctx, sctx->vs_shader);
+ r = si_shader_select(ctx, sctx->vs_shader);
+ if (r)
+ return false;
si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
} else {
/* VS as VS */
- si_shader_select(ctx, sctx->vs_shader);
+ r = si_shader_select(ctx, sctx->vs_shader);
+ if (r)
+ return false;
si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
si_update_so(sctx, sctx->vs_shader);
}
/* Update GS. */
if (sctx->gs_shader) {
- si_shader_select(ctx, sctx->gs_shader);
+ r = si_shader_select(ctx, sctx->gs_shader);
+ if (r)
+ return false;
si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
si_pm4_bind_state(sctx, vs, sctx->gs_shader->current->gs_copy_shader->pm4);
si_update_so(sctx, sctx->gs_shader);
- if (!sctx->gs_rings)
+ if (!sctx->gsvs_ring) {
si_init_gs_rings(sctx);
-
- if (sctx->emitted.named.gs_rings != sctx->gs_rings)
- sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
- si_pm4_bind_state(sctx, gs_rings, sctx->gs_rings);
+ if (!sctx->gsvs_ring)
+ return false;
+ }
si_update_gs_rings(sctx);
} else {
- si_pm4_bind_state(sctx, gs_rings, NULL);
si_pm4_bind_state(sctx, gs, NULL);
si_pm4_bind_state(sctx, es, NULL);
}
si_update_vgt_shader_config(sctx);
- si_shader_select(ctx, sctx->ps_shader);
-
- if (!sctx->ps_shader->current) {
- struct si_shader_selector *sel;
-
- /* use a dummy shader if compiling the shader (variant) failed */
- si_make_dummy_ps(sctx);
- sel = sctx->dummy_pixel_shader;
- si_shader_select(ctx, sel);
- sctx->ps_shader->current = sel->current;
- }
-
+ r = si_shader_select(ctx, sctx->ps_shader);
+ if (r)
+ return false;
si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
sctx->flatshade != rs->flatshade) {
sctx->sprite_coord_enable = rs->sprite_coord_enable;
sctx->flatshade = rs->flatshade;
- si_update_spi_map(sctx);
+ si_mark_atom_dirty(sctx, &sctx->spi_map);
}
- if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
- si_pm4_state_changed(sctx, gs)) {
- si_update_spi_tmpring_size(sctx);
+ if (si_pm4_state_changed(sctx, ls) ||
+ si_pm4_state_changed(sctx, hs) ||
+ si_pm4_state_changed(sctx, es) ||
+ si_pm4_state_changed(sctx, gs) ||
+ si_pm4_state_changed(sctx, vs) ||
+ si_pm4_state_changed(sctx, ps)) {
+ if (!si_update_spi_tmpring_size(sctx))
+ return false;
}
if (sctx->ps_db_shader_control != sctx->ps_shader->current->db_shader_control) {
if (sctx->b.chip_class == SI)
si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
+ return true;
}
void si_init_shader_functions(struct si_context *sctx)
{
+ si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, si_emit_spi_map);
+
sctx->b.b.create_vs_state = si_create_vs_state;
sctx->b.b.create_tcs_state = si_create_tcs_state;
sctx->b.b.create_tes_state = si_create_tes_state;
#define PKT3_DRAW_INDEX_IMMD 0x2E /* not on CIK */
#define PKT3_NUM_INSTANCES 0x2F
#define PKT3_DRAW_INDEX_MULTI_AUTO 0x30
-#define PKT3_INDIRECT_BUFFER 0x32
+#define PKT3_INDIRECT_BUFFER_SI 0x32 /* not on CIK */
#define PKT3_STRMOUT_BUFFER_UPDATE 0x34
#define PKT3_DRAW_INDEX_OFFSET_2 0x35
#define PKT3_DRAW_PREAMBLE 0x36 /* new on CIK, required on GFX7.2 and later */
#define PKT3_WAIT_REG_MEM 0x3C
#define WAIT_REG_MEM_EQUAL 3
#define PKT3_MEM_WRITE 0x3D /* not on CIK */
+#define PKT3_INDIRECT_BUFFER_CIK 0x3F /* new on CIK */
#define PKT3_COPY_DATA 0x40
#define COPY_DATA_SRC_SEL(x) ((x) & 0xf)
#define COPY_DATA_REG 0
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
return 4;
case PIPE_CAP_TEXTURE_GATHER_SM5:
+ case PIPE_CAP_TEXTURE_QUERY_LOD:
return 1;
case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
- case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_SAMPLE_SHADING:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
return 0;
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
+ case PIPE_CAP_TGSI_TXQS:
return 0;
}
/* should only get here on unhandled cases */
{
/* s limited to [0,1) */
/* i limited to [0,size-1] */
- int i = util_ifloor(s * size);
+ const int i = util_ifloor(s * size);
*icoord = repeat(i + offset, size);
}
wrap_linear_repeat(float s, unsigned size, int offset,
int *icoord0, int *icoord1, float *w)
{
- float u = s * size - 0.5F;
+ const float u = s * size - 0.5F;
*icoord0 = repeat(util_ifloor(u) + offset, size);
*icoord1 = repeat(*icoord0 + 1, size);
*w = frac(u);
wrap_linear_clamp(float s, unsigned size, int offset,
int *icoord0, int *icoord1, float *w)
{
- float u = CLAMP(s * size + offset, 0.0F, (float)size);
+ const float u = CLAMP(s * size + offset, 0.0F, (float)size) - 0.5f;
- u = u - 0.5f;
*icoord0 = util_ifloor(u);
*icoord1 = *icoord0 + 1;
*w = frac(u);
wrap_linear_clamp_to_edge(float s, unsigned size, int offset,
int *icoord0, int *icoord1, float *w)
{
- float u = CLAMP(s * size + offset, 0.0F, (float)size);
- u = u - 0.5f;
+ const float u = CLAMP(s * size + offset, 0.0F, (float)size) - 0.5f;
*icoord0 = util_ifloor(u);
*icoord1 = *icoord0 + 1;
if (*icoord0 < 0)
{
const float min = -0.5F;
const float max = (float)size + 0.5F;
- float u = CLAMP(s * size + offset, min, max);
- u = u - 0.5f;
+ const float u = CLAMP(s * size + offset, min, max) - 0.5f;
*icoord0 = util_ifloor(u);
*icoord1 = *icoord0 + 1;
*w = frac(u);
{
const float min = -0.5F;
const float max = size + 0.5F;
- float u = fabsf(s * size + offset);
- if (u <= min)
- u = min;
- else if (u >= max)
- u = max;
- u -= 0.5F;
+ const float t = fabsf(s * size + offset);
+ const float u = CLAMP(t, min, max) - 0.5F;
*icoord0 = util_ifloor(u);
*icoord1 = *icoord0 + 1;
*w = frac(u);
static void
wrap_nearest_unorm_clamp(float s, unsigned size, int offset, int *icoord)
{
- int i = util_ifloor(s);
+ const int i = util_ifloor(s);
*icoord = CLAMP(i + offset, 0, (int) size-1);
}
int *icoord0, int *icoord1, float *w)
{
/* Not exactly what the spec says, but it matches NVIDIA output */
- float u = CLAMP(s + offset - 0.5F, 0.0f, (float) size - 1.0f);
+ const float u = CLAMP(s + offset - 0.5F, 0.0f, (float) size - 1.0f);
*icoord0 = util_ifloor(u);
*icoord1 = *icoord0 + 1;
*w = frac(u);
wrap_linear_unorm_clamp_to_border(float s, unsigned size, int offset,
int *icoord0, int *icoord1, float *w)
{
- float u = CLAMP(s + offset, -0.5F, (float) size + 0.5F);
- u -= 0.5F;
+ const float u = CLAMP(s + offset, -0.5F, (float) size + 0.5F) - 0.5F;
*icoord0 = util_ifloor(u);
*icoord1 = *icoord0 + 1;
if (*icoord1 > (int) size - 1)
wrap_linear_unorm_clamp_to_edge(float s, unsigned size, int offset,
int *icoord0, int *icoord1, float *w)
{
- float u = CLAMP(s + offset, +0.5F, (float) size - 0.5F);
- u -= 0.5F;
+ const float u = CLAMP(s + offset, +0.5F, (float) size - 0.5F) - 0.5F;
*icoord0 = util_ifloor(u);
*icoord1 = *icoord0 + 1;
if (*icoord1 > (int) size - 1)
static inline int
coord_to_layer(float coord, unsigned first_layer, unsigned last_layer)
{
- int c = util_ifloor(coord + 0.5F);
+ const int c = util_ifloor(coord + 0.5F);
return CLAMP(c, (int)first_layer, (int)last_layer);
}
const float p[TGSI_QUAD_SIZE])
{
const struct pipe_resource *texture = sview->base.texture;
- float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
- float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]);
- float rho = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
+ const float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
+ const float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]);
+ const float rho = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
return util_fast_log2(rho);
}
const float p[TGSI_QUAD_SIZE])
{
const struct pipe_resource *texture = sview->base.texture;
- float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
- float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]);
- float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
- float dtdy = fabsf(t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]);
- float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
- float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, sview->base.u.tex.first_level);
- float rho = MAX2(maxx, maxy);
+ const float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
+ const float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]);
+ const float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
+ const float dtdy = fabsf(t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]);
+ const float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
+ const float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, sview->base.u.tex.first_level);
+ const float rho = MAX2(maxx, maxy);
return util_fast_log2(rho);
}
const float p[TGSI_QUAD_SIZE])
{
const struct pipe_resource *texture = sview->base.texture;
- float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
- float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]);
- float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
- float dtdy = fabsf(t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]);
- float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]);
- float dpdy = fabsf(p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]);
- float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
- float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, sview->base.u.tex.first_level);
- float maxz = MAX2(dpdx, dpdy) * u_minify(texture->depth0, sview->base.u.tex.first_level);
- float rho;
-
- rho = MAX2(maxx, maxy);
- rho = MAX2(rho, maxz);
+ const float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
+ const float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]);
+ const float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
+ const float dtdy = fabsf(t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]);
+ const float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]);
+ const float dpdy = fabsf(p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]);
+ const float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
+ const float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, sview->base.u.tex.first_level);
+ const float maxz = MAX2(dpdx, dpdy) * u_minify(texture->depth0, sview->base.u.tex.first_level);
+ const float rho = MAX3(maxx, maxy, maxz);
return util_fast_log2(rho);
}
union tex_tile_address addr, int x, int y)
{
const struct pipe_resource *texture = sp_sview->base.texture;
- unsigned level = addr.bits.level;
+ const unsigned level = addr.bits.level;
if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
y < 0 || y >= (int) u_minify(texture->height0, level)) {
union tex_tile_address addr, int x, int y, int z)
{
const struct pipe_resource *texture = sp_sview->base.texture;
- unsigned level = addr.bits.level;
+ const unsigned level = addr.bits.level;
if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
y < 0 || y >= (int) u_minify(texture->height0, level) ||
union tex_tile_address addr, int x, int y)
{
const struct pipe_resource *texture = sp_sview->base.texture;
- unsigned level = addr.bits.level;
+ const unsigned level = addr.bits.level;
if (x < 0 || x >= (int) u_minify(texture->width0, level)) {
return sp_samp->base.border_color.f;
union tex_tile_address addr, int x, int y, int layer)
{
const struct pipe_resource *texture = sp_sview->base.texture;
- unsigned level = addr.bits.level;
+ const unsigned level = addr.bits.level;
assert(layer < (int) texture->array_size);
assert(layer >= 0);
float *corner, int layer, unsigned face)
{
const struct pipe_resource *texture = sp_sview->base.texture;
- unsigned level = addr.bits.level;
+ const unsigned level = addr.bits.level;
int new_x, new_y, max_x;
max_x = (int) u_minify(texture->width0, level);
union tex_tile_address addr, int x, int y, int layer)
{
const struct pipe_resource *texture = sp_sview->base.texture;
- unsigned level = addr.bits.level;
+ const unsigned level = addr.bits.level;
assert(layer < (int) texture->array_size);
assert(layer >= 0);
/* Some image-filter fastpaths:
*/
static inline void
-img_filter_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+img_filter_2d_linear_repeat_POT(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
const struct img_filter_args *args,
float *rgba)
{
- unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
- unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
- int xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */
- int ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */
+ const unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
+ const unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
+ const int xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */
+ const int ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */
union tex_tile_address addr;
int c;
- float u = (args->s * xpot - 0.5F) + args->offset[0];
- float v = (args->t * ypot - 0.5F) + args->offset[1];
+ const float u = (args->s * xpot - 0.5F) + args->offset[0];
+ const float v = (args->t * ypot - 0.5F) + args->offset[1];
- int uflr = util_ifloor(u);
- int vflr = util_ifloor(v);
+ const int uflr = util_ifloor(u);
+ const int vflr = util_ifloor(v);
- float xw = u - (float)uflr;
- float yw = v - (float)vflr;
+ const float xw = u - (float)uflr;
+ const float yw = v - (float)vflr;
- int x0 = uflr & (xpot - 1);
- int y0 = vflr & (ypot - 1);
+ const int x0 = uflr & (xpot - 1);
+ const int y0 = vflr & (ypot - 1);
const float *tx[4];
get_texel_quad_2d_no_border_single_tile(sp_sview, addr, x0, y0, tx);
}
else {
- unsigned x1 = (x0 + 1) & (xpot - 1);
- unsigned y1 = (y0 + 1) & (ypot - 1);
+ const unsigned x1 = (x0 + 1) & (xpot - 1);
+ const unsigned y1 = (y0 + 1) & (ypot - 1);
get_texel_quad_2d_no_border(sp_sview, addr, x0, y0, x1, y1, tx);
}
static inline void
-img_filter_2d_nearest_repeat_POT(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+img_filter_2d_nearest_repeat_POT(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
const struct img_filter_args *args,
float rgba[TGSI_QUAD_SIZE])
{
- unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
- unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
+ const unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
+ const unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
const float *out;
union tex_tile_address addr;
int c;
- float u = args->s * xpot + args->offset[0];
- float v = args->t * ypot + args->offset[1];
+ const float u = args->s * xpot + args->offset[0];
+ const float v = args->t * ypot + args->offset[1];
- int uflr = util_ifloor(u);
- int vflr = util_ifloor(v);
+ const int uflr = util_ifloor(u);
+ const int vflr = util_ifloor(v);
- int x0 = uflr & (xpot - 1);
- int y0 = vflr & (ypot - 1);
+ const int x0 = uflr & (xpot - 1);
+ const int y0 = vflr & (ypot - 1);
addr.value = 0;
addr.bits.level = args->level;
static inline void
-img_filter_2d_nearest_clamp_POT(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+img_filter_2d_nearest_clamp_POT(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
const struct img_filter_args *args,
float rgba[TGSI_QUAD_SIZE])
{
- unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
- unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
+ const unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
+ const unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
union tex_tile_address addr;
int c;
- float u = args->s * xpot + args->offset[0];
- float v = args->t * ypot + args->offset[1];
+ const float u = args->s * xpot + args->offset[0];
+ const float v = args->t * ypot + args->offset[1];
int x0, y0;
const float *out;
static void
-img_filter_1d_nearest(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+img_filter_1d_nearest(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
const struct img_filter_args *args,
float rgba[TGSI_QUAD_SIZE])
{
const struct pipe_resource *texture = sp_sview->base.texture;
- int width;
+ const int width = u_minify(texture->width0, args->level);
int x;
union tex_tile_address addr;
const float *out;
int c;
- width = u_minify(texture->width0, args->level);
-
assert(width > 0);
addr.value = 0;
static void
-img_filter_1d_array_nearest(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+img_filter_1d_array_nearest(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
const struct img_filter_args *args,
float *rgba)
{
const struct pipe_resource *texture = sp_sview->base.texture;
- int width;
- int x, layer;
+ const int width = u_minify(texture->width0, args->level);
+ const int layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer,
+ sp_sview->base.u.tex.last_layer);
+ int x;
union tex_tile_address addr;
const float *out;
int c;
- width = u_minify(texture->width0, args->level);
-
assert(width > 0);
addr.value = 0;
addr.bits.level = args->level;
sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
- layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer,
- sp_sview->base.u.tex.last_layer);
out = get_texel_1d_array(sp_sview, sp_samp, addr, x, layer);
for (c = 0; c < TGSI_QUAD_SIZE; c++)
static void
-img_filter_2d_nearest(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+img_filter_2d_nearest(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
const struct img_filter_args *args,
float *rgba)
{
const struct pipe_resource *texture = sp_sview->base.texture;
- int width, height;
+ const int width = u_minify(texture->width0, args->level);
+ const int height = u_minify(texture->height0, args->level);
int x, y;
union tex_tile_address addr;
const float *out;
int c;
- width = u_minify(texture->width0, args->level);
- height = u_minify(texture->height0, args->level);
-
assert(width > 0);
assert(height > 0);
static void
-img_filter_2d_array_nearest(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+img_filter_2d_array_nearest(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
const struct img_filter_args *args,
float *rgba)
{
const struct pipe_resource *texture = sp_sview->base.texture;
- int width, height;
- int x, y, layer;
+ const int width = u_minify(texture->width0, args->level);
+ const int height = u_minify(texture->height0, args->level);
+ const int layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer,
+ sp_sview->base.u.tex.last_layer);
+ int x, y;
union tex_tile_address addr;
const float *out;
int c;
- width = u_minify(texture->width0, args->level);
- height = u_minify(texture->height0, args->level);
-
assert(width > 0);
assert(height > 0);
sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
- layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer,
- sp_sview->base.u.tex.last_layer);
out = get_texel_2d_array(sp_sview, sp_samp, addr, x, y, layer);
for (c = 0; c < TGSI_QUAD_SIZE; c++)
static void
-img_filter_cube_nearest(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+img_filter_cube_nearest(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
const struct img_filter_args *args,
float *rgba)
{
const struct pipe_resource *texture = sp_sview->base.texture;
- int width, height;
- int x, y, layerface;
+ const int width = u_minify(texture->width0, args->level);
+ const int height = u_minify(texture->height0, args->level);
+ const int layerface = args->face_id + sp_sview->base.u.tex.first_layer;
+ int x, y;
union tex_tile_address addr;
const float *out;
int c;
- width = u_minify(texture->width0, args->level);
- height = u_minify(texture->height0, args->level);
-
assert(width > 0);
assert(height > 0);
sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
}
- layerface = args->face_id + sp_sview->base.u.tex.first_layer;
out = get_texel_cube_array(sp_sview, sp_samp, addr, x, y, layerface);
for (c = 0; c < TGSI_QUAD_SIZE; c++)
rgba[TGSI_NUM_CHANNELS*c] = out[c];
}
static void
-img_filter_cube_array_nearest(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+img_filter_cube_array_nearest(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
const struct img_filter_args *args,
float *rgba)
{
const struct pipe_resource *texture = sp_sview->base.texture;
- int width, height;
- int x, y, layerface;
+ const int width = u_minify(texture->width0, args->level);
+ const int height = u_minify(texture->height0, args->level);
+ const int layerface =
+ coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer,
+ sp_sview->base.u.tex.first_layer,
+ sp_sview->base.u.tex.last_layer - 5) + args->face_id;
+ int x, y;
union tex_tile_address addr;
const float *out;
int c;
- width = u_minify(texture->width0, args->level);
- height = u_minify(texture->height0, args->level);
-
assert(width > 0);
assert(height > 0);
sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
- layerface = coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer,
- sp_sview->base.u.tex.first_layer,
- sp_sview->base.u.tex.last_layer - 5) + args->face_id;
out = get_texel_cube_array(sp_sview, sp_samp, addr, x, y, layerface);
for (c = 0; c < TGSI_QUAD_SIZE; c++)
}
static void
-img_filter_3d_nearest(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+img_filter_3d_nearest(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
const struct img_filter_args *args,
float *rgba)
{
const struct pipe_resource *texture = sp_sview->base.texture;
- int width, height, depth;
+ const int width = u_minify(texture->width0, args->level);
+ const int height = u_minify(texture->height0, args->level);
+ const int depth = u_minify(texture->depth0, args->level);
int x, y, z;
union tex_tile_address addr;
const float *out;
int c;
- width = u_minify(texture->width0, args->level);
- height = u_minify(texture->height0, args->level);
- depth = u_minify(texture->depth0, args->level);
-
assert(width > 0);
assert(height > 0);
assert(depth > 0);
static void
-img_filter_1d_linear(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+img_filter_1d_linear(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
const struct img_filter_args *args,
float *rgba)
{
const struct pipe_resource *texture = sp_sview->base.texture;
- int width;
+ const int width = u_minify(texture->width0, args->level);
int x0, x1;
float xw; /* weights */
union tex_tile_address addr;
const float *tx0, *tx1;
int c;
- width = u_minify(texture->width0, args->level);
-
assert(width > 0);
addr.value = 0;
static void
-img_filter_1d_array_linear(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+img_filter_1d_array_linear(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
const struct img_filter_args *args,
float *rgba)
{
const struct pipe_resource *texture = sp_sview->base.texture;
- int width;
- int x0, x1, layer;
+ const int width = u_minify(texture->width0, args->level);
+ const int layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer,
+ sp_sview->base.u.tex.last_layer);
+ int x0, x1;
float xw; /* weights */
union tex_tile_address addr;
const float *tx0, *tx1;
int c;
- width = u_minify(texture->width0, args->level);
-
assert(width > 0);
addr.value = 0;
addr.bits.level = args->level;
sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
- layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer,
- sp_sview->base.u.tex.last_layer);
tx0 = get_texel_1d_array(sp_sview, sp_samp, addr, x0, layer);
tx1 = get_texel_1d_array(sp_sview, sp_samp, addr, x1, layer);
static void
-img_filter_2d_linear(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+img_filter_2d_linear(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
const struct img_filter_args *args,
float *rgba)
{
const struct pipe_resource *texture = sp_sview->base.texture;
- int width, height;
+ const int width = u_minify(texture->width0, args->level);
+ const int height = u_minify(texture->height0, args->level);
int x0, y0, x1, y1;
float xw, yw; /* weights */
union tex_tile_address addr;
const float *tx[4];
int c;
- width = u_minify(texture->width0, args->level);
- height = u_minify(texture->height0, args->level);
-
assert(width > 0);
assert(height > 0);
static void
-img_filter_2d_array_linear(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+img_filter_2d_array_linear(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
const struct img_filter_args *args,
float *rgba)
{
const struct pipe_resource *texture = sp_sview->base.texture;
- int width, height;
- int x0, y0, x1, y1, layer;
+ const int width = u_minify(texture->width0, args->level);
+ const int height = u_minify(texture->height0, args->level);
+ const int layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer,
+ sp_sview->base.u.tex.last_layer);
+ int x0, y0, x1, y1;
float xw, yw; /* weights */
union tex_tile_address addr;
const float *tx[4];
int c;
- width = u_minify(texture->width0, args->level);
- height = u_minify(texture->height0, args->level);
-
assert(width > 0);
assert(height > 0);
sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
- layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer,
- sp_sview->base.u.tex.last_layer);
tx[0] = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y0, layer);
tx[1] = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y0, layer);
static void
-img_filter_cube_linear(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+img_filter_cube_linear(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
const struct img_filter_args *args,
float *rgba)
{
const struct pipe_resource *texture = sp_sview->base.texture;
- int width, height;
- int x0, y0, x1, y1, layer;
+ const int width = u_minify(texture->width0, args->level);
+ const int height = u_minify(texture->height0, args->level);
+ const int layer = sp_sview->base.u.tex.first_layer;
+ int x0, y0, x1, y1;
float xw, yw; /* weights */
union tex_tile_address addr;
const float *tx[4];
corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE];
int c;
- width = u_minify(texture->width0, args->level);
- height = u_minify(texture->height0, args->level);
-
assert(width > 0);
assert(height > 0);
sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
}
- layer = sp_sview->base.u.tex.first_layer;
-
if (sp_samp->base.seamless_cube_map) {
tx[0] = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, args->face_id);
tx[1] = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, args->face_id);
static void
-img_filter_cube_array_linear(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+img_filter_cube_array_linear(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
const struct img_filter_args *args,
float *rgba)
{
const struct pipe_resource *texture = sp_sview->base.texture;
- int width, height;
- int x0, y0, x1, y1, layer;
+ const int width = u_minify(texture->width0, args->level);
+ const int height = u_minify(texture->height0, args->level);
+ const int layer =
+ coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer,
+ sp_sview->base.u.tex.first_layer,
+ sp_sview->base.u.tex.last_layer - 5);
+ int x0, y0, x1, y1;
float xw, yw; /* weights */
union tex_tile_address addr;
const float *tx[4];
corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE];
int c;
- width = u_minify(texture->width0, args->level);
- height = u_minify(texture->height0, args->level);
-
assert(width > 0);
assert(height > 0);
sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
}
- layer = coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer,
- sp_sview->base.u.tex.first_layer,
- sp_sview->base.u.tex.last_layer - 5);
-
if (sp_samp->base.seamless_cube_map) {
tx[0] = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, args->face_id);
tx[1] = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, args->face_id);
}
static void
-img_filter_3d_linear(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+img_filter_3d_linear(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
const struct img_filter_args *args,
float *rgba)
{
const struct pipe_resource *texture = sp_sview->base.texture;
- int width, height, depth;
+ const int width = u_minify(texture->width0, args->level);
+ const int height = u_minify(texture->height0, args->level);
+ const int depth = u_minify(texture->depth0, args->level);
int x0, x1, y0, y1, z0, z1;
float xw, yw, zw; /* interpolation weights */
union tex_tile_address addr;
const float *tx00, *tx01, *tx02, *tx03, *tx10, *tx11, *tx12, *tx13;
int c;
- width = u_minify(texture->width0, args->level);
- height = u_minify(texture->height0, args->level);
- depth = u_minify(texture->depth0, args->level);
-
addr.value = 0;
addr.bits.level = args->level;
const float lod_in[TGSI_QUAD_SIZE],
float lod[TGSI_QUAD_SIZE])
{
- float min_lod = sampler->min_lod;
- float max_lod = sampler->max_lod;
+ const float min_lod = sampler->min_lod;
+ const float max_lod = sampler->max_lod;
uint i;
switch (control) {
- case tgsi_sampler_lod_none:
- case tgsi_sampler_lod_zero:
+ case TGSI_SAMPLER_LOD_NONE:
+ case TGSI_SAMPLER_LOD_ZERO:
/* XXX FIXME */
- case tgsi_sampler_derivs_explicit:
+ case TGSI_SAMPLER_DERIVS_EXPLICIT:
lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(biased_lambda, min_lod, max_lod);
break;
- case tgsi_sampler_lod_bias:
+ case TGSI_SAMPLER_LOD_BIAS:
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
lod[i] = biased_lambda + lod_in[i];
lod[i] = CLAMP(lod[i], min_lod, max_lod);
}
break;
- case tgsi_sampler_lod_explicit:
+ case TGSI_SAMPLER_LOD_EXPLICIT:
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
lod[i] = CLAMP(lod_in[i], min_lod, max_lod);
}
}
-/* Calculate level of detail for every fragment.
+/* Calculate level of detail for every fragment. The computed value is not
+ * clamped to lod_min and lod_max.
* \param lod_in per-fragment lod_bias or explicit_lod.
* \param lod results per-fragment lod.
*/
static inline void
-compute_lambda_lod(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
- const float s[TGSI_QUAD_SIZE],
- const float t[TGSI_QUAD_SIZE],
- const float p[TGSI_QUAD_SIZE],
- const float lod_in[TGSI_QUAD_SIZE],
- enum tgsi_sampler_control control,
- float lod[TGSI_QUAD_SIZE])
+compute_lambda_lod_unclamped(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
+ const float s[TGSI_QUAD_SIZE],
+ const float t[TGSI_QUAD_SIZE],
+ const float p[TGSI_QUAD_SIZE],
+ const float lod_in[TGSI_QUAD_SIZE],
+ enum tgsi_sampler_control control,
+ float lod[TGSI_QUAD_SIZE])
{
const struct pipe_sampler_state *sampler = &sp_samp->base;
- float lod_bias = sampler->lod_bias;
- float min_lod = sampler->min_lod;
- float max_lod = sampler->max_lod;
+ const float lod_bias = sampler->lod_bias;
float lambda;
uint i;
switch (control) {
- case tgsi_sampler_lod_none:
+ case TGSI_SAMPLER_LOD_NONE:
/* XXX FIXME */
- case tgsi_sampler_derivs_explicit:
+ case TGSI_SAMPLER_DERIVS_EXPLICIT:
lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
- lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lambda, min_lod, max_lod);
+ lod[0] = lod[1] = lod[2] = lod[3] = lambda;
break;
- case tgsi_sampler_lod_bias:
+ case TGSI_SAMPLER_LOD_BIAS:
lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
lod[i] = lambda + lod_in[i];
- lod[i] = CLAMP(lod[i], min_lod, max_lod);
}
break;
- case tgsi_sampler_lod_explicit:
+ case TGSI_SAMPLER_LOD_EXPLICIT:
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
- lod[i] = CLAMP(lod_in[i], min_lod, max_lod);
+ lod[i] = lod_in[i] + lod_bias;
}
break;
- case tgsi_sampler_lod_zero:
- case tgsi_sampler_gather:
- /* this is all static state in the sampler really need clamp here? */
- lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lod_bias, min_lod, max_lod);
+ case TGSI_SAMPLER_LOD_ZERO:
+ case TGSI_SAMPLER_GATHER:
+ lod[0] = lod[1] = lod[2] = lod[3] = lod_bias;
break;
default:
assert(0);
}
}
+/* Calculate level of detail for every fragment.
+ * \param lod_in per-fragment lod_bias or explicit_lod.
+ * \param lod results per-fragment lod.
+ */
+static inline void
+compute_lambda_lod(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
+ const float s[TGSI_QUAD_SIZE],
+ const float t[TGSI_QUAD_SIZE],
+ const float p[TGSI_QUAD_SIZE],
+ const float lod_in[TGSI_QUAD_SIZE],
+ enum tgsi_sampler_control control,
+ float lod[TGSI_QUAD_SIZE])
+{
+ const struct pipe_sampler_state *sampler = &sp_samp->base;
+ const float min_lod = sampler->min_lod;
+ const float max_lod = sampler->max_lod;
+ int i;
+
+ compute_lambda_lod_unclamped(sp_sview, sp_samp,
+ s, t, p, lod_in, control, lod);
+ for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+ lod[i] = CLAMP(lod[i], min_lod, max_lod);
+ }
+}
+
static inline unsigned
get_gather_component(const float lod_in[TGSI_QUAD_SIZE])
{
return (*(unsigned int *)lod_in) & 0x3;
}
+/**
+ * Clamps given lod to both lod limits and mip level limits. Clamping to the
+ * latter limits is done so that lod is relative to the first (base) level.
+ */
static void
-mip_filter_linear(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+clamp_lod(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
+ const float lod[TGSI_QUAD_SIZE],
+ float clamped[TGSI_QUAD_SIZE])
+{
+ const float min_lod = sp_samp->base.min_lod;
+ const float max_lod = sp_samp->base.max_lod;
+ const float min_level = sp_sview->base.u.tex.first_level;
+ const float max_level = sp_sview->base.u.tex.last_level;
+ int i;
+
+ for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+ float cl = lod[i];
+
+ cl = CLAMP(cl, min_lod, max_lod);
+ cl = CLAMP(cl, 0, max_level - min_level);
+ clamped[i] = cl;
+ }
+}
+
+/**
+ * Get mip level relative to base level for linear mip filter
+ */
+static void
+mip_rel_level_linear(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
+ const float lod[TGSI_QUAD_SIZE],
+ float level[TGSI_QUAD_SIZE])
+{
+ clamp_lod(sp_sview, sp_samp, lod, level);
+}
+
+static void
+mip_filter_linear(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
img_filter_func min_filter,
img_filter_func mag_filter,
const float s[TGSI_QUAD_SIZE],
compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
args.offset = filt_args->offset;
- args.gather_only = filt_args->control == tgsi_sampler_gather;
+ args.gather_only = filt_args->control == TGSI_SAMPLER_GATHER;
args.gather_comp = get_gather_component(lod_in);
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
- int level0 = psview->u.tex.first_level + (int)lod[j];
+ const int level0 = psview->u.tex.first_level + (int)lod[j];
args.s = s[j];
args.t = t[j];
args.p = p[j];
- args.face_id = sp_sview->faces[j];
+ args.face_id = filt_args->faces[j];
if (lod[j] < 0.0) {
args.level = psview->u.tex.first_level;
/**
+ * Get mip level relative to base level for nearest mip filter
+ */
+static void
+mip_rel_level_nearest(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
+ const float lod[TGSI_QUAD_SIZE],
+ float level[TGSI_QUAD_SIZE])
+{
+ int j;
+
+ clamp_lod(sp_sview, sp_samp, lod, level);
+ for (j = 0; j < TGSI_QUAD_SIZE; j++)
+ /* TODO: It should rather be:
+ * level[j] = ceil(level[j] + 0.5F) - 1.0F;
+ */
+ level[j] = (int)(level[j] + 0.5F);
+}
+
+/**
* Compute nearest mipmap level from texcoords.
* Then sample the texture level for four elements of a quad.
* \param c0 the LOD bias factors, or absolute LODs (depending on control)
*/
static void
-mip_filter_nearest(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+mip_filter_nearest(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
img_filter_func min_filter,
img_filter_func mag_filter,
const float s[TGSI_QUAD_SIZE],
struct img_filter_args args;
args.offset = filt_args->offset;
- args.gather_only = filt_args->control == tgsi_sampler_gather;
+ args.gather_only = filt_args->control == TGSI_SAMPLER_GATHER;
args.gather_comp = get_gather_component(lod_in);
compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
args.s = s[j];
args.t = t[j];
args.p = p[j];
- args.face_id = sp_sview->faces[j];
+ args.face_id = filt_args->faces[j];
if (lod[j] < 0.0) {
args.level = psview->u.tex.first_level;
mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
} else {
- int level = psview->u.tex.first_level + (int)(lod[j] + 0.5F);
+ const int level = psview->u.tex.first_level + (int)(lod[j] + 0.5F);
args.level = MIN2(level, (int)psview->u.tex.last_level);
min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
}
}
+/**
+ * Get mip level relative to base level for none mip filter
+ */
+static void
+mip_rel_level_none(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
+ const float lod[TGSI_QUAD_SIZE],
+ float level[TGSI_QUAD_SIZE])
+{
+ int j;
+
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ level[j] = 0;
+ }
+}
+
static void
-mip_filter_none(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+mip_filter_none(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
img_filter_func min_filter,
img_filter_func mag_filter,
const float s[TGSI_QUAD_SIZE],
args.level = sp_sview->base.u.tex.first_level;
args.offset = filt_args->offset;
- args.gather_only = filt_args->control == tgsi_sampler_gather;
+ args.gather_only = filt_args->control == TGSI_SAMPLER_GATHER;
compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
args.s = s[j];
args.t = t[j];
args.p = p[j];
- args.face_id = sp_sview->faces[j];
+ args.face_id = filt_args->faces[j];
if (lod[j] < 0.0) {
mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
}
}
+/**
+ * Get mip level relative to base level for none mip filter
+ */
static void
-mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+mip_rel_level_none_no_filter_select(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
+ const float lod[TGSI_QUAD_SIZE],
+ float level[TGSI_QUAD_SIZE])
+{
+ mip_rel_level_none(sp_sview, sp_samp, lod, level);
+}
+
+static void
+mip_filter_none_no_filter_select(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
img_filter_func min_filter,
img_filter_func mag_filter,
const float s[TGSI_QUAD_SIZE],
struct img_filter_args args;
args.level = sp_sview->base.u.tex.first_level;
args.offset = filt_args->offset;
- args.gather_only = filt_args->control == tgsi_sampler_gather;
+ args.gather_only = filt_args->control == TGSI_SAMPLER_GATHER;
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
args.s = s[j];
args.t = t[j];
args.p = p[j];
- args.face_id = sp_sview->faces[j];
+ args.face_id = filt_args->faces[j];
mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
}
}
/* For anisotropic filtering */
#define WEIGHT_LUT_SIZE 1024
-static float *weightLut = NULL;
+static const float *weightLut = NULL;
/**
* Creates the look-up table used to speed-up EWA sampling
{
unsigned i;
if (!weightLut) {
- weightLut = (float *) MALLOC(WEIGHT_LUT_SIZE * sizeof(float));
+ float *lut = (float *) MALLOC(WEIGHT_LUT_SIZE * sizeof(float));
for (i = 0; i < WEIGHT_LUT_SIZE; ++i) {
- float alpha = 2;
- float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1);
- float weight = (float) exp(-alpha * r2);
- weightLut[i] = weight;
+ const float alpha = 2;
+ const float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1);
+ const float weight = (float) exp(-alpha * r2);
+ lut[i] = weight;
}
+ weightLut = lut;
}
}
* "Fundamentals of Texture Mapping and Image Warping" (1989)
*/
static void
-img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+img_filter_2d_ewa(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
img_filter_func min_filter,
img_filter_func mag_filter,
const float s[TGSI_QUAD_SIZE],
const float t[TGSI_QUAD_SIZE],
const float p[TGSI_QUAD_SIZE],
+ const uint faces[TGSI_QUAD_SIZE],
unsigned level,
const float dudx, const float dvdx,
const float dudy, const float dvdy,
const struct pipe_resource *texture = sp_sview->base.texture;
// ??? Won't the image filters blow up if level is negative?
- unsigned level0 = level > 0 ? level : 0;
- float scaling = 1.0f / (1 << level0);
- int width = u_minify(texture->width0, level0);
- int height = u_minify(texture->height0, level0);
+ const unsigned level0 = level > 0 ? level : 0;
+ const float scaling = 1.0f / (1 << level0);
+ const int width = u_minify(texture->width0, level0);
+ const int height = u_minify(texture->height0, level0);
struct img_filter_args args;
- float ux = dudx * scaling;
- float vx = dvdx * scaling;
- float uy = dudy * scaling;
- float vy = dvdy * scaling;
+ const float ux = dudx * scaling;
+ const float vx = dvdx * scaling;
+ const float uy = dudy * scaling;
+ const float vy = dvdy * scaling;
/* compute ellipse coefficients to bound the region:
* A*x*x + B*x*y + C*y*y = F.
/* assert(F > 0.0); */
/* Compute the ellipse's (u,v) bounding box in texture space */
- float d = -B*B+4.0f*C*A;
- float box_u = 2.0f / d * sqrtf(d*C*F); /* box_u -> half of bbox with */
- float box_v = 2.0f / d * sqrtf(A*d*F); /* box_v -> half of bbox height */
+ const float d = -B*B+4.0f*C*A;
+ const float box_u = 2.0f / d * sqrtf(d*C*F); /* box_u -> half of bbox with */
+ const float box_v = 2.0f / d * sqrtf(A*d*F); /* box_v -> half of bbox height */
float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
float s_buffer[TGSI_QUAD_SIZE];
float t_buffer[TGSI_QUAD_SIZE];
float weight_buffer[TGSI_QUAD_SIZE];
- unsigned buffer_next;
int j;
- float den; /* = 0.0F; */
- float ddq;
- float U; /* = u0 - tex_u; */
- int v;
-
- /* Scale ellipse formula to directly index the Filter Lookup Table.
- * i.e. scale so that F = WEIGHT_LUT_SIZE-1
- */
- double formScale = (double) (WEIGHT_LUT_SIZE - 1) / F;
- A *= formScale;
- B *= formScale;
- C *= formScale;
- /* F *= formScale; */ /* no need to scale F as we don't use it below here */
/* For each quad, the du and dx values are the same and so the ellipse is
* also the same. Note that texel/image access can only be performed using
* using the s_buffer/t_buffer and weight_buffer. Only when the buffer is
* full, then the pixel values are read from the image.
*/
- ddq = 2 * A;
+ const float ddq = 2 * A;
+
+ /* Scale ellipse formula to directly index the Filter Lookup Table.
+ * i.e. scale so that F = WEIGHT_LUT_SIZE-1
+ */
+ const double formScale = (double) (WEIGHT_LUT_SIZE - 1) / F;
+ A *= formScale;
+ B *= formScale;
+ C *= formScale;
+ /* F *= formScale; */ /* no need to scale F as we don't use it below here */
args.level = level;
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
* and incrementally update the value of Ax^2+Bxy*Cy^2; when this
* value, q, is less than F, we're inside the ellipse
*/
- float tex_u = -0.5F + s[j] * texture->width0 * scaling;
- float tex_v = -0.5F + t[j] * texture->height0 * scaling;
+ const float tex_u = -0.5F + s[j] * texture->width0 * scaling;
+ const float tex_v = -0.5F + t[j] * texture->height0 * scaling;
- int u0 = (int) floorf(tex_u - box_u);
- int u1 = (int) ceilf(tex_u + box_u);
- int v0 = (int) floorf(tex_v - box_v);
- int v1 = (int) ceilf(tex_v + box_v);
+ const int u0 = (int) floorf(tex_u - box_u);
+ const int u1 = (int) ceilf(tex_u + box_u);
+ const int v0 = (int) floorf(tex_v - box_v);
+ const int v1 = (int) ceilf(tex_v + box_v);
+ const float U = u0 - tex_u;
float num[4] = {0.0F, 0.0F, 0.0F, 0.0F};
- buffer_next = 0;
- den = 0;
- args.face_id = sp_sview->faces[j];
+ unsigned buffer_next = 0;
+ float den = 0;
+ int v;
+ args.face_id = faces[j];
- U = u0 - tex_u;
for (v = v0; v <= v1; ++v) {
- float V = v - tex_v;
+ const float V = v - tex_v;
float dq = A * (2 * U + 1) + B * V;
float q = (C * V + B * U) * V + A * U * U;
* should not happen, though
*/
const int qClamped = q >= 0.0F ? q : 0;
- float weight = weightLut[qClamped];
+ const float weight = weightLut[qClamped];
weight_buffer[buffer_next] = weight;
s_buffer[buffer_next] = u / ((float) width);
/**
+ * Get mip level relative to base level for linear mip filter
+ */
+static void
+mip_rel_level_linear_aniso(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
+ const float lod[TGSI_QUAD_SIZE],
+ float level[TGSI_QUAD_SIZE])
+{
+ mip_rel_level_linear(sp_sview, sp_samp, lod, level);
+}
+
+/**
* Sample 2D texture using an anisotropic filter.
*/
static void
-mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+mip_filter_linear_aniso(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
img_filter_func min_filter,
img_filter_func mag_filter,
const float s[TGSI_QUAD_SIZE],
float lambda;
float lod[TGSI_QUAD_SIZE];
- float s_to_u = u_minify(texture->width0, psview->u.tex.first_level);
- float t_to_v = u_minify(texture->height0, psview->u.tex.first_level);
- float dudx = (s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]) * s_to_u;
- float dudy = (s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]) * s_to_u;
- float dvdx = (t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
- float dvdy = (t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
+ const float s_to_u = u_minify(texture->width0, psview->u.tex.first_level);
+ const float t_to_v = u_minify(texture->height0, psview->u.tex.first_level);
+ const float dudx = (s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]) * s_to_u;
+ const float dudy = (s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]) * s_to_u;
+ const float dvdx = (t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
+ const float dvdy = (t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
struct img_filter_args args;
- if (filt_args->control == tgsi_sampler_lod_bias ||
- filt_args->control == tgsi_sampler_lod_none ||
+ if (filt_args->control == TGSI_SAMPLER_LOD_BIAS ||
+ filt_args->control == TGSI_SAMPLER_LOD_NONE ||
/* XXX FIXME */
- filt_args->control == tgsi_sampler_derivs_explicit) {
+ filt_args->control == TGSI_SAMPLER_DERIVS_EXPLICIT) {
/* note: instead of working with Px and Py, we will use the
* squared length instead, to avoid sqrt.
*/
- float Px2 = dudx * dudx + dvdx * dvdx;
- float Py2 = dudy * dudy + dvdy * dvdy;
+ const float Px2 = dudx * dudx + dvdx * dvdx;
+ const float Py2 = dudy * dudy + dvdy * dvdy;
float Pmax2;
float Pmin2;
compute_lod(&sp_samp->base, filt_args->control, lambda, lod_in, lod);
}
else {
- assert(filt_args->control == tgsi_sampler_lod_explicit ||
- filt_args->control == tgsi_sampler_lod_zero);
+ assert(filt_args->control == TGSI_SAMPLER_LOD_EXPLICIT ||
+ filt_args->control == TGSI_SAMPLER_LOD_ZERO);
compute_lod(&sp_samp->base, filt_args->control, sp_samp->base.lod_bias, lod_in, lod);
}
args.t = t[j];
args.p = p[j];
args.level = psview->u.tex.last_level;
- args.face_id = sp_sview->faces[j];
+ args.face_id = filt_args->faces[j];
min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
}
}
* seem to be worth the extra running time.
*/
img_filter_2d_ewa(sp_sview, sp_samp, min_filter, mag_filter,
- s, t, p, level0,
+ s, t, p, filt_args->faces, level0,
dudx, dvdx, dudy, dvdy, rgba);
}
}
}
+/**
+ * Get mip level relative to base level for linear mip filter
+ */
+static void
+mip_rel_level_linear_2d_linear_repeat_POT(
+ const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
+ const float lod[TGSI_QUAD_SIZE],
+ float level[TGSI_QUAD_SIZE])
+{
+ mip_rel_level_linear(sp_sview, sp_samp, lod, level);
+}
/**
* Specialized version of mip_filter_linear with hard-wired calls to
*/
static void
mip_filter_linear_2d_linear_repeat_POT(
- struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+ const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
img_filter_func min_filter,
img_filter_func mag_filter,
const float s[TGSI_QUAD_SIZE],
compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
- int level0 = psview->u.tex.first_level + (int)lod[j];
+ const int level0 = psview->u.tex.first_level + (int)lod[j];
struct img_filter_args args;
/* Catches both negative and large values of level0:
*/
args.s = s[j];
args.t = t[j];
args.p = p[j];
- args.face_id = sp_sview->faces[j];
+ args.face_id = filt_args->faces[j];
args.offset = filt_args->offset;
- args.gather_only = filt_args->control == tgsi_sampler_gather;
+ args.gather_only = filt_args->control == TGSI_SAMPLER_GATHER;
if ((unsigned)level0 >= psview->u.tex.last_level) {
if (level0 < 0)
args.level = psview->u.tex.first_level;
}
else {
- float levelBlend = frac(lod[j]);
+ const float levelBlend = frac(lod[j]);
float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
int c;
}
}
+static const struct sp_filter_funcs funcs_linear = {
+ mip_rel_level_linear,
+ mip_filter_linear
+};
+
+static const struct sp_filter_funcs funcs_nearest = {
+ mip_rel_level_nearest,
+ mip_filter_nearest
+};
+
+static const struct sp_filter_funcs funcs_none = {
+ mip_rel_level_none,
+ mip_filter_none
+};
+
+static const struct sp_filter_funcs funcs_none_no_filter_select = {
+ mip_rel_level_none_no_filter_select,
+ mip_filter_none_no_filter_select
+};
+
+static const struct sp_filter_funcs funcs_linear_aniso = {
+ mip_rel_level_linear_aniso,
+ mip_filter_linear_aniso
+};
+
+static const struct sp_filter_funcs funcs_linear_2d_linear_repeat_POT = {
+ mip_rel_level_linear_2d_linear_repeat_POT,
+ mip_filter_linear_2d_linear_repeat_POT
+};
/**
* Do shadow/depth comparisons.
*/
static void
-sample_compare(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+sample_compare(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
const float s[TGSI_QUAD_SIZE],
const float t[TGSI_QUAD_SIZE],
const float p[TGSI_QUAD_SIZE],
int j, v;
int k[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
float pc[4];
- const struct util_format_description *format_desc;
- unsigned chan_type;
- bool is_gather = (control == tgsi_sampler_gather);
+ const struct util_format_description *format_desc =
+ util_format_description(sp_sview->base.format);
+ /* not entirely sure we couldn't end up with non-valid swizzle here */
+ const unsigned chan_type =
+ format_desc->swizzle[0] <= UTIL_FORMAT_SWIZZLE_W ?
+ format_desc->channel[format_desc->swizzle[0]].type :
+ UTIL_FORMAT_TYPE_FLOAT;
+ const bool is_gather = (control == TGSI_SAMPLER_GATHER);
/**
* Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
pc[3] = p[3];
}
- format_desc = util_format_description(sp_sview->base.format);
- /* not entirely sure we couldn't end up with non-valid swizzle here */
- chan_type = format_desc->swizzle[0] <= UTIL_FORMAT_SWIZZLE_W ?
- format_desc->channel[format_desc->swizzle[0]].type :
- UTIL_FORMAT_TYPE_FLOAT;
if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
/*
* clamping is a result of conversion to texture format, hence
}
}
+/**
+ * Get mip filter funcs, and optionally both img min filter and img mag
+ * filter. Note that both img filter function pointers must be either non-NULL
+ * or NULL.
+ */
+static void
+get_filters(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
+ const enum tgsi_sampler_control control,
+ const struct sp_filter_funcs **funcs,
+ img_filter_func *min,
+ img_filter_func *mag)
+{
+ assert(funcs);
+ if (control == TGSI_SAMPLER_GATHER) {
+ *funcs = &funcs_nearest;
+ if (min) {
+ *min = get_img_filter(sp_sview, &sp_samp->base,
+ PIPE_TEX_FILTER_LINEAR, true);
+ }
+ } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
+ *funcs = &funcs_linear_2d_linear_repeat_POT;
+ } else {
+ *funcs = sp_samp->filter_funcs;
+ if (min) {
+ assert(mag);
+ *min = get_img_filter(sp_sview, &sp_samp->base,
+ sp_samp->min_img_filter, false);
+ if (sp_samp->min_mag_equal) {
+ *mag = *min;
+ } else {
+ *mag = get_img_filter(sp_sview, &sp_samp->base,
+ sp_samp->base.mag_img_filter, false);
+ }
+ }
+ }
+}
static void
-sample_mip(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+sample_mip(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
const float s[TGSI_QUAD_SIZE],
const float t[TGSI_QUAD_SIZE],
const float p[TGSI_QUAD_SIZE],
const struct filter_args *filt_args,
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
{
- mip_filter_func mip_filter;
+ const struct sp_filter_funcs *funcs = NULL;
img_filter_func min_img_filter = NULL;
img_filter_func mag_img_filter = NULL;
- if (filt_args->control == tgsi_sampler_gather) {
- mip_filter = mip_filter_nearest;
- min_img_filter = get_img_filter(sp_sview, &sp_samp->base, PIPE_TEX_FILTER_LINEAR, true);
- } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
- mip_filter = mip_filter_linear_2d_linear_repeat_POT;
- }
- else {
- mip_filter = sp_samp->mip_filter;
- min_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->min_img_filter, false);
- if (sp_samp->min_mag_equal) {
- mag_img_filter = min_img_filter;
- }
- else {
- mag_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->base.mag_img_filter, false);
- }
- }
+ get_filters(sp_sview, sp_samp, filt_args->control,
+ &funcs, &min_img_filter, &mag_img_filter);
- mip_filter(sp_sview, sp_samp, min_img_filter, mag_img_filter,
- s, t, p, c0, lod, filt_args, rgba);
+ funcs->filter(sp_sview, sp_samp, min_img_filter, mag_img_filter,
+ s, t, p, c0, lod, filt_args, rgba);
if (sp_samp->base.compare_mode != PIPE_TEX_COMPARE_NONE) {
- sample_compare(sp_sview, sp_samp, s, t, p, c0, lod, filt_args->control, rgba);
+ sample_compare(sp_sview, sp_samp, s, t, p, c0,
+ lod, filt_args->control, rgba);
}
- if (sp_sview->need_swizzle && filt_args->control != tgsi_sampler_gather) {
+ if (sp_sview->need_swizzle && filt_args->control != TGSI_SAMPLER_GATHER) {
float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
memcpy(rgba_temp, rgba, sizeof(rgba_temp));
do_swizzling(&sp_sview->base, rgba_temp, rgba);
/**
- * Use 3D texcoords to choose a cube face, then sample the 2D cube faces.
- * Put face info into the sampler faces[] array.
+ * This function uses cube texture coordinates to choose a face of a cube and
+ * computes the 2D cube face coordinates. Puts face info into the sampler
+ * faces[] array.
*/
static void
-sample_cube(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
- const float s[TGSI_QUAD_SIZE],
- const float t[TGSI_QUAD_SIZE],
- const float p[TGSI_QUAD_SIZE],
- const float c0[TGSI_QUAD_SIZE],
- const float c1[TGSI_QUAD_SIZE],
- const struct filter_args *filt_args,
- float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
+convert_cube(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
+ const float s[TGSI_QUAD_SIZE],
+ const float t[TGSI_QUAD_SIZE],
+ const float p[TGSI_QUAD_SIZE],
+ const float c0[TGSI_QUAD_SIZE],
+ float ssss[TGSI_QUAD_SIZE],
+ float tttt[TGSI_QUAD_SIZE],
+ float pppp[TGSI_QUAD_SIZE],
+ uint faces[TGSI_QUAD_SIZE])
{
unsigned j;
- float ssss[4], tttt[4];
-
- /* Not actually used, but the intermediate steps that do the
- * dereferencing don't know it.
- */
- static float pppp[4] = { 0, 0, 0, 0 };
pppp[0] = c0[0];
pppp[1] = c0[1];
const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
if (arx >= ary && arx >= arz) {
- float sign = (rx >= 0.0F) ? 1.0F : -1.0F;
- uint face = (rx >= 0.0F) ? PIPE_TEX_FACE_POS_X : PIPE_TEX_FACE_NEG_X;
+ const float sign = (rx >= 0.0F) ? 1.0F : -1.0F;
+ const uint face = (rx >= 0.0F) ?
+ PIPE_TEX_FACE_POS_X : PIPE_TEX_FACE_NEG_X;
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
const float ima = -0.5F / fabsf(s[j]);
ssss[j] = sign * p[j] * ima + 0.5F;
tttt[j] = t[j] * ima + 0.5F;
- sp_sview->faces[j] = face;
+ faces[j] = face;
}
}
else if (ary >= arx && ary >= arz) {
- float sign = (ry >= 0.0F) ? 1.0F : -1.0F;
- uint face = (ry >= 0.0F) ? PIPE_TEX_FACE_POS_Y : PIPE_TEX_FACE_NEG_Y;
+ const float sign = (ry >= 0.0F) ? 1.0F : -1.0F;
+ const uint face = (ry >= 0.0F) ?
+ PIPE_TEX_FACE_POS_Y : PIPE_TEX_FACE_NEG_Y;
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
const float ima = -0.5F / fabsf(t[j]);
ssss[j] = -s[j] * ima + 0.5F;
tttt[j] = sign * -p[j] * ima + 0.5F;
- sp_sview->faces[j] = face;
+ faces[j] = face;
}
}
else {
- float sign = (rz >= 0.0F) ? 1.0F : -1.0F;
- uint face = (rz >= 0.0F) ? PIPE_TEX_FACE_POS_Z : PIPE_TEX_FACE_NEG_Z;
+ const float sign = (rz >= 0.0F) ? 1.0F : -1.0F;
+ const uint face = (rz >= 0.0F) ?
+ PIPE_TEX_FACE_POS_Z : PIPE_TEX_FACE_NEG_Z;
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
const float ima = -0.5F / fabsf(p[j]);
ssss[j] = sign * -s[j] * ima + 0.5F;
tttt[j] = t[j] * ima + 0.5F;
- sp_sview->faces[j] = face;
+ faces[j] = face;
}
}
}
-
- sample_mip(sp_sview, sp_samp, ssss, tttt, pppp, c0, c1, filt_args, rgba);
}
static void
-sp_get_dims(struct sp_sampler_view *sp_sview, int level,
+sp_get_dims(const struct sp_sampler_view *sp_sview,
+ int level,
int dims[4])
{
const struct pipe_sampler_view *view = &sp_sview->base;
* coords to the texture image size.
*/
static void
-sp_get_texels(struct sp_sampler_view *sp_sview,
+sp_get_texels(const struct sp_sampler_view *sp_sview,
const int v_i[TGSI_QUAD_SIZE],
const int v_j[TGSI_QUAD_SIZE],
const int v_k[TGSI_QUAD_SIZE],
const struct pipe_resource *texture = sp_sview->base.texture;
int j, c;
const float *tx;
- int width, height, depth;
-
- addr.value = 0;
/* TODO write a better test for LOD */
- addr.bits.level = sp_sview->base.target == PIPE_BUFFER ? 0 :
- CLAMP(lod[0] + sp_sview->base.u.tex.first_level,
- sp_sview->base.u.tex.first_level,
- sp_sview->base.u.tex.last_level);
+ const unsigned level =
+ sp_sview->base.target == PIPE_BUFFER ? 0 :
+ CLAMP(lod[0] + sp_sview->base.u.tex.first_level,
+ sp_sview->base.u.tex.first_level,
+ sp_sview->base.u.tex.last_level);
+ const int width = u_minify(texture->width0, level);
+ const int height = u_minify(texture->height0, level);
+ const int depth = u_minify(texture->depth0, level);
- width = u_minify(texture->width0, addr.bits.level);
- height = u_minify(texture->height0, addr.bits.level);
- depth = u_minify(texture->depth0, addr.bits.level);
+ addr.value = 0;
+ addr.bits.level = level;
switch (sp_sview->base.target) {
case PIPE_BUFFER:
case PIPE_TEXTURE_1D:
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
- int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
+ const int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
tx = get_texel_2d_no_border(sp_sview, addr, x, 0);
for (c = 0; c < 4; c++) {
rgba[c][j] = tx[c];
break;
case PIPE_TEXTURE_1D_ARRAY:
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
- int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
- int y = CLAMP(v_j[j], sp_sview->base.u.tex.first_layer,
- sp_sview->base.u.tex.last_layer);
+ const int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
+ const int y = CLAMP(v_j[j], sp_sview->base.u.tex.first_layer,
+ sp_sview->base.u.tex.last_layer);
tx = get_texel_2d_no_border(sp_sview, addr, x, y);
for (c = 0; c < 4; c++) {
rgba[c][j] = tx[c];
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_RECT:
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
- int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
- int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
+ const int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
+ const int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
tx = get_texel_2d_no_border(sp_sview, addr, x, y);
for (c = 0; c < 4; c++) {
rgba[c][j] = tx[c];
break;
case PIPE_TEXTURE_2D_ARRAY:
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
- int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
- int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
- int layer = CLAMP(v_k[j], sp_sview->base.u.tex.first_layer,
- sp_sview->base.u.tex.last_layer);
+ const int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
+ const int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
+ const int layer = CLAMP(v_k[j], sp_sview->base.u.tex.first_layer,
+ sp_sview->base.u.tex.last_layer);
tx = get_texel_3d_no_border(sp_sview, addr, x, y, layer);
for (c = 0; c < 4; c++) {
rgba[c][j] = tx[c];
switch (sampler->min_mip_filter) {
case PIPE_TEX_MIPFILTER_NONE:
if (sampler->min_img_filter == sampler->mag_img_filter)
- samp->mip_filter = mip_filter_none_no_filter_select;
+ samp->filter_funcs = &funcs_none_no_filter_select;
else
- samp->mip_filter = mip_filter_none;
+ samp->filter_funcs = &funcs_none;
break;
case PIPE_TEX_MIPFILTER_NEAREST:
- samp->mip_filter = mip_filter_nearest;
+ samp->filter_funcs = &funcs_nearest;
break;
case PIPE_TEX_MIPFILTER_LINEAR:
sampler->max_anisotropy <= 1) {
samp->min_mag_equal_repeat_linear = TRUE;
}
- samp->mip_filter = mip_filter_linear;
+ samp->filter_funcs = &funcs_linear;
/* Anisotropic filtering extension. */
if (sampler->max_anisotropy > 1) {
- samp->mip_filter = mip_filter_linear_aniso;
+ samp->filter_funcs = &funcs_linear_aniso;
/* Override min_img_filter:
* min_img_filter needs to be set to NEAREST since we need to access
const struct pipe_sampler_view *templ)
{
struct sp_sampler_view *sview = CALLOC_STRUCT(sp_sampler_view);
- struct softpipe_resource *spr = (struct softpipe_resource *)resource;
+ const struct softpipe_resource *spr = (struct softpipe_resource *)resource;
if (sview) {
struct pipe_sampler_view *view = &sview->base;
sview->need_swizzle = TRUE;
}
- if (view->target == PIPE_TEXTURE_CUBE ||
- view->target == PIPE_TEXTURE_CUBE_ARRAY)
- sview->get_samples = sample_cube;
- else {
- sview->get_samples = sample_mip;
- }
+ sview->need_cube_convert = (view->target == PIPE_TEXTURE_CUBE ||
+ view->target == PIPE_TEXTURE_CUBE_ARRAY);
sview->pot2d = spr->pot &&
(view->target == PIPE_TEXTURE_2D ||
view->target == PIPE_TEXTURE_RECT);
}
+static inline const struct sp_tgsi_sampler *
+sp_tgsi_sampler_cast_c(const struct tgsi_sampler *sampler)
+{
+ return (const struct sp_tgsi_sampler *)sampler;
+}
+
+
static void
sp_tgsi_get_dims(struct tgsi_sampler *tgsi_sampler,
const unsigned sview_index,
int level, int dims[4])
{
- struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
+ const struct sp_tgsi_sampler *sp_samp =
+ sp_tgsi_sampler_cast_c(tgsi_sampler);
assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
/* always have a view here but texture is NULL if no sampler view was set. */
enum tgsi_sampler_control control,
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
{
- struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
+ const struct sp_tgsi_sampler *sp_tgsi_samp =
+ sp_tgsi_sampler_cast_c(tgsi_sampler);
+ const struct sp_sampler_view *sp_sview;
+ const struct sp_sampler *sp_samp;
struct filter_args filt_args;
+
assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
assert(sampler_index < PIPE_MAX_SAMPLERS);
- assert(sp_samp->sp_sampler[sampler_index]);
+ assert(sp_tgsi_samp->sp_sampler[sampler_index]);
+
+ sp_sview = &sp_tgsi_samp->sp_sview[sview_index];
+ sp_samp = sp_tgsi_samp->sp_sampler[sampler_index];
/* always have a view here but texture is NULL if no sampler view was set. */
- if (!sp_samp->sp_sview[sview_index].base.texture) {
+ if (!sp_sview->base.texture) {
int i, j;
for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
filt_args.control = control;
filt_args.offset = offset;
- sp_samp->sp_sview[sview_index].get_samples(&sp_samp->sp_sview[sview_index],
- sp_samp->sp_sampler[sampler_index],
- s, t, p, c0, lod, &filt_args, rgba);
+
+ if (sp_sview->need_cube_convert) {
+ float cs[TGSI_QUAD_SIZE];
+ float ct[TGSI_QUAD_SIZE];
+ float cp[TGSI_QUAD_SIZE];
+ uint faces[TGSI_QUAD_SIZE];
+
+ convert_cube(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp, faces);
+
+ filt_args.faces = faces;
+ sample_mip(sp_sview, sp_samp, cs, ct, cp, c0, lod, &filt_args, rgba);
+ } else {
+ static const uint zero_faces[TGSI_QUAD_SIZE] = {0, 0, 0, 0};
+
+ filt_args.faces = zero_faces;
+ sample_mip(sp_sview, sp_samp, s, t, p, c0, lod, &filt_args, rgba);
+ }
}
+static void
+sp_tgsi_query_lod(const struct tgsi_sampler *tgsi_sampler,
+ const unsigned sview_index,
+ const unsigned sampler_index,
+ const float s[TGSI_QUAD_SIZE],
+ const float t[TGSI_QUAD_SIZE],
+ const float p[TGSI_QUAD_SIZE],
+ const float c0[TGSI_QUAD_SIZE],
+ const enum tgsi_sampler_control control,
+ float mipmap[TGSI_QUAD_SIZE],
+ float lod[TGSI_QUAD_SIZE])
+{
+ static const float lod_in[TGSI_QUAD_SIZE] = { 0.0, 0.0, 0.0, 0.0 };
+
+ const struct sp_tgsi_sampler *sp_tgsi_samp =
+ sp_tgsi_sampler_cast_c(tgsi_sampler);
+ const struct sp_sampler_view *sp_sview;
+ const struct sp_sampler *sp_samp;
+ const struct sp_filter_funcs *funcs;
+ int i;
+
+ assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
+ assert(sampler_index < PIPE_MAX_SAMPLERS);
+ assert(sp_tgsi_samp->sp_sampler[sampler_index]);
+
+ sp_sview = &sp_tgsi_samp->sp_sview[sview_index];
+ sp_samp = sp_tgsi_samp->sp_sampler[sampler_index];
+ /* always have a view here but texture is NULL if no sampler view was
+ * set. */
+ if (!sp_sview->base.texture) {
+ for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+ mipmap[i] = 0.0f;
+ lod[i] = 0.0f;
+ }
+ return;
+ }
+
+ if (sp_sview->need_cube_convert) {
+ float cs[TGSI_QUAD_SIZE];
+ float ct[TGSI_QUAD_SIZE];
+ float cp[TGSI_QUAD_SIZE];
+ uint unused_faces[TGSI_QUAD_SIZE];
+
+ convert_cube(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp, unused_faces);
+ compute_lambda_lod_unclamped(sp_sview, sp_samp,
+ cs, ct, cp, lod_in, control, lod);
+ } else {
+ compute_lambda_lod_unclamped(sp_sview, sp_samp,
+ s, t, p, lod_in, control, lod);
+ }
+
+ get_filters(sp_sview, sp_samp, control, &funcs, NULL, NULL);
+ funcs->relative_level(sp_sview, sp_samp, lod, mipmap);
+}
static void
sp_tgsi_get_texel(struct tgsi_sampler *tgsi_sampler,
const int lod[TGSI_QUAD_SIZE], const int8_t offset[3],
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
{
- struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
+ const struct sp_tgsi_sampler *sp_samp =
+ sp_tgsi_sampler_cast_c(tgsi_sampler);
assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
/* always have a view here but texture is NULL if no sampler view was set. */
samp->base.get_dims = sp_tgsi_get_dims;
samp->base.get_samples = sp_tgsi_get_samples;
samp->base.get_texel = sp_tgsi_get_texel;
+ samp->base.query_lod = sp_tgsi_query_lod;
return samp;
}
-
int gather_comp;
};
-typedef void (*img_filter_func)(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+typedef void (*img_filter_func)(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
const struct img_filter_args *args,
float *rgba);
struct filter_args {
enum tgsi_sampler_control control;
const int8_t *offset;
+ const uint *faces;
};
-typedef void (*mip_filter_func)(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
+typedef void (*mip_filter_func)(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
img_filter_func min_filter,
img_filter_func mag_filter,
const float s[TGSI_QUAD_SIZE],
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
-typedef void (*filter_func)(struct sp_sampler_view *sp_sview,
- struct sp_sampler *sp_samp,
- const float s[TGSI_QUAD_SIZE],
- const float t[TGSI_QUAD_SIZE],
- const float p[TGSI_QUAD_SIZE],
- const float c0[TGSI_QUAD_SIZE],
- const float lod[TGSI_QUAD_SIZE],
- const struct filter_args *args,
- float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
-
+typedef void (*mip_level_func)(const struct sp_sampler_view *sp_sview,
+ const struct sp_sampler *sp_samp,
+ const float lod[TGSI_QUAD_SIZE],
+ float level[TGSI_QUAD_SIZE]);
typedef void (*fetch_func)(struct sp_sampler_view *sp_sview,
const int i[TGSI_QUAD_SIZE],
boolean need_swizzle;
boolean pot2d;
-
- filter_func get_samples;
-
- /* this is just abusing the sampler_view object as local storage */
- unsigned faces[TGSI_QUAD_SIZE];
+ boolean need_cube_convert;
/* these are different per shader type */
struct softpipe_tex_tile_cache *cache;
};
+struct sp_filter_funcs {
+ mip_level_func relative_level;
+ mip_filter_func filter;
+};
struct sp_sampler {
struct pipe_sampler_state base;
wrap_linear_func linear_texcoord_t;
wrap_linear_func linear_texcoord_p;
- mip_filter_func mip_filter;
+ const struct sp_filter_funcs *filter_funcs;
};
C_SOURCES := \
svga_cmd.c \
svga_cmd.h \
+ svga_cmd_vgpu10.c \
svga_context.c \
svga_context.h \
svga_debug.h \
svga_format.c \
svga_format.h \
svga_hw_reg.h \
+ svga_link.c \
+ svga_link.h \
svga_pipe_blend.c \
svga_pipe_blit.c \
svga_pipe_clear.c \
svga_pipe_draw.c \
svga_pipe_flush.c \
svga_pipe_fs.c \
+ svga_pipe_gs.c \
svga_pipe_misc.c \
svga_pipe_query.c \
svga_pipe_rasterizer.c \
svga_pipe_sampler.c \
+ svga_pipe_streamout.c \
svga_pipe_vertex.c \
svga_pipe_vs.c \
svga_public.h \
svga_shader.c \
svga_shader.h \
svga_state.c \
+ svga_state.h \
svga_state_constants.c \
svga_state_framebuffer.c \
svga_state_fs.c \
- svga_state.h \
+ svga_state_gs.c \
svga_state_need_swtnl.c \
svga_state_rss.c \
+ svga_state_sampler.c \
+ svga_state_tgsi_transform.c \
svga_state_tss.c \
svga_state_vdecl.c \
svga_state_vs.c \
+ svga_streamout.h \
svga_surface.c \
svga_surface.h \
svga_swtnl_backend.c \
svga_tgsi_emit.h \
svga_tgsi.h \
svga_tgsi_insn.c \
+ svga_tgsi_vgpu10.c \
svga_winsys.h \
\
svgadump/svga_dump.c \
include/svga3d_caps.h \
include/svga3d_cmd.h \
include/svga3d_devcaps.h \
+ include/svga3d_dx.h \
include/svga3d_limits.h \
include/svga3d_reg.h \
include/svga3d_shaderdefs.h \
include/svga_overlay.h \
include/svga_reg.h \
include/svga_types.h \
+ include/VGPU10ShaderTokens.h \
include/vmware_pack_begin.h \
include/vmware_pack_end.h
--- /dev/null
+/**********************************************************
+ * Copyright 2007-2015 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * VGPU10ShaderTokens.h --
+ *
+ * VGPU10 shader token definitions.
+ *
+ */
+
+#ifndef VGPU10SHADERTOKENS_H
+#define VGPU10SHADERTOKENS_H
+
+/* Shader limits */
+#define VGPU10_MAX_VS_INPUTS 16
+#define VGPU10_MAX_VS_OUTPUTS 16
+#define VGPU10_MAX_GS_INPUTS 16
+#define VGPU10_MAX_GS_OUTPUTS 32
+#define VGPU10_MAX_FS_INPUTS 32
+#define VGPU10_MAX_FS_OUTPUTS 8
+#define VGPU10_MAX_TEMPS 4096
+#define VGPU10_MAX_CONSTANT_BUFFERS 14
+#define VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT 4096
+#define VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT 4096
+#define VGPU10_MAX_SAMPLERS 16
+#define VGPU10_MAX_RESOURCES 128
+#define VGPU10_MIN_TEXEL_FETCH_OFFSET -8
+#define VGPU10_MAX_TEXEL_FETCH_OFFSET 7
+
+typedef enum {
+ VGPU10_PIXEL_SHADER = 0,
+ VGPU10_VERTEX_SHADER = 1,
+ VGPU10_GEOMETRY_SHADER = 2
+} VGPU10_PROGRAM_TYPE;
+
+typedef union {
+ struct {
+ unsigned int minorVersion : 4;
+ unsigned int majorVersion : 4;
+ unsigned int : 8;
+ unsigned int programType : 16; /* VGPU10_PROGRAM_TYPE */
+ };
+ uint32 value;
+} VGPU10ProgramToken;
+
+
+typedef enum {
+ VGPU10_OPCODE_ADD = 0,
+ VGPU10_OPCODE_AND = 1,
+ VGPU10_OPCODE_BREAK = 2,
+ VGPU10_OPCODE_BREAKC = 3,
+ VGPU10_OPCODE_CALL = 4,
+ VGPU10_OPCODE_CALLC = 5,
+ VGPU10_OPCODE_CASE = 6,
+ VGPU10_OPCODE_CONTINUE = 7,
+ VGPU10_OPCODE_CONTINUEC = 8,
+ VGPU10_OPCODE_CUT = 9,
+ VGPU10_OPCODE_DEFAULT = 10,
+ VGPU10_OPCODE_DERIV_RTX = 11,
+ VGPU10_OPCODE_DERIV_RTY = 12,
+ VGPU10_OPCODE_DISCARD = 13,
+ VGPU10_OPCODE_DIV = 14,
+ VGPU10_OPCODE_DP2 = 15,
+ VGPU10_OPCODE_DP3 = 16,
+ VGPU10_OPCODE_DP4 = 17,
+ VGPU10_OPCODE_ELSE = 18,
+ VGPU10_OPCODE_EMIT = 19,
+ VGPU10_OPCODE_EMITTHENCUT = 20,
+ VGPU10_OPCODE_ENDIF = 21,
+ VGPU10_OPCODE_ENDLOOP = 22,
+ VGPU10_OPCODE_ENDSWITCH = 23,
+ VGPU10_OPCODE_EQ = 24,
+ VGPU10_OPCODE_EXP = 25,
+ VGPU10_OPCODE_FRC = 26,
+ VGPU10_OPCODE_FTOI = 27,
+ VGPU10_OPCODE_FTOU = 28,
+ VGPU10_OPCODE_GE = 29,
+ VGPU10_OPCODE_IADD = 30,
+ VGPU10_OPCODE_IF = 31,
+ VGPU10_OPCODE_IEQ = 32,
+ VGPU10_OPCODE_IGE = 33,
+ VGPU10_OPCODE_ILT = 34,
+ VGPU10_OPCODE_IMAD = 35,
+ VGPU10_OPCODE_IMAX = 36,
+ VGPU10_OPCODE_IMIN = 37,
+ VGPU10_OPCODE_IMUL = 38,
+ VGPU10_OPCODE_INE = 39,
+ VGPU10_OPCODE_INEG = 40,
+ VGPU10_OPCODE_ISHL = 41,
+ VGPU10_OPCODE_ISHR = 42,
+ VGPU10_OPCODE_ITOF = 43,
+ VGPU10_OPCODE_LABEL = 44,
+ VGPU10_OPCODE_LD = 45,
+ VGPU10_OPCODE_LD_MS = 46,
+ VGPU10_OPCODE_LOG = 47,
+ VGPU10_OPCODE_LOOP = 48,
+ VGPU10_OPCODE_LT = 49,
+ VGPU10_OPCODE_MAD = 50,
+ VGPU10_OPCODE_MIN = 51,
+ VGPU10_OPCODE_MAX = 52,
+ VGPU10_OPCODE_CUSTOMDATA = 53,
+ VGPU10_OPCODE_MOV = 54,
+ VGPU10_OPCODE_MOVC = 55,
+ VGPU10_OPCODE_MUL = 56,
+ VGPU10_OPCODE_NE = 57,
+ VGPU10_OPCODE_NOP = 58,
+ VGPU10_OPCODE_NOT = 59,
+ VGPU10_OPCODE_OR = 60,
+ VGPU10_OPCODE_RESINFO = 61,
+ VGPU10_OPCODE_RET = 62,
+ VGPU10_OPCODE_RETC = 63,
+ VGPU10_OPCODE_ROUND_NE = 64,
+ VGPU10_OPCODE_ROUND_NI = 65,
+ VGPU10_OPCODE_ROUND_PI = 66,
+ VGPU10_OPCODE_ROUND_Z = 67,
+ VGPU10_OPCODE_RSQ = 68,
+ VGPU10_OPCODE_SAMPLE = 69,
+ VGPU10_OPCODE_SAMPLE_C = 70,
+ VGPU10_OPCODE_SAMPLE_C_LZ = 71,
+ VGPU10_OPCODE_SAMPLE_L = 72,
+ VGPU10_OPCODE_SAMPLE_D = 73,
+ VGPU10_OPCODE_SAMPLE_B = 74,
+ VGPU10_OPCODE_SQRT = 75,
+ VGPU10_OPCODE_SWITCH = 76,
+ VGPU10_OPCODE_SINCOS = 77,
+ VGPU10_OPCODE_UDIV = 78,
+ VGPU10_OPCODE_ULT = 79,
+ VGPU10_OPCODE_UGE = 80,
+ VGPU10_OPCODE_UMUL = 81,
+ VGPU10_OPCODE_UMAD = 82,
+ VGPU10_OPCODE_UMAX = 83,
+ VGPU10_OPCODE_UMIN = 84,
+ VGPU10_OPCODE_USHR = 85,
+ VGPU10_OPCODE_UTOF = 86,
+ VGPU10_OPCODE_XOR = 87,
+ VGPU10_OPCODE_DCL_RESOURCE = 88,
+ VGPU10_OPCODE_DCL_CONSTANT_BUFFER = 89,
+ VGPU10_OPCODE_DCL_SAMPLER = 90,
+ VGPU10_OPCODE_DCL_INDEX_RANGE = 91,
+ VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY = 92,
+ VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE = 93,
+ VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT = 94,
+ VGPU10_OPCODE_DCL_INPUT = 95,
+ VGPU10_OPCODE_DCL_INPUT_SGV = 96,
+ VGPU10_OPCODE_DCL_INPUT_SIV = 97,
+ VGPU10_OPCODE_DCL_INPUT_PS = 98,
+ VGPU10_OPCODE_DCL_INPUT_PS_SGV = 99,
+ VGPU10_OPCODE_DCL_INPUT_PS_SIV = 100,
+ VGPU10_OPCODE_DCL_OUTPUT = 101,
+ VGPU10_OPCODE_DCL_OUTPUT_SGV = 102,
+ VGPU10_OPCODE_DCL_OUTPUT_SIV = 103,
+ VGPU10_OPCODE_DCL_TEMPS = 104,
+ VGPU10_OPCODE_DCL_INDEXABLE_TEMP = 105,
+ VGPU10_OPCODE_DCL_GLOBAL_FLAGS = 106,
+ VGPU10_OPCODE_IDIV = 107,
+ VGPU10_NUM_OPCODES /* Should be the last entry. */
+} VGPU10_OPCODE_TYPE;
+
+typedef enum {
+ VGPU10_INTERPOLATION_UNDEFINED = 0,
+ VGPU10_INTERPOLATION_CONSTANT = 1,
+ VGPU10_INTERPOLATION_LINEAR = 2,
+ VGPU10_INTERPOLATION_LINEAR_CENTROID = 3,
+ VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE = 4,
+ VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID = 5,
+ VGPU10_INTERPOLATION_LINEAR_SAMPLE = 6, /* DX10.1 */
+ VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE = 7 /* DX10.1 */
+} VGPU10_INTERPOLATION_MODE;
+
+typedef enum {
+ VGPU10_RESOURCE_DIMENSION_UNKNOWN = 0,
+ VGPU10_RESOURCE_DIMENSION_BUFFER = 1,
+ VGPU10_RESOURCE_DIMENSION_TEXTURE1D = 2,
+ VGPU10_RESOURCE_DIMENSION_TEXTURE2D = 3,
+ VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS = 4,
+ VGPU10_RESOURCE_DIMENSION_TEXTURE3D = 5,
+ VGPU10_RESOURCE_DIMENSION_TEXTURECUBE = 6,
+ VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY = 7,
+ VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY = 8,
+ VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY = 9,
+ VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY = 10
+} VGPU10_RESOURCE_DIMENSION;
+
+typedef enum {
+ VGPU10_SAMPLER_MODE_DEFAULT = 0,
+ VGPU10_SAMPLER_MODE_COMPARISON = 1,
+ VGPU10_SAMPLER_MODE_MONO = 2
+} VGPU10_SAMPLER_MODE;
+
+typedef enum {
+ VGPU10_INSTRUCTION_TEST_ZERO = 0,
+ VGPU10_INSTRUCTION_TEST_NONZERO = 1
+} VGPU10_INSTRUCTION_TEST_BOOLEAN;
+
+typedef enum {
+ VGPU10_CB_IMMEDIATE_INDEXED = 0,
+ VGPU10_CB_DYNAMIC_INDEXED = 1
+} VGPU10_CB_ACCESS_PATTERN;
+
+typedef enum {
+ VGPU10_PRIMITIVE_UNDEFINED = 0,
+ VGPU10_PRIMITIVE_POINT = 1,
+ VGPU10_PRIMITIVE_LINE = 2,
+ VGPU10_PRIMITIVE_TRIANGLE = 3,
+ VGPU10_PRIMITIVE_LINE_ADJ = 6,
+ VGPU10_PRIMITIVE_TRIANGLE_ADJ = 7
+} VGPU10_PRIMITIVE;
+
+typedef enum {
+ VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED = 0,
+ VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST = 1,
+ VGPU10_PRIMITIVE_TOPOLOGY_LINELIST = 2,
+ VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP = 3,
+ VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST = 4,
+ VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP = 5,
+ VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ = 10,
+ VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ = 11,
+ VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ = 12,
+ VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ = 13
+} VGPU10_PRIMITIVE_TOPOLOGY;
+
+typedef enum {
+ VGPU10_CUSTOMDATA_COMMENT = 0,
+ VGPU10_CUSTOMDATA_DEBUGINFO = 1,
+ VGPU10_CUSTOMDATA_OPAQUE = 2,
+ VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER = 3
+} VGPU10_CUSTOMDATA_CLASS;
+
+typedef enum {
+ VGPU10_RESINFO_RETURN_FLOAT = 0,
+ VGPU10_RESINFO_RETURN_RCPFLOAT = 1,
+ VGPU10_RESINFO_RETURN_UINT = 2
+} VGPU10_RESINFO_RETURN_TYPE;
+
+typedef union {
+ struct {
+ unsigned int opcodeType : 11; /* VGPU10_OPCODE_TYPE */
+ unsigned int interpolationMode : 4; /* VGPU10_INTERPOLATION_MODE */
+ unsigned int : 3;
+ unsigned int testBoolean : 1; /* VGPU10_INSTRUCTION_TEST_BOOLEAN */
+ unsigned int : 5;
+ unsigned int instructionLength : 7;
+ unsigned int extended : 1;
+ };
+ struct {
+ unsigned int : 11;
+ unsigned int resourceDimension : 5; /* VGPU10_RESOURCE_DIMENSION */
+ };
+ struct {
+ unsigned int : 11;
+ unsigned int samplerMode : 4; /* VGPU10_SAMPLER_MODE */
+ };
+ struct {
+ unsigned int : 11;
+ unsigned int accessPattern : 1; /* VGPU10_CB_ACCESS_PATTERN */
+ };
+ struct {
+ unsigned int : 11;
+ unsigned int primitive : 6; /* VGPU10_PRIMITIVE */
+ };
+ struct {
+ unsigned int : 11;
+ unsigned int primitiveTopology : 6; /* VGPU10_PRIMITIVE_TOPOLOGY */
+ };
+ struct {
+ unsigned int : 11;
+ unsigned int customDataClass : 21; /* VGPU10_CUSTOMDATA_CLASS */
+ };
+ struct {
+ unsigned int : 11;
+ unsigned int resinfoReturnType : 2; /* VGPU10_RESINFO_RETURN_TYPE */
+ unsigned int saturate : 1;
+ };
+ struct {
+ unsigned int : 11;
+ unsigned int refactoringAllowed : 1;
+ };
+ uint32 value;
+} VGPU10OpcodeToken0;
+
+
+typedef enum {
+ VGPU10_EXTENDED_OPCODE_EMPTY = 0,
+ VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS
+} VGPU10_EXTENDED_OPCODE_TYPE;
+
+typedef union {
+ struct {
+ unsigned int opcodeType : 6; /* VGPU10_EXTENDED_OPCODE_TYPE */
+ unsigned int : 3;
+ unsigned int offsetU : 4; /* Two's complement. */
+ unsigned int offsetV : 4; /* Two's complement. */
+ unsigned int offsetW : 4; /* Two's complement. */
+ unsigned int : 10;
+ unsigned int extended : 1;
+ };
+ uint32 value;
+} VGPU10OpcodeToken1;
+
+
+typedef enum {
+ VGPU10_OPERAND_0_COMPONENT = 0,
+ VGPU10_OPERAND_1_COMPONENT = 1,
+ VGPU10_OPERAND_4_COMPONENT = 2,
+ VGPU10_OPERAND_N_COMPONENT = 3 /* Unused for now. */
+} VGPU10_OPERAND_NUM_COMPONENTS;
+
+typedef enum {
+ VGPU10_OPERAND_4_COMPONENT_MASK_MODE = 0,
+ VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE = 1,
+ VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE = 2
+} VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE;
+
+#define VGPU10_OPERAND_4_COMPONENT_MASK_X 0x1
+#define VGPU10_OPERAND_4_COMPONENT_MASK_Y 0x2
+#define VGPU10_OPERAND_4_COMPONENT_MASK_Z 0x4
+#define VGPU10_OPERAND_4_COMPONENT_MASK_W 0x8
+
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XY (VGPU10_OPERAND_4_COMPONENT_MASK_X | VGPU10_OPERAND_4_COMPONENT_MASK_Y)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XZ (VGPU10_OPERAND_4_COMPONENT_MASK_X | VGPU10_OPERAND_4_COMPONENT_MASK_Z)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XW (VGPU10_OPERAND_4_COMPONENT_MASK_X | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_YZ (VGPU10_OPERAND_4_COMPONENT_MASK_Y | VGPU10_OPERAND_4_COMPONENT_MASK_Z)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_YW (VGPU10_OPERAND_4_COMPONENT_MASK_Y | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_ZW (VGPU10_OPERAND_4_COMPONENT_MASK_Z | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XYZ (VGPU10_OPERAND_4_COMPONENT_MASK_XY | VGPU10_OPERAND_4_COMPONENT_MASK_Z)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XYW (VGPU10_OPERAND_4_COMPONENT_MASK_XY | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XZW (VGPU10_OPERAND_4_COMPONENT_MASK_XZ | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_YZW (VGPU10_OPERAND_4_COMPONENT_MASK_YZ | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_XYZW (VGPU10_OPERAND_4_COMPONENT_MASK_XYZ | VGPU10_OPERAND_4_COMPONENT_MASK_W)
+#define VGPU10_OPERAND_4_COMPONENT_MASK_ALL VGPU10_OPERAND_4_COMPONENT_MASK_XYZW
+
+#define VGPU10_REGISTER_INDEX_FROM_SEMANTIC 0xffffffff
+
+typedef enum {
+ VGPU10_COMPONENT_X = 0,
+ VGPU10_COMPONENT_Y = 1,
+ VGPU10_COMPONENT_Z = 2,
+ VGPU10_COMPONENT_W = 3
+} VGPU10_COMPONENT_NAME;
+
+typedef enum {
+ VGPU10_OPERAND_TYPE_TEMP = 0,
+ VGPU10_OPERAND_TYPE_INPUT = 1,
+ VGPU10_OPERAND_TYPE_OUTPUT = 2,
+ VGPU10_OPERAND_TYPE_INDEXABLE_TEMP = 3,
+ VGPU10_OPERAND_TYPE_IMMEDIATE32 = 4,
+ VGPU10_OPERAND_TYPE_IMMEDIATE64 = 5,
+ VGPU10_OPERAND_TYPE_SAMPLER = 6,
+ VGPU10_OPERAND_TYPE_RESOURCE = 7,
+ VGPU10_OPERAND_TYPE_CONSTANT_BUFFER = 8,
+ VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER = 9,
+ VGPU10_OPERAND_TYPE_LABEL = 10,
+ VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID = 11,
+ VGPU10_OPERAND_TYPE_OUTPUT_DEPTH = 12,
+ VGPU10_OPERAND_TYPE_NULL = 13,
+ VGPU10_OPERAND_TYPE_RASTERIZER = 14, /* DX10.1 */
+ VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK = 15 /* DX10.1 */
+} VGPU10_OPERAND_TYPE;
+
+typedef enum {
+ VGPU10_OPERAND_INDEX_0D = 0,
+ VGPU10_OPERAND_INDEX_1D = 1,
+ VGPU10_OPERAND_INDEX_2D = 2,
+ VGPU10_OPERAND_INDEX_3D = 3
+} VGPU10_OPERAND_INDEX_DIMENSION;
+
+typedef enum {
+ VGPU10_OPERAND_INDEX_IMMEDIATE32 = 0,
+ VGPU10_OPERAND_INDEX_IMMEDIATE64 = 1,
+ VGPU10_OPERAND_INDEX_RELATIVE = 2,
+ VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE = 3,
+ VGPU10_OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE = 4
+} VGPU10_OPERAND_INDEX_REPRESENTATION;
+
+typedef union {
+ struct {
+ unsigned int numComponents : 2; /* VGPU10_OPERAND_NUM_COMPONENTS */
+ unsigned int selectionMode : 2; /* VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE */
+ unsigned int mask : 4; /* D3D10_SB_OPERAND_4_COMPONENT_MASK_* */
+ unsigned int : 4;
+ unsigned int operandType : 8; /* VGPU10_OPERAND_TYPE */
+ unsigned int indexDimension : 2; /* VGPU10_OPERAND_INDEX_DIMENSION */
+ unsigned int index0Representation : 3; /* VGPU10_OPERAND_INDEX_REPRESENTATION */
+ unsigned int index1Representation : 3; /* VGPU10_OPERAND_INDEX_REPRESENTATION */
+ unsigned int : 3;
+ unsigned int extended : 1;
+ };
+ struct {
+ unsigned int : 4;
+ unsigned int swizzleX : 2; /* VGPU10_COMPONENT_NAME */
+ unsigned int swizzleY : 2; /* VGPU10_COMPONENT_NAME */
+ unsigned int swizzleZ : 2; /* VGPU10_COMPONENT_NAME */
+ unsigned int swizzleW : 2; /* VGPU10_COMPONENT_NAME */
+ };
+ struct {
+ unsigned int : 4;
+ unsigned int selectMask : 2; /* VGPU10_COMPONENT_NAME */
+ };
+ uint32 value;
+} VGPU10OperandToken0;
+
+
+typedef enum {
+ VGPU10_EXTENDED_OPERAND_EMPTY = 0,
+ VGPU10_EXTENDED_OPERAND_MODIFIER = 1
+} VGPU10_EXTENDED_OPERAND_TYPE;
+
+typedef enum {
+ VGPU10_OPERAND_MODIFIER_NONE = 0,
+ VGPU10_OPERAND_MODIFIER_NEG = 1,
+ VGPU10_OPERAND_MODIFIER_ABS = 2,
+ VGPU10_OPERAND_MODIFIER_ABSNEG = 3
+} VGPU10_OPERAND_MODIFIER;
+
+typedef union {
+ struct {
+ unsigned int extendedOperandType : 6; /* VGPU10_EXTENDED_OPERAND_TYPE */
+ unsigned int operandModifier : 8; /* VGPU10_OPERAND_MODIFIER */
+ unsigned int : 17;
+ unsigned int extended : 1;
+ };
+ uint32 value;
+} VGPU10OperandToken1;
+
+
+typedef enum {
+ VGPU10_RETURN_TYPE_UNORM = 1,
+ VGPU10_RETURN_TYPE_SNORM = 2,
+ VGPU10_RETURN_TYPE_SINT = 3,
+ VGPU10_RETURN_TYPE_UINT = 4,
+ VGPU10_RETURN_TYPE_FLOAT = 5,
+ VGPU10_RETURN_TYPE_MIXED = 6
+} VGPU10_RESOURCE_RETURN_TYPE;
+
+typedef union {
+ struct {
+ unsigned int component0 : 4; /* VGPU10_RESOURCE_RETURN_TYPE */
+ unsigned int component1 : 4; /* VGPU10_RESOURCE_RETURN_TYPE */
+ unsigned int component2 : 4; /* VGPU10_RESOURCE_RETURN_TYPE */
+ unsigned int component3 : 4; /* VGPU10_RESOURCE_RETURN_TYPE */
+ };
+ uint32 value;
+} VGPU10ResourceReturnTypeToken;
+
+
+typedef enum {
+ VGPU10_NAME_UNDEFINED = 0,
+ VGPU10_NAME_POSITION = 1,
+ VGPU10_NAME_CLIP_DISTANCE = 2,
+ VGPU10_NAME_CULL_DISTANCE = 3,
+ VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX = 4,
+ VGPU10_NAME_VIEWPORT_ARRAY_INDEX = 5,
+ VGPU10_NAME_VERTEX_ID = 6,
+ VGPU10_NAME_PRIMITIVE_ID = 7,
+ VGPU10_NAME_INSTANCE_ID = 8,
+ VGPU10_NAME_IS_FRONT_FACE = 9,
+ VGPU10_NAME_SAMPLE_INDEX = 10,
+} VGPU10_SYSTEM_NAME;
+
+typedef union {
+ struct {
+ unsigned int name : 16; /* VGPU10_SYSTEM_NAME */
+ };
+ uint32 value;
+} VGPU10NameToken;
+
+#endif
/**********************************************************
- * Copyright 2007-2014 VMware, Inc. All rights reserved.
+ * Copyright 2007-2015 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
typedef uint32 SVGA3dCapPair[2];
-#endif // _SVGA3D_CAPS_H_
+#endif
/**********************************************************
- * Copyright 1998-2014 VMware, Inc. All rights reserved.
+ * Copyright 1998-2015 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* and up.
*/
-#define SVGA_3D_CMD_LEGACY_BASE 1000
-#define SVGA_3D_CMD_BASE 1040
-
-#define SVGA_3D_CMD_SURFACE_DEFINE 1040
-#define SVGA_3D_CMD_SURFACE_DESTROY 1041
-#define SVGA_3D_CMD_SURFACE_COPY 1042
-#define SVGA_3D_CMD_SURFACE_STRETCHBLT 1043
-#define SVGA_3D_CMD_SURFACE_DMA 1044
-#define SVGA_3D_CMD_CONTEXT_DEFINE 1045
-#define SVGA_3D_CMD_CONTEXT_DESTROY 1046
-#define SVGA_3D_CMD_SETTRANSFORM 1047
-#define SVGA_3D_CMD_SETZRANGE 1048
-#define SVGA_3D_CMD_SETRENDERSTATE 1049
-#define SVGA_3D_CMD_SETRENDERTARGET 1050
-#define SVGA_3D_CMD_SETTEXTURESTATE 1051
-#define SVGA_3D_CMD_SETMATERIAL 1052
-#define SVGA_3D_CMD_SETLIGHTDATA 1053
-#define SVGA_3D_CMD_SETLIGHTENABLED 1054
-#define SVGA_3D_CMD_SETVIEWPORT 1055
-#define SVGA_3D_CMD_SETCLIPPLANE 1056
-#define SVGA_3D_CMD_CLEAR 1057
-#define SVGA_3D_CMD_PRESENT 1058
-#define SVGA_3D_CMD_SHADER_DEFINE 1059
-#define SVGA_3D_CMD_SHADER_DESTROY 1060
-#define SVGA_3D_CMD_SET_SHADER 1061
-#define SVGA_3D_CMD_SET_SHADER_CONST 1062
-#define SVGA_3D_CMD_DRAW_PRIMITIVES 1063
-#define SVGA_3D_CMD_SETSCISSORRECT 1064
-#define SVGA_3D_CMD_BEGIN_QUERY 1065
-#define SVGA_3D_CMD_END_QUERY 1066
-#define SVGA_3D_CMD_WAIT_FOR_QUERY 1067
-#define SVGA_3D_CMD_PRESENT_READBACK 1068
-#define SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN 1069
-#define SVGA_3D_CMD_SURFACE_DEFINE_V2 1070
-#define SVGA_3D_CMD_GENERATE_MIPMAPS 1071
-#define SVGA_3D_CMD_VIDEO_CREATE_DECODER 1072
-#define SVGA_3D_CMD_VIDEO_DESTROY_DECODER 1073
-#define SVGA_3D_CMD_VIDEO_CREATE_PROCESSOR 1074
-#define SVGA_3D_CMD_VIDEO_DESTROY_PROCESSOR 1075
-#define SVGA_3D_CMD_VIDEO_DECODE_START_FRAME 1076
-#define SVGA_3D_CMD_VIDEO_DECODE_RENDER 1077
-#define SVGA_3D_CMD_VIDEO_DECODE_END_FRAME 1078
-#define SVGA_3D_CMD_VIDEO_PROCESS_FRAME 1079
-#define SVGA_3D_CMD_ACTIVATE_SURFACE 1080
-#define SVGA_3D_CMD_DEACTIVATE_SURFACE 1081
-#define SVGA_3D_CMD_SCREEN_DMA 1082
-#define SVGA_3D_CMD_SET_UNITY_SURFACE_COOKIE 1083
-#define SVGA_3D_CMD_OPEN_CONTEXT_SURFACE 1084
-
-#define SVGA_3D_CMD_LOGICOPS_BITBLT 1085
-#define SVGA_3D_CMD_LOGICOPS_TRANSBLT 1086
-#define SVGA_3D_CMD_LOGICOPS_STRETCHBLT 1087
-#define SVGA_3D_CMD_LOGICOPS_COLORFILL 1088
-#define SVGA_3D_CMD_LOGICOPS_ALPHABLEND 1089
-#define SVGA_3D_CMD_LOGICOPS_CLEARTYPEBLEND 1090
-
-#define SVGA_3D_CMD_SET_OTABLE_BASE 1091
-#define SVGA_3D_CMD_READBACK_OTABLE 1092
-
-#define SVGA_3D_CMD_DEFINE_GB_MOB 1093
-#define SVGA_3D_CMD_DESTROY_GB_MOB 1094
-#define SVGA_3D_CMD_REDEFINE_GB_MOB 1095
-#define SVGA_3D_CMD_UPDATE_GB_MOB_MAPPING 1096
-
-#define SVGA_3D_CMD_DEFINE_GB_SURFACE 1097
-#define SVGA_3D_CMD_DESTROY_GB_SURFACE 1098
-#define SVGA_3D_CMD_BIND_GB_SURFACE 1099
-#define SVGA_3D_CMD_COND_BIND_GB_SURFACE 1100
-#define SVGA_3D_CMD_UPDATE_GB_IMAGE 1101
-#define SVGA_3D_CMD_UPDATE_GB_SURFACE 1102
-#define SVGA_3D_CMD_READBACK_GB_IMAGE 1103
-#define SVGA_3D_CMD_READBACK_GB_SURFACE 1104
-#define SVGA_3D_CMD_INVALIDATE_GB_IMAGE 1105
-#define SVGA_3D_CMD_INVALIDATE_GB_SURFACE 1106
-
-#define SVGA_3D_CMD_DEFINE_GB_CONTEXT 1107
-#define SVGA_3D_CMD_DESTROY_GB_CONTEXT 1108
-#define SVGA_3D_CMD_BIND_GB_CONTEXT 1109
-#define SVGA_3D_CMD_READBACK_GB_CONTEXT 1110
-#define SVGA_3D_CMD_INVALIDATE_GB_CONTEXT 1111
-
-#define SVGA_3D_CMD_DEFINE_GB_SHADER 1112
-#define SVGA_3D_CMD_DESTROY_GB_SHADER 1113
-#define SVGA_3D_CMD_BIND_GB_SHADER 1114
-
-#define SVGA_3D_CMD_BIND_SHADERCONSTS 1115
-
-#define SVGA_3D_CMD_BEGIN_GB_QUERY 1116
-#define SVGA_3D_CMD_END_GB_QUERY 1117
-#define SVGA_3D_CMD_WAIT_FOR_GB_QUERY 1118
-
-#define SVGA_3D_CMD_NOP 1119
-
-#define SVGA_3D_CMD_ENABLE_GART 1120
-#define SVGA_3D_CMD_DISABLE_GART 1121
-#define SVGA_3D_CMD_MAP_MOB_INTO_GART 1122
-#define SVGA_3D_CMD_UNMAP_GART_RANGE 1123
-
-#define SVGA_3D_CMD_DEFINE_GB_SCREENTARGET 1124
-#define SVGA_3D_CMD_DESTROY_GB_SCREENTARGET 1125
-#define SVGA_3D_CMD_BIND_GB_SCREENTARGET 1126
-#define SVGA_3D_CMD_UPDATE_GB_SCREENTARGET 1127
-
-#define SVGA_3D_CMD_READBACK_GB_IMAGE_PARTIAL 1128
-#define SVGA_3D_CMD_INVALIDATE_GB_IMAGE_PARTIAL 1129
-
-#define SVGA_3D_CMD_SET_GB_SHADERCONSTS_INLINE 1130
-
-#define SVGA_3D_CMD_GB_SCREEN_DMA 1131
-#define SVGA_3D_CMD_BIND_GB_SURFACE_WITH_PITCH 1132
-#define SVGA_3D_CMD_GB_MOB_FENCE 1133
-#define SVGA_3D_CMD_DEFINE_GB_SURFACE_V2 1134
-#define SVGA_3D_CMD_DEFINE_GB_MOB64 1135
-#define SVGA_3D_CMD_REDEFINE_GB_MOB64 1136
-#define SVGA_3D_CMD_NOP_ERROR 1137
-
-#define SVGA_3D_CMD_RESERVED1 1138
-#define SVGA_3D_CMD_RESERVED2 1139
-#define SVGA_3D_CMD_RESERVED3 1140
-#define SVGA_3D_CMD_RESERVED4 1141
-#define SVGA_3D_CMD_RESERVED5 1142
-
-#define SVGA_3D_CMD_MAX 1203
-#define SVGA_3D_CMD_FUTURE_MAX 3000
+typedef enum {
+ SVGA_3D_CMD_LEGACY_BASE = 1000,
+ SVGA_3D_CMD_BASE = 1040,
+
+ SVGA_3D_CMD_SURFACE_DEFINE = 1040,
+ SVGA_3D_CMD_SURFACE_DESTROY = 1041,
+ SVGA_3D_CMD_SURFACE_COPY = 1042,
+ SVGA_3D_CMD_SURFACE_STRETCHBLT = 1043,
+ SVGA_3D_CMD_SURFACE_DMA = 1044,
+ SVGA_3D_CMD_CONTEXT_DEFINE = 1045,
+ SVGA_3D_CMD_CONTEXT_DESTROY = 1046,
+ SVGA_3D_CMD_SETTRANSFORM = 1047,
+ SVGA_3D_CMD_SETZRANGE = 1048,
+ SVGA_3D_CMD_SETRENDERSTATE = 1049,
+ SVGA_3D_CMD_SETRENDERTARGET = 1050,
+ SVGA_3D_CMD_SETTEXTURESTATE = 1051,
+ SVGA_3D_CMD_SETMATERIAL = 1052,
+ SVGA_3D_CMD_SETLIGHTDATA = 1053,
+ SVGA_3D_CMD_SETLIGHTENABLED = 1054,
+ SVGA_3D_CMD_SETVIEWPORT = 1055,
+ SVGA_3D_CMD_SETCLIPPLANE = 1056,
+ SVGA_3D_CMD_CLEAR = 1057,
+ SVGA_3D_CMD_PRESENT = 1058,
+ SVGA_3D_CMD_SHADER_DEFINE = 1059,
+ SVGA_3D_CMD_SHADER_DESTROY = 1060,
+ SVGA_3D_CMD_SET_SHADER = 1061,
+ SVGA_3D_CMD_SET_SHADER_CONST = 1062,
+ SVGA_3D_CMD_DRAW_PRIMITIVES = 1063,
+ SVGA_3D_CMD_SETSCISSORRECT = 1064,
+ SVGA_3D_CMD_BEGIN_QUERY = 1065,
+ SVGA_3D_CMD_END_QUERY = 1066,
+ SVGA_3D_CMD_WAIT_FOR_QUERY = 1067,
+ SVGA_3D_CMD_PRESENT_READBACK = 1068,
+ SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN = 1069,
+ SVGA_3D_CMD_SURFACE_DEFINE_V2 = 1070,
+ SVGA_3D_CMD_GENERATE_MIPMAPS = 1071,
+ SVGA_3D_CMD_VIDEO_CREATE_DECODER = 1072,
+ SVGA_3D_CMD_VIDEO_DESTROY_DECODER = 1073,
+ SVGA_3D_CMD_VIDEO_CREATE_PROCESSOR = 1074,
+ SVGA_3D_CMD_VIDEO_DESTROY_PROCESSOR = 1075,
+ SVGA_3D_CMD_VIDEO_DECODE_START_FRAME = 1076,
+ SVGA_3D_CMD_VIDEO_DECODE_RENDER = 1077,
+ SVGA_3D_CMD_VIDEO_DECODE_END_FRAME = 1078,
+ SVGA_3D_CMD_VIDEO_PROCESS_FRAME = 1079,
+ SVGA_3D_CMD_ACTIVATE_SURFACE = 1080,
+ SVGA_3D_CMD_DEACTIVATE_SURFACE = 1081,
+ SVGA_3D_CMD_SCREEN_DMA = 1082,
+ SVGA_3D_CMD_DEAD1 = 1083,
+ SVGA_3D_CMD_DEAD2 = 1084,
+
+ SVGA_3D_CMD_LOGICOPS_BITBLT = 1085,
+ SVGA_3D_CMD_LOGICOPS_TRANSBLT = 1086,
+ SVGA_3D_CMD_LOGICOPS_STRETCHBLT = 1087,
+ SVGA_3D_CMD_LOGICOPS_COLORFILL = 1088,
+ SVGA_3D_CMD_LOGICOPS_ALPHABLEND = 1089,
+ SVGA_3D_CMD_LOGICOPS_CLEARTYPEBLEND = 1090,
+
+ SVGA_3D_CMD_SET_OTABLE_BASE = 1091,
+ SVGA_3D_CMD_READBACK_OTABLE = 1092,
+
+ SVGA_3D_CMD_DEFINE_GB_MOB = 1093,
+ SVGA_3D_CMD_DESTROY_GB_MOB = 1094,
+ SVGA_3D_CMD_DEAD3 = 1095,
+ SVGA_3D_CMD_UPDATE_GB_MOB_MAPPING = 1096,
+
+ SVGA_3D_CMD_DEFINE_GB_SURFACE = 1097,
+ SVGA_3D_CMD_DESTROY_GB_SURFACE = 1098,
+ SVGA_3D_CMD_BIND_GB_SURFACE = 1099,
+ SVGA_3D_CMD_COND_BIND_GB_SURFACE = 1100,
+ SVGA_3D_CMD_UPDATE_GB_IMAGE = 1101,
+ SVGA_3D_CMD_UPDATE_GB_SURFACE = 1102,
+ SVGA_3D_CMD_READBACK_GB_IMAGE = 1103,
+ SVGA_3D_CMD_READBACK_GB_SURFACE = 1104,
+ SVGA_3D_CMD_INVALIDATE_GB_IMAGE = 1105,
+ SVGA_3D_CMD_INVALIDATE_GB_SURFACE = 1106,
+
+ SVGA_3D_CMD_DEFINE_GB_CONTEXT = 1107,
+ SVGA_3D_CMD_DESTROY_GB_CONTEXT = 1108,
+ SVGA_3D_CMD_BIND_GB_CONTEXT = 1109,
+ SVGA_3D_CMD_READBACK_GB_CONTEXT = 1110,
+ SVGA_3D_CMD_INVALIDATE_GB_CONTEXT = 1111,
+
+ SVGA_3D_CMD_DEFINE_GB_SHADER = 1112,
+ SVGA_3D_CMD_DESTROY_GB_SHADER = 1113,
+ SVGA_3D_CMD_BIND_GB_SHADER = 1114,
+
+ SVGA_3D_CMD_SET_OTABLE_BASE64 = 1115,
+
+ SVGA_3D_CMD_BEGIN_GB_QUERY = 1116,
+ SVGA_3D_CMD_END_GB_QUERY = 1117,
+ SVGA_3D_CMD_WAIT_FOR_GB_QUERY = 1118,
+
+ SVGA_3D_CMD_NOP = 1119,
+
+ SVGA_3D_CMD_ENABLE_GART = 1120,
+ SVGA_3D_CMD_DISABLE_GART = 1121,
+ SVGA_3D_CMD_MAP_MOB_INTO_GART = 1122,
+ SVGA_3D_CMD_UNMAP_GART_RANGE = 1123,
+
+ SVGA_3D_CMD_DEFINE_GB_SCREENTARGET = 1124,
+ SVGA_3D_CMD_DESTROY_GB_SCREENTARGET = 1125,
+ SVGA_3D_CMD_BIND_GB_SCREENTARGET = 1126,
+ SVGA_3D_CMD_UPDATE_GB_SCREENTARGET = 1127,
+
+ SVGA_3D_CMD_READBACK_GB_IMAGE_PARTIAL = 1128,
+ SVGA_3D_CMD_INVALIDATE_GB_IMAGE_PARTIAL = 1129,
+
+ SVGA_3D_CMD_SET_GB_SHADERCONSTS_INLINE = 1130,
+
+ SVGA_3D_CMD_GB_SCREEN_DMA = 1131,
+ SVGA_3D_CMD_BIND_GB_SURFACE_WITH_PITCH = 1132,
+ SVGA_3D_CMD_GB_MOB_FENCE = 1133,
+ SVGA_3D_CMD_DEFINE_GB_SURFACE_V2 = 1134,
+ SVGA_3D_CMD_DEFINE_GB_MOB64 = 1135,
+ SVGA_3D_CMD_REDEFINE_GB_MOB64 = 1136,
+ SVGA_3D_CMD_NOP_ERROR = 1137,
+
+ SVGA_3D_CMD_SET_VERTEX_STREAMS = 1138,
+ SVGA_3D_CMD_SET_VERTEX_DECLS = 1139,
+ SVGA_3D_CMD_SET_VERTEX_DIVISORS = 1140,
+ SVGA_3D_CMD_DRAW = 1141,
+ SVGA_3D_CMD_DRAW_INDEXED = 1142,
+
+ /*
+ * DX10 Commands
+ */
+ SVGA_3D_CMD_DX_MIN = 1143,
+ SVGA_3D_CMD_DX_DEFINE_CONTEXT = 1143,
+ SVGA_3D_CMD_DX_DESTROY_CONTEXT = 1144,
+ SVGA_3D_CMD_DX_BIND_CONTEXT = 1145,
+ SVGA_3D_CMD_DX_READBACK_CONTEXT = 1146,
+ SVGA_3D_CMD_DX_INVALIDATE_CONTEXT = 1147,
+ SVGA_3D_CMD_DX_SET_SINGLE_CONSTANT_BUFFER = 1148,
+ SVGA_3D_CMD_DX_SET_SHADER_RESOURCES = 1149,
+ SVGA_3D_CMD_DX_SET_SHADER = 1150,
+ SVGA_3D_CMD_DX_SET_SAMPLERS = 1151,
+ SVGA_3D_CMD_DX_DRAW = 1152,
+ SVGA_3D_CMD_DX_DRAW_INDEXED = 1153,
+ SVGA_3D_CMD_DX_DRAW_INSTANCED = 1154,
+ SVGA_3D_CMD_DX_DRAW_INDEXED_INSTANCED = 1155,
+ SVGA_3D_CMD_DX_DRAW_AUTO = 1156,
+ SVGA_3D_CMD_DX_SET_INPUT_LAYOUT = 1157,
+ SVGA_3D_CMD_DX_SET_VERTEX_BUFFERS = 1158,
+ SVGA_3D_CMD_DX_SET_INDEX_BUFFER = 1159,
+ SVGA_3D_CMD_DX_SET_TOPOLOGY = 1160,
+ SVGA_3D_CMD_DX_SET_RENDERTARGETS = 1161,
+ SVGA_3D_CMD_DX_SET_BLEND_STATE = 1162,
+ SVGA_3D_CMD_DX_SET_DEPTHSTENCIL_STATE = 1163,
+ SVGA_3D_CMD_DX_SET_RASTERIZER_STATE = 1164,
+ SVGA_3D_CMD_DX_DEFINE_QUERY = 1165,
+ SVGA_3D_CMD_DX_DESTROY_QUERY = 1166,
+ SVGA_3D_CMD_DX_BIND_QUERY = 1167,
+ SVGA_3D_CMD_DX_SET_QUERY_OFFSET = 1168,
+ SVGA_3D_CMD_DX_BEGIN_QUERY = 1169,
+ SVGA_3D_CMD_DX_END_QUERY = 1170,
+ SVGA_3D_CMD_DX_READBACK_QUERY = 1171,
+ SVGA_3D_CMD_DX_SET_PREDICATION = 1172,
+ SVGA_3D_CMD_DX_SET_SOTARGETS = 1173,
+ SVGA_3D_CMD_DX_SET_VIEWPORTS = 1174,
+ SVGA_3D_CMD_DX_SET_SCISSORRECTS = 1175,
+ SVGA_3D_CMD_DX_CLEAR_RENDERTARGET_VIEW = 1176,
+ SVGA_3D_CMD_DX_CLEAR_DEPTHSTENCIL_VIEW = 1177,
+ SVGA_3D_CMD_DX_PRED_COPY_REGION = 1178,
+ SVGA_3D_CMD_DX_PRED_COPY = 1179,
+ SVGA_3D_CMD_DX_STRETCHBLT = 1180,
+ SVGA_3D_CMD_DX_GENMIPS = 1181,
+ SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE = 1182,
+ SVGA_3D_CMD_DX_READBACK_SUBRESOURCE = 1183,
+ SVGA_3D_CMD_DX_INVALIDATE_SUBRESOURCE = 1184,
+ SVGA_3D_CMD_DX_DEFINE_SHADERRESOURCE_VIEW = 1185,
+ SVGA_3D_CMD_DX_DESTROY_SHADERRESOURCE_VIEW = 1186,
+ SVGA_3D_CMD_DX_DEFINE_RENDERTARGET_VIEW = 1187,
+ SVGA_3D_CMD_DX_DESTROY_RENDERTARGET_VIEW = 1188,
+ SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_VIEW = 1189,
+ SVGA_3D_CMD_DX_DESTROY_DEPTHSTENCIL_VIEW = 1190,
+ SVGA_3D_CMD_DX_DEFINE_ELEMENTLAYOUT = 1191,
+ SVGA_3D_CMD_DX_DESTROY_ELEMENTLAYOUT = 1192,
+ SVGA_3D_CMD_DX_DEFINE_BLEND_STATE = 1193,
+ SVGA_3D_CMD_DX_DESTROY_BLEND_STATE = 1194,
+ SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_STATE = 1195,
+ SVGA_3D_CMD_DX_DESTROY_DEPTHSTENCIL_STATE = 1196,
+ SVGA_3D_CMD_DX_DEFINE_RASTERIZER_STATE = 1197,
+ SVGA_3D_CMD_DX_DESTROY_RASTERIZER_STATE = 1198,
+ SVGA_3D_CMD_DX_DEFINE_SAMPLER_STATE = 1199,
+ SVGA_3D_CMD_DX_DESTROY_SAMPLER_STATE = 1200,
+ SVGA_3D_CMD_DX_DEFINE_SHADER = 1201,
+ SVGA_3D_CMD_DX_DESTROY_SHADER = 1202,
+ SVGA_3D_CMD_DX_BIND_SHADER = 1203,
+ SVGA_3D_CMD_DX_DEFINE_STREAMOUTPUT = 1204,
+ SVGA_3D_CMD_DX_DESTROY_STREAMOUTPUT = 1205,
+ SVGA_3D_CMD_DX_SET_STREAMOUTPUT = 1206,
+ SVGA_3D_CMD_DX_SET_COTABLE = 1207,
+ SVGA_3D_CMD_DX_READBACK_COTABLE = 1208,
+ SVGA_3D_CMD_DX_BUFFER_COPY = 1209,
+ SVGA_3D_CMD_DX_TRANSFER_FROM_BUFFER = 1210,
+ SVGA_3D_CMD_DX_SURFACE_COPY_AND_READBACK = 1211,
+ SVGA_3D_CMD_DX_MOVE_QUERY = 1212,
+ SVGA_3D_CMD_DX_BIND_ALL_QUERY = 1213,
+ SVGA_3D_CMD_DX_READBACK_ALL_QUERY = 1214,
+ SVGA_3D_CMD_DX_PRED_TRANSFER_FROM_BUFFER = 1215,
+ SVGA_3D_CMD_DX_MOB_FENCE_64 = 1216,
+ SVGA_3D_CMD_DX_BIND_ALL_SHADER = 1217,
+ SVGA_3D_CMD_DX_HINT = 1218,
+ SVGA_3D_CMD_DX_BUFFER_UPDATE = 1219,
+ SVGA_3D_CMD_DX_SET_VS_CONSTANT_BUFFER_OFFSET = 1220,
+ SVGA_3D_CMD_DX_SET_PS_CONSTANT_BUFFER_OFFSET = 1221,
+ SVGA_3D_CMD_DX_SET_GS_CONSTANT_BUFFER_OFFSET = 1222,
+
+ /*
+ * Reserve some IDs to be used for the DX11 shader types.
+ */
+ SVGA_3D_CMD_DX_RESERVED1 = 1223,
+ SVGA_3D_CMD_DX_RESERVED2 = 1224,
+ SVGA_3D_CMD_DX_RESERVED3 = 1225,
+
+ SVGA_3D_CMD_DX_COND_BIND_ALL_SHADER = 1226,
+
+ SVGA_3D_CMD_DX_MAX = 1227,
+ SVGA_3D_CMD_MAX = 1227,
+ SVGA_3D_CMD_FUTURE_MAX = 3000
+} SVGAFifo3dCmdId;
/*
* FIFO command format definitions:
#include "vmware_pack_end.h"
SVGA3dCmdHeader;
-typedef enum {
- SVGA3D_SURFACE_CUBEMAP = (1 << 0),
-
- /*
- * HINT flags are not enforced by the device but are useful for
- * performance.
- */
- SVGA3D_SURFACE_HINT_STATIC = (1 << 1),
- SVGA3D_SURFACE_HINT_DYNAMIC = (1 << 2),
- SVGA3D_SURFACE_HINT_INDEXBUFFER = (1 << 3),
- SVGA3D_SURFACE_HINT_VERTEXBUFFER = (1 << 4),
- SVGA3D_SURFACE_HINT_TEXTURE = (1 << 5),
- SVGA3D_SURFACE_HINT_RENDERTARGET = (1 << 6),
- SVGA3D_SURFACE_HINT_DEPTHSTENCIL = (1 << 7),
- SVGA3D_SURFACE_HINT_WRITEONLY = (1 << 8),
- SVGA3D_SURFACE_MASKABLE_ANTIALIAS = (1 << 9),
- SVGA3D_SURFACE_AUTOGENMIPMAPS = (1 << 10),
- SVGA3D_SURFACE_DECODE_RENDERTARGET = (1 << 11),
-
- /*
- * Is this surface using a base-level pitch for it's mob backing?
- *
- * This flag is not intended to be set by guest-drivers, but is instead
- * set by the device when the surface is bound to a mob with a specified
- * pitch.
- */
- SVGA3D_SURFACE_MOB_PITCH = (1 << 12),
-
- SVGA3D_SURFACE_INACTIVE = (1 << 13),
- SVGA3D_SURFACE_HINT_RT_LOCKABLE = (1 << 14),
- SVGA3D_SURFACE_VOLUME = (1 << 15),
-
- /*
- * Required to be set on a surface to bind it to a screen target.
- */
- SVGA3D_SURFACE_SCREENTARGET = (1 << 16),
-
- SVGA3D_SURFACE_RESERVED1 = (1 << 17),
- SVGA3D_SURFACE_1D = (1 << 18),
- SVGA3D_SURFACE_ARRAY = (1 << 19),
-
-} SVGA3dSurfaceFlags;
-
-#define SVGA3D_SURFACE_HB_DISALLOWED_MASK (SVGA3D_SURFACE_SCREENTARGET | \
- SVGA3D_SURFACE_MOB_PITCH | \
- SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \
- SVGA3D_SURFACE_BIND_STREAM_OUTPUT)
-
typedef
#include "vmware_pack_begin.h"
struct {
typedef
#include "vmware_pack_begin.h"
struct {
+ uint32 cid;
+
+ uint32 primitiveCount; /* How many primitives to render */
+ uint32 startVertexLocation; /* Which vertex do we start rendering at. */
+
+ uint8 primitiveType; /* SVGA3dPrimitiveType */
+ uint8 padding[3];
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDraw;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ uint32 cid;
+
+ uint8 primitiveType; /* SVGA3dPrimitiveType */
+
+ uint32 indexBufferSid; /* Valid index buffer sid. */
+ uint32 indexBufferOffset; /* Byte offset into the vertex buffer, almost */
+ /* always 0 for DX9 guests, non-zero for OpenGL */
+ /* guests. We can't represent non-multiple of */
+ /* stride offsets in D3D9Renderer... */
+ uint8 indexBufferStride; /* Allowable values = 1, 2, or 4 */
+
+ int32 baseVertexLocation; /* Bias applied to the index when selecting a */
+ /* vertex from the streams, may be negative */
+
+ uint32 primitiveCount; /* How many primitives to render */
+ uint32 pad0;
+ uint16 pad1;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDrawIndexed;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ /*
+ * Describe a vertex array's data type, and define how it is to be
+ * used by the fixed function pipeline or the vertex shader. It
+ * isn't useful to have two VertexDecls with the same
+ * VertexArrayIdentity in one draw call.
+ */
+ uint16 streamOffset;
+ uint8 stream;
+ uint8 type; /* SVGA3dDeclType */
+ uint8 method; /* SVGA3dDeclMethod */
+ uint8 usage; /* SVGA3dDeclUsage */
+ uint8 usageIndex;
+ uint8 padding;
+
+}
+#include "vmware_pack_end.h"
+SVGA3dVertexElement;
+
+/*
+ * Should the vertex element respect the stream value? The high bit of the
+ * stream should be set to indicate that the stream should be respected. If
+ * the high bit is not set, the stream will be ignored and replaced by the index
+ * of the position of the currently considered vertex element.
+ *
+ * All guests should set this bit and correctly specify the stream going
+ * forward.
+ */
+#define SVGA3D_VERTEX_ELEMENT_RESPECT_STREAM (1 << 7)
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ uint32 cid;
+
+ uint32 numElements;
+
+ /*
+ * Followed by numElements SVGA3dVertexElement structures.
+ *
+ * If numElements < SVGA3D_MAX_VERTEX_ARRAYS, the remaining elements
+ * are cleared and will not be used by following draws.
+ */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdSetVertexDecls;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ uint32 sid;
+ uint32 stride;
+ uint32 offset;
+}
+#include "vmware_pack_end.h"
+SVGA3dVertexStream;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ uint32 cid;
+
+ uint32 numStreams;
+ /*
+ * Followed by numStream SVGA3dVertexStream structures.
+ *
+ * If numStreams < SVGA3D_MAX_VERTEX_ARRAYS, the remaining streams
+ * are cleared and will not be used by following draws.
+ */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdSetVertexStreams;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ uint32 cid;
+ uint32 numDivisors;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdSetVertexDivisors;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
uint32 stage;
SVGA3dTextureStateName name;
union {
SVGA3dCmdScreenDMA; /* SVGA_3D_CMD_SCREEN_DMA */
/*
- * Set Unity Surface Cookie
- *
- * Associates the supplied cookie with the surface id for use with
- * Unity. This cookie is a hint from guest to host, there is no way
- * for the guest to readback the cookie and the host is free to drop
- * the cookie association at will. The default value for the cookie
- * on all surfaces is 0.
- */
-
-typedef
-#include "vmware_pack_begin.h"
-struct SVGA3dCmdSetUnitySurfaceCookie {
- uint32 sid;
- uint64 cookie;
-}
-#include "vmware_pack_end.h"
-SVGA3dCmdSetUnitySurfaceCookie; /* SVGA_3D_CMD_SET_UNITY_SURFACE_COOKIE */
-
-/*
- * Open a context-specific surface in a non-context-specific manner.
- */
-
-typedef
-#include "vmware_pack_begin.h"
-struct SVGA3dCmdOpenContextSurface {
- uint32 sid;
-}
-#include "vmware_pack_end.h"
-SVGA3dCmdOpenContextSurface; /* SVGA_3D_CMD_OPEN_CONTEXT_SURFACE */
-
-
-/*
* Logic ops
*/
uint32 gamma;
uint32 color;
uint32 color2;
- int alphaOffsetX;
- int alphaOffsetY;
+ int32 alphaOffsetX;
+ int32 alphaOffsetY;
/* Followed by variable number of SVGA3dBox structures */
}
#include "vmware_pack_end.h"
* Guest-backed objects definitions.
*/
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ SVGAMobFormat ptDepth;
+ uint32 sizeInBytes;
+ PPN64 base;
+}
+#include "vmware_pack_end.h"
+SVGAOTableMobEntry;
+#define SVGA3D_OTABLE_MOB_ENTRY_SIZE (sizeof(SVGAOTableMobEntry))
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ SVGA3dSurfaceFormat format;
+ SVGA3dSurfaceFlags surfaceFlags;
+ uint32 numMipLevels;
+ uint32 multisampleCount;
+ SVGA3dTextureFilter autogenFilter;
+ SVGA3dSize size;
+ SVGAMobId mobid;
+ uint32 arraySize;
+ uint32 mobPitch;
+ uint32 pad[5];
+}
+#include "vmware_pack_end.h"
+SVGAOTableSurfaceEntry;
+#define SVGA3D_OTABLE_SURFACE_ENTRY_SIZE (sizeof(SVGAOTableSurfaceEntry))
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ uint32 cid;
+ SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGAOTableContextEntry;
+#define SVGA3D_OTABLE_CONTEXT_ENTRY_SIZE (sizeof(SVGAOTableContextEntry))
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ SVGA3dShaderType type;
+ uint32 sizeInBytes;
+ uint32 offsetInBytes;
+ SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGAOTableShaderEntry;
+#define SVGA3D_OTABLE_SHADER_ENTRY_SIZE (sizeof(SVGAOTableShaderEntry))
+
#define SVGA_STFLAG_PRIMARY (1 << 0)
typedef uint32 SVGAScreenTargetFlags;
typedef
#include "vmware_pack_begin.h"
struct {
+ SVGA3dSurfaceImageId image;
+ uint32 width;
+ uint32 height;
+ int32 xRoot;
+ int32 yRoot;
+ SVGAScreenTargetFlags flags;
+ uint32 dpi;
+ uint32 pad[7];
+}
+#include "vmware_pack_end.h"
+SVGAOTableScreenTargetEntry;
+#define SVGA3D_OTABLE_SCREEN_TARGET_ENTRY_SIZE \
+ (sizeof(SVGAOTableScreenTargetEntry))
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
float value[4];
}
#include "vmware_pack_end.h"
#include "vmware_pack_end.h"
SVGA3dShaderConstBool;
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ uint16 streamOffset;
+ uint8 stream;
+ uint8 type;
+ uint8 methodUsage;
+ uint8 usageIndex;
+}
+#include "vmware_pack_end.h"
+SVGAGBVertexElement;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ uint32 sid;
+ uint16 stride;
+ uint32 offset;
+}
+#include "vmware_pack_end.h"
+SVGAGBVertexStream;
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ SVGA3dRect viewport;
+ SVGA3dRect scissorRect;
+ SVGA3dZRange zRange;
+
+ SVGA3dSurfaceImageId renderTargets[SVGA3D_RT_MAX];
+ SVGAGBVertexElement decl1[4];
+
+ uint32 renderStates[SVGA3D_RS_MAX];
+ SVGAGBVertexElement decl2[18];
+ uint32 pad0[2];
+
+ struct {
+ SVGA3dFace face;
+ SVGA3dMaterial material;
+ } material;
+
+ float clipPlanes[SVGA3D_NUM_CLIPPLANES][4];
+ float matrices[SVGA3D_TRANSFORM_MAX][16];
+
+ SVGA3dBool lightEnabled[SVGA3D_NUM_LIGHTS];
+ SVGA3dLightData lightData[SVGA3D_NUM_LIGHTS];
+
+ /*
+ * Shaders currently bound
+ */
+ uint32 shaders[SVGA3D_NUM_SHADERTYPE_PREDX];
+ SVGAGBVertexElement decl3[10];
+ uint32 pad1[3];
+
+ uint32 occQueryActive;
+ uint32 occQueryValue;
+
+ /*
+ * Int/Bool Shader constants
+ */
+ SVGA3dShaderConstInt pShaderIValues[SVGA3D_CONSTINTREG_MAX];
+ SVGA3dShaderConstInt vShaderIValues[SVGA3D_CONSTINTREG_MAX];
+ uint16 pShaderBValues;
+ uint16 vShaderBValues;
+
+
+ SVGAGBVertexStream streams[SVGA3D_MAX_VERTEX_ARRAYS];
+ SVGA3dVertexDivisor divisors[SVGA3D_MAX_VERTEX_ARRAYS];
+ uint32 numVertexDecls;
+ uint32 numVertexStreams;
+ uint32 numVertexDivisors;
+ uint32 pad2[30];
+
+ /*
+ * Texture Stages
+ *
+ * SVGA3D_TS_INVALID through SVGA3D_TS_CONSTANT are in the
+ * textureStages array.
+ * SVGA3D_TS_COLOR_KEY is in tsColorKey.
+ */
+ uint32 tsColorKey[SVGA3D_NUM_TEXTURE_UNITS];
+ uint32 textureStages[SVGA3D_NUM_TEXTURE_UNITS][SVGA3D_TS_CONSTANT + 1];
+ uint32 tsColorKeyEnable[SVGA3D_NUM_TEXTURE_UNITS];
+
+ /*
+ * Float Shader constants.
+ */
+ SVGA3dShaderConstFloat pShaderFValues[SVGA3D_CONSTREG_MAX];
+ SVGA3dShaderConstFloat vShaderFValues[SVGA3D_CONSTREG_MAX];
+}
+#include "vmware_pack_end.h"
+SVGAGBContextData;
+#define SVGA3D_CONTEXT_DATA_SIZE (sizeof(SVGAGBContextData))
+
+/*
+ * SVGA3dCmdSetOTableBase --
+ *
+ * This command allows the guest to specify the base PPN of the
+ * specified object table.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ SVGAOTableType type;
+ PPN baseAddress;
+ uint32 sizeInBytes;
+ uint32 validSizeInBytes;
+ SVGAMobFormat ptDepth;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdSetOTableBase; /* SVGA_3D_CMD_SET_OTABLE_BASE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ SVGAOTableType type;
+ PPN64 baseAddress;
+ uint32 sizeInBytes;
+ uint32 validSizeInBytes;
+ SVGAMobFormat ptDepth;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdSetOTableBase64; /* SVGA_3D_CMD_SET_OTABLE_BASE64 */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ SVGAOTableType type;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdReadbackOTable; /* SVGA_3D_CMD_READBACK_OTABLE */
+
+/*
+ * Define a memory object (Mob) in the OTable.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDefineGBMob {
+ SVGAMobId mobid;
+ SVGAMobFormat ptDepth;
+ PPN base;
+ uint32 sizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDefineGBMob; /* SVGA_3D_CMD_DEFINE_GB_MOB */
+
+
+/*
+ * Destroys an object in the OTable.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDestroyGBMob {
+ SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDestroyGBMob; /* SVGA_3D_CMD_DESTROY_GB_MOB */
+
+
+/*
+ * Define a memory object (Mob) in the OTable with a PPN64 base.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDefineGBMob64 {
+ SVGAMobId mobid;
+ SVGAMobFormat ptDepth;
+ PPN64 base;
+ uint32 sizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDefineGBMob64; /* SVGA_3D_CMD_DEFINE_GB_MOB64 */
+
+/*
+ * Redefine an object in the OTable with PPN64 base.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdRedefineGBMob64 {
+ SVGAMobId mobid;
+ SVGAMobFormat ptDepth;
+ PPN64 base;
+ uint32 sizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdRedefineGBMob64; /* SVGA_3D_CMD_REDEFINE_GB_MOB64 */
+
+/*
+ * Notification that the page tables have been modified.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdUpdateGBMobMapping {
+ SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdUpdateGBMobMapping; /* SVGA_3D_CMD_UPDATE_GB_MOB_MAPPING */
+
/*
* Define a guest-backed surface.
*/
typedef
#include "vmware_pack_begin.h"
-struct{
+struct SVGA3dCmdCondBindGBSurface {
uint32 sid;
SVGAMobId testMobid;
SVGAMobId mobid;
#include "vmware_pack_end.h"
SVGA3dCmdDestroyGBShader; /* SVGA_3D_CMD_DESTROY_GB_SHADER */
-
-typedef
-#include "vmware_pack_begin.h"
-struct SVGA3dCmdBindGBShaderConsts {
- uint32 cid;
- SVGA3dShaderType shaderType;
- SVGA3dShaderConstType shaderConstType;
- uint32 sid;
-}
-#include "vmware_pack_end.h"
-SVGA3dCmdBindGBShaderConsts; /* SVGA_3D_CMD_BIND_SHADERCONSTS */
-
typedef
#include "vmware_pack_begin.h"
struct {
#include "vmware_pack_begin.h"
struct {
SVGAMobId mobid;
- uint32 fbOffset;
+ uint32 mustBeZero;
uint32 initialized;
}
#include "vmware_pack_end.h"
uint32 mobOffset;
}
#include "vmware_pack_end.h"
-SVGA3dCmdGBMobFence; /* SVGA_3D_CMD_GB_MOB_FENCE*/
+SVGA3dCmdGBMobFence; /* SVGA_3D_CMD_GB_MOB_FENCE */
-#endif // _SVGA3D_CMD_H_
+#endif /* _SVGA3D_CMD_H_ */
/**********************************************************
- * Copyright 1998-2014 VMware, Inc. All rights reserved.
+ * Copyright 1998-2015 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
*/
SVGA3D_DEVCAP_TS_COLOR_KEY = 93, /* boolean */
+ /*
+ * Deprecated.
+ */
+ SVGA3D_DEVCAP_DEAD2 = 94,
+
+ /*
+ * Does the device support the DX commands?
+ */
+ SVGA3D_DEVCAP_DX = 95,
+
+ /*
+ * What is the maximum size of a texture array?
+ *
+ * (Even if this cap is zero, cubemaps are still allowed.)
+ */
+ SVGA3D_DEVCAP_MAX_TEXTURE_ARRAY_SIZE = 96,
+
+ /*
+ * What is the maximum number of vertex buffers that can
+ * be used in the DXContext inputAssembly?
+ */
+ SVGA3D_DEVCAP_DX_MAX_VERTEXBUFFERS = 97,
+
+ /*
+ * What is the maximum number of constant buffers
+ * that can be expected to work correctly with a
+ * DX context?
+ */
+ SVGA3D_DEVCAP_DX_MAX_CONSTANT_BUFFERS = 98,
+
+ /*
+ * Does the device support provoking vertex control?
+ * If zero, the first vertex will always be the provoking vertex.
+ */
+ SVGA3D_DEVCAP_DX_PROVOKING_VERTEX = 99,
+
+ SVGA3D_DEVCAP_DXFMT_X8R8G8B8 = 100,
+ SVGA3D_DEVCAP_DXFMT_A8R8G8B8 = 101,
+ SVGA3D_DEVCAP_DXFMT_R5G6B5 = 102,
+ SVGA3D_DEVCAP_DXFMT_X1R5G5B5 = 103,
+ SVGA3D_DEVCAP_DXFMT_A1R5G5B5 = 104,
+ SVGA3D_DEVCAP_DXFMT_A4R4G4B4 = 105,
+ SVGA3D_DEVCAP_DXFMT_Z_D32 = 106,
+ SVGA3D_DEVCAP_DXFMT_Z_D16 = 107,
+ SVGA3D_DEVCAP_DXFMT_Z_D24S8 = 108,
+ SVGA3D_DEVCAP_DXFMT_Z_D15S1 = 109,
+ SVGA3D_DEVCAP_DXFMT_LUMINANCE8 = 110,
+ SVGA3D_DEVCAP_DXFMT_LUMINANCE4_ALPHA4 = 111,
+ SVGA3D_DEVCAP_DXFMT_LUMINANCE16 = 112,
+ SVGA3D_DEVCAP_DXFMT_LUMINANCE8_ALPHA8 = 113,
+ SVGA3D_DEVCAP_DXFMT_DXT1 = 114,
+ SVGA3D_DEVCAP_DXFMT_DXT2 = 115,
+ SVGA3D_DEVCAP_DXFMT_DXT3 = 116,
+ SVGA3D_DEVCAP_DXFMT_DXT4 = 117,
+ SVGA3D_DEVCAP_DXFMT_DXT5 = 118,
+ SVGA3D_DEVCAP_DXFMT_BUMPU8V8 = 119,
+ SVGA3D_DEVCAP_DXFMT_BUMPL6V5U5 = 120,
+ SVGA3D_DEVCAP_DXFMT_BUMPX8L8V8U8 = 121,
+ SVGA3D_DEVCAP_DXFMT_FORMAT_DEAD1 = 122,
+ SVGA3D_DEVCAP_DXFMT_ARGB_S10E5 = 123,
+ SVGA3D_DEVCAP_DXFMT_ARGB_S23E8 = 124,
+ SVGA3D_DEVCAP_DXFMT_A2R10G10B10 = 125,
+ SVGA3D_DEVCAP_DXFMT_V8U8 = 126,
+ SVGA3D_DEVCAP_DXFMT_Q8W8V8U8 = 127,
+ SVGA3D_DEVCAP_DXFMT_CxV8U8 = 128,
+ SVGA3D_DEVCAP_DXFMT_X8L8V8U8 = 129,
+ SVGA3D_DEVCAP_DXFMT_A2W10V10U10 = 130,
+ SVGA3D_DEVCAP_DXFMT_ALPHA8 = 131,
+ SVGA3D_DEVCAP_DXFMT_R_S10E5 = 132,
+ SVGA3D_DEVCAP_DXFMT_R_S23E8 = 133,
+ SVGA3D_DEVCAP_DXFMT_RG_S10E5 = 134,
+ SVGA3D_DEVCAP_DXFMT_RG_S23E8 = 135,
+ SVGA3D_DEVCAP_DXFMT_BUFFER = 136,
+ SVGA3D_DEVCAP_DXFMT_Z_D24X8 = 137,
+ SVGA3D_DEVCAP_DXFMT_V16U16 = 138,
+ SVGA3D_DEVCAP_DXFMT_G16R16 = 139,
+ SVGA3D_DEVCAP_DXFMT_A16B16G16R16 = 140,
+ SVGA3D_DEVCAP_DXFMT_UYVY = 141,
+ SVGA3D_DEVCAP_DXFMT_YUY2 = 142,
+ SVGA3D_DEVCAP_DXFMT_NV12 = 143,
+ SVGA3D_DEVCAP_DXFMT_AYUV = 144,
+ SVGA3D_DEVCAP_DXFMT_R32G32B32A32_TYPELESS = 145,
+ SVGA3D_DEVCAP_DXFMT_R32G32B32A32_UINT = 146,
+ SVGA3D_DEVCAP_DXFMT_R32G32B32A32_SINT = 147,
+ SVGA3D_DEVCAP_DXFMT_R32G32B32_TYPELESS = 148,
+ SVGA3D_DEVCAP_DXFMT_R32G32B32_FLOAT = 149,
+ SVGA3D_DEVCAP_DXFMT_R32G32B32_UINT = 150,
+ SVGA3D_DEVCAP_DXFMT_R32G32B32_SINT = 151,
+ SVGA3D_DEVCAP_DXFMT_R16G16B16A16_TYPELESS = 152,
+ SVGA3D_DEVCAP_DXFMT_R16G16B16A16_UINT = 153,
+ SVGA3D_DEVCAP_DXFMT_R16G16B16A16_SNORM = 154,
+ SVGA3D_DEVCAP_DXFMT_R16G16B16A16_SINT = 155,
+ SVGA3D_DEVCAP_DXFMT_R32G32_TYPELESS = 156,
+ SVGA3D_DEVCAP_DXFMT_R32G32_UINT = 157,
+ SVGA3D_DEVCAP_DXFMT_R32G32_SINT = 158,
+ SVGA3D_DEVCAP_DXFMT_R32G8X24_TYPELESS = 159,
+ SVGA3D_DEVCAP_DXFMT_D32_FLOAT_S8X24_UINT = 160,
+ SVGA3D_DEVCAP_DXFMT_R32_FLOAT_X8X24_TYPELESS = 161,
+ SVGA3D_DEVCAP_DXFMT_X32_TYPELESS_G8X24_UINT = 162,
+ SVGA3D_DEVCAP_DXFMT_R10G10B10A2_TYPELESS = 163,
+ SVGA3D_DEVCAP_DXFMT_R10G10B10A2_UINT = 164,
+ SVGA3D_DEVCAP_DXFMT_R11G11B10_FLOAT = 165,
+ SVGA3D_DEVCAP_DXFMT_R8G8B8A8_TYPELESS = 166,
+ SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UNORM = 167,
+ SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UNORM_SRGB = 168,
+ SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UINT = 169,
+ SVGA3D_DEVCAP_DXFMT_R8G8B8A8_SINT = 170,
+ SVGA3D_DEVCAP_DXFMT_R16G16_TYPELESS = 171,
+ SVGA3D_DEVCAP_DXFMT_R16G16_UINT = 172,
+ SVGA3D_DEVCAP_DXFMT_R16G16_SINT = 173,
+ SVGA3D_DEVCAP_DXFMT_R32_TYPELESS = 174,
+ SVGA3D_DEVCAP_DXFMT_D32_FLOAT = 175,
+ SVGA3D_DEVCAP_DXFMT_R32_UINT = 176,
+ SVGA3D_DEVCAP_DXFMT_R32_SINT = 177,
+ SVGA3D_DEVCAP_DXFMT_R24G8_TYPELESS = 178,
+ SVGA3D_DEVCAP_DXFMT_D24_UNORM_S8_UINT = 179,
+ SVGA3D_DEVCAP_DXFMT_R24_UNORM_X8_TYPELESS = 180,
+ SVGA3D_DEVCAP_DXFMT_X24_TYPELESS_G8_UINT = 181,
+ SVGA3D_DEVCAP_DXFMT_R8G8_TYPELESS = 182,
+ SVGA3D_DEVCAP_DXFMT_R8G8_UNORM = 183,
+ SVGA3D_DEVCAP_DXFMT_R8G8_UINT = 184,
+ SVGA3D_DEVCAP_DXFMT_R8G8_SINT = 185,
+ SVGA3D_DEVCAP_DXFMT_R16_TYPELESS = 186,
+ SVGA3D_DEVCAP_DXFMT_R16_UNORM = 187,
+ SVGA3D_DEVCAP_DXFMT_R16_UINT = 188,
+ SVGA3D_DEVCAP_DXFMT_R16_SNORM = 189,
+ SVGA3D_DEVCAP_DXFMT_R16_SINT = 190,
+ SVGA3D_DEVCAP_DXFMT_R8_TYPELESS = 191,
+ SVGA3D_DEVCAP_DXFMT_R8_UNORM = 192,
+ SVGA3D_DEVCAP_DXFMT_R8_UINT = 193,
+ SVGA3D_DEVCAP_DXFMT_R8_SNORM = 194,
+ SVGA3D_DEVCAP_DXFMT_R8_SINT = 195,
+ SVGA3D_DEVCAP_DXFMT_P8 = 196,
+ SVGA3D_DEVCAP_DXFMT_R9G9B9E5_SHAREDEXP = 197,
+ SVGA3D_DEVCAP_DXFMT_R8G8_B8G8_UNORM = 198,
+ SVGA3D_DEVCAP_DXFMT_G8R8_G8B8_UNORM = 199,
+ SVGA3D_DEVCAP_DXFMT_BC1_TYPELESS = 200,
+ SVGA3D_DEVCAP_DXFMT_BC1_UNORM_SRGB = 201,
+ SVGA3D_DEVCAP_DXFMT_BC2_TYPELESS = 202,
+ SVGA3D_DEVCAP_DXFMT_BC2_UNORM_SRGB = 203,
+ SVGA3D_DEVCAP_DXFMT_BC3_TYPELESS = 204,
+ SVGA3D_DEVCAP_DXFMT_BC3_UNORM_SRGB = 205,
+ SVGA3D_DEVCAP_DXFMT_BC4_TYPELESS = 206,
+ SVGA3D_DEVCAP_DXFMT_ATI1 = 207,
+ SVGA3D_DEVCAP_DXFMT_BC4_SNORM = 208,
+ SVGA3D_DEVCAP_DXFMT_BC5_TYPELESS = 209,
+ SVGA3D_DEVCAP_DXFMT_ATI2 = 210,
+ SVGA3D_DEVCAP_DXFMT_BC5_SNORM = 211,
+ SVGA3D_DEVCAP_DXFMT_R10G10B10_XR_BIAS_A2_UNORM = 212,
+ SVGA3D_DEVCAP_DXFMT_B8G8R8A8_TYPELESS = 213,
+ SVGA3D_DEVCAP_DXFMT_B8G8R8A8_UNORM_SRGB = 214,
+ SVGA3D_DEVCAP_DXFMT_B8G8R8X8_TYPELESS = 215,
+ SVGA3D_DEVCAP_DXFMT_B8G8R8X8_UNORM_SRGB = 216,
+ SVGA3D_DEVCAP_DXFMT_Z_DF16 = 217,
+ SVGA3D_DEVCAP_DXFMT_Z_DF24 = 218,
+ SVGA3D_DEVCAP_DXFMT_Z_D24S8_INT = 219,
+ SVGA3D_DEVCAP_DXFMT_YV12 = 220,
+ SVGA3D_DEVCAP_DXFMT_R32G32B32A32_FLOAT = 221,
+ SVGA3D_DEVCAP_DXFMT_R16G16B16A16_FLOAT = 222,
+ SVGA3D_DEVCAP_DXFMT_R16G16B16A16_UNORM = 223,
+ SVGA3D_DEVCAP_DXFMT_R32G32_FLOAT = 224,
+ SVGA3D_DEVCAP_DXFMT_R10G10B10A2_UNORM = 225,
+ SVGA3D_DEVCAP_DXFMT_R8G8B8A8_SNORM = 226,
+ SVGA3D_DEVCAP_DXFMT_R16G16_FLOAT = 227,
+ SVGA3D_DEVCAP_DXFMT_R16G16_UNORM = 228,
+ SVGA3D_DEVCAP_DXFMT_R16G16_SNORM = 229,
+ SVGA3D_DEVCAP_DXFMT_R32_FLOAT = 230,
+ SVGA3D_DEVCAP_DXFMT_R8G8_SNORM = 231,
+ SVGA3D_DEVCAP_DXFMT_R16_FLOAT = 232,
+ SVGA3D_DEVCAP_DXFMT_D16_UNORM = 233,
+ SVGA3D_DEVCAP_DXFMT_A8_UNORM = 234,
+ SVGA3D_DEVCAP_DXFMT_BC1_UNORM = 235,
+ SVGA3D_DEVCAP_DXFMT_BC2_UNORM = 236,
+ SVGA3D_DEVCAP_DXFMT_BC3_UNORM = 237,
+ SVGA3D_DEVCAP_DXFMT_B5G6R5_UNORM = 238,
+ SVGA3D_DEVCAP_DXFMT_B5G5R5A1_UNORM = 239,
+ SVGA3D_DEVCAP_DXFMT_B8G8R8A8_UNORM = 240,
+ SVGA3D_DEVCAP_DXFMT_B8G8R8X8_UNORM = 241,
+ SVGA3D_DEVCAP_DXFMT_BC4_UNORM = 242,
+ SVGA3D_DEVCAP_DXFMT_BC5_UNORM = 243,
+
SVGA3D_DEVCAP_MAX /* This must be the last index. */
} SVGA3dDevCapIndex;
+/*
+ * Bit definitions for DXFMT devcaps
+ *
+ *
+ * SUPPORTED: Can the format be defined?
+ * SHADER_SAMPLE: Can the format be sampled from a shader?
+ * COLOR_RENDERTARGET: Can the format be a color render target?
+ * DEPTH_RENDERTARGET: Can the format be a depth render target?
+ * BLENDABLE: Is the format blendable?
+ * MIPS: Does the format support mip levels?
+ * ARRAY: Does the format support texture arrays?
+ * VOLUME: Does the format support having volume?
+ * MULTISAMPLE_2: Does the format support 2x multisample?
+ * MULTISAMPLE_4: Does the format support 4x multisample?
+ * MULTISAMPLE_8: Does the format support 8x multisample?
+ */
+#define SVGA3D_DXFMT_SUPPORTED (1 << 0)
+#define SVGA3D_DXFMT_SHADER_SAMPLE (1 << 1)
+#define SVGA3D_DXFMT_COLOR_RENDERTARGET (1 << 2)
+#define SVGA3D_DXFMT_DEPTH_RENDERTARGET (1 << 3)
+#define SVGA3D_DXFMT_BLENDABLE (1 << 4)
+#define SVGA3D_DXFMT_MIPS (1 << 5)
+#define SVGA3D_DXFMT_ARRAY (1 << 6)
+#define SVGA3D_DXFMT_VOLUME (1 << 7)
+#define SVGA3D_DXFMT_DX_VERTEX_BUFFER (1 << 8)
+#define SVGADX_DXFMT_MULTISAMPLE_2 (1 << 9)
+#define SVGADX_DXFMT_MULTISAMPLE_4 (1 << 10)
+#define SVGADX_DXFMT_MULTISAMPLE_8 (1 << 11)
+#define SVGADX_DXFMT_MAX (1 << 12)
+
+/*
+ * Convenience mask for any multisample capability.
+ *
+ * The multisample bits imply both load and render capability.
+ */
+#define SVGA3D_DXFMT_MULTISAMPLE ( \
+ SVGADX_DXFMT_MULTISAMPLE_2 | \
+ SVGADX_DXFMT_MULTISAMPLE_4 | \
+ SVGADX_DXFMT_MULTISAMPLE_8 )
+
typedef union {
Bool b;
uint32 u;
float f;
} SVGA3dDevCapResult;
-#endif // _SVGA3D_DEVCAPS_H_
+#endif /* _SVGA3D_DEVCAPS_H_ */
--- /dev/null
+/**********************************************************
+ * Copyright 2007-2015 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * svga3d_dx.h --
+ *
+ * SVGA 3d hardware definitions for DX10 support.
+ */
+
+#ifndef _SVGA3D_DX_H_
+#define _SVGA3D_DX_H_
+
+#define INCLUDE_ALLOW_MODULE
+#define INCLUDE_ALLOW_USERLEVEL
+#define INCLUDE_ALLOW_VMCORE
+#include "includeCheck.h"
+
+#include "svga3d_limits.h"
+
+#define SVGA3D_INPUT_MIN 0
+#define SVGA3D_INPUT_PER_VERTEX_DATA 0
+#define SVGA3D_INPUT_PER_INSTANCE_DATA 1
+#define SVGA3D_INPUT_MAX 2
+typedef uint32 SVGA3dInputClassification;
+
+#define SVGA3D_RESOURCE_TYPE_MIN 1
+#define SVGA3D_RESOURCE_BUFFER 1
+#define SVGA3D_RESOURCE_TEXTURE1D 2
+#define SVGA3D_RESOURCE_TEXTURE2D 3
+#define SVGA3D_RESOURCE_TEXTURE3D 4
+#define SVGA3D_RESOURCE_TEXTURECUBE 5
+#define SVGA3D_RESOURCE_TYPE_DX10_MAX 6
+#define SVGA3D_RESOURCE_BUFFEREX 6
+#define SVGA3D_RESOURCE_TYPE_MAX 7
+typedef uint32 SVGA3dResourceType;
+
+#define SVGA3D_DEPTH_WRITE_MASK_ZERO 0
+#define SVGA3D_DEPTH_WRITE_MASK_ALL 1
+typedef uint8 SVGA3dDepthWriteMask;
+
+#define SVGA3D_FILTER_MIP_LINEAR (1 << 0)
+#define SVGA3D_FILTER_MAG_LINEAR (1 << 2)
+#define SVGA3D_FILTER_MIN_LINEAR (1 << 4)
+#define SVGA3D_FILTER_ANISOTROPIC (1 << 6)
+#define SVGA3D_FILTER_COMPARE (1 << 7)
+typedef uint32 SVGA3dFilter;
+
+#define SVGA3D_CULL_INVALID 0
+#define SVGA3D_CULL_MIN 1
+#define SVGA3D_CULL_NONE 1
+#define SVGA3D_CULL_FRONT 2
+#define SVGA3D_CULL_BACK 3
+#define SVGA3D_CULL_MAX 4
+typedef uint8 SVGA3dCullMode;
+
+#define SVGA3D_COMPARISON_INVALID 0
+#define SVGA3D_COMPARISON_MIN 1
+#define SVGA3D_COMPARISON_NEVER 1
+#define SVGA3D_COMPARISON_LESS 2
+#define SVGA3D_COMPARISON_EQUAL 3
+#define SVGA3D_COMPARISON_LESS_EQUAL 4
+#define SVGA3D_COMPARISON_GREATER 5
+#define SVGA3D_COMPARISON_NOT_EQUAL 6
+#define SVGA3D_COMPARISON_GREATER_EQUAL 7
+#define SVGA3D_COMPARISON_ALWAYS 8
+#define SVGA3D_COMPARISON_MAX 9
+typedef uint8 SVGA3dComparisonFunc;
+
+#define SVGA3D_DX_MAX_VERTEXBUFFERS 32
+#define SVGA3D_DX_MAX_VERTEXINPUTREGISTERS 16
+#define SVGA3D_DX_MAX_SOTARGETS 4
+#define SVGA3D_DX_MAX_SRVIEWS 128
+#define SVGA3D_DX_MAX_CONSTBUFFERS 16
+#define SVGA3D_DX_MAX_SAMPLERS 16
+
+/* Id limits */
+static const uint32 SVGA3dBlendObjectCountPerContext = 4096;
+static const uint32 SVGA3dDepthStencilObjectCountPerContext = 4096;
+
+typedef uint32 SVGA3dSurfaceId;
+typedef uint32 SVGA3dShaderResourceViewId;
+typedef uint32 SVGA3dRenderTargetViewId;
+typedef uint32 SVGA3dDepthStencilViewId;
+
+typedef uint32 SVGA3dShaderId;
+typedef uint32 SVGA3dElementLayoutId;
+typedef uint32 SVGA3dSamplerId;
+typedef uint32 SVGA3dBlendStateId;
+typedef uint32 SVGA3dDepthStencilStateId;
+typedef uint32 SVGA3dRasterizerStateId;
+typedef uint32 SVGA3dQueryId;
+typedef uint32 SVGA3dStreamOutputId;
+
+typedef union {
+ struct {
+ float r;
+ float g;
+ float b;
+ float a;
+ };
+
+ float value[4];
+} SVGA3dRGBAFloat;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ uint32 cid;
+ SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGAOTableDXContextEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineContext {
+ uint32 cid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineContext; /* SVGA_3D_CMD_DX_DEFINE_CONTEXT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyContext {
+ uint32 cid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyContext; /* SVGA_3D_CMD_DX_DESTROY_CONTEXT */
+
+/*
+ * Bind a DX context.
+ *
+ * validContents should be set to 0 for new contexts,
+ * and 1 if this is an old context which is getting paged
+ * back on to the device.
+ *
+ * For new contexts, it is recommended that the driver
+ * issue commands to initialize all interesting state
+ * prior to rendering.
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBindContext {
+ uint32 cid;
+ SVGAMobId mobid;
+ uint32 validContents;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBindContext; /* SVGA_3D_CMD_DX_BIND_CONTEXT */
+
+/*
+ * Readback a DX context.
+ * (Request that the device flush the contents back into guest memory.)
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXReadbackContext {
+ uint32 cid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXReadbackContext; /* SVGA_3D_CMD_DX_READBACK_CONTEXT */
+
+/*
+ * Invalidate a guest-backed context.
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXInvalidateContext {
+ uint32 cid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXInvalidateContext; /* SVGA_3D_CMD_DX_INVALIDATE_CONTEXT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dReplyFormatData {
+ uint32 formatSupport;
+ uint32 msaa2xQualityLevels:5;
+ uint32 msaa4xQualityLevels:5;
+ uint32 msaa8xQualityLevels:5;
+ uint32 msaa16xQualityLevels:5;
+ uint32 msaa32xQualityLevels:5;
+ uint32 pad:7;
+}
+#include "vmware_pack_end.h"
+SVGA3dReplyFormatData;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetSingleConstantBuffer {
+ uint32 slot;
+ SVGA3dShaderType type;
+ SVGA3dSurfaceId sid;
+ uint32 offsetInBytes;
+ uint32 sizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetSingleConstantBuffer;
+/* SVGA_3D_CMD_DX_SET_SINGLE_CONSTANT_BUFFER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetShaderResources {
+ uint32 startView;
+ SVGA3dShaderType type;
+
+ /*
+ * Followed by a variable number of SVGA3dShaderResourceViewId's.
+ */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetShaderResources; /* SVGA_3D_CMD_DX_SET_SHADER_RESOURCES */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetShader {
+ SVGA3dShaderId shaderId;
+ SVGA3dShaderType type;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetShader; /* SVGA_3D_CMD_DX_SET_SHADER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetSamplers {
+ uint32 startSampler;
+ SVGA3dShaderType type;
+
+ /*
+ * Followed by a variable number of SVGA3dSamplerId's.
+ */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetSamplers; /* SVGA_3D_CMD_DX_SET_SAMPLERS */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDraw {
+ uint32 vertexCount;
+ uint32 startVertexLocation;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDraw; /* SVGA_3D_CMD_DX_DRAW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDrawIndexed {
+ uint32 indexCount;
+ uint32 startIndexLocation;
+ int32 baseVertexLocation;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDrawIndexed; /* SVGA_3D_CMD_DX_DRAW_INDEXED */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDrawInstanced {
+ uint32 vertexCountPerInstance;
+ uint32 instanceCount;
+ uint32 startVertexLocation;
+ uint32 startInstanceLocation;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDrawInstanced; /* SVGA_3D_CMD_DX_DRAW_INSTANCED */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDrawIndexedInstanced {
+ uint32 indexCountPerInstance;
+ uint32 instanceCount;
+ uint32 startIndexLocation;
+ int32 baseVertexLocation;
+ uint32 startInstanceLocation;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDrawIndexedInstanced; /* SVGA_3D_CMD_DX_DRAW_INDEXED_INSTANCED */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDrawAuto {
+ uint32 pad0;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDrawAuto; /* SVGA_3D_CMD_DX_DRAW_AUTO */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetInputLayout {
+ SVGA3dElementLayoutId elementLayoutId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetInputLayout; /* SVGA_3D_CMD_DX_SET_INPUT_LAYOUT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dVertexBuffer {
+ SVGA3dSurfaceId sid;
+ uint32 stride;
+ uint32 offset;
+}
+#include "vmware_pack_end.h"
+SVGA3dVertexBuffer;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetVertexBuffers {
+ uint32 startBuffer;
+ /* Followed by a variable number of SVGA3dVertexBuffer's. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetVertexBuffers; /* SVGA_3D_CMD_DX_SET_VERTEX_BUFFERS */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetIndexBuffer {
+ SVGA3dSurfaceId sid;
+ SVGA3dSurfaceFormat format;
+ uint32 offset;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetIndexBuffer; /* SVGA_3D_CMD_DX_SET_INDEX_BUFFER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetTopology {
+ SVGA3dPrimitiveType topology;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetTopology; /* SVGA_3D_CMD_DX_SET_TOPOLOGY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetRenderTargets {
+ SVGA3dDepthStencilViewId depthStencilViewId;
+ /* Followed by a variable number of SVGA3dRenderTargetViewId's. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetRenderTargets; /* SVGA_3D_CMD_DX_SET_RENDERTARGETS */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetBlendState {
+ SVGA3dBlendStateId blendId;
+ float blendFactor[4];
+ uint32 sampleMask;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetBlendState; /* SVGA_3D_CMD_DX_SET_BLEND_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetDepthStencilState {
+ SVGA3dDepthStencilStateId depthStencilId;
+ uint32 stencilRef;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetDepthStencilState; /* SVGA_3D_CMD_DX_SET_DEPTHSTENCIL_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetRasterizerState {
+ SVGA3dRasterizerStateId rasterizerId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetRasterizerState; /* SVGA_3D_CMD_DX_SET_RASTERIZER_STATE */
+
+#define SVGA3D_DXQUERY_FLAG_PREDICATEHINT (1 << 0)
+typedef uint32 SVGA3dDXQueryFlags;
+
+/*
+ * The SVGADXQueryDeviceState and SVGADXQueryDeviceBits are used by the device
+ * to track query state transitions, but are not intended to be used by the
+ * driver.
+ */
+#define SVGADX_QDSTATE_INVALID ((uint8)-1) /* Query has no state */
+#define SVGADX_QDSTATE_MIN 0
+#define SVGADX_QDSTATE_IDLE 0 /* Query hasn't started yet */
+#define SVGADX_QDSTATE_ACTIVE 1 /* Query is actively gathering data */
+#define SVGADX_QDSTATE_PENDING 2 /* Query is waiting for results */
+#define SVGADX_QDSTATE_FINISHED 3 /* Query has completed */
+#define SVGADX_QDSTATE_MAX 4
+typedef uint8 SVGADXQueryDeviceState;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ SVGA3dQueryTypeUint8 type;
+ uint16 pad0;
+ SVGADXQueryDeviceState state;
+ SVGA3dDXQueryFlags flags;
+ SVGAMobId mobid;
+ uint32 offset;
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXQueryEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineQuery {
+ SVGA3dQueryId queryId;
+ SVGA3dQueryType type;
+ SVGA3dDXQueryFlags flags;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineQuery; /* SVGA_3D_CMD_DX_DEFINE_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyQuery {
+ SVGA3dQueryId queryId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyQuery; /* SVGA_3D_CMD_DX_DESTROY_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBindQuery {
+ SVGA3dQueryId queryId;
+ SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBindQuery; /* SVGA_3D_CMD_DX_BIND_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetQueryOffset {
+ SVGA3dQueryId queryId;
+ uint32 mobOffset;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetQueryOffset; /* SVGA_3D_CMD_DX_SET_QUERY_OFFSET */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBeginQuery {
+ SVGA3dQueryId queryId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBeginQuery; /* SVGA_3D_CMD_DX_QUERY_BEGIN */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXEndQuery {
+ SVGA3dQueryId queryId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXEndQuery; /* SVGA_3D_CMD_DX_QUERY_END */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXReadbackQuery {
+ SVGA3dQueryId queryId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXReadbackQuery; /* SVGA_3D_CMD_DX_READBACK_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXMoveQuery {
+ SVGA3dQueryId queryId;
+ SVGAMobId mobid;
+ uint32 mobOffset;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXMoveQuery; /* SVGA_3D_CMD_DX_MOVE_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBindAllQuery {
+ uint32 cid;
+ SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBindAllQuery; /* SVGA_3D_CMD_DX_BIND_ALL_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXReadbackAllQuery {
+ uint32 cid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXReadbackAllQuery; /* SVGA_3D_CMD_DX_READBACK_ALL_QUERY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetPredication {
+ SVGA3dQueryId queryId;
+ uint32 predicateValue;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetPredication; /* SVGA_3D_CMD_DX_SET_PREDICATION */
+
+typedef
+#include "vmware_pack_begin.h"
+struct MKS3dDXSOState {
+ uint32 offset; /* Starting offset */
+ uint32 intOffset; /* Internal offset */
+ uint32 vertexCount; /* vertices written */
+ uint32 sizeInBytes; /* max bytes to write */
+}
+#include "vmware_pack_end.h"
+SVGA3dDXSOState;
+
+/* Set the offset field to this value to append SO values to the buffer */
+#define SVGA3D_DX_SO_OFFSET_APPEND ((uint32) ~0u)
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dSoTarget {
+ SVGA3dSurfaceId sid;
+ uint32 offset;
+ uint32 sizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dSoTarget;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetSOTargets {
+ uint32 pad0;
+ /* Followed by a variable number of SVGA3dSOTarget's. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetSOTargets; /* SVGA_3D_CMD_DX_SET_SOTARGETS */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dViewport
+{
+ float x;
+ float y;
+ float width;
+ float height;
+ float minDepth;
+ float maxDepth;
+}
+#include "vmware_pack_end.h"
+SVGA3dViewport;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetViewports {
+ uint32 pad0;
+ /* Followed by a variable number of SVGA3dViewport's. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetViewports; /* SVGA_3D_CMD_DX_SET_VIEWPORTS */
+
+#define SVGA3D_DX_MAX_VIEWPORTS 16
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetScissorRects {
+ uint32 pad0;
+ /* Followed by a variable number of SVGASignedRect's. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetScissorRects; /* SVGA_3D_CMD_DX_SET_SCISSORRECTS */
+
+#define SVGA3D_DX_MAX_SCISSORRECTS 16
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXClearRenderTargetView {
+ SVGA3dRenderTargetViewId renderTargetViewId;
+ SVGA3dRGBAFloat rgba;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXClearRenderTargetView; /* SVGA_3D_CMD_DX_CLEAR_RENDERTARGET_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXClearDepthStencilView {
+ uint16 flags;
+ uint16 stencil;
+ SVGA3dDepthStencilViewId depthStencilViewId;
+ float depth;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXClearDepthStencilView; /* SVGA_3D_CMD_DX_CLEAR_DEPTHSTENCIL_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXPredCopyRegion {
+ SVGA3dSurfaceId dstSid;
+ uint32 dstSubResource;
+ SVGA3dSurfaceId srcSid;
+ uint32 srcSubResource;
+ SVGA3dCopyBox box;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXPredCopyRegion;
+/* SVGA_3D_CMD_DX_PRED_COPY_REGION */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXPredCopy {
+ SVGA3dSurfaceId dstSid;
+ SVGA3dSurfaceId srcSid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXPredCopy; /* SVGA_3D_CMD_DX_PRED_COPY */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBufferCopy {
+ SVGA3dSurfaceId dest;
+ SVGA3dSurfaceId src;
+ uint32 destX;
+ uint32 srcX;
+ uint32 width;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBufferCopy;
+/* SVGA_3D_CMD_DX_BUFFER_COPY */
+
+typedef uint32 SVGA3dDXStretchBltMode;
+#define SVGADX_STRETCHBLT_LINEAR (1 << 0)
+#define SVGADX_STRETCHBLT_FORCE_SRC_SRGB (1 << 1)
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXStretchBlt {
+ SVGA3dSurfaceId srcSid;
+ uint32 srcSubResource;
+ SVGA3dSurfaceId dstSid;
+ uint32 destSubResource;
+ SVGA3dBox boxSrc;
+ SVGA3dBox boxDest;
+ SVGA3dDXStretchBltMode mode;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXStretchBlt; /* SVGA_3D_CMD_DX_STRETCHBLT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXGenMips {
+ SVGA3dShaderResourceViewId shaderResourceViewId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXGenMips; /* SVGA_3D_CMD_DX_GENMIPS */
+
+/*
+ * Defines a resource/DX surface. Resources share the surfaceId namespace.
+ *
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDefineGBSurface_v2 {
+ uint32 sid;
+ SVGA3dSurfaceFlags surfaceFlags;
+ SVGA3dSurfaceFormat format;
+ uint32 numMipLevels;
+ uint32 multisampleCount;
+ SVGA3dTextureFilter autogenFilter;
+ SVGA3dSize size;
+ uint32 arraySize;
+ uint32 pad;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDefineGBSurface_v2; /* SVGA_3D_CMD_DEFINE_GB_SURFACE_V2 */
+
+/*
+ * Update a sub-resource in a guest-backed resource.
+ * (Inform the device that the guest-contents have been updated.)
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXUpdateSubResource {
+ SVGA3dSurfaceId sid;
+ uint32 subResource;
+ SVGA3dBox box;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXUpdateSubResource; /* SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE */
+
+/*
+ * Readback a subresource in a guest-backed resource.
+ * (Request the device to flush the dirty contents into the guest.)
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXReadbackSubResource {
+ SVGA3dSurfaceId sid;
+ uint32 subResource;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXReadbackSubResource; /* SVGA_3D_CMD_DX_READBACK_SUBRESOURCE */
+
+/*
+ * Invalidate an image in a guest-backed surface.
+ * (Notify the device that the contents can be lost.)
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXInvalidateSubResource {
+ SVGA3dSurfaceId sid;
+ uint32 subResource;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXInvalidateSubResource; /* SVGA_3D_CMD_DX_INVALIDATE_SUBRESOURCE */
+
+
+/*
+ * Raw byte wise transfer from a buffer surface into another surface
+ * of the requested box.
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXTransferFromBuffer {
+ SVGA3dSurfaceId srcSid;
+ uint32 srcOffset;
+ uint32 srcPitch;
+ uint32 srcSlicePitch;
+ SVGA3dSurfaceId destSid;
+ uint32 destSubResource;
+ SVGA3dBox destBox;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXTransferFromBuffer; /* SVGA_3D_CMD_DX_TRANSFER_FROM_BUFFER */
+
+
+/*
+ * Raw byte wise transfer from a buffer surface into another surface
+ * of the requested box. Supported if SVGA3D_DEVCAP_DXCONTEXT is set.
+ * The context is implied from the command buffer header.
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXPredTransferFromBuffer {
+ SVGA3dSurfaceId srcSid;
+ uint32 srcOffset;
+ uint32 srcPitch;
+ uint32 srcSlicePitch;
+ SVGA3dSurfaceId destSid;
+ uint32 destSubResource;
+ SVGA3dBox destBox;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXPredTransferFromBuffer;
+/* SVGA_3D_CMD_DX_PRED_TRANSFER_FROM_BUFFER */
+
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSurfaceCopyAndReadback {
+ SVGA3dSurfaceId srcSid;
+ SVGA3dSurfaceId destSid;
+ SVGA3dCopyBox box;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSurfaceCopyAndReadback;
+/* SVGA_3D_CMD_DX_SURFACE_COPY_AND_READBACK */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXHint {
+ uint32 hintId;
+
+ /*
+ * Followed by variable sized data depending on the hintId.
+ */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXHint;
+/* SVGA_3D_CMD_DX_HINT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBufferUpdate {
+ SVGA3dSurfaceId sid;
+ uint32 x;
+ uint32 width;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBufferUpdate;
+/* SVGA_3D_CMD_DX_BUFFER_UPDATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetConstantBufferOffset {
+ uint32 slot;
+ uint32 offsetInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetConstantBufferOffset;
+
+typedef SVGA3dCmdDXSetConstantBufferOffset SVGA3dCmdDXSetVSConstantBufferOffset;
+/* SVGA_3D_CMD_DX_SET_VS_CONSTANT_BUFFER_OFFSET */
+
+typedef SVGA3dCmdDXSetConstantBufferOffset SVGA3dCmdDXSetPSConstantBufferOffset;
+/* SVGA_3D_CMD_DX_SET_PS_CONSTANT_BUFFER_OFFSET */
+
+typedef SVGA3dCmdDXSetConstantBufferOffset SVGA3dCmdDXSetGSConstantBufferOffset;
+/* SVGA_3D_CMD_DX_SET_GS_CONSTANT_BUFFER_OFFSET */
+
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ union {
+ struct {
+ uint32 firstElement;
+ uint32 numElements;
+ uint32 pad0;
+ uint32 pad1;
+ } buffer;
+ struct {
+ uint32 mostDetailedMip;
+ uint32 firstArraySlice;
+ uint32 mipLevels;
+ uint32 arraySize;
+ } tex;
+ struct {
+ uint32 firstElement;
+ uint32 numElements;
+ uint32 flags;
+ uint32 pad0;
+ } bufferex;
+ };
+}
+#include "vmware_pack_end.h"
+SVGA3dShaderResourceViewDesc;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ SVGA3dSurfaceId sid;
+ SVGA3dSurfaceFormat format;
+ SVGA3dResourceType resourceDimension;
+ SVGA3dShaderResourceViewDesc desc;
+ uint32 pad;
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXSRViewEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineShaderResourceView {
+ SVGA3dShaderResourceViewId shaderResourceViewId;
+
+ SVGA3dSurfaceId sid;
+ SVGA3dSurfaceFormat format;
+ SVGA3dResourceType resourceDimension;
+
+ SVGA3dShaderResourceViewDesc desc;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineShaderResourceView;
+/* SVGA_3D_CMD_DX_DEFINE_SHADERRESOURCE_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyShaderResourceView {
+ SVGA3dShaderResourceViewId shaderResourceViewId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyShaderResourceView;
+/* SVGA_3D_CMD_DX_DESTROY_SHADERRESOURCE_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dRenderTargetViewDesc {
+ union {
+ struct {
+ uint32 firstElement;
+ uint32 numElements;
+ } buffer;
+ struct {
+ uint32 mipSlice;
+ uint32 firstArraySlice;
+ uint32 arraySize;
+ } tex; /* 1d, 2d, cube */
+ struct {
+ uint32 mipSlice;
+ uint32 firstW;
+ uint32 wSize;
+ } tex3D;
+ };
+}
+#include "vmware_pack_end.h"
+SVGA3dRenderTargetViewDesc;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ SVGA3dSurfaceId sid;
+ SVGA3dSurfaceFormat format;
+ SVGA3dResourceType resourceDimension;
+ SVGA3dRenderTargetViewDesc desc;
+ uint32 pad[2];
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXRTViewEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineRenderTargetView {
+ SVGA3dRenderTargetViewId renderTargetViewId;
+
+ SVGA3dSurfaceId sid;
+ SVGA3dSurfaceFormat format;
+ SVGA3dResourceType resourceDimension;
+
+ SVGA3dRenderTargetViewDesc desc;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineRenderTargetView;
+/* SVGA_3D_CMD_DX_DEFINE_RENDERTARGET_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyRenderTargetView {
+ SVGA3dRenderTargetViewId renderTargetViewId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyRenderTargetView;
+/* SVGA_3D_CMD_DX_DESTROY_RENDERTARGET_VIEW */
+
+/*
+ */
+#define SVGA3D_DXDSVIEW_CREATE_READ_ONLY_DEPTH 0x01
+#define SVGA3D_DXDSVIEW_CREATE_READ_ONLY_STENCIL 0x02
+#define SVGA3D_DXDSVIEW_CREATE_FLAG_MASK 0x03
+typedef uint8 SVGA3DCreateDSViewFlags;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ SVGA3dSurfaceId sid;
+ SVGA3dSurfaceFormat format;
+ SVGA3dResourceType resourceDimension;
+ uint32 mipSlice;
+ uint32 firstArraySlice;
+ uint32 arraySize;
+ SVGA3DCreateDSViewFlags flags;
+ uint8 pad0;
+ uint16 pad1;
+ uint32 pad2;
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXDSViewEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineDepthStencilView {
+ SVGA3dDepthStencilViewId depthStencilViewId;
+
+ SVGA3dSurfaceId sid;
+ SVGA3dSurfaceFormat format;
+ SVGA3dResourceType resourceDimension;
+ uint32 mipSlice;
+ uint32 firstArraySlice;
+ uint32 arraySize;
+ SVGA3DCreateDSViewFlags flags;
+ uint8 pad0;
+ uint16 pad1;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineDepthStencilView;
+/* SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyDepthStencilView {
+ SVGA3dDepthStencilViewId depthStencilViewId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyDepthStencilView;
+/* SVGA_3D_CMD_DX_DESTROY_DEPTHSTENCIL_VIEW */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dInputElementDesc {
+ uint32 inputSlot;
+ uint32 alignedByteOffset;
+ SVGA3dSurfaceFormat format;
+ SVGA3dInputClassification inputSlotClass;
+ uint32 instanceDataStepRate;
+ uint32 inputRegister;
+}
+#include "vmware_pack_end.h"
+SVGA3dInputElementDesc;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ /*
+ * XXX: How many of these can there be?
+ */
+ uint32 elid;
+ uint32 numDescs;
+ SVGA3dInputElementDesc desc[32];
+ uint32 pad[62];
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXElementLayoutEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineElementLayout {
+ SVGA3dElementLayoutId elementLayoutId;
+ /* Followed by a variable number of SVGA3dInputElementDesc's. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineElementLayout;
+/* SVGA_3D_CMD_DX_DEFINE_ELEMENTLAYOUT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyElementLayout {
+ SVGA3dElementLayoutId elementLayoutId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyElementLayout;
+/* SVGA_3D_CMD_DX_DESTROY_ELEMENTLAYOUT */
+
+
+#define SVGA3D_DX_MAX_RENDER_TARGETS 8
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dDXBlendStatePerRT {
+ uint8 blendEnable;
+ uint8 srcBlend;
+ uint8 destBlend;
+ uint8 blendOp;
+ uint8 srcBlendAlpha;
+ uint8 destBlendAlpha;
+ uint8 blendOpAlpha;
+ uint8 renderTargetWriteMask;
+ uint8 logicOpEnable;
+ uint8 logicOp;
+ uint16 pad0;
+}
+#include "vmware_pack_end.h"
+SVGA3dDXBlendStatePerRT;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ uint8 alphaToCoverageEnable;
+ uint8 independentBlendEnable;
+ uint16 pad0;
+ SVGA3dDXBlendStatePerRT perRT[SVGA3D_MAX_RENDER_TARGETS];
+ uint32 pad1[7];
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXBlendStateEntry;
+
+/*
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineBlendState {
+ SVGA3dBlendStateId blendId;
+ uint8 alphaToCoverageEnable;
+ uint8 independentBlendEnable;
+ uint16 pad0;
+ SVGA3dDXBlendStatePerRT perRT[SVGA3D_MAX_RENDER_TARGETS];
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineBlendState; /* SVGA_3D_CMD_DX_DEFINE_BLEND_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyBlendState {
+ SVGA3dBlendStateId blendId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyBlendState; /* SVGA_3D_CMD_DX_DESTROY_BLEND_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ uint8 depthEnable;
+ SVGA3dDepthWriteMask depthWriteMask;
+ SVGA3dComparisonFunc depthFunc;
+ uint8 stencilEnable;
+ uint8 frontEnable;
+ uint8 backEnable;
+ uint8 stencilReadMask;
+ uint8 stencilWriteMask;
+
+ uint8 frontStencilFailOp;
+ uint8 frontStencilDepthFailOp;
+ uint8 frontStencilPassOp;
+ SVGA3dComparisonFunc frontStencilFunc;
+
+ uint8 backStencilFailOp;
+ uint8 backStencilDepthFailOp;
+ uint8 backStencilPassOp;
+ SVGA3dComparisonFunc backStencilFunc;
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXDepthStencilEntry;
+
+/*
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineDepthStencilState {
+ SVGA3dDepthStencilStateId depthStencilId;
+
+ uint8 depthEnable;
+ SVGA3dDepthWriteMask depthWriteMask;
+ SVGA3dComparisonFunc depthFunc;
+ uint8 stencilEnable;
+ uint8 frontEnable;
+ uint8 backEnable;
+ uint8 stencilReadMask;
+ uint8 stencilWriteMask;
+
+ uint8 frontStencilFailOp;
+ uint8 frontStencilDepthFailOp;
+ uint8 frontStencilPassOp;
+ SVGA3dComparisonFunc frontStencilFunc;
+
+ uint8 backStencilFailOp;
+ uint8 backStencilDepthFailOp;
+ uint8 backStencilPassOp;
+ SVGA3dComparisonFunc backStencilFunc;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineDepthStencilState;
+/* SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyDepthStencilState {
+ SVGA3dDepthStencilStateId depthStencilId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyDepthStencilState;
+/* SVGA_3D_CMD_DX_DESTROY_DEPTHSTENCIL_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ uint8 fillMode;
+ SVGA3dCullMode cullMode;
+ uint8 frontCounterClockwise;
+ uint8 provokingVertexLast;
+ int32 depthBias;
+ float depthBiasClamp;
+ float slopeScaledDepthBias;
+ uint8 depthClipEnable;
+ uint8 scissorEnable;
+ uint8 multisampleEnable;
+ uint8 antialiasedLineEnable;
+ float lineWidth;
+ uint8 lineStippleEnable;
+ uint8 lineStippleFactor;
+ uint16 lineStipplePattern;
+ uint32 forcedSampleCount;
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXRasterizerStateEntry;
+
+/*
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineRasterizerState {
+ SVGA3dRasterizerStateId rasterizerId;
+
+ uint8 fillMode;
+ SVGA3dCullMode cullMode;
+ uint8 frontCounterClockwise;
+ uint8 provokingVertexLast;
+ int32 depthBias;
+ float depthBiasClamp;
+ float slopeScaledDepthBias;
+ uint8 depthClipEnable;
+ uint8 scissorEnable;
+ uint8 multisampleEnable;
+ uint8 antialiasedLineEnable;
+ float lineWidth;
+ uint8 lineStippleEnable;
+ uint8 lineStippleFactor;
+ uint16 lineStipplePattern;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineRasterizerState;
+/* SVGA_3D_CMD_DX_DEFINE_RASTERIZER_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyRasterizerState {
+ SVGA3dRasterizerStateId rasterizerId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyRasterizerState;
+/* SVGA_3D_CMD_DX_DESTROY_RASTERIZER_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ SVGA3dFilter filter;
+ uint8 addressU;
+ uint8 addressV;
+ uint8 addressW;
+ uint8 pad0;
+ float mipLODBias;
+ uint8 maxAnisotropy;
+ SVGA3dComparisonFunc comparisonFunc;
+ uint16 pad1;
+ SVGA3dRGBAFloat borderColor;
+ float minLOD;
+ float maxLOD;
+ uint32 pad2[6];
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXSamplerEntry;
+
+/*
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineSamplerState {
+ SVGA3dSamplerId samplerId;
+ SVGA3dFilter filter;
+ uint8 addressU;
+ uint8 addressV;
+ uint8 addressW;
+ uint8 pad0;
+ float mipLODBias;
+ uint8 maxAnisotropy;
+ SVGA3dComparisonFunc comparisonFunc;
+ uint16 pad1;
+ SVGA3dRGBAFloat borderColor;
+ float minLOD;
+ float maxLOD;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineSamplerState; /* SVGA_3D_CMD_DX_DEFINE_SAMPLER_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroySamplerState {
+ SVGA3dSamplerId samplerId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroySamplerState; /* SVGA_3D_CMD_DX_DESTROY_SAMPLER_STATE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineShader {
+ SVGA3dShaderId shaderId;
+ SVGA3dShaderType type;
+ uint32 sizeInBytes; /* Number of bytes of shader text. */
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineShader; /* SVGA_3D_CMD_DX_DEFINE_SHADER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGACOTableDXShaderEntry {
+ SVGA3dShaderType type;
+ uint32 sizeInBytes;
+ uint32 offsetInBytes;
+ SVGAMobId mobid;
+ uint32 pad[4];
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXShaderEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyShader {
+ SVGA3dShaderId shaderId;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyShader; /* SVGA_3D_CMD_DX_DESTROY_SHADER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBindShader {
+ uint32 cid;
+ uint32 shid;
+ SVGAMobId mobid;
+ uint32 offsetInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBindShader; /* SVGA_3D_CMD_DX_BIND_SHADER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXBindAllShader {
+ uint32 cid;
+ SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXBindAllShader; /* SVGA_3D_CMD_DX_BIND_ALL_SHADER */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXCondBindAllShader {
+ uint32 cid;
+ SVGAMobId testMobid;
+ SVGAMobId mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXCondBindAllShader; /* SVGA_3D_CMD_DX_COND_BIND_ALL_SHADER */
+
+/*
+ * The maximum number of streamout decl's in each streamout entry.
+ */
+#define SVGA3D_MAX_STREAMOUT_DECLS 64
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dStreamOutputDeclarationEntry {
+ uint32 outputSlot;
+ uint32 registerIndex;
+ uint8 registerMask;
+ uint8 pad0;
+ uint16 pad1;
+ uint32 stream;
+}
+#include "vmware_pack_end.h"
+SVGA3dStreamOutputDeclarationEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGAOTableStreamOutputEntry {
+ uint32 numOutputStreamEntries;
+ SVGA3dStreamOutputDeclarationEntry decl[SVGA3D_MAX_STREAMOUT_DECLS];
+ uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS];
+ uint32 rasterizedStream;
+ uint32 pad[250];
+}
+#include "vmware_pack_end.h"
+SVGACOTableDXStreamOutputEntry;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDefineStreamOutput {
+ SVGA3dStreamOutputId soid;
+ uint32 numOutputStreamEntries;
+ SVGA3dStreamOutputDeclarationEntry decl[SVGA3D_MAX_STREAMOUT_DECLS];
+ uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS];
+ uint32 rasterizedStream;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDefineStreamOutput; /* SVGA_3D_CMD_DX_DEFINE_STREAMOUTPUT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXDestroyStreamOutput {
+ SVGA3dStreamOutputId soid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXDestroyStreamOutput; /* SVGA_3D_CMD_DX_DESTROY_STREAMOUTPUT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetStreamOutput {
+ SVGA3dStreamOutputId soid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetStreamOutput; /* SVGA_3D_CMD_DX_SET_STREAMOUTPUT */
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ uint64 value;
+ uint32 mobId;
+ uint32 mobOffset;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXMobFence64; /* SVGA_3D_CMD_DX_MOB_FENCE_64 */
+
+/*
+ * SVGA3dCmdSetCOTable --
+ *
+ * This command allows the guest to bind a mob to a context-object table.
+ */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXSetCOTable {
+ uint32 cid;
+ uint32 mobid;
+ SVGACOTableType type;
+ uint32 validSizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXSetCOTable; /* SVGA_3D_CMD_DX_SET_COTABLE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCmdDXReadbackCOTable {
+ uint32 cid;
+ SVGACOTableType type;
+}
+#include "vmware_pack_end.h"
+SVGA3dCmdDXReadbackCOTable; /* SVGA_3D_CMD_DX_READBACK_COTABLE */
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dCOTableData {
+ uint32 mobid;
+}
+#include "vmware_pack_end.h"
+SVGA3dCOTableData;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dBufferBinding {
+ uint32 bufferId;
+ uint32 stride;
+ uint32 offset;
+}
+#include "vmware_pack_end.h"
+SVGA3dBufferBinding;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGA3dConstantBufferBinding {
+ uint32 sid;
+ uint32 offsetInBytes;
+ uint32 sizeInBytes;
+}
+#include "vmware_pack_end.h"
+SVGA3dConstantBufferBinding;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGADXInputAssemblyMobFormat {
+ uint32 layoutId;
+ SVGA3dBufferBinding vertexBuffers[SVGA3D_DX_MAX_VERTEXBUFFERS];
+ uint32 indexBufferSid;
+ uint32 pad;
+ uint32 indexBufferOffset;
+ uint32 indexBufferFormat;
+ uint32 topology;
+}
+#include "vmware_pack_end.h"
+SVGADXInputAssemblyMobFormat;
+
+typedef
+#include "vmware_pack_begin.h"
+struct SVGADXContextMobFormat {
+ SVGADXInputAssemblyMobFormat inputAssembly;
+
+ struct {
+ uint32 blendStateId;
+ uint32 blendFactor[4];
+ uint32 sampleMask;
+ uint32 depthStencilStateId;
+ uint32 stencilRef;
+ uint32 rasterizerStateId;
+ uint32 depthStencilViewId;
+ uint32 renderTargetViewIds[SVGA3D_MAX_SIMULTANEOUS_RENDER_TARGETS];
+ uint32 unorderedAccessViewIds[SVGA3D_MAX_UAVIEWS];
+ } renderState;
+
+ struct {
+ uint32 targets[SVGA3D_DX_MAX_SOTARGETS];
+ uint32 soid;
+ } streamOut;
+ uint32 pad0[11];
+
+ uint8 numViewports;
+ uint8 numScissorRects;
+ uint16 pad1[1];
+
+ uint32 pad2[3];
+
+ SVGA3dViewport viewports[SVGA3D_DX_MAX_VIEWPORTS];
+ uint32 pad3[32];
+
+ SVGASignedRect scissorRects[SVGA3D_DX_MAX_SCISSORRECTS];
+ uint32 pad4[64];
+
+ struct {
+ uint32 queryID;
+ uint32 value;
+ } predication;
+ uint32 pad5[2];
+
+ struct {
+ uint32 shaderId;
+ SVGA3dConstantBufferBinding constantBuffers[SVGA3D_DX_MAX_CONSTBUFFERS];
+ uint32 shaderResources[SVGA3D_DX_MAX_SRVIEWS];
+ uint32 samplers[SVGA3D_DX_MAX_SAMPLERS];
+ } shaderState[SVGA3D_NUM_SHADERTYPE];
+ uint32 pad6[26];
+
+ SVGA3dQueryId queryID[SVGA3D_MAX_QUERY];
+
+ SVGA3dCOTableData cotables[SVGA_COTABLE_MAX];
+ uint32 pad7[380];
+}
+#include "vmware_pack_end.h"
+SVGADXContextMobFormat;
+
+#endif /* _SVGA3D_DX_H_ */
/**********************************************************
- * Copyright 2007-2014 VMware, Inc. All rights reserved.
+ * Copyright 2007-2015 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
#define SVGA3D_NUM_CLIPPLANES 6
#define SVGA3D_MAX_RENDER_TARGETS 8
#define SVGA3D_MAX_SIMULTANEOUS_RENDER_TARGETS (SVGA3D_MAX_RENDER_TARGETS)
+#define SVGA3D_MAX_UAVIEWS 8
#define SVGA3D_MAX_CONTEXT_IDS 256
#define SVGA3D_MAX_SURFACE_IDS (32 * 1024)
#define SVGA3D_NUM_TEXTURE_UNITS 32
#define SVGA3D_NUM_LIGHTS 8
-#define SVGA3D_MAX_VIDEODECODERS 8
-#define SVGA3D_MAX_VIDEOPROCESSORS 8
-#define SVGA3D_MAX_VIDEODECODER_FRAMES 400
/*
* Maximum size in dwords of shader text the SVGA device will allow.
*/
#define SVGA3D_MAX_DRAW_PRIMITIVE_RANGES 32
-#endif // _SVGA3D_LIMITS_H_
+#endif /* _SVGA3D_LIMITS_H_ */
/**********************************************************
- * Copyright 1998-2014 VMware, Inc. All rights reserved.
+ * Copyright 1998-2015 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
#include "svga3d_types.h"
#include "svga3d_limits.h"
#include "svga3d_cmd.h"
+#include "svga3d_dx.h"
#include "svga3d_devcaps.h"
-/**********************************************************
- * Copyright 1998-2014 VMware, Inc. All rights reserved.
+/**************************************************************************
*
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
+ * Copyright © 1998-2015 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
*
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
*
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
*
- **********************************************************/
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
/*
* svga3d_surfacedefs.h --
*/
enum svga3d_block_desc {
- SVGA3DBLOCKDESC_NONE = 0, /* No channels are active */
- SVGA3DBLOCKDESC_BLUE = 1 << 0, /* Block with red channel
- data */
- SVGA3DBLOCKDESC_U = 1 << 0, /* Block with bump U channel
- data */
- SVGA3DBLOCKDESC_UV_VIDEO = 1 << 7, /* Block with alternating video
- U and V */
- SVGA3DBLOCKDESC_GREEN = 1 << 1, /* Block with green channel
- data */
- SVGA3DBLOCKDESC_V = 1 << 1, /* Block with bump V channel
- data */
- SVGA3DBLOCKDESC_STENCIL = 1 << 1, /* Block with a stencil
- channel */
- SVGA3DBLOCKDESC_RED = 1 << 2, /* Block with blue channel
- data */
- SVGA3DBLOCKDESC_W = 1 << 2, /* Block with bump W channel
- data */
- SVGA3DBLOCKDESC_LUMINANCE = 1 << 2, /* Block with luminance channel
- data */
- SVGA3DBLOCKDESC_Y = 1 << 2, /* Block with video luminance
- data */
- SVGA3DBLOCKDESC_DEPTH = 1 << 2, /* Block with depth channel */
- SVGA3DBLOCKDESC_ALPHA = 1 << 3, /* Block with an alpha
- channel */
- SVGA3DBLOCKDESC_Q = 1 << 3, /* Block with bump Q channel
- data */
- SVGA3DBLOCKDESC_BUFFER = 1 << 4, /* Block stores 1 byte of
- data */
- SVGA3DBLOCKDESC_COMPRESSED = 1 << 5, /* Block stores n bytes of
- data depending on the
- compression method used */
- SVGA3DBLOCKDESC_IEEE_FP = 1 << 6, /* Block stores data in an IEEE
- floating point
- representation in
- all channels */
- SVGA3DBLOCKDESC_PLANAR_YUV = 1 << 8, /* Three separate blocks store
- data. */
- SVGA3DBLOCKDESC_U_VIDEO = 1 << 9, /* Block with U video data */
- SVGA3DBLOCKDESC_V_VIDEO = 1 << 10, /* Block with V video data */
- SVGA3DBLOCKDESC_EXP = 1 << 11, /* Shared exponent */
- SVGA3DBLOCKDESC_SRGB = 1 << 12, /* Data is in sRGB format */
- SVGA3DBLOCKDESC_2PLANAR_YUV = 1 << 13, /* 2 planes of Y, UV,
- e.g., NV12. */
- SVGA3DBLOCKDESC_3PLANAR_YUV = 1 << 14, /* 3 planes of separate
- Y, U, V, e.g., YV12. */
-
- SVGA3DBLOCKDESC_RG = SVGA3DBLOCKDESC_RED |
- SVGA3DBLOCKDESC_GREEN,
- SVGA3DBLOCKDESC_RGB = SVGA3DBLOCKDESC_RG |
- SVGA3DBLOCKDESC_BLUE,
- SVGA3DBLOCKDESC_RGB_SRGB = SVGA3DBLOCKDESC_RGB |
- SVGA3DBLOCKDESC_SRGB,
- SVGA3DBLOCKDESC_RGBA = SVGA3DBLOCKDESC_RGB |
- SVGA3DBLOCKDESC_ALPHA,
- SVGA3DBLOCKDESC_RGBA_SRGB = SVGA3DBLOCKDESC_RGBA |
- SVGA3DBLOCKDESC_SRGB,
- SVGA3DBLOCKDESC_UV = SVGA3DBLOCKDESC_U |
- SVGA3DBLOCKDESC_V,
- SVGA3DBLOCKDESC_UVL = SVGA3DBLOCKDESC_UV |
- SVGA3DBLOCKDESC_LUMINANCE,
- SVGA3DBLOCKDESC_UVW = SVGA3DBLOCKDESC_UV |
- SVGA3DBLOCKDESC_W,
- SVGA3DBLOCKDESC_UVWA = SVGA3DBLOCKDESC_UVW |
- SVGA3DBLOCKDESC_ALPHA,
- SVGA3DBLOCKDESC_UVWQ = SVGA3DBLOCKDESC_U |
- SVGA3DBLOCKDESC_V |
- SVGA3DBLOCKDESC_W |
- SVGA3DBLOCKDESC_Q,
- SVGA3DBLOCKDESC_LA = SVGA3DBLOCKDESC_LUMINANCE |
- SVGA3DBLOCKDESC_ALPHA,
- SVGA3DBLOCKDESC_R_FP = SVGA3DBLOCKDESC_RED |
- SVGA3DBLOCKDESC_IEEE_FP,
- SVGA3DBLOCKDESC_RG_FP = SVGA3DBLOCKDESC_R_FP |
- SVGA3DBLOCKDESC_GREEN,
- SVGA3DBLOCKDESC_RGB_FP = SVGA3DBLOCKDESC_RG_FP |
- SVGA3DBLOCKDESC_BLUE,
- SVGA3DBLOCKDESC_RGBA_FP = SVGA3DBLOCKDESC_RGB_FP |
- SVGA3DBLOCKDESC_ALPHA,
- SVGA3DBLOCKDESC_DS = SVGA3DBLOCKDESC_DEPTH |
- SVGA3DBLOCKDESC_STENCIL,
- SVGA3DBLOCKDESC_YUV = SVGA3DBLOCKDESC_UV_VIDEO |
- SVGA3DBLOCKDESC_Y,
- SVGA3DBLOCKDESC_AYUV = SVGA3DBLOCKDESC_ALPHA |
- SVGA3DBLOCKDESC_Y |
- SVGA3DBLOCKDESC_U_VIDEO |
- SVGA3DBLOCKDESC_V_VIDEO,
- SVGA3DBLOCKDESC_RGBE = SVGA3DBLOCKDESC_RGB |
- SVGA3DBLOCKDESC_EXP,
- SVGA3DBLOCKDESC_COMPRESSED_SRGB = SVGA3DBLOCKDESC_COMPRESSED |
- SVGA3DBLOCKDESC_SRGB,
- SVGA3DBLOCKDESC_NV12 = SVGA3DBLOCKDESC_PLANAR_YUV |
- SVGA3DBLOCKDESC_2PLANAR_YUV,
- SVGA3DBLOCKDESC_YV12 = SVGA3DBLOCKDESC_PLANAR_YUV |
- SVGA3DBLOCKDESC_3PLANAR_YUV,
-};
-/*
- * SVGA3dSurfaceDesc describes the actual pixel data.
- *
- * This structure provides the following information:
- * 1. Block description.
- * 2. Dimensions of a block in the surface.
- * 3. Size of block in bytes.
- * 4. Bit depth of the pixel data.
- * 5. Channel bit depths and masks (if applicable).
- */
-#define SVGA3D_CHANNEL_DEF(type) \
- struct { \
- union { \
- type blue; \
- type u; \
- type uv_video; \
- type u_video; \
- }; \
- union { \
- type green; \
- type v; \
- type stencil; \
- type v_video; \
- }; \
- union { \
- type red; \
- type w; \
- type luminance; \
- type y; \
- type depth; \
- type data; \
- }; \
- union { \
- type alpha; \
- type q; \
- type exp; \
- }; \
- }
-
-struct svga3d_surface_desc {
- enum svga3d_block_desc block_desc;
- SVGA3dSize block_size;
- uint32 bytes_per_block;
- uint32 pitch_bytes_per_block;
-
- struct {
- uint32 total;
- SVGA3D_CHANNEL_DEF(uint8);
- } bit_depth;
-
- struct {
- SVGA3D_CHANNEL_DEF(uint8);
- } bit_offset;
+ SVGA3DBLOCKDESC_NONE = 0, /* No channels are active */
+ SVGA3DBLOCKDESC_BLUE = 1 << 0, /* Block with red channel data */
+ SVGA3DBLOCKDESC_U = 1 << 0, /* Block with bump U channel data */
+ SVGA3DBLOCKDESC_GREEN = 1 << 1, /* Block with green channel data */
+ SVGA3DBLOCKDESC_V = 1 << 1, /* Block with bump V channel data */
+ SVGA3DBLOCKDESC_RED = 1 << 2, /* Block with blue channel data */
+ SVGA3DBLOCKDESC_W = 1 << 2, /* Block with bump W channel data */
+ SVGA3DBLOCKDESC_LUMINANCE = 1 << 2, /* Block with luminance channel data */
+ SVGA3DBLOCKDESC_Y = 1 << 2, /* Block with video luminance data */
+ SVGA3DBLOCKDESC_ALPHA = 1 << 3, /* Block with an alpha channel */
+ SVGA3DBLOCKDESC_Q = 1 << 3, /* Block with bump Q channel data */
+ SVGA3DBLOCKDESC_BUFFER = 1 << 4, /* Block stores 1 byte of data */
+ SVGA3DBLOCKDESC_COMPRESSED = 1 << 5, /* Block stores n bytes of data depending
+ on the compression method used */
+ SVGA3DBLOCKDESC_IEEE_FP = 1 << 6, /* Block stores data in an IEEE floating point
+ representation in all channels */
+ SVGA3DBLOCKDESC_UV_VIDEO = 1 << 7, /* Block with alternating video U and V */
+ SVGA3DBLOCKDESC_PLANAR_YUV = 1 << 8, /* Three separate blocks store data. */
+ SVGA3DBLOCKDESC_U_VIDEO = 1 << 9, /* Block with U video data */
+ SVGA3DBLOCKDESC_V_VIDEO = 1 << 10, /* Block with V video data */
+ SVGA3DBLOCKDESC_EXP = 1 << 11, /* Shared exponent */
+ SVGA3DBLOCKDESC_SRGB = 1 << 12, /* Data is in sRGB format */
+ SVGA3DBLOCKDESC_2PLANAR_YUV = 1 << 13, /* 2 planes of Y, UV, e.g., NV12. */
+ SVGA3DBLOCKDESC_3PLANAR_YUV = 1 << 14, /* 3 planes of separate Y, U, V, e.g., YV12. */
+ SVGA3DBLOCKDESC_DEPTH = 1 << 15, /* Block with depth channel */
+ SVGA3DBLOCKDESC_STENCIL = 1 << 16, /* Block with a stencil channel */
+
+ SVGA3DBLOCKDESC_RG = SVGA3DBLOCKDESC_RED |
+ SVGA3DBLOCKDESC_GREEN,
+ SVGA3DBLOCKDESC_RGB = SVGA3DBLOCKDESC_RG |
+ SVGA3DBLOCKDESC_BLUE,
+ SVGA3DBLOCKDESC_RGB_SRGB = SVGA3DBLOCKDESC_RGB |
+ SVGA3DBLOCKDESC_SRGB,
+ SVGA3DBLOCKDESC_RGBA = SVGA3DBLOCKDESC_RGB |
+ SVGA3DBLOCKDESC_ALPHA,
+ SVGA3DBLOCKDESC_RGBA_SRGB = SVGA3DBLOCKDESC_RGBA |
+ SVGA3DBLOCKDESC_SRGB,
+ SVGA3DBLOCKDESC_UV = SVGA3DBLOCKDESC_U |
+ SVGA3DBLOCKDESC_V,
+ SVGA3DBLOCKDESC_UVL = SVGA3DBLOCKDESC_UV |
+ SVGA3DBLOCKDESC_LUMINANCE,
+ SVGA3DBLOCKDESC_UVW = SVGA3DBLOCKDESC_UV |
+ SVGA3DBLOCKDESC_W,
+ SVGA3DBLOCKDESC_UVWA = SVGA3DBLOCKDESC_UVW |
+ SVGA3DBLOCKDESC_ALPHA,
+ SVGA3DBLOCKDESC_UVWQ = SVGA3DBLOCKDESC_U |
+ SVGA3DBLOCKDESC_V |
+ SVGA3DBLOCKDESC_W |
+ SVGA3DBLOCKDESC_Q,
+ SVGA3DBLOCKDESC_LA = SVGA3DBLOCKDESC_LUMINANCE |
+ SVGA3DBLOCKDESC_ALPHA,
+ SVGA3DBLOCKDESC_R_FP = SVGA3DBLOCKDESC_RED |
+ SVGA3DBLOCKDESC_IEEE_FP,
+ SVGA3DBLOCKDESC_RG_FP = SVGA3DBLOCKDESC_R_FP |
+ SVGA3DBLOCKDESC_GREEN,
+ SVGA3DBLOCKDESC_RGB_FP = SVGA3DBLOCKDESC_RG_FP |
+ SVGA3DBLOCKDESC_BLUE,
+ SVGA3DBLOCKDESC_RGBA_FP = SVGA3DBLOCKDESC_RGB_FP |
+ SVGA3DBLOCKDESC_ALPHA,
+ SVGA3DBLOCKDESC_DS = SVGA3DBLOCKDESC_DEPTH |
+ SVGA3DBLOCKDESC_STENCIL,
+ SVGA3DBLOCKDESC_YUV = SVGA3DBLOCKDESC_UV_VIDEO |
+ SVGA3DBLOCKDESC_Y,
+ SVGA3DBLOCKDESC_AYUV = SVGA3DBLOCKDESC_ALPHA |
+ SVGA3DBLOCKDESC_Y |
+ SVGA3DBLOCKDESC_U_VIDEO |
+ SVGA3DBLOCKDESC_V_VIDEO,
+ SVGA3DBLOCKDESC_RGBE = SVGA3DBLOCKDESC_RGB |
+ SVGA3DBLOCKDESC_EXP,
+ SVGA3DBLOCKDESC_COMPRESSED_SRGB = SVGA3DBLOCKDESC_COMPRESSED |
+ SVGA3DBLOCKDESC_SRGB,
+ SVGA3DBLOCKDESC_NV12 = SVGA3DBLOCKDESC_PLANAR_YUV |
+ SVGA3DBLOCKDESC_2PLANAR_YUV,
+ SVGA3DBLOCKDESC_YV12 = SVGA3DBLOCKDESC_PLANAR_YUV |
+ SVGA3DBLOCKDESC_3PLANAR_YUV,
};
-static const struct svga3d_surface_desc svga3d_surface_descs[] = {
- {SVGA3DBLOCKDESC_NONE,
- {1, 1, 1}, 0, 0, {0, {{0}, {0}, {0}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_FORMAT_INVALID */
-
- {SVGA3DBLOCKDESC_RGB,
- {1, 1, 1}, 4, 4, {24, {{8}, {8}, {8}, {0} } },
- {{{0}, {8}, {16}, {24} } } }, /* SVGA3D_X8R8G8B8 */
-
- {SVGA3DBLOCKDESC_RGBA,
- {1, 1, 1}, 4, 4, {32, {{8}, {8}, {8}, {8} } },
- {{{0}, {8}, {16}, {24} } } }, /* SVGA3D_A8R8G8B8 */
-
- {SVGA3DBLOCKDESC_RGB,
- {1, 1, 1}, 2, 2, {16, {{5}, {6}, {5}, {0} } },
- {{{0}, {5}, {11}, {0} } } }, /* SVGA3D_R5G6B5 */
-
- {SVGA3DBLOCKDESC_RGB,
- {1, 1, 1}, 2, 2, {15, {{5}, {5}, {5}, {0} } },
- {{{0}, {5}, {10}, {0} } } }, /* SVGA3D_X1R5G5B5 */
-
- {SVGA3DBLOCKDESC_RGBA,
- {1, 1, 1}, 2, 2, {16, {{5}, {5}, {5}, {1} } },
- {{{0}, {5}, {10}, {15} } } }, /* SVGA3D_A1R5G5B5 */
-
- {SVGA3DBLOCKDESC_RGBA,
- {1, 1, 1}, 2, 2, {16, {{4}, {4}, {4}, {4} } },
- {{{0}, {4}, {8}, {12} } } }, /* SVGA3D_A4R4G4B4 */
-
- {SVGA3DBLOCKDESC_DEPTH,
- {1, 1, 1}, 4, 4, {32, {{0}, {0}, {32}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_Z_D32 */
-
- {SVGA3DBLOCKDESC_DEPTH,
- {1, 1, 1}, 2, 2, {16, {{0}, {0}, {16}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_Z_D16 */
-
- {SVGA3DBLOCKDESC_DS,
- {1, 1, 1}, 4, 4, {32, {{0}, {8}, {24}, {0} } },
- {{{0}, {24}, {0}, {0} } } }, /* SVGA3D_Z_D24S8 */
-
- {SVGA3DBLOCKDESC_DS,
- {1, 1, 1}, 2, 2, {16, {{0}, {1}, {15}, {0} } },
- {{{0}, {15}, {0}, {0} } } }, /* SVGA3D_Z_D15S1 */
-
- {SVGA3DBLOCKDESC_LUMINANCE,
- {1, 1, 1}, 1, 1, {8, {{0}, {0}, {8}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_LUMINANCE8 */
-
- {SVGA3DBLOCKDESC_LA,
- {1, 1, 1}, 1, 1, {8, {{0}, {0}, {4}, {4} } },
- {{{0}, {0}, {0}, {4} } } }, /* SVGA3D_LUMINANCE4_ALPHA4 */
-
- {SVGA3DBLOCKDESC_LUMINANCE,
- {1, 1, 1}, 2, 2, {16, {{0}, {0}, {16}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_LUMINANCE16 */
-
- {SVGA3DBLOCKDESC_LA,
- {1, 1, 1}, 2, 2, {16, {{0}, {0}, {8}, {8} } },
- {{{0}, {0}, {0}, {8} } } }, /* SVGA3D_LUMINANCE8_ALPHA8 */
-
- {SVGA3DBLOCKDESC_COMPRESSED,
- {4, 4, 1}, 8, 8, {64, {{0}, {0}, {64}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_DXT1 */
-
- {SVGA3DBLOCKDESC_COMPRESSED,
- {4, 4, 1}, 16, 16, {128, {{0}, {0}, {128}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_DXT2 */
-
- {SVGA3DBLOCKDESC_COMPRESSED,
- {4, 4, 1}, 16, 16, {128, {{0}, {0}, {128}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_DXT3 */
-
- {SVGA3DBLOCKDESC_COMPRESSED,
- {4, 4, 1}, 16, 16, {128, {{0}, {0}, {128}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_DXT4 */
-
- {SVGA3DBLOCKDESC_COMPRESSED,
- {4, 4, 1}, 16, 16, {128, {{0}, {0}, {128}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_DXT5 */
-
- {SVGA3DBLOCKDESC_UV,
- {1, 1, 1}, 2, 2, {16, {{0}, {0}, {8}, {8} } },
- {{{0}, {0}, {0}, {8} } } }, /* SVGA3D_BUMPU8V8 */
-
- {SVGA3DBLOCKDESC_UVL,
- {1, 1, 1}, 2, 2, {16, {{5}, {5}, {6}, {0} } },
- {{{11}, {6}, {0}, {0} } } }, /* SVGA3D_BUMPL6V5U5 */
-
- {SVGA3DBLOCKDESC_UVL,
- {1, 1, 1}, 4, 4, {32, {{8}, {8}, {8}, {0} } },
- {{{16}, {8}, {0}, {0} } } }, /* SVGA3D_BUMPX8L8V8U8 */
-
- {SVGA3DBLOCKDESC_UVL,
- {1, 1, 1}, 3, 3, {24, {{8}, {8}, {8}, {0} } },
- {{{16}, {8}, {0}, {0} } } }, /* SVGA3D_BUMPL8V8U8 */
-
- {SVGA3DBLOCKDESC_RGBA_FP,
- {1, 1, 1}, 8, 8, {64, {{16}, {16}, {16}, {16} } },
- {{{32}, {16}, {0}, {48} } } }, /* SVGA3D_ARGB_S10E5 */
-
- {SVGA3DBLOCKDESC_RGBA_FP,
- {1, 1, 1}, 16, 16, {128, {{32}, {32}, {32}, {32} } },
- {{{64}, {32}, {0}, {96} } } }, /* SVGA3D_ARGB_S23E8 */
-
- {SVGA3DBLOCKDESC_RGBA,
- {1, 1, 1}, 4, 4, {32, {{10}, {10}, {10}, {2} } },
- {{{0}, {10}, {20}, {30} } } }, /* SVGA3D_A2R10G10B10 */
-
- {SVGA3DBLOCKDESC_UV,
- {1, 1, 1}, 2, 2, {16, {{8}, {8}, {0}, {0} } },
- {{{8}, {0}, {0}, {0} } } }, /* SVGA3D_V8U8 */
-
- {SVGA3DBLOCKDESC_UVWQ,
- {1, 1, 1}, 4, 4, {32, {{8}, {8}, {8}, {8} } },
- {{{24}, {16}, {8}, {0} } } }, /* SVGA3D_Q8W8V8U8 */
-
- {SVGA3DBLOCKDESC_UV,
- {1, 1, 1}, 2, 2, {16, {{8}, {8}, {0}, {0} } },
- {{{8}, {0}, {0}, {0} } } }, /* SVGA3D_CxV8U8 */
-
- {SVGA3DBLOCKDESC_UVL,
- {1, 1, 1}, 4, 4, {24, {{8}, {8}, {8}, {0} } },
- {{{16}, {8}, {0}, {0} } } }, /* SVGA3D_X8L8V8U8 */
-
- {SVGA3DBLOCKDESC_UVWA,
- {1, 1, 1}, 4, 4, {32, {{10}, {10}, {10}, {2} } },
- {{{0}, {10}, {20}, {30} } } }, /* SVGA3D_A2W10V10U10 */
-
- {SVGA3DBLOCKDESC_ALPHA,
- {1, 1, 1}, 1, 1, {8, {{0}, {0}, {0}, {8} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_ALPHA8 */
-
- {SVGA3DBLOCKDESC_R_FP,
- {1, 1, 1}, 2, 2, {16, {{0}, {0}, {16}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R_S10E5 */
-
- {SVGA3DBLOCKDESC_R_FP,
- {1, 1, 1}, 4, 4, {32, {{0}, {0}, {32}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R_S23E8 */
-
- {SVGA3DBLOCKDESC_RG_FP,
- {1, 1, 1}, 4, 4, {32, {{0}, {16}, {16}, {0} } },
- {{{0}, {16}, {0}, {0} } } }, /* SVGA3D_RG_S10E5 */
-
- {SVGA3DBLOCKDESC_RG_FP,
- {1, 1, 1}, 8, 8, {64, {{0}, {32}, {32}, {0} } },
- {{{0}, {32}, {0}, {0} } } }, /* SVGA3D_RG_S23E8 */
-
- {SVGA3DBLOCKDESC_BUFFER,
- {1, 1, 1}, 1, 1, {8, {{0}, {0}, {8}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_BUFFER */
-
- {SVGA3DBLOCKDESC_DEPTH,
- {1, 1, 1}, 4, 4, {32, {{0}, {0}, {24}, {0} } },
- {{{0}, {24}, {0}, {0} } } }, /* SVGA3D_Z_D24X8 */
-
- {SVGA3DBLOCKDESC_UV,
- {1, 1, 1}, 4, 4, {32, {{16}, {16}, {0}, {0} } },
- {{{16}, {0}, {0}, {0} } } }, /* SVGA3D_V16U16 */
-
- {SVGA3DBLOCKDESC_RG,
- {1, 1, 1}, 4, 4, {32, {{0}, {16}, {16}, {0} } },
- {{{0}, {0}, {16}, {0} } } }, /* SVGA3D_G16R16 */
-
- {SVGA3DBLOCKDESC_RGBA,
- {1, 1, 1}, 8, 8, {64, {{16}, {16}, {16}, {16} } },
- {{{32}, {16}, {0}, {48} } } }, /* SVGA3D_A16B16G16R16 */
-
- {SVGA3DBLOCKDESC_YUV,
- {1, 1, 1}, 2, 2, {16, {{8}, {0}, {8}, {0} } },
- {{{0}, {0}, {8}, {0} } } }, /* SVGA3D_UYVY */
-
- {SVGA3DBLOCKDESC_YUV,
- {1, 1, 1}, 2, 2, {16, {{8}, {0}, {8}, {0} } },
- {{{8}, {0}, {0}, {0} } } }, /* SVGA3D_YUY2 */
-
- {SVGA3DBLOCKDESC_NV12,
- {2, 2, 1}, 6, 2, {48, {{0}, {0}, {48}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_NV12 */
-
- {SVGA3DBLOCKDESC_AYUV,
- {1, 1, 1}, 4, 4, {32, {{8}, {8}, {8}, {8} } },
- {{{0}, {8}, {16}, {24} } } }, /* SVGA3D_AYUV */
-
- {SVGA3DBLOCKDESC_RGBA,
- {1, 1, 1}, 16, 16, {128, {{32}, {32}, {32}, {32} } },
- {{{64}, {32}, {0}, {96} } } }, /* SVGA3D_R32G32B32A32_TYPELESS */
-
- {SVGA3DBLOCKDESC_RGBA,
- {1, 1, 1}, 16, 16, {128, {{32}, {32}, {32}, {32} } },
- {{{64}, {32}, {0}, {96} } } }, /* SVGA3D_R32G32B32A32_UINT */
-
- {SVGA3DBLOCKDESC_UVWQ,
- {1, 1, 1}, 16, 16, {128, {{32}, {32}, {32}, {32} } },
- {{{64}, {32}, {0}, {96} } } }, /* SVGA3D_R32G32B32A32_SINT */
-
- {SVGA3DBLOCKDESC_RGB,
- {1, 1, 1}, 12, 12, {96, {{32}, {32}, {32}, {0} } },
- {{{64}, {32}, {0}, {0} } } }, /* SVGA3D_R32G32B32_TYPELESS */
-
- {SVGA3DBLOCKDESC_RGB_FP,
- {1, 1, 1}, 12, 12, {96, {{32}, {32}, {32}, {0} } },
- {{{64}, {32}, {0}, {0} } } }, /* SVGA3D_R32G32B32_FLOAT */
-
- {SVGA3DBLOCKDESC_RGB,
- {1, 1, 1}, 12, 12, {96, {{32}, {32}, {32}, {0} } },
- {{{64}, {32}, {0}, {0} } } }, /* SVGA3D_R32G32B32_UINT */
-
- {SVGA3DBLOCKDESC_UVW,
- {1, 1, 1}, 12, 12, {96, {{32}, {32}, {32}, {0} } },
- {{{64}, {32}, {0}, {0} } } }, /* SVGA3D_R32G32B32_SINT */
-
- {SVGA3DBLOCKDESC_RGBA,
- {1, 1, 1}, 8, 8, {64, {{16}, {16}, {16}, {16} } },
- {{{32}, {16}, {0}, {48} } } }, /* SVGA3D_R16G16B16A16_TYPELESS */
-
- {SVGA3DBLOCKDESC_RGBA,
- {1, 1, 1}, 8, 8, {64, {{16}, {16}, {16}, {16} } },
- {{{32}, {16}, {0}, {48} } } }, /* SVGA3D_R16G16B16A16_UINT */
-
- {SVGA3DBLOCKDESC_UVWQ,
- {1, 1, 1}, 8, 8, {64, {{16}, {16}, {16}, {16} } },
- {{{32}, {16}, {0}, {48} } } }, /* SVGA3D_R16G16B16A16_SNORM */
-
- {SVGA3DBLOCKDESC_UVWQ,
- {1, 1, 1}, 8, 8, {64, {{16}, {16}, {16}, {16} } },
- {{{32}, {16}, {0}, {48} } } }, /* SVGA3D_R16G16B16A16_SINT */
-
- {SVGA3DBLOCKDESC_RG,
- {1, 1, 1}, 8, 8, {64, {{0}, {32}, {32}, {0} } },
- {{{0}, {32}, {0}, {0} } } }, /* SVGA3D_R32G32_TYPELESS */
-
- {SVGA3DBLOCKDESC_RG,
- {1, 1, 1}, 8, 8, {64, {{0}, {32}, {32}, {0} } },
- {{{0}, {32}, {0}, {0} } } }, /* SVGA3D_R32G32_UINT */
- {SVGA3DBLOCKDESC_UV,
- {1, 1, 1}, 8, 8, {64, {{0}, {32}, {32}, {0} } },
- {{{0}, {32}, {0}, {0} } } }, /* SVGA3D_R32G32_SINT */
+typedef struct SVGA3dChannelDef {
+ union {
+ uint8 blue;
+ uint8 u;
+ uint8 uv_video;
+ uint8 u_video;
+ };
+ union {
+ uint8 green;
+ uint8 v;
+ uint8 stencil;
+ uint8 v_video;
+ };
+ union {
+ uint8 red;
+ uint8 w;
+ uint8 luminance;
+ uint8 y;
+ uint8 depth;
+ uint8 data;
+ };
+ union {
+ uint8 alpha;
+ uint8 q;
+ uint8 exp;
+ };
+} SVGA3dChannelDef;
- {SVGA3DBLOCKDESC_RG,
- {1, 1, 1}, 8, 8, {64, {{0}, {8}, {32}, {0} } },
- {{{0}, {32}, {0}, {0} } } }, /* SVGA3D_R32G8X24_TYPELESS */
-
- {SVGA3DBLOCKDESC_DS,
- {1, 1, 1}, 8, 8, {64, {{0}, {8}, {32}, {0} } },
- {{{0}, {32}, {0}, {0} } } }, /* SVGA3D_D32_FLOAT_S8X24_UINT */
-
- {SVGA3DBLOCKDESC_R_FP,
- {1, 1, 1}, 8, 8, {64, {{0}, {0}, {32}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R32_FLOAT_X8_X24_TYPELESS */
-
- {SVGA3DBLOCKDESC_GREEN,
- {1, 1, 1}, 8, 8, {64, {{0}, {8}, {0}, {0} } },
- {{{0}, {32}, {0}, {0} } } }, /* SVGA3D_X32_TYPELESS_G8X24_UINT */
-
- {SVGA3DBLOCKDESC_RGBA,
- {1, 1, 1}, 4, 4, {32, {{10}, {10}, {10}, {2} } },
- {{{0}, {10}, {20}, {30} } } }, /* SVGA3D_R10G10B10A2_TYPELESS */
-
- {SVGA3DBLOCKDESC_RGBA,
- {1, 1, 1}, 4, 4, {32, {{10}, {10}, {10}, {2} } },
- {{{0}, {10}, {20}, {30} } } }, /* SVGA3D_R10G10B10A2_UINT */
-
- {SVGA3DBLOCKDESC_RGB_FP,
- {1, 1, 1}, 4, 4, {32, {{10}, {11}, {11}, {0} } },
- {{{0}, {10}, {21}, {0} } } }, /* SVGA3D_R11G11B10_FLOAT */
-
- {SVGA3DBLOCKDESC_RGBA,
- {1, 1, 1}, 4, 4, {32, {{8}, {8}, {8}, {8} } },
- {{{16}, {8}, {0}, {24} } } }, /* SVGA3D_R8G8B8A8_TYPELESS */
-
- {SVGA3DBLOCKDESC_RGBA,
- {1, 1, 1}, 4, 4, {32, {{8}, {8}, {8}, {8} } },
- {{{16}, {8}, {0}, {24} } } }, /* SVGA3D_R8G8B8A8_UNORM */
-
- {SVGA3DBLOCKDESC_RGBA_SRGB,
- {1, 1, 1}, 4, 4, {32, {{8}, {8}, {8}, {8} } },
- {{{16}, {8}, {0}, {24} } } }, /* SVGA3D_R8G8B8A8_UNORM_SRGB */
-
- {SVGA3DBLOCKDESC_RGBA,
- {1, 1, 1}, 4, 4, {32, {{8}, {8}, {8}, {8} } },
- {{{16}, {8}, {0}, {24} } } }, /* SVGA3D_R8G8B8A8_UINT */
-
- {SVGA3DBLOCKDESC_RGBA,
- {1, 1, 1}, 4, 4, {32, {{8}, {8}, {8}, {8} } },
- {{{16}, {8}, {0}, {24} } } }, /* SVGA3D_R8G8B8A8_SINT */
-
- {SVGA3DBLOCKDESC_RG,
- {1, 1, 1}, 4, 4, {32, {{0}, {16}, {16}, {0} } },
- {{{0}, {16}, {0}, {0} } } }, /* SVGA3D_R16G16_TYPELESS */
-
- {SVGA3DBLOCKDESC_RG_FP,
- {1, 1, 1}, 4, 4, {32, {{0}, {16}, {16}, {0} } },
- {{{0}, {16}, {0}, {0} } } }, /* SVGA3D_R16G16_UINT */
-
- {SVGA3DBLOCKDESC_UV,
- {1, 1, 1}, 4, 4, {32, {{0}, {16}, {16}, {0} } },
- {{{0}, {16}, {0}, {0} } } }, /* SVGA3D_R16G16_SINT */
-
- {SVGA3DBLOCKDESC_RED,
- {1, 1, 1}, 4, 4, {32, {{0}, {0}, {32}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R32_TYPELESS */
-
- {SVGA3DBLOCKDESC_DEPTH,
- {1, 1, 1}, 4, 4, {32, {{0}, {0}, {32}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_D32_FLOAT */
-
- {SVGA3DBLOCKDESC_RED,
- {1, 1, 1}, 4, 4, {32, {{0}, {0}, {32}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R32_UINT */
-
- {SVGA3DBLOCKDESC_RED,
- {1, 1, 1}, 4, 4, {32, {{0}, {0}, {32}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R32_SINT */
-
- {SVGA3DBLOCKDESC_RG,
- {1, 1, 1}, 4, 4, {32, {{0}, {8}, {24}, {0} } },
- {{{0}, {24}, {0}, {0} } } }, /* SVGA3D_R24G8_TYPELESS */
-
- {SVGA3DBLOCKDESC_DS,
- {1, 1, 1}, 4, 4, {32, {{0}, {8}, {24}, {0} } },
- {{{0}, {24}, {0}, {0} } } }, /* SVGA3D_D24_UNORM_S8_UINT */
-
- {SVGA3DBLOCKDESC_RED,
- {1, 1, 1}, 4, 4, {32, {{0}, {0}, {24}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R24_UNORM_X8_TYPELESS */
-
- {SVGA3DBLOCKDESC_GREEN,
- {1, 1, 1}, 4, 4, {32, {{0}, {8}, {0}, {0} } },
- {{{0}, {24}, {0}, {0} } } }, /* SVGA3D_X24_TYPELESS_G8_UINT */
-
- {SVGA3DBLOCKDESC_RG,
- {1, 1, 1}, 2, 2, {16, {{0}, {8}, {8}, {0} } },
- {{{0}, {8}, {0}, {0} } } }, /* SVGA3D_R8G8_TYPELESS */
-
- {SVGA3DBLOCKDESC_RG,
- {1, 1, 1}, 2, 2, {16, {{0}, {8}, {8}, {0} } },
- {{{0}, {8}, {0}, {0} } } }, /* SVGA3D_R8G8_UNORM */
-
- {SVGA3DBLOCKDESC_RG,
- {1, 1, 1}, 2, 2, {16, {{0}, {8}, {8}, {0} } },
- {{{0}, {8}, {0}, {0} } } }, /* SVGA3D_R8G8_UINT */
-
- {SVGA3DBLOCKDESC_UV,
- {1, 1, 1}, 2, 2, {16, {{0}, {8}, {8}, {0} } },
- {{{0}, {8}, {0}, {0} } } }, /* SVGA3D_R8G8_SINT */
-
- {SVGA3DBLOCKDESC_RED,
- {1, 1, 1}, 2, 2, {16, {{0}, {0}, {16}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R16_TYPELESS */
-
- {SVGA3DBLOCKDESC_RED,
- {1, 1, 1}, 2, 2, {16, {{0}, {0}, {16}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R16_UNORM */
-
- {SVGA3DBLOCKDESC_RED,
- {1, 1, 1}, 2, 2, {16, {{0}, {0}, {16}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R16_UINT */
-
- {SVGA3DBLOCKDESC_U,
- {1, 1, 1}, 2, 2, {16, {{0}, {0}, {16}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R16_SNORM */
-
- {SVGA3DBLOCKDESC_U,
- {1, 1, 1}, 2, 2, {16, {{0}, {0}, {16}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R16_SINT */
-
- {SVGA3DBLOCKDESC_RED,
- {1, 1, 1}, 1, 1, {8, {{0}, {0}, {8}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R8_TYPELESS */
-
- {SVGA3DBLOCKDESC_RED,
- {1, 1, 1}, 1, 1, {8, {{0}, {0}, {8}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R8_UNORM */
-
- {SVGA3DBLOCKDESC_RED,
- {1, 1, 1}, 1, 1, {8, {{0}, {0}, {8}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R8_UINT */
-
- {SVGA3DBLOCKDESC_U,
- {1, 1, 1}, 1, 1, {8, {{0}, {0}, {8}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R8_SNORM */
-
- {SVGA3DBLOCKDESC_U,
- {1, 1, 1}, 1, 1, {8, {{0}, {0}, {8}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R8_SINT */
-
- {SVGA3DBLOCKDESC_RED,
- {8, 1, 1}, 1, 1, {8, {{0}, {0}, {8}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_R1_UNORM */
-
- {SVGA3DBLOCKDESC_RGBE,
- {1, 1, 1}, 4, 4, {32, {{9}, {9}, {9}, {5} } },
- {{{18}, {9}, {0}, {27} } } }, /* SVGA3D_R9G9B9E5_SHAREDEXP */
-
- {SVGA3DBLOCKDESC_RG,
- {1, 1, 1}, 2, 2, {16, {{0}, {8}, {8}, {0} } },
- {{{0}, {8}, {0}, {0} } } }, /* SVGA3D_R8G8_B8G8_UNORM */
-
- {SVGA3DBLOCKDESC_RG,
- {1, 1, 1}, 2, 2, {16, {{0}, {8}, {8}, {0} } },
- {{{0}, {8}, {0}, {0} } } }, /* SVGA3D_G8R8_G8B8_UNORM */
-
- {SVGA3DBLOCKDESC_COMPRESSED,
- {4, 4, 1}, 8, 8, {64, {{0}, {0}, {64}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_BC1_TYPELESS */
-
- {SVGA3DBLOCKDESC_COMPRESSED_SRGB,
- {4, 4, 1}, 8, 8, {64, {{0}, {0}, {64}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_BC1_UNORM_SRGB */
-
- {SVGA3DBLOCKDESC_COMPRESSED,
- {4, 4, 1}, 16, 16, {128, {{0}, {0}, {128}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_BC2_TYPELESS */
-
- {SVGA3DBLOCKDESC_COMPRESSED_SRGB,
- {4, 4, 1}, 16, 16, {128, {{0}, {0}, {128}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_BC2_UNORM_SRGB */
-
- {SVGA3DBLOCKDESC_COMPRESSED,
- {4, 4, 1}, 16, 16, {128, {{0}, {0}, {128}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_BC3_TYPELESS */
-
- {SVGA3DBLOCKDESC_COMPRESSED_SRGB,
- {4, 4, 1}, 16, 16, {128, {{0}, {0}, {128}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_BC3_UNORM_SRGB */
-
- {SVGA3DBLOCKDESC_COMPRESSED,
- {4, 4, 1}, 8, 8, {64, {{0}, {0}, {64}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_BC4_TYPELESS */
-
- {SVGA3DBLOCKDESC_COMPRESSED,
- {4, 4, 1}, 8, 8, {64, {{0}, {0}, {64}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_BC4_UNORM */
-
- {SVGA3DBLOCKDESC_COMPRESSED,
- {4, 4, 1}, 8, 8, {64, {{0}, {0}, {64}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_BC4_SNORM */
-
- {SVGA3DBLOCKDESC_COMPRESSED,
- {4, 4, 1}, 16, 16, {128, {{0}, {0}, {128}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_BC5_TYPELESS */
-
- {SVGA3DBLOCKDESC_COMPRESSED,
- {4, 4, 1}, 16, 16, {128, {{0}, {0}, {128}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_BC5_UNORM */
-
- {SVGA3DBLOCKDESC_COMPRESSED,
- {4, 4, 1}, 16, 16, {128, {{0}, {0}, {128}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_BC5_SNORM */
-
- {SVGA3DBLOCKDESC_RGBA,
- {1, 1, 1}, 4, 4, {32, {{10}, {10}, {10}, {2} } },
- {{{0}, {10}, {20}, {30} } } }, /* SVGA3D_R10G10B10_XR_BIAS_A2_UNORM */
-
- {SVGA3DBLOCKDESC_RGBA,
- {1, 1, 1}, 4, 4, {32, {{8}, {8}, {8}, {8} } },
- {{{0}, {8}, {16}, {24} } } }, /* SVGA3D_B8G8R8A8_TYPELESS */
-
- {SVGA3DBLOCKDESC_RGBA_SRGB,
- {1, 1, 1}, 4, 4, {32, {{8}, {8}, {8}, {8} } },
- {{{0}, {8}, {16}, {24} } } }, /* SVGA3D_B8G8R8A8_UNORM_SRGB */
-
- {SVGA3DBLOCKDESC_RGB,
- {1, 1, 1}, 4, 4, {24, {{8}, {8}, {8}, {0} } },
- {{{0}, {8}, {16}, {24} } } }, /* SVGA3D_B8G8R8X8_TYPELESS */
-
- {SVGA3DBLOCKDESC_RGB_SRGB,
- {1, 1, 1}, 4, 4, {24, {{8}, {8}, {8}, {0} } },
- {{{0}, {8}, {16}, {24} } } }, /* SVGA3D_B8G8R8X8_UNORM_SRGB */
-
- {SVGA3DBLOCKDESC_DEPTH,
- {1, 1, 1}, 2, 2, {16, {{0}, {0}, {16}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_Z_DF16 */
+struct svga3d_surface_desc {
+ SVGA3dSurfaceFormat format;
+ enum svga3d_block_desc block_desc;
- {SVGA3DBLOCKDESC_DS,
- {1, 1, 1}, 4, 4, {32, {{0}, {8}, {24}, {0} } },
- {{{0}, {24}, {0}, {0} } } }, /* SVGA3D_Z_DF24 */
+ SVGA3dSize block_size;
+ uint32 bytes_per_block;
+ uint32 pitch_bytes_per_block;
- {SVGA3DBLOCKDESC_DS,
- {1, 1, 1}, 4, 4, {32, {{0}, {8}, {24}, {0} } },
- {{{0}, {24}, {0}, {0} } } }, /* SVGA3D_Z_D24S8_INT */
+ uint32 totalBitDepth;
+ SVGA3dChannelDef bitDepth;
+ SVGA3dChannelDef bitOffset;
+};
- {SVGA3DBLOCKDESC_YV12,
- {2, 2, 1}, 6, 2, {48, {{0}, {0}, {48}, {0} } },
- {{{0}, {0}, {0}, {0} } } }, /* SVGA3D_YV12 */
+static const struct svga3d_surface_desc svga3d_surface_descs[] = {
+ {SVGA3D_FORMAT_INVALID, SVGA3DBLOCKDESC_NONE,
+ {1, 1, 1}, 0, 0,
+ 0, {{0}, {0}, {0}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_X8R8G8B8, SVGA3DBLOCKDESC_RGB,
+ {1, 1, 1}, 4, 4,
+ 24, {{8}, {8}, {8}, {0}},
+ {{0}, {8}, {16}, {24}}},
+
+ {SVGA3D_A8R8G8B8, SVGA3DBLOCKDESC_RGBA,
+ {1, 1, 1}, 4, 4,
+ 32, {{8}, {8}, {8}, {8}},
+ {{0}, {8}, {16}, {24}}},
+
+ {SVGA3D_R5G6B5, SVGA3DBLOCKDESC_RGB,
+ {1, 1, 1}, 2, 2,
+ 16, {{5}, {6}, {5}, {0}},
+ {{0}, {5}, {11}, {0}}},
+
+ {SVGA3D_X1R5G5B5, SVGA3DBLOCKDESC_RGB,
+ {1, 1, 1}, 2, 2,
+ 15, {{5}, {5}, {5}, {0}},
+ {{0}, {5}, {10}, {0}}},
+
+ {SVGA3D_A1R5G5B5, SVGA3DBLOCKDESC_RGBA,
+ {1, 1, 1}, 2, 2,
+ 16, {{5}, {5}, {5}, {1}},
+ {{0}, {5}, {10}, {15}}},
+
+ {SVGA3D_A4R4G4B4, SVGA3DBLOCKDESC_RGBA,
+ {1, 1, 1}, 2, 2,
+ 16, {{4}, {4}, {4}, {4}},
+ {{0}, {4}, {8}, {12}}},
+
+ {SVGA3D_Z_D32, SVGA3DBLOCKDESC_DEPTH,
+ {1, 1, 1}, 4, 4,
+ 32, {{0}, {0}, {32}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_Z_D16, SVGA3DBLOCKDESC_DEPTH,
+ {1, 1, 1}, 2, 2,
+ 16, {{0}, {0}, {16}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_Z_D24S8, SVGA3DBLOCKDESC_DS,
+ {1, 1, 1}, 4, 4,
+ 32, {{0}, {8}, {24}, {0}},
+ {{0}, {24}, {0}, {0}}},
+
+ {SVGA3D_Z_D15S1, SVGA3DBLOCKDESC_DS,
+ {1, 1, 1}, 2, 2,
+ 16, {{0}, {1}, {15}, {0}},
+ {{0}, {15}, {0}, {0}}},
+
+ {SVGA3D_LUMINANCE8, SVGA3DBLOCKDESC_LUMINANCE,
+ {1, 1, 1}, 1, 1,
+ 8, {{0}, {0}, {8}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_LUMINANCE4_ALPHA4, SVGA3DBLOCKDESC_LA,
+ {1 , 1, 1}, 1, 1,
+ 8, {{0}, {0}, {4}, {4}},
+ {{0}, {0}, {0}, {4}}},
+
+ {SVGA3D_LUMINANCE16, SVGA3DBLOCKDESC_LUMINANCE,
+ {1, 1, 1}, 2, 2,
+ 16, {{0}, {0}, {16}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_LUMINANCE8_ALPHA8, SVGA3DBLOCKDESC_LA,
+ {1, 1, 1}, 2, 2,
+ 16, {{0}, {0}, {8}, {8}},
+ {{0}, {0}, {0}, {8}}},
+
+ {SVGA3D_DXT1, SVGA3DBLOCKDESC_COMPRESSED,
+ {4, 4, 1}, 8, 8,
+ 64, {{0}, {0}, {64}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_DXT2, SVGA3DBLOCKDESC_COMPRESSED,
+ {4, 4, 1}, 16, 16,
+ 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_DXT3, SVGA3DBLOCKDESC_COMPRESSED,
+ {4, 4, 1}, 16, 16,
+ 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_DXT4, SVGA3DBLOCKDESC_COMPRESSED,
+ {4, 4, 1}, 16, 16,
+ 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_DXT5, SVGA3DBLOCKDESC_COMPRESSED,
+ {4, 4, 1}, 16, 16,
+ 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_BUMPU8V8, SVGA3DBLOCKDESC_UV,
+ {1, 1, 1}, 2, 2,
+ 16, {{0}, {0}, {8}, {8}},
+ {{0}, {0}, {0}, {8}}},
+
+ {SVGA3D_BUMPL6V5U5, SVGA3DBLOCKDESC_UVL,
+ {1, 1, 1}, 2, 2,
+ 16, {{5}, {5}, {6}, {0}},
+ {{11}, {6}, {0}, {0}}},
+
+ {SVGA3D_BUMPX8L8V8U8, SVGA3DBLOCKDESC_UVL,
+ {1, 1, 1}, 4, 4,
+ 32, {{8}, {8}, {8}, {0}},
+ {{16}, {8}, {0}, {0}}},
+
+ {SVGA3D_FORMAT_DEAD1, SVGA3DBLOCKDESC_UVL,
+ {0, 0, 0}, 0, 0,
+ 0, {{0}, {0}, {0}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_ARGB_S10E5, SVGA3DBLOCKDESC_RGBA_FP,
+ {1, 1, 1}, 8, 8,
+ 64, {{16}, {16}, {16}, {16}},
+ {{32}, {16}, {0}, {48}}},
+
+ {SVGA3D_ARGB_S23E8, SVGA3DBLOCKDESC_RGBA_FP,
+ {1, 1, 1}, 16, 16,
+ 128, {{32}, {32}, {32}, {32}},
+ {{64}, {32}, {0}, {96}}},
+
+ {SVGA3D_A2R10G10B10, SVGA3DBLOCKDESC_RGBA,
+ {1, 1, 1}, 4, 4,
+ 32, {{10}, {10}, {10}, {2}},
+ {{0}, {10}, {20}, {30}}},
+
+ {SVGA3D_V8U8, SVGA3DBLOCKDESC_UV,
+ {1, 1, 1}, 2, 2,
+ 16, {{8}, {8}, {0}, {0}},
+ {{8}, {0}, {0}, {0}}},
+
+ {SVGA3D_Q8W8V8U8, SVGA3DBLOCKDESC_UVWQ,
+ {1, 1, 1}, 4, 4,
+ 32, {{8}, {8}, {8}, {8}},
+ {{24}, {16}, {8}, {0}}},
+
+ {SVGA3D_CxV8U8, SVGA3DBLOCKDESC_UV,
+ {1, 1, 1}, 2, 2,
+ 16, {{8}, {8}, {0}, {0}},
+ {{8}, {0}, {0}, {0}}},
+
+ {SVGA3D_X8L8V8U8, SVGA3DBLOCKDESC_UVL,
+ {1, 1, 1}, 4, 4,
+ 24, {{8}, {8}, {8}, {0}},
+ {{16}, {8}, {0}, {0}}},
+
+ {SVGA3D_A2W10V10U10, SVGA3DBLOCKDESC_UVWA,
+ {1, 1, 1}, 4, 4,
+ 32, {{10}, {10}, {10}, {2}},
+ {{0}, {10}, {20}, {30}}},
+
+ {SVGA3D_ALPHA8, SVGA3DBLOCKDESC_ALPHA,
+ {1, 1, 1}, 1, 1,
+ 8, {{0}, {0}, {0}, {8}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_R_S10E5, SVGA3DBLOCKDESC_R_FP,
+ {1, 1, 1}, 2, 2,
+ 16, {{0}, {0}, {16}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_R_S23E8, SVGA3DBLOCKDESC_R_FP,
+ {1, 1, 1}, 4, 4,
+ 32, {{0}, {0}, {32}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_RG_S10E5, SVGA3DBLOCKDESC_RG_FP,
+ {1, 1, 1}, 4, 4,
+ 32, {{0}, {16}, {16}, {0}},
+ {{0}, {16}, {0}, {0}}},
+
+ {SVGA3D_RG_S23E8, SVGA3DBLOCKDESC_RG_FP,
+ {1, 1, 1}, 8, 8,
+ 64, {{0}, {32}, {32}, {0}},
+ {{0}, {32}, {0}, {0}}},
+
+ {SVGA3D_BUFFER, SVGA3DBLOCKDESC_BUFFER,
+ {1, 1, 1}, 1, 1,
+ 8, {{0}, {0}, {8}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_Z_D24X8, SVGA3DBLOCKDESC_DEPTH,
+ {1, 1, 1}, 4, 4,
+ 32, {{0}, {0}, {24}, {0}},
+ {{0}, {24}, {0}, {0}}},
+
+ {SVGA3D_V16U16, SVGA3DBLOCKDESC_UV,
+ {1, 1, 1}, 4, 4,
+ 32, {{16}, {16}, {0}, {0}},
+ {{16}, {0}, {0}, {0}}},
+
+ {SVGA3D_G16R16, SVGA3DBLOCKDESC_RG,
+ {1, 1, 1}, 4, 4,
+ 32, {{0}, {16}, {16}, {0}},
+ {{0}, {0}, {16}, {0}}},
+
+ {SVGA3D_A16B16G16R16, SVGA3DBLOCKDESC_RGBA,
+ {1, 1, 1}, 8, 8,
+ 64, {{16}, {16}, {16}, {16}},
+ {{32}, {16}, {0}, {48}}},
+
+ {SVGA3D_UYVY, SVGA3DBLOCKDESC_YUV,
+ {1, 1, 1}, 2, 2,
+ 16, {{8}, {0}, {8}, {0}},
+ {{0}, {0}, {8}, {0}}},
+
+ {SVGA3D_YUY2, SVGA3DBLOCKDESC_YUV,
+ {1, 1, 1}, 2, 2,
+ 16, {{8}, {0}, {8}, {0}},
+ {{8}, {0}, {0}, {0}}},
+
+ {SVGA3D_NV12, SVGA3DBLOCKDESC_NV12,
+ {2, 2, 1}, 6, 2,
+ 48, {{0}, {0}, {48}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_AYUV, SVGA3DBLOCKDESC_AYUV,
+ {1, 1, 1}, 4, 4,
+ 32, {{8}, {8}, {8}, {8}},
+ {{0}, {8}, {16}, {24}}},
+
+ {SVGA3D_R32G32B32A32_TYPELESS, SVGA3DBLOCKDESC_RGBA,
+ {1, 1, 1}, 16, 16,
+ 128, {{32}, {32}, {32}, {32}},
+ {{64}, {32}, {0}, {96}}},
+
+ {SVGA3D_R32G32B32A32_UINT, SVGA3DBLOCKDESC_RGBA,
+ {1, 1, 1}, 16, 16,
+ 128, {{32}, {32}, {32}, {32}},
+ {{64}, {32}, {0}, {96}}},
+
+ {SVGA3D_R32G32B32A32_SINT, SVGA3DBLOCKDESC_UVWQ,
+ {1, 1, 1}, 16, 16,
+ 128, {{32}, {32}, {32}, {32}},
+ {{64}, {32}, {0}, {96}}},
+
+ {SVGA3D_R32G32B32_TYPELESS, SVGA3DBLOCKDESC_RGB,
+ {1, 1, 1}, 12, 12,
+ 96, {{32}, {32}, {32}, {0}},
+ {{64}, {32}, {0}, {0}}},
+
+ {SVGA3D_R32G32B32_FLOAT, SVGA3DBLOCKDESC_RGB_FP,
+ {1, 1, 1}, 12, 12,
+ 96, {{32}, {32}, {32}, {0}},
+ {{64}, {32}, {0}, {0}}},
+
+ {SVGA3D_R32G32B32_UINT, SVGA3DBLOCKDESC_RGB,
+ {1, 1, 1}, 12, 12,
+ 96, {{32}, {32}, {32}, {0}},
+ {{64}, {32}, {0}, {0}}},
+
+ {SVGA3D_R32G32B32_SINT, SVGA3DBLOCKDESC_UVW,
+ {1, 1, 1}, 12, 12,
+ 96, {{32}, {32}, {32}, {0}},
+ {{64}, {32}, {0}, {0}}},
+
+ {SVGA3D_R16G16B16A16_TYPELESS, SVGA3DBLOCKDESC_RGBA,
+ {1, 1, 1}, 8, 8,
+ 64, {{16}, {16}, {16}, {16}},
+ {{32}, {16}, {0}, {48}}},
+
+ {SVGA3D_R16G16B16A16_UINT, SVGA3DBLOCKDESC_RGBA,
+ {1, 1, 1}, 8, 8,
+ 64, {{16}, {16}, {16}, {16}},
+ {{32}, {16}, {0}, {48}}},
+
+ {SVGA3D_R16G16B16A16_SNORM, SVGA3DBLOCKDESC_UVWQ,
+ {1, 1, 1}, 8, 8,
+ 64, {{16}, {16}, {16}, {16}},
+ {{32}, {16}, {0}, {48}}},
+
+ {SVGA3D_R16G16B16A16_SINT, SVGA3DBLOCKDESC_UVWQ,
+ {1, 1, 1}, 8, 8,
+ 64, {{16}, {16}, {16}, {16}},
+ {{32}, {16}, {0}, {48}}},
+
+ {SVGA3D_R32G32_TYPELESS, SVGA3DBLOCKDESC_RG,
+ {1, 1, 1}, 8, 8,
+ 64, {{0}, {32}, {32}, {0}},
+ {{0}, {32}, {0}, {0}}},
+
+ {SVGA3D_R32G32_UINT, SVGA3DBLOCKDESC_RG,
+ {1, 1, 1}, 8, 8,
+ 64, {{0}, {32}, {32}, {0}},
+ {{0}, {32}, {0}, {0}}},
+
+ {SVGA3D_R32G32_SINT, SVGA3DBLOCKDESC_UV,
+ {1, 1, 1}, 8, 8,
+ 64, {{0}, {32}, {32}, {0}},
+ {{0}, {32}, {0}, {0}}},
+
+ {SVGA3D_R32G8X24_TYPELESS, SVGA3DBLOCKDESC_RG,
+ {1, 1, 1}, 8, 8,
+ 64, {{0}, {8}, {32}, {0}},
+ {{0}, {32}, {0}, {0}}},
+
+ {SVGA3D_D32_FLOAT_S8X24_UINT, SVGA3DBLOCKDESC_DS,
+ {1, 1, 1}, 8, 8,
+ 64, {{0}, {8}, {32}, {0}},
+ {{0}, {32}, {0}, {0}}},
+
+ {SVGA3D_R32_FLOAT_X8X24_TYPELESS, SVGA3DBLOCKDESC_R_FP,
+ {1, 1, 1}, 8, 8,
+ 64, {{0}, {0}, {32}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_X32_TYPELESS_G8X24_UINT, SVGA3DBLOCKDESC_GREEN,
+ {1, 1, 1}, 8, 8,
+ 64, {{0}, {8}, {0}, {0}},
+ {{0}, {32}, {0}, {0}}},
+
+ {SVGA3D_R10G10B10A2_TYPELESS, SVGA3DBLOCKDESC_RGBA,
+ {1, 1, 1}, 4, 4,
+ 32, {{10}, {10}, {10}, {2}},
+ {{0}, {10}, {20}, {30}}},
+
+ {SVGA3D_R10G10B10A2_UINT, SVGA3DBLOCKDESC_RGBA,
+ {1, 1, 1}, 4, 4,
+ 32, {{10}, {10}, {10}, {2}},
+ {{0}, {10}, {20}, {30}}},
+
+ {SVGA3D_R11G11B10_FLOAT, SVGA3DBLOCKDESC_RGB_FP,
+ {1, 1, 1}, 4, 4,
+ 32, {{10}, {11}, {11}, {0}},
+ {{0}, {10}, {21}, {0}}},
+
+ {SVGA3D_R8G8B8A8_TYPELESS, SVGA3DBLOCKDESC_RGBA,
+ {1, 1, 1}, 4, 4,
+ 32, {{8}, {8}, {8}, {8}},
+ {{16}, {8}, {0}, {24}}},
+
+ {SVGA3D_R8G8B8A8_UNORM, SVGA3DBLOCKDESC_RGBA,
+ {1, 1, 1}, 4, 4,
+ 32, {{8}, {8}, {8}, {8}},
+ {{16}, {8}, {0}, {24}}},
+
+ {SVGA3D_R8G8B8A8_UNORM_SRGB, SVGA3DBLOCKDESC_RGBA_SRGB,
+ {1, 1, 1}, 4, 4,
+ 32, {{8}, {8}, {8}, {8}},
+ {{16}, {8}, {0}, {24}}},
+
+ {SVGA3D_R8G8B8A8_UINT, SVGA3DBLOCKDESC_RGBA,
+ {1, 1, 1}, 4, 4,
+ 32, {{8}, {8}, {8}, {8}},
+ {{16}, {8}, {0}, {24}}},
+
+ {SVGA3D_R8G8B8A8_SINT, SVGA3DBLOCKDESC_RGBA,
+ {1, 1, 1}, 4, 4,
+ 32, {{8}, {8}, {8}, {8}},
+ {{16}, {8}, {0}, {24}}},
+
+ {SVGA3D_R16G16_TYPELESS, SVGA3DBLOCKDESC_RG,
+ {1, 1, 1}, 4, 4,
+ 32, {{0}, {16}, {16}, {0}},
+ {{0}, {16}, {0}, {0}}},
+
+ {SVGA3D_R16G16_UINT, SVGA3DBLOCKDESC_RG_FP,
+ {1, 1, 1}, 4, 4,
+ 32, {{0}, {16}, {16}, {0}},
+ {{0}, {16}, {0}, {0}}},
+
+ {SVGA3D_R16G16_SINT, SVGA3DBLOCKDESC_UV,
+ {1, 1, 1}, 4, 4,
+ 32, {{0}, {16}, {16}, {0}},
+ {{0}, {16}, {0}, {0}}},
+
+ {SVGA3D_R32_TYPELESS, SVGA3DBLOCKDESC_RED,
+ {1, 1, 1}, 4, 4,
+ 32, {{0}, {0}, {32}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_D32_FLOAT, SVGA3DBLOCKDESC_DEPTH,
+ {1, 1, 1}, 4, 4,
+ 32, {{0}, {0}, {32}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_R32_UINT, SVGA3DBLOCKDESC_RED,
+ {1, 1, 1}, 4, 4,
+ 32, {{0}, {0}, {32}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_R32_SINT, SVGA3DBLOCKDESC_RED,
+ {1, 1, 1}, 4, 4,
+ 32, {{0}, {0}, {32}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_R24G8_TYPELESS, SVGA3DBLOCKDESC_RG,
+ {1, 1, 1}, 4, 4,
+ 32, {{0}, {8}, {24}, {0}},
+ {{0}, {24}, {0}, {0}}},
+
+ {SVGA3D_D24_UNORM_S8_UINT, SVGA3DBLOCKDESC_DS,
+ {1, 1, 1}, 4, 4,
+ 32, {{0}, {8}, {24}, {0}},
+ {{0}, {24}, {0}, {0}}},
+
+ {SVGA3D_R24_UNORM_X8_TYPELESS, SVGA3DBLOCKDESC_RED,
+ {1, 1, 1}, 4, 4,
+ 32, {{0}, {0}, {24}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_X24_TYPELESS_G8_UINT, SVGA3DBLOCKDESC_GREEN,
+ {1, 1, 1}, 4, 4,
+ 32, {{0}, {8}, {0}, {0}},
+ {{0}, {24}, {0}, {0}}},
+
+ {SVGA3D_R8G8_TYPELESS, SVGA3DBLOCKDESC_RG,
+ {1, 1, 1}, 2, 2,
+ 16, {{0}, {8}, {8}, {0}},
+ {{0}, {8}, {0}, {0}}},
+
+ {SVGA3D_R8G8_UNORM, SVGA3DBLOCKDESC_RG,
+ {1, 1, 1}, 2, 2,
+ 16, {{0}, {8}, {8}, {0}},
+ {{0}, {8}, {0}, {0}}},
+
+ {SVGA3D_R8G8_UINT, SVGA3DBLOCKDESC_RG,
+ {1, 1, 1}, 2, 2,
+ 16, {{0}, {8}, {8}, {0}},
+ {{0}, {8}, {0}, {0}}},
+
+ {SVGA3D_R8G8_SINT, SVGA3DBLOCKDESC_UV,
+ {1, 1, 1}, 2, 2,
+ 16, {{0}, {8}, {8}, {0}},
+ {{0}, {8}, {0}, {0}}},
+
+ {SVGA3D_R16_TYPELESS, SVGA3DBLOCKDESC_RED,
+ {1, 1, 1}, 2, 2,
+ 16, {{0}, {0}, {16}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_R16_UNORM, SVGA3DBLOCKDESC_RED,
+ {1, 1, 1}, 2, 2,
+ 16, {{0}, {0}, {16}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_R16_UINT, SVGA3DBLOCKDESC_RED,
+ {1, 1, 1}, 2, 2,
+ 16, {{0}, {0}, {16}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_R16_SNORM, SVGA3DBLOCKDESC_U,
+ {1, 1, 1}, 2, 2,
+ 16, {{0}, {0}, {16}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_R16_SINT, SVGA3DBLOCKDESC_U,
+ {1, 1, 1}, 2, 2,
+ 16, {{0}, {0}, {16}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_R8_TYPELESS, SVGA3DBLOCKDESC_RED,
+ {1, 1, 1}, 1, 1,
+ 8, {{0}, {0}, {8}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_R8_UNORM, SVGA3DBLOCKDESC_RED,
+ {1, 1, 1}, 1, 1,
+ 8, {{0}, {0}, {8}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_R8_UINT, SVGA3DBLOCKDESC_RED,
+ {1, 1, 1}, 1, 1,
+ 8, {{0}, {0}, {8}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_R8_SNORM, SVGA3DBLOCKDESC_U,
+ {1, 1, 1}, 1, 1,
+ 8, {{0}, {0}, {8}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_R8_SINT, SVGA3DBLOCKDESC_U,
+ {1, 1, 1}, 1, 1,
+ 8, {{0}, {0}, {8}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_P8, SVGA3DBLOCKDESC_RED,
+ {1, 1, 1}, 1, 1,
+ 8, {{0}, {0}, {8}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_R9G9B9E5_SHAREDEXP, SVGA3DBLOCKDESC_RGBE,
+ {1, 1, 1}, 4, 4,
+ 32, {{9}, {9}, {9}, {5}},
+ {{18}, {9}, {0}, {27}}},
+
+ {SVGA3D_R8G8_B8G8_UNORM, SVGA3DBLOCKDESC_RG,
+ {1, 1, 1}, 2, 2,
+ 16, {{0}, {8}, {8}, {0}},
+ {{0}, {8}, {0}, {0}}},
+
+ {SVGA3D_G8R8_G8B8_UNORM, SVGA3DBLOCKDESC_RG,
+ {1, 1, 1}, 2, 2,
+ 16, {{0}, {8}, {8}, {0}},
+ {{0}, {8}, {0}, {0}}},
+
+ {SVGA3D_BC1_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED,
+ {4, 4, 1}, 8, 8,
+ 64, {{0}, {0}, {64}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_BC1_UNORM_SRGB, SVGA3DBLOCKDESC_COMPRESSED_SRGB,
+ {4, 4, 1}, 8, 8,
+ 64, {{0}, {0}, {64}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_BC2_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED,
+ {4, 4, 1}, 16, 16,
+ 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_BC2_UNORM_SRGB, SVGA3DBLOCKDESC_COMPRESSED_SRGB,
+ {4, 4, 1}, 16, 16,
+ 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_BC3_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED,
+ {4, 4, 1}, 16, 16,
+ 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_BC3_UNORM_SRGB, SVGA3DBLOCKDESC_COMPRESSED_SRGB,
+ {4, 4, 1}, 16, 16,
+ 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_BC4_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED,
+ {4, 4, 1}, 8, 8,
+ 64, {{0}, {0}, {64}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_ATI1, SVGA3DBLOCKDESC_COMPRESSED,
+ {4, 4, 1}, 8, 8,
+ 64, {{0}, {0}, {64}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_BC4_SNORM, SVGA3DBLOCKDESC_COMPRESSED,
+ {4, 4, 1}, 8, 8,
+ 64, {{0}, {0}, {64}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_BC5_TYPELESS, SVGA3DBLOCKDESC_COMPRESSED,
+ {4, 4, 1}, 16, 16,
+ 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_ATI2, SVGA3DBLOCKDESC_COMPRESSED,
+ {4, 4, 1}, 16, 16,
+ 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_BC5_SNORM, SVGA3DBLOCKDESC_COMPRESSED,
+ {4, 4, 1}, 16, 16,
+ 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_R10G10B10_XR_BIAS_A2_UNORM, SVGA3DBLOCKDESC_RGBA,
+ {1, 1, 1}, 4, 4,
+ 32, {{10}, {10}, {10}, {2}},
+ {{0}, {10}, {20}, {30}}},
+
+ {SVGA3D_B8G8R8A8_TYPELESS, SVGA3DBLOCKDESC_RGBA,
+ {1, 1, 1}, 4, 4,
+ 32, {{8}, {8}, {8}, {8}},
+ {{0}, {8}, {16}, {24}}},
+
+ {SVGA3D_B8G8R8A8_UNORM_SRGB, SVGA3DBLOCKDESC_RGBA_SRGB,
+ {1, 1, 1}, 4, 4,
+ 32, {{8}, {8}, {8}, {8}},
+ {{0}, {8}, {16}, {24}}},
+
+ {SVGA3D_B8G8R8X8_TYPELESS, SVGA3DBLOCKDESC_RGB,
+ {1, 1, 1}, 4, 4,
+ 24, {{8}, {8}, {8}, {0}},
+ {{0}, {8}, {16}, {24}}},
+
+ {SVGA3D_B8G8R8X8_UNORM_SRGB, SVGA3DBLOCKDESC_RGB_SRGB,
+ {1, 1, 1}, 4, 4,
+ 24, {{8}, {8}, {8}, {0}},
+ {{0}, {8}, {16}, {24}}},
+
+ {SVGA3D_Z_DF16, SVGA3DBLOCKDESC_DEPTH,
+ {1, 1, 1}, 2, 2,
+ 16, {{0}, {0}, {16}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_Z_DF24, SVGA3DBLOCKDESC_DEPTH,
+ {1, 1, 1}, 4, 4,
+ 32, {{0}, {8}, {24}, {0}},
+ {{0}, {24}, {0}, {0}}},
+
+ {SVGA3D_Z_D24S8_INT, SVGA3DBLOCKDESC_DS,
+ {1, 1, 1}, 4, 4,
+ 32, {{0}, {8}, {24}, {0}},
+ {{0}, {24}, {0}, {0}}},
+
+ {SVGA3D_YV12, SVGA3DBLOCKDESC_YV12,
+ {2, 2, 1}, 6, 2,
+ 48, {{0}, {0}, {48}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_R32G32B32A32_FLOAT, SVGA3DBLOCKDESC_RGBA_FP,
+ {1, 1, 1}, 16, 16,
+ 128, {{32}, {32}, {32}, {32}},
+ {{64}, {32}, {0}, {96}}},
+
+ {SVGA3D_R16G16B16A16_FLOAT, SVGA3DBLOCKDESC_RGBA_FP,
+ {1, 1, 1}, 8, 8,
+ 64, {{16}, {16}, {16}, {16}},
+ {{32}, {16}, {0}, {48}}},
+
+ {SVGA3D_R16G16B16A16_UNORM, SVGA3DBLOCKDESC_RGBA,
+ {1, 1, 1}, 8, 8,
+ 64, {{16}, {16}, {16}, {16}},
+ {{32}, {16}, {0}, {48}}},
+
+ {SVGA3D_R32G32_FLOAT, SVGA3DBLOCKDESC_RG_FP,
+ {1, 1, 1}, 8, 8,
+ 64, {{0}, {32}, {32}, {0}},
+ {{0}, {32}, {0}, {0}}},
+
+ {SVGA3D_R10G10B10A2_UNORM, SVGA3DBLOCKDESC_RGBA,
+ {1, 1, 1}, 4, 4,
+ 32, {{10}, {10}, {10}, {2}},
+ {{0}, {10}, {20}, {30}}},
+
+ {SVGA3D_R8G8B8A8_SNORM, SVGA3DBLOCKDESC_RGBA,
+ {1, 1, 1}, 4, 4,
+ 32, {{8}, {8}, {8}, {8}},
+ {{24}, {16}, {8}, {0}}},
+
+ {SVGA3D_R16G16_FLOAT, SVGA3DBLOCKDESC_RG_FP,
+ {1, 1, 1}, 4, 4,
+ 32, {{0}, {16}, {16}, {0}},
+ {{0}, {16}, {0}, {0}}},
+
+ {SVGA3D_R16G16_UNORM, SVGA3DBLOCKDESC_RG,
+ {1, 1, 1}, 4, 4,
+ 32, {{0}, {16}, {16}, {0}},
+ {{0}, {0}, {16}, {0}}},
+
+ {SVGA3D_R16G16_SNORM, SVGA3DBLOCKDESC_RG,
+ {1, 1, 1}, 4, 4,
+ 32, {{16}, {16}, {0}, {0}},
+ {{16}, {0}, {0}, {0}}},
+
+ {SVGA3D_R32_FLOAT, SVGA3DBLOCKDESC_R_FP,
+ {1, 1, 1}, 4, 4,
+ 32, {{0}, {0}, {32}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_R8G8_SNORM, SVGA3DBLOCKDESC_RG,
+ {1, 1, 1}, 2, 2,
+ 16, {{8}, {8}, {0}, {0}},
+ {{8}, {0}, {0}, {0}}},
+
+ {SVGA3D_R16_FLOAT, SVGA3DBLOCKDESC_R_FP,
+ {1, 1, 1}, 2, 2,
+ 16, {{0}, {0}, {16}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_D16_UNORM, SVGA3DBLOCKDESC_DEPTH,
+ {1, 1, 1}, 2, 2,
+ 16, {{0}, {0}, {16}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_A8_UNORM, SVGA3DBLOCKDESC_ALPHA,
+ {1, 1, 1}, 1, 1,
+ 8, {{0}, {0}, {0}, {8}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_BC1_UNORM, SVGA3DBLOCKDESC_COMPRESSED,
+ {4, 4, 1}, 8, 8,
+ 64, {{0}, {0}, {64}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_BC2_UNORM, SVGA3DBLOCKDESC_COMPRESSED,
+ {4, 4, 1}, 16, 16,
+ 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_BC3_UNORM, SVGA3DBLOCKDESC_COMPRESSED,
+ {4, 4, 1}, 16, 16,
+ 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_B5G6R5_UNORM, SVGA3DBLOCKDESC_RGB,
+ {1, 1, 1}, 2, 2,
+ 16, {{5}, {6}, {5}, {0}},
+ {{0}, {5}, {11}, {0}}},
+
+ {SVGA3D_B5G5R5A1_UNORM, SVGA3DBLOCKDESC_RGBA,
+ {1, 1, 1}, 2, 2,
+ 16, {{5}, {5}, {5}, {1}},
+ {{0}, {5}, {10}, {15}}},
+
+ {SVGA3D_B8G8R8A8_UNORM, SVGA3DBLOCKDESC_RGBA,
+ {1, 1, 1}, 4, 4,
+ 32, {{8}, {8}, {8}, {8}},
+ {{0}, {8}, {16}, {24}}},
+
+ {SVGA3D_B8G8R8X8_UNORM, SVGA3DBLOCKDESC_RGB,
+ {1, 1, 1}, 4, 4,
+ 24, {{8}, {8}, {8}, {0}},
+ {{0}, {8}, {16}, {24}}},
+
+ {SVGA3D_BC4_UNORM, SVGA3DBLOCKDESC_COMPRESSED,
+ {4, 4, 1}, 8, 8,
+ 64, {{0}, {0}, {64}, {0}},
+ {{0}, {0}, {0}, {0}}},
+
+ {SVGA3D_BC5_UNORM, SVGA3DBLOCKDESC_COMPRESSED,
+ {4, 4, 1}, 16, 16,
+ 128, {{0}, {0}, {128}, {0}},
+ {{0}, {0}, {0}, {0}}},
};
return (tmp > (uint64_t) ((uint32) -1)) ? (uint32) -1 : tmp;
}
+static inline uint32 clamped_uadd32(uint32 a, uint32 b)
+{
+ uint32 c = a + b;
+ if (c < a || c < b) {
+ return MAX_UINT32;
+ }
+ return c;
+}
+
+
static inline const struct svga3d_surface_desc *
svga3dsurface_get_desc(SVGA3dSurfaceFormat format)
{
svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format,
SVGA3dSize baseLevelSize,
uint32 numMipLevels,
- uint32 face,
+ uint32 layer,
uint32 mip)
{
}
}
- offset = mipChainBytes * face + mipChainBytesToLevel;
+ offset = mipChainBytes * layer + mipChainBytesToLevel;
return offset;
}
svga3dsurface_get_serialized_size(SVGA3dSurfaceFormat format,
SVGA3dSize base_level_size,
uint32 num_mip_levels,
- bool cubemap)
+ uint32 num_layers)
{
const struct svga3d_surface_desc *desc = svga3dsurface_get_desc(format);
uint64_t total_size = 0;
&size, 0);
}
- if (cubemap)
- total_size *= SVGA3D_MAX_SURFACE_FACES;
+ total_size *= num_layers;
return (total_size > (uint64_t) MAX_UINT32) ? MAX_UINT32 :
(uint32) total_size;
/**********************************************************
- * Copyright 1998-2014 VMware, Inc. All rights reserved.
+ * Copyright 2007-2015 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
*/
#define SVGA3D_INVALID_ID ((uint32)-1)
-#define SVGA3D_INVALID_CID SVGA3D_INVALID_ID
-#define SVGA3D_INVALID_SID SVGA3D_INVALID_ID
-#define SVGA3D_INVALID_SHID SVGA3D_INVALID_ID
-
typedef uint32 SVGA3dBool; /* 32-bit Bool definition */
typedef uint32 SVGA3dColor; /* a, r, g, b */
/*
* Surface formats.
- *
- * If you modify this list, be sure to keep GLUtil.c in sync. It
- * includes the internal format definition of each surface in
- * GLUtil_ConvertSurfaceFormat, and it contains a table of
- * human-readable names in GLUtil_GetFormatName.
*/
-
typedef enum SVGA3dSurfaceFormat {
SVGA3D_FORMAT_INVALID = 0,
SVGA3D_BUMPU8V8 = 20,
SVGA3D_BUMPL6V5U5 = 21,
SVGA3D_BUMPX8L8V8U8 = 22,
- SVGA3D_BUMPL8V8U8 = 23,
+ SVGA3D_FORMAT_DEAD1 = 23,
SVGA3D_ARGB_S10E5 = 24, /* 16-bit floating-point ARGB */
SVGA3D_ARGB_S23E8 = 25, /* 32-bit floating-point ARGB */
SVGA3D_B8G8R8X8_TYPELESS = 116,
SVGA3D_B8G8R8X8_UNORM_SRGB = 117,
- /* Advanced D3D9 depth formats. */
+ /* Advanced depth formats. */
SVGA3D_Z_DF16 = 118,
SVGA3D_Z_DF24 = 119,
SVGA3D_Z_D24S8_INT = 120,
SVGA3D_FORMAT_MAX
} SVGA3dSurfaceFormat;
+typedef uint32 SVGA3dSurfaceFlags;
+#define SVGA3D_SURFACE_CUBEMAP (1 << 0)
+
+/*
+ * HINT flags are not enforced by the device but are useful for
+ * performance.
+ */
+#define SVGA3D_SURFACE_HINT_STATIC (1 << 1)
+#define SVGA3D_SURFACE_HINT_DYNAMIC (1 << 2)
+#define SVGA3D_SURFACE_HINT_INDEXBUFFER (1 << 3)
+#define SVGA3D_SURFACE_HINT_VERTEXBUFFER (1 << 4)
+#define SVGA3D_SURFACE_HINT_TEXTURE (1 << 5)
+#define SVGA3D_SURFACE_HINT_RENDERTARGET (1 << 6)
+#define SVGA3D_SURFACE_HINT_DEPTHSTENCIL (1 << 7)
+#define SVGA3D_SURFACE_HINT_WRITEONLY (1 << 8)
+#define SVGA3D_SURFACE_MASKABLE_ANTIALIAS (1 << 9)
+#define SVGA3D_SURFACE_AUTOGENMIPMAPS (1 << 10)
+#define SVGA3D_SURFACE_DECODE_RENDERTARGET (1 << 11)
+
+/*
+ * Is this surface using a base-level pitch for it's mob backing?
+ *
+ * This flag is not intended to be set by guest-drivers, but is instead
+ * set by the device when the surface is bound to a mob with a specified
+ * pitch.
+ */
+#define SVGA3D_SURFACE_MOB_PITCH (1 << 12)
+
+#define SVGA3D_SURFACE_INACTIVE (1 << 13)
+#define SVGA3D_SURFACE_HINT_RT_LOCKABLE (1 << 14)
+#define SVGA3D_SURFACE_VOLUME (1 << 15)
+
+/*
+ * Required to be set on a surface to bind it to a screen target.
+ */
+#define SVGA3D_SURFACE_SCREENTARGET (1 << 16)
+
+/*
+ * Align images in the guest-backing mob to 16-bytes.
+ */
+#define SVGA3D_SURFACE_ALIGN16 (1 << 17)
+
+#define SVGA3D_SURFACE_1D (1 << 18)
+#define SVGA3D_SURFACE_ARRAY (1 << 19)
+
+/*
+ * Bind flags.
+ * These are enforced for any surface defined with DefineGBSurface_v2.
+ */
+#define SVGA3D_SURFACE_BIND_VERTEX_BUFFER (1 << 20)
+#define SVGA3D_SURFACE_BIND_INDEX_BUFFER (1 << 21)
+#define SVGA3D_SURFACE_BIND_CONSTANT_BUFFER (1 << 22)
+#define SVGA3D_SURFACE_BIND_SHADER_RESOURCE (1 << 23)
+#define SVGA3D_SURFACE_BIND_RENDER_TARGET (1 << 24)
+#define SVGA3D_SURFACE_BIND_DEPTH_STENCIL (1 << 25)
+#define SVGA3D_SURFACE_BIND_STREAM_OUTPUT (1 << 26)
+
+/*
+ * The STAGING flags notes that the surface will not be used directly by the
+ * drawing pipeline, i.e. that it will not be bound to any bind point.
+ * Staging surfaces may be used by copy operations to move data in and out
+ * of other surfaces. No bind flags may be set on surfaces with this flag.
+ *
+ * The HINT_INDIRECT_UPDATE flag suggests that the surface will receive
+ * updates indirectly, i.e. the surface will not be updated directly, but
+ * will receive copies from staging surfaces.
+ */
+#define SVGA3D_SURFACE_STAGING_UPLOAD (1 << 27)
+#define SVGA3D_SURFACE_STAGING_DOWNLOAD (1 << 28)
+#define SVGA3D_SURFACE_HINT_INDIRECT_UPDATE (1 << 29)
+
+/*
+ * Setting this flag allow this surface to be used with the
+ * SVGA_3D_CMD_DX_TRANSFER_FROM_BUFFER command. It is only valid for
+ * buffer surfaces, and no bind flags are allowed to be set on surfaces
+ * with this flag.
+ */
+#define SVGA3D_SURFACE_TRANSFER_FROM_BUFFER (1 << 30)
/*
- * These are really the D3DFORMAT_OP defines from the wdk. We need
- * them so that we can query the host for what the supported surface
- * operations are (when we're using the D3D backend, in particular),
- * and so we can send those operations to the guest.
+ * Marker for the last defined bit in SVGA3dSurfaceFlags.
*/
+#define SVGA3D_SURFACE_FLAG_MAX (1 << 31)
+
+#define SVGA3D_SURFACE_HB_DISALLOWED_MASK \
+ ( SVGA3D_SURFACE_MOB_PITCH | \
+ SVGA3D_SURFACE_SCREENTARGET | \
+ SVGA3D_SURFACE_ALIGN16 | \
+ SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \
+ SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \
+ SVGA3D_SURFACE_STAGING_UPLOAD | \
+ SVGA3D_SURFACE_STAGING_DOWNLOAD | \
+ SVGA3D_SURFACE_HINT_INDIRECT_UPDATE | \
+ SVGA3D_SURFACE_TRANSFER_FROM_BUFFER \
+ )
+
+#define SVGA3D_SURFACE_2D_DISALLOWED_MASK \
+ ( SVGA3D_SURFACE_CUBEMAP | \
+ SVGA3D_SURFACE_MASKABLE_ANTIALIAS | \
+ SVGA3D_SURFACE_AUTOGENMIPMAPS | \
+ SVGA3D_SURFACE_DECODE_RENDERTARGET | \
+ SVGA3D_SURFACE_VOLUME | \
+ SVGA3D_SURFACE_1D | \
+ SVGA3D_SURFACE_BIND_VERTEX_BUFFER | \
+ SVGA3D_SURFACE_BIND_INDEX_BUFFER | \
+ SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \
+ SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \
+ SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \
+ SVGA3D_SURFACE_TRANSFER_FROM_BUFFER \
+ )
+
+#define SVGA3D_SURFACE_SCREENTARGET_DISALLOWED_MASK \
+ ( SVGA3D_SURFACE_CUBEMAP | \
+ SVGA3D_SURFACE_AUTOGENMIPMAPS | \
+ SVGA3D_SURFACE_DECODE_RENDERTARGET | \
+ SVGA3D_SURFACE_VOLUME | \
+ SVGA3D_SURFACE_1D | \
+ SVGA3D_SURFACE_BIND_VERTEX_BUFFER | \
+ SVGA3D_SURFACE_BIND_INDEX_BUFFER | \
+ SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \
+ SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \
+ SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \
+ SVGA3D_SURFACE_INACTIVE | \
+ SVGA3D_SURFACE_STAGING_UPLOAD | \
+ SVGA3D_SURFACE_STAGING_DOWNLOAD | \
+ SVGA3D_SURFACE_HINT_INDIRECT_UPDATE | \
+ SVGA3D_SURFACE_TRANSFER_FROM_BUFFER \
+ )
+
+#define SVGA3D_SURFACE_DX_ONLY_MASK \
+ ( SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \
+ SVGA3D_SURFACE_STAGING_UPLOAD | \
+ SVGA3D_SURFACE_STAGING_DOWNLOAD | \
+ SVGA3D_SURFACE_TRANSFER_FROM_BUFFER \
+ )
+
+#define SVGA3D_SURFACE_STAGING_MASK \
+ ( SVGA3D_SURFACE_STAGING_UPLOAD | \
+ SVGA3D_SURFACE_STAGING_DOWNLOAD \
+ )
+
+#define SVGA3D_SURFACE_BIND_MASK \
+ ( SVGA3D_SURFACE_BIND_VERTEX_BUFFER | \
+ SVGA3D_SURFACE_BIND_INDEX_BUFFER | \
+ SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \
+ SVGA3D_SURFACE_BIND_SHADER_RESOURCE | \
+ SVGA3D_SURFACE_BIND_RENDER_TARGET | \
+ SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \
+ SVGA3D_SURFACE_BIND_STREAM_OUTPUT \
+ )
+
typedef enum {
SVGA3DFORMAT_OP_TEXTURE = 0x00000001,
SVGA3DFORMAT_OP_VOLUMETEXTURE = 0x00000002,
SVGA3dLinePattern;
typedef enum {
- SVGA3D_BLENDOP_INVALID = 0,
- SVGA3D_BLENDOP_MIN = 1,
- SVGA3D_BLENDOP_ZERO = 1,
- SVGA3D_BLENDOP_ONE = 2,
- SVGA3D_BLENDOP_SRCCOLOR = 3,
- SVGA3D_BLENDOP_INVSRCCOLOR = 4,
- SVGA3D_BLENDOP_SRCALPHA = 5,
- SVGA3D_BLENDOP_INVSRCALPHA = 6,
- SVGA3D_BLENDOP_DESTALPHA = 7,
- SVGA3D_BLENDOP_INVDESTALPHA = 8,
- SVGA3D_BLENDOP_DESTCOLOR = 9,
- SVGA3D_BLENDOP_INVDESTCOLOR = 10,
- SVGA3D_BLENDOP_SRCALPHASAT = 11,
- SVGA3D_BLENDOP_BLENDFACTOR = 12,
- SVGA3D_BLENDOP_INVBLENDFACTOR = 13,
- SVGA3D_BLENDOP_SRC1COLOR = 14,
- SVGA3D_BLENDOP_INVSRC1COLOR = 15,
- SVGA3D_BLENDOP_SRC1ALPHA = 16,
- SVGA3D_BLENDOP_INVSRC1ALPHA = 17,
+ SVGA3D_BLENDOP_INVALID = 0,
+ SVGA3D_BLENDOP_MIN = 1,
+ SVGA3D_BLENDOP_ZERO = 1,
+ SVGA3D_BLENDOP_ONE = 2,
+ SVGA3D_BLENDOP_SRCCOLOR = 3,
+ SVGA3D_BLENDOP_INVSRCCOLOR = 4,
+ SVGA3D_BLENDOP_SRCALPHA = 5,
+ SVGA3D_BLENDOP_INVSRCALPHA = 6,
+ SVGA3D_BLENDOP_DESTALPHA = 7,
+ SVGA3D_BLENDOP_INVDESTALPHA = 8,
+ SVGA3D_BLENDOP_DESTCOLOR = 9,
+ SVGA3D_BLENDOP_INVDESTCOLOR = 10,
+ SVGA3D_BLENDOP_SRCALPHASAT = 11,
+ SVGA3D_BLENDOP_BLENDFACTOR = 12,
+ SVGA3D_BLENDOP_INVBLENDFACTOR = 13,
+ SVGA3D_BLENDOP_SRC1COLOR = 14,
+ SVGA3D_BLENDOP_INVSRC1COLOR = 15,
+ SVGA3D_BLENDOP_SRC1ALPHA = 16,
+ SVGA3D_BLENDOP_INVSRC1ALPHA = 17,
+ SVGA3D_BLENDOP_BLENDFACTORALPHA = 18,
+ SVGA3D_BLENDOP_INVBLENDFACTORALPHA = 19,
SVGA3D_BLENDOP_MAX
} SVGA3dBlendOp;
} SVGA3dBlendEquation;
typedef enum {
+ SVGA3D_DX11_LOGICOP_MIN = 0,
+ SVGA3D_DX11_LOGICOP_CLEAR = 0,
+ SVGA3D_DX11_LOGICOP_SET = 1,
+ SVGA3D_DX11_LOGICOP_COPY = 2,
+ SVGA3D_DX11_LOGICOP_COPY_INVERTED = 3,
+ SVGA3D_DX11_LOGICOP_NOOP = 4,
+ SVGA3D_DX11_LOGICOP_INVERT = 5,
+ SVGA3D_DX11_LOGICOP_AND = 6,
+ SVGA3D_DX11_LOGICOP_NAND = 7,
+ SVGA3D_DX11_LOGICOP_OR = 8,
+ SVGA3D_DX11_LOGICOP_NOR = 9,
+ SVGA3D_DX11_LOGICOP_XOR = 10,
+ SVGA3D_DX11_LOGICOP_EQUIV = 11,
+ SVGA3D_DX11_LOGICOP_AND_REVERSE = 12,
+ SVGA3D_DX11_LOGICOP_AND_INVERTED = 13,
+ SVGA3D_DX11_LOGICOP_OR_REVERSE = 14,
+ SVGA3D_DX11_LOGICOP_OR_INVERTED = 15,
+ SVGA3D_DX11_LOGICOP_MAX
+} SVGA3dDX11LogicOp;
+
+typedef enum {
SVGA3D_FRONTWINDING_INVALID = 0,
SVGA3D_FRONTWINDING_CW = 1,
SVGA3D_FRONTWINDING_CCW = 2,
SVGA3D_TEX_FILTER_NEAREST = 1,
SVGA3D_TEX_FILTER_LINEAR = 2,
SVGA3D_TEX_FILTER_ANISOTROPIC = 3,
- SVGA3D_TEX_FILTER_FLATCUBIC = 4, // Deprecated, not implemented
- SVGA3D_TEX_FILTER_GAUSSIANCUBIC = 5, // Deprecated, not implemented
- SVGA3D_TEX_FILTER_PYRAMIDALQUAD = 6, // Not currently implemented
- SVGA3D_TEX_FILTER_GAUSSIANQUAD = 7, // Not currently implemented
+ SVGA3D_TEX_FILTER_FLATCUBIC = 4, /* Deprecated, not implemented */
+ SVGA3D_TEX_FILTER_GAUSSIANCUBIC = 5, /* Deprecated, not implemented */
+ SVGA3D_TEX_FILTER_PYRAMIDALQUAD = 6, /* Not currently implemented */
+ SVGA3D_TEX_FILTER_GAUSSIANQUAD = 7, /* Not currently implemented */
SVGA3D_TEX_FILTER_MAX
} SVGA3dTextureFilter;
typedef enum {
SVGA3D_DECLUSAGE_POSITION = 0,
- SVGA3D_DECLUSAGE_BLENDWEIGHT, // 1
- SVGA3D_DECLUSAGE_BLENDINDICES, // 2
- SVGA3D_DECLUSAGE_NORMAL, // 3
- SVGA3D_DECLUSAGE_PSIZE, // 4
- SVGA3D_DECLUSAGE_TEXCOORD, // 5
- SVGA3D_DECLUSAGE_TANGENT, // 6
- SVGA3D_DECLUSAGE_BINORMAL, // 7
- SVGA3D_DECLUSAGE_TESSFACTOR, // 8
- SVGA3D_DECLUSAGE_POSITIONT, // 9
- SVGA3D_DECLUSAGE_COLOR, // 10
- SVGA3D_DECLUSAGE_FOG, // 11
- SVGA3D_DECLUSAGE_DEPTH, // 12
- SVGA3D_DECLUSAGE_SAMPLE, // 13
+ SVGA3D_DECLUSAGE_BLENDWEIGHT,
+ SVGA3D_DECLUSAGE_BLENDINDICES,
+ SVGA3D_DECLUSAGE_NORMAL,
+ SVGA3D_DECLUSAGE_PSIZE,
+ SVGA3D_DECLUSAGE_TEXCOORD,
+ SVGA3D_DECLUSAGE_TANGENT,
+ SVGA3D_DECLUSAGE_BINORMAL,
+ SVGA3D_DECLUSAGE_TESSFACTOR,
+ SVGA3D_DECLUSAGE_POSITIONT,
+ SVGA3D_DECLUSAGE_COLOR,
+ SVGA3D_DECLUSAGE_FOG,
+ SVGA3D_DECLUSAGE_DEPTH,
+ SVGA3D_DECLUSAGE_SAMPLE,
SVGA3D_DECLUSAGE_MAX
} SVGA3dDeclUsage;
SVGA3D_DECLMETHOD_DEFAULT = 0,
SVGA3D_DECLMETHOD_PARTIALU,
SVGA3D_DECLMETHOD_PARTIALV,
- SVGA3D_DECLMETHOD_CROSSUV, // Normal
+ SVGA3D_DECLMETHOD_CROSSUV, /* Normal */
SVGA3D_DECLMETHOD_UV,
- SVGA3D_DECLMETHOD_LOOKUP, // Lookup a displacement map
- SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED, // Lookup a pre-sampled displacement map
+ SVGA3D_DECLMETHOD_LOOKUP, /* Lookup a displacement map */
+ SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED, /* Lookup a pre-sampled displacement */
+ /* map */
} SVGA3dDeclMethod;
typedef enum {
SVGA3D_SHADERTYPE_MIN = 1,
SVGA3D_SHADERTYPE_VS = 1,
SVGA3D_SHADERTYPE_PS = 2,
- SVGA3D_SHADERTYPE_MAX = 3,
SVGA3D_SHADERTYPE_PREDX_MAX = 3,
SVGA3D_SHADERTYPE_GS = 3,
- SVGA3D_SHADERTYPE_DX_MAX = 4,
+ SVGA3D_SHADERTYPE_DX10_MAX = 4,
+ SVGA3D_SHADERTYPE_HS = 4,
+ SVGA3D_SHADERTYPE_DS = 5,
+ SVGA3D_SHADERTYPE_CS = 6,
+ SVGA3D_SHADERTYPE_MAX = 7
} SVGA3dShaderType;
#define SVGA3D_NUM_SHADERTYPE_PREDX \
(SVGA3D_SHADERTYPE_PREDX_MAX - SVGA3D_SHADERTYPE_MIN)
-#define SVGA3D_NUM_SHADERTYPE_DX \
- (SVGA3D_SHADERTYPE_DX_MAX - SVGA3D_SHADERTYPE_MIN)
+#define SVGA3D_NUM_SHADERTYPE_DX10 \
+ (SVGA3D_SHADERTYPE_DX10_MAX - SVGA3D_SHADERTYPE_MIN)
+
+#define SVGA3D_NUM_SHADERTYPE \
+ (SVGA3D_SHADERTYPE_MAX - SVGA3D_SHADERTYPE_MIN)
typedef enum {
SVGA3D_CONST_TYPE_MIN = 0,
} SVGA3dStretchBltMode;
typedef enum {
- SVGA3D_QUERYTYPE_INVALID = ((uint32)-1),
+ SVGA3D_QUERYTYPE_INVALID = ((uint8)-1),
SVGA3D_QUERYTYPE_MIN = 0,
SVGA3D_QUERYTYPE_OCCLUSION = 0,
- SVGA3D_QUERYTYPE_EVENT = 1,
- SVGA3D_QUERYTYPE_TIMESTAMP = 2,
- SVGA3D_QUERYTYPE_TIMESTAMPDISJOINT = 3,
- SVGA3D_QUERYTYPE_PIPELINESTATS = 4,
- SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE = 5,
- SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS = 6,
- SVGA3D_QUERYTYPE_STREAMOVERFLOWPREDICATE = 7,
- SVGA3D_QUERYTYPE_OCCLUSION64 = 8,
+ SVGA3D_QUERYTYPE_TIMESTAMP = 1,
+ SVGA3D_QUERYTYPE_TIMESTAMPDISJOINT = 2,
+ SVGA3D_QUERYTYPE_PIPELINESTATS = 3,
+ SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE = 4,
+ SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS = 5,
+ SVGA3D_QUERYTYPE_STREAMOVERFLOWPREDICATE = 6,
+ SVGA3D_QUERYTYPE_OCCLUSION64 = 7,
+ SVGA3D_QUERYTYPE_DX10_MAX = 8,
+ SVGA3D_QUERYTYPE_SOSTATS_STREAM0 = 8,
+ SVGA3D_QUERYTYPE_SOSTATS_STREAM1 = 9,
+ SVGA3D_QUERYTYPE_SOSTATS_STREAM2 = 10,
+ SVGA3D_QUERYTYPE_SOSTATS_STREAM3 = 11,
+ SVGA3D_QUERYTYPE_SOP_STREAM0 = 12,
+ SVGA3D_QUERYTYPE_SOP_STREAM1 = 13,
+ SVGA3D_QUERYTYPE_SOP_STREAM2 = 14,
+ SVGA3D_QUERYTYPE_SOP_STREAM3 = 15,
SVGA3D_QUERYTYPE_MAX
} SVGA3dQueryType;
+typedef uint8 SVGA3dQueryTypeUint8;
+
#define SVGA3D_NUM_QUERYTYPE (SVGA3D_QUERYTYPE_MAX - SVGA3D_QUERYTYPE_MIN)
/*
* This is the maximum number of queries per context that can be active
* simultaneously between a beginQuery and endQuery.
*/
-#define SVGA3D_MAX_QUERY_PER_CONTEXT 64
+#define SVGA3D_MAX_QUERY 64
+
+/*
+ * Query result buffer formats
+ */
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ uint32 samplesRendered;
+}
+#include "vmware_pack_end.h"
+SVGADXOcclusionQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ uint32 passed;
+}
+#include "vmware_pack_end.h"
+SVGADXEventQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ uint64 timestamp;
+}
+#include "vmware_pack_end.h"
+SVGADXTimestampQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ uint64 realFrequency;
+ uint32 disjoint;
+}
+#include "vmware_pack_end.h"
+SVGADXTimestampDisjointQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ uint64 inputAssemblyVertices;
+ uint64 inputAssemblyPrimitives;
+ uint64 vertexShaderInvocations;
+ uint64 geometryShaderInvocations;
+ uint64 geometryShaderPrimitives;
+ uint64 clipperInvocations;
+ uint64 clipperPrimitives;
+ uint64 pixelShaderInvocations;
+ uint64 hullShaderInvocations;
+ uint64 domainShaderInvocations;
+ uint64 computeShaderInvocations;
+}
+#include "vmware_pack_end.h"
+SVGADXPipelineStatisticsQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ uint32 anySamplesRendered;
+}
+#include "vmware_pack_end.h"
+SVGADXOcclusionPredicateQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ uint64 numPrimitivesWritten;
+ uint64 numPrimitivesRequired;
+}
+#include "vmware_pack_end.h"
+SVGADXStreamOutStatisticsQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ uint32 overflowed;
+}
+#include "vmware_pack_end.h"
+SVGADXStreamOutPredicateQueryResult;
+
+typedef
+#include "vmware_pack_begin.h"
+struct {
+ uint64 samplesRendered;
+}
+#include "vmware_pack_end.h"
+SVGADXOcclusion64QueryResult;
+
+/*
+ * SVGADXQueryResultUnion is not intended for use in the protocol, but is
+ * very helpful when working with queries generically.
+ */
+typedef
+#include "vmware_pack_begin.h"
+union SVGADXQueryResultUnion {
+ SVGADXOcclusionQueryResult occ;
+ SVGADXEventQueryResult event;
+ SVGADXTimestampQueryResult ts;
+ SVGADXTimestampDisjointQueryResult tsDisjoint;
+ SVGADXPipelineStatisticsQueryResult pipelineStats;
+ SVGADXOcclusionPredicateQueryResult occPred;
+ SVGADXStreamOutStatisticsQueryResult soStats;
+ SVGADXStreamOutPredicateQueryResult soPred;
+ SVGADXOcclusion64QueryResult occ64;
+}
+#include "vmware_pack_end.h"
+SVGADXQueryResultUnion;
+
typedef enum {
- SVGA3D_QUERYSTATE_PENDING = 0, /* Waiting on the host (set by guest) */
- SVGA3D_QUERYSTATE_SUCCEEDED = 1, /* Completed successfully (set by host) */
- SVGA3D_QUERYSTATE_FAILED = 2, /* Completed unsuccessfully (set by host) */
- SVGA3D_QUERYSTATE_NEW = 3, /* Never submitted (For guest use only) */
+ SVGA3D_QUERYSTATE_PENDING = 0, /* Query is not finished yet */
+ SVGA3D_QUERYSTATE_SUCCEEDED = 1, /* Completed successfully */
+ SVGA3D_QUERYSTATE_FAILED = 2, /* Completed unsuccessfully */
+ SVGA3D_QUERYSTATE_NEW = 3, /* Never submitted (guest only) */
} SVGA3dQueryState;
typedef enum {
struct {
union {
struct {
- uint16 function; // SVGA3dFogFunction
- uint8 type; // SVGA3dFogType
- uint8 base; // SVGA3dFogBase
+ uint16 function; /* SVGA3dFogFunction */
+ uint8 type; /* SVGA3dFogType */
+ uint8 base; /* SVGA3dFogBase */
};
uint32 uintValue;
};
/*
* Guest-backed objects definitions.
*/
+typedef enum {
+ SVGA_OTABLE_MOB = 0,
+ SVGA_OTABLE_MIN = 0,
+ SVGA_OTABLE_SURFACE = 1,
+ SVGA_OTABLE_CONTEXT = 2,
+ SVGA_OTABLE_SHADER = 3,
+ SVGA_OTABLE_SCREENTARGET = 4,
+
+ SVGA_OTABLE_DX9_MAX = 5,
-typedef uint32 SVGAMobId;
+ SVGA_OTABLE_DXCONTEXT = 5,
+ SVGA_OTABLE_MAX = 6
+} SVGAOTableType;
+
+/*
+ * Deprecated.
+ */
+#define SVGA_OTABLE_COUNT 4
+
+typedef enum {
+ SVGA_COTABLE_MIN = 0,
+ SVGA_COTABLE_RTVIEW = 0,
+ SVGA_COTABLE_DSVIEW = 1,
+ SVGA_COTABLE_SRVIEW = 2,
+ SVGA_COTABLE_ELEMENTLAYOUT = 3,
+ SVGA_COTABLE_BLENDSTATE = 4,
+ SVGA_COTABLE_DEPTHSTENCIL = 5,
+ SVGA_COTABLE_RASTERIZERSTATE = 6,
+ SVGA_COTABLE_SAMPLER = 7,
+ SVGA_COTABLE_STREAMOUTPUT = 8,
+ SVGA_COTABLE_DXQUERY = 9,
+ SVGA_COTABLE_DXSHADER = 10,
+ SVGA_COTABLE_DX10_MAX = 11,
+ SVGA_COTABLE_UAVIEW = 11,
+ SVGA_COTABLE_MAX
+} SVGACOTableType;
+
+/*
+ * The largest size (number of entries) allowed in a COTable.
+ */
+#define SVGA_COTABLE_MAX_IDS (MAX_UINT16 - 2)
typedef enum SVGAMobFormat {
SVGA3D_MOBFMT_INVALID = SVGA3D_INVALID_ID,
SVGA3D_MOBFMT_PTDEPTH64_0 = 4,
SVGA3D_MOBFMT_PTDEPTH64_1 = 5,
SVGA3D_MOBFMT_PTDEPTH64_2 = 6,
+ SVGA3D_MOBFMT_PREDX_MAX = 7,
+ SVGA3D_MOBFMT_EMPTY = 7,
SVGA3D_MOBFMT_MAX,
} SVGAMobFormat;
-#endif // _SVGA3D_TYPES_H_
+#define SVGA3D_MOB_EMPTY_BASE 1
+
+#endif /* _SVGA3D_TYPES_H_ */
/**********************************************************
- * Copyright 2007-2014 VMware, Inc. All rights reserved.
+ * Copyright 2007-2015 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
*/
#define SVGA_ESCAPE_VMWARE_HINT 0x00030000
-#define SVGA_ESCAPE_VMWARE_HINT_FULLSCREEN 0x00030001 // Deprecated
+#define SVGA_ESCAPE_VMWARE_HINT_FULLSCREEN 0x00030001 /* Deprecated */
typedef
struct {
/**********************************************************
- * Copyright 2007-2014 VMware, Inc. All rights reserved.
+ * Copyright 2007-2015 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* Video formats we support
*/
-#define VMWARE_FOURCC_YV12 0x32315659 // 'Y' 'V' '1' '2'
-#define VMWARE_FOURCC_YUY2 0x32595559 // 'Y' 'U' 'Y' '2'
-#define VMWARE_FOURCC_UYVY 0x59565955 // 'U' 'Y' 'V' 'Y'
+#define VMWARE_FOURCC_YV12 0x32315659 /* 'Y' 'V' '1' '2' */
+#define VMWARE_FOURCC_YUY2 0x32595559 /* 'Y' 'U' 'Y' '2' */
+#define VMWARE_FOURCC_UYVY 0x59565955 /* 'U' 'Y' 'V' 'Y' */
typedef enum {
SVGA_OVERLAY_FORMAT_INVALID = 0,
uint32 streamId;
} header;
- // May include zero or more items.
+ /* May include zero or more items. */
struct {
uint32 registerId;
uint32 value;
*/
static inline Bool
-VMwareVideoGetAttributes(const SVGAOverlayFormat format, // IN
- uint32 *width, // IN / OUT
- uint32 *height, // IN / OUT
- uint32 *size, // OUT
- uint32 *pitches, // OUT (optional)
- uint32 *offsets) // OUT (optional)
+VMwareVideoGetAttributes(const SVGAOverlayFormat format, /* IN */
+ uint32 *width, /* IN / OUT */
+ uint32 *height, /* IN / OUT */
+ uint32 *size, /* OUT */
+ uint32 *pitches, /* OUT (optional) */
+ uint32 *offsets) /* OUT (optional) */
{
int tmp;
return TRUE;
}
-#endif // _SVGA_OVERLAY_H_
+#endif /* _SVGA_OVERLAY_H_ */
/**********************************************************
- * Copyright 1998-2014 VMware, Inc. All rights reserved.
+ * Copyright 1998-2015 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
SVGA_REG_ENABLE_HIDE = (1 << 1),
} SvgaRegEnable;
+typedef uint32 SVGAMobId;
+
/*
* Arbitrary and meaningless limits. Please ignore these when writing
* new drivers.
struct {
uint32 bitsPerPixel : 8;
uint32 colorDepth : 8;
- uint32 reserved : 16; // Must be zero
+ uint32 reserved : 16; /* Must be zero */
};
uint32 value;
uint32 b : 8;
uint32 g : 8;
uint32 r : 8;
- uint32 x : 8; // Unused
+ uint32 x : 8; /* Unused */
};
uint32 value;
* SVGA_CAP_COMMAND_BUFFERS --
* Enable register based command buffer submission.
*
+ * SVGA_CAP_DEAD1 --
+ * This cap was incorrectly used by old drivers and should not be
+ * reused.
+ *
+ * SVGA_CAP_CMD_BUFFERS_2 --
+ * Enable support for the prepend command buffer submision
+ * registers. SVGA_REG_CMD_PREPEND_LOW and
+ * SVGA_REG_CMD_PREPEND_HIGH.
+ *
* SVGA_CAP_GBOBJECTS --
* Enable guest-backed objects and surfaces.
*
+ * SVGA_CAP_CMD_BUFFERS_3 --
+ * Enable support for command buffers in a mob.
*/
#define SVGA_CAP_NONE 0x00000000
#define SVGA_CAP_RECT_COPY 0x00000002
#define SVGA_CAP_CURSOR 0x00000020
-#define SVGA_CAP_CURSOR_BYPASS 0x00000040 // Legacy (Use Cursor Bypass 3 instead)
-#define SVGA_CAP_CURSOR_BYPASS_2 0x00000080 // Legacy (Use Cursor Bypass 3 instead)
+#define SVGA_CAP_CURSOR_BYPASS 0x00000040
+#define SVGA_CAP_CURSOR_BYPASS_2 0x00000080
#define SVGA_CAP_8BIT_EMULATION 0x00000100
#define SVGA_CAP_ALPHA_CURSOR 0x00000200
#define SVGA_CAP_3D 0x00004000
#define SVGA_CAP_EXTENDED_FIFO 0x00008000
-#define SVGA_CAP_MULTIMON 0x00010000 // Legacy multi-monitor support
+#define SVGA_CAP_MULTIMON 0x00010000
#define SVGA_CAP_PITCHLOCK 0x00020000
#define SVGA_CAP_IRQMASK 0x00040000
-#define SVGA_CAP_DISPLAY_TOPOLOGY 0x00080000 // Legacy multi-monitor support
+#define SVGA_CAP_DISPLAY_TOPOLOGY 0x00080000
#define SVGA_CAP_GMR 0x00100000
#define SVGA_CAP_TRACES 0x00200000
#define SVGA_CAP_GMR2 0x00400000
#define SVGA_CAP_DEAD1 0x02000000
#define SVGA_CAP_CMD_BUFFERS_2 0x04000000
#define SVGA_CAP_GBOBJECTS 0x08000000
+#define SVGA_CAP_CMD_BUFFERS_3 0x10000000
+
+#define SVGA_CAP_CMD_RESERVED 0x80000000
/*
SVGA_FIFO_CAPABILITIES = 4,
SVGA_FIFO_FLAGS,
- // Valid with SVGA_FIFO_CAP_FENCE:
+ /* Valid with SVGA_FIFO_CAP_FENCE: */
SVGA_FIFO_FENCE,
/*
* These in block 3a, the VMX currently considers mandatory for the
* extended FIFO.
*/
-
- // Valid if exists (i.e. if extended FIFO enabled):
+
+ /* Valid if exists (i.e. if extended FIFO enabled): */
SVGA_FIFO_3D_HWVERSION, /* See SVGA3dHardwareVersion in svga3d_reg.h */
- // Valid with SVGA_FIFO_CAP_PITCHLOCK:
+ /* Valid with SVGA_FIFO_CAP_PITCHLOCK: */
SVGA_FIFO_PITCHLOCK,
- // Valid with SVGA_FIFO_CAP_CURSOR_BYPASS_3:
+ /* Valid with SVGA_FIFO_CAP_CURSOR_BYPASS_3: */
SVGA_FIFO_CURSOR_ON, /* Cursor bypass 3 show/hide register */
SVGA_FIFO_CURSOR_X, /* Cursor bypass 3 x register */
SVGA_FIFO_CURSOR_Y, /* Cursor bypass 3 y register */
SVGA_FIFO_CURSOR_COUNT, /* Incremented when any of the other 3 change */
SVGA_FIFO_CURSOR_LAST_UPDATED,/* Last time the host updated the cursor */
- // Valid with SVGA_FIFO_CAP_RESERVE:
+ /* Valid with SVGA_FIFO_CAP_RESERVE: */
SVGA_FIFO_RESERVED, /* Bytes past NEXT_CMD with real contents */
/*
* sets SVGA_FIFO_MIN high enough to leave room for them.
*/
- // Valid if register exists:
+ /* Valid if register exists: */
SVGA_FIFO_GUEST_3D_HWVERSION, /* Guest driver's 3D version */
SVGA_FIFO_FENCE_GOAL, /* Matching target for SVGA_IRQFLAG_FENCE_GOAL */
SVGA_FIFO_BUSY, /* See "FIFO Synchronization Registers" */
#define SVGA_FIFO_FLAG_NONE 0
#define SVGA_FIFO_FLAG_ACCELFRONT (1<<0)
-#define SVGA_FIFO_FLAG_RESERVED (1<<31) // Internal use only
+#define SVGA_FIFO_FLAG_RESERVED (1<<31) /* Internal use only */
/*
* FIFO reservation sentinel value
SVGA_VIDEO_DATA_OFFSET,
SVGA_VIDEO_FORMAT,
SVGA_VIDEO_COLORKEY,
- SVGA_VIDEO_SIZE, // Deprecated
+ SVGA_VIDEO_SIZE, /* Deprecated */
SVGA_VIDEO_WIDTH,
SVGA_VIDEO_HEIGHT,
SVGA_VIDEO_SRC_X,
SVGA_VIDEO_SRC_Y,
SVGA_VIDEO_SRC_WIDTH,
SVGA_VIDEO_SRC_HEIGHT,
- SVGA_VIDEO_DST_X, // Signed int32
- SVGA_VIDEO_DST_Y, // Signed int32
+ SVGA_VIDEO_DST_X, /* Signed int32 */
+ SVGA_VIDEO_DST_Y, /* Signed int32 */
SVGA_VIDEO_DST_WIDTH,
SVGA_VIDEO_DST_HEIGHT,
SVGA_VIDEO_PITCH_1,
SVGA_VIDEO_PITCH_2,
SVGA_VIDEO_PITCH_3,
- SVGA_VIDEO_DATA_GMRID, // Optional, defaults to SVGA_GMR_FRAMEBUFFER
- SVGA_VIDEO_DST_SCREEN_ID, // Optional, defaults to virtual coords (SVGA_ID_INVALID)
+ SVGA_VIDEO_DATA_GMRID, /* Optional, defaults to SVGA_GMR_FRAMEBUFFER */
+ SVGA_VIDEO_DST_SCREEN_ID, /* Optional, defaults to virtual coords */
+ /* (SVGA_ID_INVALID) */
SVGA_VIDEO_NUM_REGS
};
* value of zero means no cloning should happen.
*/
-#define SVGA_SCREEN_MUST_BE_SET (1 << 0) // Must be set or results undefined
-#define SVGA_SCREEN_HAS_ROOT SVGA_SCREEN_MUST_BE_SET // Deprecated
-#define SVGA_SCREEN_IS_PRIMARY (1 << 1) // Guest considers this screen to be 'primary'
-#define SVGA_SCREEN_FULLSCREEN_HINT (1 << 2) // Guest is running a fullscreen app here
+#define SVGA_SCREEN_MUST_BE_SET (1 << 0)
+#define SVGA_SCREEN_HAS_ROOT SVGA_SCREEN_MUST_BE_SET /* Deprecated */
+#define SVGA_SCREEN_IS_PRIMARY (1 << 1)
+#define SVGA_SCREEN_FULLSCREEN_HINT (1 << 2)
/*
* Added with SVGA_FIFO_CAP_SCREEN_OBJECT_2. When the screen is
typedef
struct {
- uint32 structSize; // sizeof(SVGAScreenObject)
+ uint32 structSize; /* sizeof(SVGAScreenObject) */
uint32 id;
uint32 flags;
struct {
* with SVGA_FIFO_CAP_SCREEN_OBJECT.
*/
SVGAGuestImage backingStore;
+
+ /*
+ * The cloneCount field is treated as a hint from the guest that
+ * the user wants this display to be cloned, cloneCount times.
+ *
+ * A value of zero means no cloning should happen.
+ */
uint32 cloneCount;
} SVGAScreenObject;
* Note the holes in the command ID numbers: These commands have been
* deprecated, and the old IDs must not be reused.
*
- * Command IDs from 1000 to 1999 are reserved for use by the SVGA3D
+ * Command IDs from 1000 to 2999 are reserved for use by the SVGA3D
* protocol.
*
* Each command's parameters are described by the comments and
SVGA_CMD_REMAP_GMR2 = 42,
SVGA_CMD_DEAD = 43,
SVGA_CMD_DEAD_2 = 44,
+ SVGA_CMD_NOP = 45,
+ SVGA_CMD_NOP_ERROR = 46,
SVGA_CMD_MAX
} SVGAFifoCmdId;
typedef
struct {
- uint32 id; // Reserved, must be zero.
+ uint32 id; /* Reserved, must be zero. */
uint32 hotspotX;
uint32 hotspotY;
uint32 width;
uint32 height;
- uint32 andMaskDepth; // Value must be 1 or equal to BITS_PER_PIXEL
- uint32 xorMaskDepth; // Value must be 1 or equal to BITS_PER_PIXEL
+ uint32 andMaskDepth; /* Value must be 1 or equal to BITS_PER_PIXEL */
+ uint32 xorMaskDepth; /* Value must be 1 or equal to BITS_PER_PIXEL */
/*
* Followed by scanline data for AND mask, then XOR mask.
* Each scanline is padded to a 32-bit boundary.
typedef
struct {
- uint32 id; // Reserved, must be zero.
+ uint32 id; /* Reserved, must be zero. */
uint32 hotspotX;
uint32 hotspotY;
uint32 width;
typedef
struct {
- uint32 color; // In the same format as the GFB
+ uint32 color; /* In the same format as the GFB */
uint32 x;
uint32 y;
uint32 width;
uint32 height;
- uint32 rop; // Must be SVGA_ROP_COPY
+ uint32 rop; /* Must be SVGA_ROP_COPY */
} SVGAFifoCmdFrontRopFill;
typedef
struct {
- SVGAScreenObject screen; // Variable-length according to version
+ SVGAScreenObject screen; /* Variable-length according to version */
} SVGAFifoCmdDefineScreen;
struct {
uint32 gmrId;
SVGARemapGMR2Flags flags;
- uint32 offsetPages; // offset in pages to begin remap
- uint32 numPages; // number of pages to remap
+ uint32 offsetPages; /* offset in pages to begin remap */
+ uint32 numPages; /* number of pages to remap */
/*
* Followed by additional data depending on SVGARemapGMR2Flags.
*
/*
* Size of SVGA device memory such as frame buffer and FIFO.
*/
-#define SVGA_VRAM_MIN_SIZE (4 * 640 * 480) // bytes
+#define SVGA_VRAM_MIN_SIZE (4 * 640 * 480) /* bytes */
#define SVGA_VRAM_MIN_SIZE_3D (16 * 1024 * 1024)
#define SVGA_VRAM_MAX_SIZE (128 * 1024 * 1024)
#define SVGA_MEMORY_SIZE_MAX (1024 * 1024 * 1024)
#define SVGA_GRAPHICS_MEMORY_KB_MAX (2 * 1024 * 1024)
#define SVGA_GRAPHICS_MEMORY_KB_DEFAULT (256 * 1024)
-#define SVGA_VRAM_SIZE_W2K (64 * 1024 * 1024) // 64 MB
+#define SVGA_VRAM_SIZE_W2K (64 * 1024 * 1024) /* 64 MB */
/*
* To simplify autoDetect display configuration, support a minimum of
#define SVGA_VRAM_SIZE (4 * 1024 * 1024)
#define SVGA_VRAM_SIZE_3D (64 * 1024 * 1024)
#define SVGA_FIFO_SIZE (256 * 1024)
-#define SVGA_FIFO_SIZE_3D (516 * 1024) // Bump to 516KB to workaround WDDM driver issue (see bug# 744318)
+#define SVGA_FIFO_SIZE_3D (516 * 1024)
#define SVGA_MEMORY_SIZE_DEFAULT (160 * 1024 * 1024)
#define SVGA_AUTODETECT_DEFAULT FALSE
#else
if (surface) {
struct svga_surface *s = svga_surface(surface);
swc->surface_relocation(swc, &id->sid, NULL, s->handle, flags);
- id->face = s->real_face; /* faces have the same order */
+ id->face = s->real_layer; /* faces have the same order */
id->mipmap = s->real_level;
}
else {
swc->surface_relocation(swc, &cmd->host.sid, NULL,
texture->handle, surface_flags);
- cmd->host.face = st->face; /* PIPE_TEX_FACE_* and SVGA3D_CUBEFACE_* match */
+ cmd->host.face = st->slice; /* PIPE_TEX_FACE_* and SVGA3D_CUBEFACE_* match */
cmd->host.mipmap = st->base.level;
cmd->transfer = transfer;
{
SVGA3dCmdSetShader *cmd;
+ assert(type == SVGA3D_SHADERTYPE_VS || type == SVGA3D_SHADERTYPE_PS);
+
cmd = SVGA3D_FIFOReserve(swc,
SVGA_3D_CMD_SET_SHADER, sizeof *cmd,
0);
if(!cmd)
return PIPE_ERROR_OUT_OF_MEMORY;
- swc->context_relocation(swc, &cmd->cid);
+ cmd->cid = swc->cid;
cmd->type = type;
swc->commit(swc);
if(!cmd)
return PIPE_ERROR_OUT_OF_MEMORY;
- swc->context_relocation(swc, &cmd->cid);
+ cmd->cid = swc->cid;
cmd->type = type;
swc->mob_relocation(swc, &cmd->mobid, &cmd->offset, buffer,
if(!cmd)
return PIPE_ERROR_OUT_OF_MEMORY;
- swc->context_relocation(swc, &cmd->cid);
+ cmd->cid = swc->cid;
cmd->type = type;
swc->mob_relocation(swc, &cmd->mobid, &cmd->offset, buffer,
enum pipe_error
-SVGA3D_DefineGBShader(struct svga_winsys_context *swc,
- struct svga_winsys_gb_shader *gbshader,
- SVGA3dShaderType type,
- uint32 sizeInBytes)
-{
- SVGA3dCmdDefineGBShader *cmd;
-
- assert(sizeInBytes % 4 == 0);
- assert(type == SVGA3D_SHADERTYPE_VS ||
- type == SVGA3D_SHADERTYPE_PS);
-
- cmd = SVGA3D_FIFOReserve(swc,
- SVGA_3D_CMD_DEFINE_GB_SHADER,
- sizeof *cmd,
- 1); /* one relocation */
-
- if (!cmd)
- return PIPE_ERROR_OUT_OF_MEMORY;
-
- swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader);
- cmd->type = type;
- cmd->sizeInBytes = sizeInBytes;
-
- swc->commit(swc);
-
- return PIPE_OK;
-}
-
-
-enum pipe_error
SVGA3D_BindGBShader(struct svga_winsys_context *swc,
struct svga_winsys_gb_shader *gbshader)
{
return PIPE_ERROR_OUT_OF_MEMORY;
swc->shader_relocation(swc, &cmd->shid, &cmd->mobid,
- &cmd->offsetInBytes, gbshader);
+ &cmd->offsetInBytes, gbshader, 0);
swc->commit(swc);
struct svga_winsys_gb_shader *gbshader)
{
SVGA3dCmdSetShader *cmd;
+
+ assert(type == SVGA3D_SHADERTYPE_VS || type == SVGA3D_SHADERTYPE_PS);
cmd = SVGA3D_FIFOReserve(swc,
SVGA_3D_CMD_SET_SHADER,
if (!cmd)
return PIPE_ERROR_OUT_OF_MEMORY;
- swc->context_relocation(swc, &cmd->cid);
+ cmd->cid = swc->cid;
cmd->type = type;
if (gbshader)
- swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader);
+ swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader, 0);
else
cmd->shid = SVGA_ID_INVALID;
swc->commit(swc);
}
-enum pipe_error
-SVGA3D_DestroyGBShader(struct svga_winsys_context *swc,
- struct svga_winsys_gb_shader *gbshader)
-{
- SVGA3dCmdDestroyGBShader *cmd =
- SVGA3D_FIFOReserve(swc,
- SVGA_3D_CMD_DESTROY_GB_SHADER,
- sizeof *cmd,
- 1); /* one relocation */
-
- if (!cmd)
- return PIPE_ERROR_OUT_OF_MEMORY;
-
- swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader);
-
- swc->commit(swc);
-
- return PIPE_OK;
-}
-
-
/**
* \param flags mask of SVGA_RELOC_READ / _WRITE
*/
}
-enum pipe_error
-SVGA3D_DefineGBContext(struct svga_winsys_context *swc)
-{
- SVGA3dCmdDefineGBContext *cmd =
- SVGA3D_FIFOReserve(swc,
- SVGA_3D_CMD_DEFINE_GB_CONTEXT,
- sizeof *cmd,
- 1); /* one relocation */
-
- if (!cmd)
- return PIPE_ERROR_OUT_OF_MEMORY;
-
- swc->context_relocation(swc, &cmd->cid);
-
- swc->commit(swc);
-
- return PIPE_OK;
-}
-
-
-enum pipe_error
-SVGA3D_DestroyGBContext(struct svga_winsys_context *swc)
-{
- SVGA3dCmdDestroyGBContext *cmd =
- SVGA3D_FIFOReserve(swc,
- SVGA_3D_CMD_DESTROY_GB_CONTEXT,
- sizeof *cmd,
- 1); /* one relocation */
-
- if (!cmd)
- return PIPE_ERROR_OUT_OF_MEMORY;
-
- swc->context_relocation(swc, &cmd->cid);
-
- swc->commit(swc);
-
- return PIPE_OK;
-}
-
-
-enum pipe_error
-SVGA3D_BindGBContext(struct svga_winsys_context *swc)
-{
- SVGA3dCmdBindGBContext *cmd =
- SVGA3D_FIFOReserve(swc,
- SVGA_3D_CMD_BIND_GB_CONTEXT,
- sizeof *cmd,
- 2); /* two relocations */
-
- if (!cmd)
- return PIPE_ERROR_OUT_OF_MEMORY;
-
- swc->context_relocation(swc, &cmd->cid);
- swc->context_relocation(swc, &cmd->mobid);
- cmd->validContents = 0; /* XXX pass as a parameter? */
-
- swc->commit(swc);
-
- return PIPE_OK;
-}
-
-
-enum pipe_error
-SVGA3D_InvalidateGBContext(struct svga_winsys_context *swc)
-{
- SVGA3dCmdInvalidateGBContext *cmd =
- SVGA3D_FIFOReserve(swc,
- SVGA_3D_CMD_INVALIDATE_GB_CONTEXT,
- sizeof *cmd,
- 1); /* one relocation */
-
- if (!cmd)
- return PIPE_ERROR_OUT_OF_MEMORY;
-
- swc->context_relocation(swc, &cmd->cid);
-
- swc->commit(swc);
-
- return PIPE_OK;
-}
-
-
-
/**
* Update an image in a guest-backed surface.
* (Inform the device that the guest-contents have been updated.)
struct svga_winsys_buffer;
struct svga_winsys_surface;
struct svga_winsys_gb_shader;
+struct svga_winsys_gb_query;
/*
*/
enum pipe_error
-SVGA3D_DefineGBShader(struct svga_winsys_context *swc,
- struct svga_winsys_gb_shader *gbshader,
- SVGA3dShaderType type,
- uint32 sizeInBytes);
-
-enum pipe_error
SVGA3D_BindGBShader(struct svga_winsys_context *swc,
struct svga_winsys_gb_shader *gbshader);
struct svga_winsys_gb_shader *gbshader);
enum pipe_error
-SVGA3D_DestroyGBShader(struct svga_winsys_context *swc,
- struct svga_winsys_gb_shader *gbshader);
-
-enum pipe_error
SVGA3D_BindGBSurface(struct svga_winsys_context *swc,
struct svga_winsys_surface *surface);
enum pipe_error
-SVGA3D_DefineGBContext(struct svga_winsys_context *swc);
-
-enum pipe_error
-SVGA3D_DestroyGBContext(struct svga_winsys_context *swc);
-
-enum pipe_error
-SVGA3D_BindGBContext(struct svga_winsys_context *swc);
-
-enum pipe_error
-SVGA3D_InvalidateGBContext(struct svga_winsys_context *swc);
-
-enum pipe_error
SVGA3D_UpdateGBImage(struct svga_winsys_context *swc,
struct svga_winsys_surface *surface,
const SVGA3dBox *box,
SVGA3dQueryType type,
struct svga_winsys_buffer *buffer);
+
+
+/*
+ * VGPU10 commands
+ */
+
+enum pipe_error
+SVGA3D_vgpu10_PredCopyRegion(struct svga_winsys_context *swc,
+ struct svga_winsys_surface *dstSurf,
+ uint32 dstSubResource,
+ struct svga_winsys_surface *srcSurf,
+ uint32 srcSubResource,
+ const SVGA3dCopyBox *box);
+
+enum pipe_error
+SVGA3D_vgpu10_PredCopy(struct svga_winsys_context *swc,
+ struct svga_winsys_surface *dstSurf,
+ struct svga_winsys_surface *srcSurf);
+
+enum pipe_error
+SVGA3D_vgpu10_SetViewports(struct svga_winsys_context *swc,
+ unsigned count, const SVGA3dViewport *viewports);
+
+enum pipe_error
+SVGA3D_vgpu10_SetShader(struct svga_winsys_context *swc,
+ SVGA3dShaderType type,
+ struct svga_winsys_gb_shader *gbshader,
+ SVGA3dShaderId shaderId);
+
+enum pipe_error
+SVGA3D_vgpu10_SetShaderResources(struct svga_winsys_context *swc,
+ SVGA3dShaderType type,
+ uint32 startView,
+ unsigned count,
+ const SVGA3dShaderResourceViewId ids[],
+ struct svga_winsys_surface **views);
+
+enum pipe_error
+SVGA3D_vgpu10_SetSamplers(struct svga_winsys_context *swc,
+ unsigned count,
+ uint32 startSampler,
+ SVGA3dShaderType type,
+ const SVGA3dSamplerId *samplerIds);
+
+enum pipe_error
+SVGA3D_vgpu10_SetRenderTargets(struct svga_winsys_context *swc,
+ unsigned color_count,
+ struct pipe_surface **color_surfs,
+ struct pipe_surface *depth_stencil_surf);
+
+enum pipe_error
+SVGA3D_vgpu10_SetBlendState(struct svga_winsys_context *swc,
+ SVGA3dBlendStateId blendId,
+ const float *blendFactor, uint32 sampleMask);
+
+enum pipe_error
+SVGA3D_vgpu10_SetDepthStencilState(struct svga_winsys_context *swc,
+ SVGA3dDepthStencilStateId depthStencilId,
+ uint32 stencilRef);
+
+enum pipe_error
+SVGA3D_vgpu10_SetRasterizerState(struct svga_winsys_context *swc,
+ SVGA3dRasterizerStateId rasterizerId);
+
+enum pipe_error
+SVGA3D_vgpu10_SetPredication(struct svga_winsys_context *swc,
+ SVGA3dQueryId queryId,
+ uint32 predicateValue);
+
+enum pipe_error
+SVGA3D_vgpu10_SetSOTargets(struct svga_winsys_context *swc,
+ unsigned count, const SVGA3dSoTarget *targets,
+ struct svga_winsys_surface **surfaces);
+
+enum pipe_error
+SVGA3D_vgpu10_SetScissorRects(struct svga_winsys_context *swc,
+ unsigned count,
+ const SVGASignedRect *rects);
+
+enum pipe_error
+SVGA3D_vgpu10_SetStreamOutput(struct svga_winsys_context *swc,
+ SVGA3dStreamOutputId soid);
+
+enum pipe_error
+SVGA3D_vgpu10_Draw(struct svga_winsys_context *swc,
+ uint32 vertexCount, uint32 startVertexLocation);
+
+enum pipe_error
+SVGA3D_vgpu10_DrawIndexed(struct svga_winsys_context *swc,
+ uint32 indexCount, uint32 startIndexLocation,
+ int32 baseVertexLocation);
+
+enum pipe_error
+SVGA3D_vgpu10_DrawInstanced(struct svga_winsys_context *swc,
+ uint32 vertexCountPerInstance,
+ uint32 instanceCount,
+ uint32 startVertexLocation,
+ uint32 startInstanceLocation);
+
+enum pipe_error
+SVGA3D_vgpu10_DrawIndexedInstanced(struct svga_winsys_context *swc,
+ uint32 indexCountPerInstance,
+ uint32 instanceCount,
+ uint32 startIndexLocation,
+ int32 baseVertexLocation,
+ uint32 startInstanceLocation);
+
+enum pipe_error
+SVGA3D_vgpu10_DrawAuto(struct svga_winsys_context *swc);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineQuery(struct svga_winsys_context *swc,
+ SVGA3dQueryId queryId,
+ SVGA3dQueryType type,
+ SVGA3dDXQueryFlags flags);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyQuery(struct svga_winsys_context *swc,
+ SVGA3dQueryId queryId);
+
+enum pipe_error
+SVGA3D_vgpu10_BindQuery(struct svga_winsys_context *swc,
+ struct svga_winsys_gb_query *gbQuery,
+ SVGA3dQueryId queryId);
+
+enum pipe_error
+SVGA3D_vgpu10_SetQueryOffset(struct svga_winsys_context *swc,
+ SVGA3dQueryId queryId,
+ uint32 mobOffset);
+
+enum pipe_error
+SVGA3D_vgpu10_BeginQuery(struct svga_winsys_context *swc,
+ SVGA3dQueryId queryId);
+
+enum pipe_error
+SVGA3D_vgpu10_EndQuery(struct svga_winsys_context *swc,
+ SVGA3dQueryId queryId);
+
+enum pipe_error
+SVGA3D_vgpu10_ClearRenderTargetView(struct svga_winsys_context *swc,
+ struct pipe_surface *color_surf,
+ const float *rgba);
+
+enum pipe_error
+SVGA3D_vgpu10_ClearDepthStencilView(struct svga_winsys_context *swc,
+ struct pipe_surface *ds_surf,
+ uint16 flags, uint16 stencil, float depth);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineShaderResourceView(struct svga_winsys_context *swc,
+ SVGA3dShaderResourceViewId shaderResourceViewId,
+ struct svga_winsys_surface *surf,
+ SVGA3dSurfaceFormat format,
+ SVGA3dResourceType resourceDimension,
+ const SVGA3dShaderResourceViewDesc *desc);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyShaderResourceView(struct svga_winsys_context *swc,
+ SVGA3dShaderResourceViewId shaderResourceViewId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineRenderTargetView(struct svga_winsys_context *swc,
+ SVGA3dRenderTargetViewId renderTargetViewId,
+ struct svga_winsys_surface *surface,
+ SVGA3dSurfaceFormat format,
+ SVGA3dResourceType resourceDimension,
+ const SVGA3dRenderTargetViewDesc *desc);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyRenderTargetView(struct svga_winsys_context *swc,
+ SVGA3dRenderTargetViewId renderTargetViewId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineDepthStencilView(struct svga_winsys_context *swc,
+ SVGA3dDepthStencilViewId depthStencilViewId,
+ struct svga_winsys_surface *surface,
+ SVGA3dSurfaceFormat format,
+ SVGA3dResourceType resourceDimension,
+ const SVGA3dRenderTargetViewDesc *desc);
+
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyDepthStencilView(struct svga_winsys_context *swc,
+ SVGA3dDepthStencilViewId depthStencilViewId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineElementLayout(struct svga_winsys_context *swc,
+ unsigned count,
+ SVGA3dElementLayoutId elementLayoutId,
+ const SVGA3dInputElementDesc *elements);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyElementLayout(struct svga_winsys_context *swc,
+ SVGA3dElementLayoutId elementLayoutId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineBlendState(struct svga_winsys_context *swc,
+ SVGA3dBlendStateId blendId,
+ uint8 alphaToCoverageEnable,
+ uint8 independentBlendEnable,
+ const SVGA3dDXBlendStatePerRT *perRT);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyBlendState(struct svga_winsys_context *swc,
+ SVGA3dBlendStateId blendId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineDepthStencilState(struct svga_winsys_context *swc,
+ SVGA3dDepthStencilStateId depthStencilId,
+ uint8 depthEnable,
+ SVGA3dDepthWriteMask depthWriteMask,
+ SVGA3dComparisonFunc depthFunc,
+ uint8 stencilEnable,
+ uint8 frontEnable,
+ uint8 backEnable,
+ uint8 stencilReadMask,
+ uint8 stencilWriteMask,
+ uint8 frontStencilFailOp,
+ uint8 frontStencilDepthFailOp,
+ uint8 frontStencilPassOp,
+ SVGA3dComparisonFunc frontStencilFunc,
+ uint8 backStencilFailOp,
+ uint8 backStencilDepthFailOp,
+ uint8 backStencilPassOp,
+ SVGA3dComparisonFunc backStencilFunc);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyDepthStencilState(struct svga_winsys_context *swc,
+ SVGA3dDepthStencilStateId depthStencilId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineRasterizerState(struct svga_winsys_context *swc,
+ SVGA3dRasterizerStateId rasterizerId,
+ uint8 fillMode,
+ SVGA3dCullMode cullMode,
+ uint8 frontCounterClockwise,
+ int32 depthBias,
+ float depthBiasClamp,
+ float slopeScaledDepthBias,
+ uint8 depthClipEnable,
+ uint8 scissorEnable,
+ uint8 multisampleEnable,
+ uint8 antialiasedLineEnable,
+ float lineWidth,
+ uint8 lineStippleEnable,
+ uint8 lineStippleFactor,
+ uint16 lineStipplePattern,
+ uint8 provokingVertexLast);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyRasterizerState(struct svga_winsys_context *swc,
+ SVGA3dRasterizerStateId rasterizerId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineSamplerState(struct svga_winsys_context *swc,
+ SVGA3dSamplerId samplerId,
+ SVGA3dFilter filter,
+ uint8 addressU,
+ uint8 addressV,
+ uint8 addressW,
+ float mipLODBias,
+ uint8 maxAnisotropy,
+ uint8 comparisonFunc,
+ SVGA3dRGBAFloat borderColor,
+ float minLOD,
+ float maxLOD);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroySamplerState(struct svga_winsys_context *swc,
+ SVGA3dSamplerId samplerId);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyShader(struct svga_winsys_context *swc,
+ SVGA3dShaderId shaderId);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineAndBindShader(struct svga_winsys_context *swc,
+ struct svga_winsys_gb_shader *gbshader,
+ SVGA3dShaderId shaderId,
+ SVGA3dShaderType type,
+ uint32 sizeInBytes);
+
+enum pipe_error
+SVGA3D_vgpu10_DefineStreamOutput(struct svga_winsys_context *swc,
+ SVGA3dStreamOutputId soid,
+ uint32 numOutputStreamEntries,
+ uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS],
+ const SVGA3dStreamOutputDeclarationEntry decl[SVGA3D_MAX_STREAMOUT_DECLS]);
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyStreamOutput(struct svga_winsys_context *swc,
+ SVGA3dStreamOutputId soid);
+
+enum pipe_error
+SVGA3D_vgpu10_ReadbackSubResource(struct svga_winsys_context *swc,
+ struct svga_winsys_surface *surface,
+ unsigned subResource);
+
+enum pipe_error
+SVGA3D_vgpu10_SetInputLayout(struct svga_winsys_context *swc,
+ SVGA3dElementLayoutId elementLayoutId);
+
+enum pipe_error
+SVGA3D_vgpu10_SetVertexBuffers(struct svga_winsys_context *swc,
+ unsigned count,
+ uint32 startBuffer,
+ const SVGA3dVertexBuffer *bufferInfo,
+ struct svga_winsys_surface **surfaces);
+
+enum pipe_error
+SVGA3D_vgpu10_SetTopology(struct svga_winsys_context *swc,
+ SVGA3dPrimitiveType topology);
+
+enum pipe_error
+SVGA3D_vgpu10_SetIndexBuffer(struct svga_winsys_context *swc,
+ struct svga_winsys_surface *indexes,
+ SVGA3dSurfaceFormat format, uint32 offset);
+
+enum pipe_error
+SVGA3D_vgpu10_SetSingleConstantBuffer(struct svga_winsys_context *swc,
+ unsigned slot,
+ SVGA3dShaderType type,
+ struct svga_winsys_surface *surface,
+ uint32 offsetInBytes,
+ uint32 sizeInBytes);
+
+enum pipe_error
+SVGA3D_vgpu10_UpdateSubResource(struct svga_winsys_context *swc,
+ struct svga_winsys_surface *surface,
+ const SVGA3dBox *box,
+ unsigned subResource);
+
#endif /* __SVGA3D_H__ */
--- /dev/null
+/**********************************************************
+ * Copyright 2008-2013 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file svga_cmd_vgpu10.c
+ *
+ * Command construction utility for the vgpu10 SVGA3D protocol.
+ *
+ * \author Mingcheng Chen
+ * \author Brian Paul
+ */
+
+
+#include "svga_winsys.h"
+#include "svga_resource_buffer.h"
+#include "svga_resource_texture.h"
+#include "svga_surface.h"
+#include "svga_cmd.h"
+
+
+/**
+ * Emit a surface relocation for RenderTargetViewId
+ */
+static void
+view_relocation(struct svga_winsys_context *swc, // IN
+ struct pipe_surface *surface, // IN
+ SVGA3dRenderTargetViewId *id, // OUT
+ unsigned flags)
+{
+ if (surface) {
+ struct svga_surface *s = svga_surface(surface);
+ assert(s->handle);
+ swc->surface_relocation(swc, id, NULL, s->handle, flags);
+ }
+ else {
+ swc->surface_relocation(swc, id, NULL, NULL, flags);
+ }
+}
+
+
+/**
+ * Emit a surface relocation for a ResourceId.
+ */
+static void
+surface_to_resourceid(struct svga_winsys_context *swc, // IN
+ struct svga_winsys_surface *surface, // IN
+ SVGA3dSurfaceId *sid, // OUT
+ unsigned flags) // IN
+{
+ if (surface) {
+ swc->surface_relocation(swc, sid, NULL, surface, flags);
+ }
+ else {
+ swc->surface_relocation(swc, sid, NULL, NULL, flags);
+ }
+}
+
+
+#define SVGA3D_CREATE_COMMAND(CommandName, CommandCode) \
+SVGA3dCmdDX##CommandName *cmd; \
+{ \
+ cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_##CommandCode, \
+ sizeof(SVGA3dCmdDX##CommandName), 0); \
+ if (!cmd) \
+ return PIPE_ERROR_OUT_OF_MEMORY; \
+}
+
+#define SVGA3D_CREATE_CMD_COUNT(CommandName, CommandCode, ElementClassName) \
+SVGA3dCmdDX##CommandName *cmd; \
+{ \
+ assert(count > 0); \
+ cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_##CommandCode, \
+ sizeof(SVGA3dCmdDX##CommandName) + \
+ count * sizeof(ElementClassName), 0); \
+ if (!cmd) \
+ return PIPE_ERROR_OUT_OF_MEMORY; \
+}
+
+#define SVGA3D_COPY_BASIC(VariableName) \
+{ \
+ cmd->VariableName = VariableName; \
+}
+
+#define SVGA3D_COPY_BASIC_2(VariableName1, VariableName2) \
+{ \
+ SVGA3D_COPY_BASIC(VariableName1); \
+ SVGA3D_COPY_BASIC(VariableName2); \
+}
+
+#define SVGA3D_COPY_BASIC_3(VariableName1, VariableName2, VariableName3) \
+{ \
+ SVGA3D_COPY_BASIC_2(VariableName1, VariableName2); \
+ SVGA3D_COPY_BASIC(VariableName3); \
+}
+
+#define SVGA3D_COPY_BASIC_4(VariableName1, VariableName2, VariableName3, \
+ VariableName4) \
+{ \
+ SVGA3D_COPY_BASIC_2(VariableName1, VariableName2); \
+ SVGA3D_COPY_BASIC_2(VariableName3, VariableName4); \
+}
+
+#define SVGA3D_COPY_BASIC_5(VariableName1, VariableName2, VariableName3, \
+ VariableName4, VariableName5) \
+{\
+ SVGA3D_COPY_BASIC_3(VariableName1, VariableName2, VariableName3); \
+ SVGA3D_COPY_BASIC_2(VariableName4, VariableName5); \
+}
+
+#define SVGA3D_COPY_BASIC_6(VariableName1, VariableName2, VariableName3, \
+ VariableName4, VariableName5, VariableName6) \
+{\
+ SVGA3D_COPY_BASIC_3(VariableName1, VariableName2, VariableName3); \
+ SVGA3D_COPY_BASIC_3(VariableName4, VariableName5, VariableName6); \
+}
+
+#define SVGA3D_COPY_BASIC_7(VariableName1, VariableName2, VariableName3, \
+ VariableName4, VariableName5, VariableName6, \
+ VariableName7) \
+{\
+ SVGA3D_COPY_BASIC_4(VariableName1, VariableName2, VariableName3, \
+ VariableName4); \
+ SVGA3D_COPY_BASIC_3(VariableName5, VariableName6, VariableName7); \
+}
+
+#define SVGA3D_COPY_BASIC_8(VariableName1, VariableName2, VariableName3, \
+ VariableName4, VariableName5, VariableName6, \
+ VariableName7, VariableName8) \
+{\
+ SVGA3D_COPY_BASIC_4(VariableName1, VariableName2, VariableName3, \
+ VariableName4); \
+ SVGA3D_COPY_BASIC_4(VariableName5, VariableName6, VariableName7, \
+ VariableName8); \
+}
+
+#define SVGA3D_COPY_BASIC_9(VariableName1, VariableName2, VariableName3, \
+ VariableName4, VariableName5, VariableName6, \
+ VariableName7, VariableName8, VariableName9) \
+{\
+ SVGA3D_COPY_BASIC_5(VariableName1, VariableName2, VariableName3, \
+ VariableName4, VariableName5); \
+ SVGA3D_COPY_BASIC_4(VariableName6, VariableName7, VariableName8, \
+ VariableName9); \
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_PredCopyRegion(struct svga_winsys_context *swc,
+ struct svga_winsys_surface *dstSurf,
+ uint32 dstSubResource,
+ struct svga_winsys_surface *srcSurf,
+ uint32 srcSubResource,
+ const SVGA3dCopyBox *box)
+{
+ SVGA3dCmdDXPredCopyRegion *cmd =
+ SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_DX_PRED_COPY_REGION,
+ sizeof(SVGA3dCmdDXPredCopyRegion),
+ 2); /* two relocations */
+ if (!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ swc->surface_relocation(swc, &cmd->dstSid, NULL, dstSurf, SVGA_RELOC_WRITE);
+ swc->surface_relocation(swc, &cmd->srcSid, NULL, srcSurf, SVGA_RELOC_READ);
+ cmd->dstSubResource = dstSubResource;
+ cmd->srcSubResource = srcSubResource;
+ cmd->box = *box;
+
+ swc->commit(swc);
+
+ return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_PredCopy(struct svga_winsys_context *swc,
+ struct svga_winsys_surface *dstSurf,
+ struct svga_winsys_surface *srcSurf)
+{
+ SVGA3dCmdDXPredCopy *cmd =
+ SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_DX_PRED_COPY,
+ sizeof(SVGA3dCmdDXPredCopy),
+ 2); /* two relocations */
+ if (!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ swc->surface_relocation(swc, &cmd->dstSid, NULL, dstSurf, SVGA_RELOC_WRITE);
+ swc->surface_relocation(swc, &cmd->srcSid, NULL, srcSurf, SVGA_RELOC_READ);
+
+ swc->commit(swc);
+
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetViewports(struct svga_winsys_context *swc,
+ unsigned count,
+ const SVGA3dViewport *viewports)
+{
+ SVGA3D_CREATE_CMD_COUNT(SetViewports, SET_VIEWPORTS, SVGA3dViewport);
+
+ memcpy(cmd + 1, viewports, count * sizeof(SVGA3dViewport));
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_SetShader(struct svga_winsys_context *swc,
+ SVGA3dShaderType type,
+ struct svga_winsys_gb_shader *gbshader,
+ SVGA3dShaderId shaderId)
+{
+ SVGA3dCmdDXSetShader *cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_DX_SET_SHADER,
+ sizeof *cmd,
+ 1); /* one relocation */
+ if (!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ swc->shader_relocation(swc, &cmd->shaderId, NULL, NULL, gbshader, 0);
+
+ cmd->type = type;
+ cmd->shaderId = shaderId;
+ swc->commit(swc);
+
+ return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_SetShaderResources(struct svga_winsys_context *swc,
+ SVGA3dShaderType type,
+ uint32 startView,
+ unsigned count,
+ const SVGA3dShaderResourceViewId ids[],
+ struct svga_winsys_surface **views)
+{
+ SVGA3dCmdDXSetShaderResources *cmd;
+ SVGA3dShaderResourceViewId *cmd_ids;
+ unsigned i;
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_DX_SET_SHADER_RESOURCES,
+ sizeof(SVGA3dCmdDXSetShaderResources) +
+ count * sizeof(SVGA3dShaderResourceViewId),
+ count); /* 'count' relocations */
+ if (!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+
+ cmd->type = type;
+ cmd->startView = startView;
+
+ cmd_ids = (SVGA3dShaderResourceViewId *) (cmd + 1);
+ for (i = 0; i < count; i++) {
+ swc->surface_relocation(swc, cmd_ids + i, NULL, views[i],
+ SVGA_RELOC_READ);
+ cmd_ids[i] = ids[i];
+ }
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_SetSamplers(struct svga_winsys_context *swc,
+ unsigned count,
+ uint32 startSampler,
+ SVGA3dShaderType type,
+ const SVGA3dSamplerId *samplerIds)
+{
+ SVGA3D_CREATE_CMD_COUNT(SetSamplers, SET_SAMPLERS, SVGA3dSamplerId);
+
+ SVGA3D_COPY_BASIC_2(startSampler, type);
+ memcpy(cmd + 1, samplerIds, count * sizeof(SVGA3dSamplerId));
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_ClearRenderTargetView(struct svga_winsys_context *swc,
+ struct pipe_surface *color_surf,
+ const float *rgba)
+{
+ SVGA3dCmdDXClearRenderTargetView *cmd;
+ struct svga_surface *ss = svga_surface(color_surf);
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_DX_CLEAR_RENDERTARGET_VIEW,
+ sizeof(SVGA3dCmdDXClearRenderTargetView),
+ 1); /* one relocation */
+ if (!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+
+ /* NOTE: The following is pretty tricky. We need to emit a view/surface
+ * relocation and we have to provide a pointer to an ID which lies in
+ * the bounds of the command space which we just allocated. However,
+ * we then need to overwrite it with the original RenderTargetViewId.
+ */
+ view_relocation(swc, color_surf, &cmd->renderTargetViewId,
+ SVGA_RELOC_WRITE);
+ cmd->renderTargetViewId = ss->view_id;
+
+ COPY_4V(cmd->rgba.value, rgba);
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_SetRenderTargets(struct svga_winsys_context *swc,
+ unsigned color_count,
+ struct pipe_surface **color_surfs,
+ struct pipe_surface *depth_stencil_surf)
+{
+ const unsigned surf_count = color_count + 1;
+ SVGA3dCmdDXSetRenderTargets *cmd;
+ SVGA3dRenderTargetViewId *ctarget;
+ struct svga_surface *ss;
+ unsigned i;
+
+ assert(surf_count > 0);
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_DX_SET_RENDERTARGETS,
+ sizeof(SVGA3dCmdDXSetRenderTargets) +
+ color_count * sizeof(SVGA3dRenderTargetViewId),
+ surf_count); /* 'surf_count' relocations */
+ if (!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ /* NOTE: See earlier comment about the tricky handling of the ViewIds.
+ */
+
+ /* Depth / Stencil buffer */
+ if (depth_stencil_surf) {
+ ss = svga_surface(depth_stencil_surf);
+ view_relocation(swc, depth_stencil_surf, &cmd->depthStencilViewId,
+ SVGA_RELOC_WRITE);
+ cmd->depthStencilViewId = ss->view_id;
+ }
+ else {
+ /* no depth/stencil buffer - still need a relocation */
+ view_relocation(swc, NULL, &cmd->depthStencilViewId,
+ SVGA_RELOC_WRITE);
+ cmd->depthStencilViewId = SVGA3D_INVALID_ID;
+ }
+
+ /* Color buffers */
+ ctarget = (SVGA3dRenderTargetViewId *) &cmd[1];
+ for (i = 0; i < color_count; i++) {
+ if (color_surfs[i]) {
+ ss = svga_surface(color_surfs[i]);
+ view_relocation(swc, color_surfs[i], ctarget + i, SVGA_RELOC_WRITE);
+ ctarget[i] = ss->view_id;
+ }
+ else {
+ view_relocation(swc, NULL, ctarget + i, SVGA_RELOC_WRITE);
+ ctarget[i] = SVGA3D_INVALID_ID;
+ }
+ }
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_SetBlendState(struct svga_winsys_context *swc,
+ SVGA3dBlendStateId blendId,
+ const float *blendFactor,
+ uint32 sampleMask)
+{
+ SVGA3D_CREATE_COMMAND(SetBlendState, SET_BLEND_STATE);
+
+ SVGA3D_COPY_BASIC_2(blendId, sampleMask);
+ memcpy(cmd->blendFactor, blendFactor, sizeof(float) * 4);
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetDepthStencilState(struct svga_winsys_context *swc,
+ SVGA3dDepthStencilStateId depthStencilId,
+ uint32 stencilRef)
+{
+ SVGA3D_CREATE_COMMAND(SetDepthStencilState, SET_DEPTHSTENCIL_STATE);
+
+ SVGA3D_COPY_BASIC_2(depthStencilId, stencilRef);
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetRasterizerState(struct svga_winsys_context *swc,
+ SVGA3dRasterizerStateId rasterizerId)
+{
+ SVGA3D_CREATE_COMMAND(SetRasterizerState, SET_RASTERIZER_STATE);
+
+ cmd->rasterizerId = rasterizerId;
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetPredication(struct svga_winsys_context *swc,
+ SVGA3dQueryId queryId,
+ uint32 predicateValue)
+{
+ SVGA3dCmdDXSetPredication *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_SET_PREDICATION,
+ sizeof *cmd, 0);
+
+ if (!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ cmd->queryId = queryId;
+ cmd->predicateValue = predicateValue;
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetSOTargets(struct svga_winsys_context *swc,
+ unsigned count,
+ const SVGA3dSoTarget *targets,
+ struct svga_winsys_surface **surfaces)
+{
+ SVGA3dCmdDXSetSOTargets *cmd;
+ SVGA3dSoTarget *sot;
+ unsigned i;
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_DX_SET_SOTARGETS,
+ sizeof(SVGA3dCmdDXSetSOTargets) +
+ count * sizeof(SVGA3dSoTarget),
+ count);
+
+ if (!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ cmd->pad0 = 0;
+ sot = (SVGA3dSoTarget *)(cmd + 1);
+ for (i = 0; i < count; i++, sot++) {
+ if (surfaces[i]) {
+ sot->offset = targets[i].offset;
+ sot->sizeInBytes = targets[i].sizeInBytes;
+ swc->surface_relocation(swc, &sot->sid, NULL, surfaces[i],
+ SVGA_RELOC_WRITE);
+ }
+ else {
+ sot->offset = 0;
+ sot->sizeInBytes = ~0u;
+ swc->surface_relocation(swc, &sot->sid, NULL, NULL,
+ SVGA_RELOC_WRITE);
+ }
+ }
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetScissorRects(struct svga_winsys_context *swc,
+ unsigned count,
+ const SVGASignedRect *rects)
+{
+ SVGA3dCmdDXSetScissorRects *cmd;
+
+ assert(count > 0);
+ cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_SET_SCISSORRECTS,
+ sizeof(SVGA3dCmdDXSetScissorRects) +
+ count * sizeof(SVGASignedRect),
+ 0);
+ if (!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ memcpy(cmd + 1, rects, count * sizeof(SVGASignedRect));
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetStreamOutput(struct svga_winsys_context *swc,
+ SVGA3dStreamOutputId soid)
+{
+ SVGA3D_CREATE_COMMAND(SetStreamOutput, SET_STREAMOUTPUT);
+
+ cmd->soid = soid;
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_Draw(struct svga_winsys_context *swc,
+ uint32 vertexCount,
+ uint32 startVertexLocation)
+{
+ SVGA3D_CREATE_COMMAND(Draw, DRAW);
+
+ SVGA3D_COPY_BASIC_2(vertexCount, startVertexLocation);
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DrawIndexed(struct svga_winsys_context *swc,
+ uint32 indexCount,
+ uint32 startIndexLocation,
+ int32 baseVertexLocation)
+{
+ SVGA3D_CREATE_COMMAND(DrawIndexed, DRAW_INDEXED);
+
+ SVGA3D_COPY_BASIC_3(indexCount, startIndexLocation,
+ baseVertexLocation);
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DrawInstanced(struct svga_winsys_context *swc,
+ uint32 vertexCountPerInstance,
+ uint32 instanceCount,
+ uint32 startVertexLocation,
+ uint32 startInstanceLocation)
+{
+ SVGA3D_CREATE_COMMAND(DrawInstanced, DRAW_INSTANCED);
+
+ SVGA3D_COPY_BASIC_4(vertexCountPerInstance, instanceCount,
+ startVertexLocation, startInstanceLocation);
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DrawIndexedInstanced(struct svga_winsys_context *swc,
+ uint32 indexCountPerInstance,
+ uint32 instanceCount,
+ uint32 startIndexLocation,
+ int32 baseVertexLocation,
+ uint32 startInstanceLocation)
+{
+ SVGA3D_CREATE_COMMAND(DrawIndexedInstanced, DRAW_INDEXED_INSTANCED);
+
+ SVGA3D_COPY_BASIC_5(indexCountPerInstance, instanceCount,
+ startIndexLocation, baseVertexLocation,
+ startInstanceLocation);
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DrawAuto(struct svga_winsys_context *swc)
+{
+ SVGA3D_CREATE_COMMAND(DrawAuto, DRAW_AUTO);
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineQuery(struct svga_winsys_context *swc,
+ SVGA3dQueryId queryId,
+ SVGA3dQueryType type,
+ SVGA3dDXQueryFlags flags)
+{
+ SVGA3D_CREATE_COMMAND(DefineQuery, DEFINE_QUERY);
+
+ SVGA3D_COPY_BASIC_3(queryId, type, flags);
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyQuery(struct svga_winsys_context *swc,
+ SVGA3dQueryId queryId)
+{
+ SVGA3D_CREATE_COMMAND(DestroyQuery, DESTROY_QUERY);
+
+ cmd->queryId = queryId;
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_BindQuery(struct svga_winsys_context *swc,
+ struct svga_winsys_gb_query *gbQuery,
+ SVGA3dQueryId queryId)
+{
+ SVGA3dCmdDXBindQuery *cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_DX_BIND_QUERY,
+ sizeof *cmd,
+ 1);
+ if (!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ cmd->queryId = queryId;
+ swc->query_relocation(swc, &cmd->mobid, gbQuery);
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetQueryOffset(struct svga_winsys_context *swc,
+ SVGA3dQueryId queryId,
+ uint32 mobOffset)
+{
+ SVGA3D_CREATE_COMMAND(SetQueryOffset, SET_QUERY_OFFSET);
+ SVGA3D_COPY_BASIC_2(queryId, mobOffset);
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_BeginQuery(struct svga_winsys_context *swc,
+ SVGA3dQueryId queryId)
+{
+ SVGA3D_CREATE_COMMAND(BeginQuery, BEGIN_QUERY);
+ cmd->queryId = queryId;
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_EndQuery(struct svga_winsys_context *swc,
+ SVGA3dQueryId queryId)
+{
+ SVGA3D_CREATE_COMMAND(EndQuery, END_QUERY);
+ cmd->queryId = queryId;
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_ClearDepthStencilView(struct svga_winsys_context *swc,
+ struct pipe_surface *ds_surf,
+ uint16 flags,
+ uint16 stencil,
+ float depth)
+{
+ SVGA3dCmdDXClearDepthStencilView *cmd;
+ struct svga_surface *ss = svga_surface(ds_surf);
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_DX_CLEAR_DEPTHSTENCIL_VIEW,
+ sizeof(SVGA3dCmdDXClearDepthStencilView),
+ 1); /* one relocation */
+ if (!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ /* NOTE: The following is pretty tricky. We need to emit a view/surface
+ * relocation and we have to provide a pointer to an ID which lies in
+ * the bounds of the command space which we just allocated. However,
+ * we then need to overwrite it with the original DepthStencilViewId.
+ */
+ view_relocation(swc, ds_surf, &cmd->depthStencilViewId,
+ SVGA_RELOC_WRITE);
+ cmd->depthStencilViewId = ss->view_id;
+ cmd->flags = flags;
+ cmd->stencil = stencil;
+ cmd->depth = depth;
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineShaderResourceView(struct svga_winsys_context *swc,
+ SVGA3dShaderResourceViewId shaderResourceViewId,
+ struct svga_winsys_surface *surface,
+ SVGA3dSurfaceFormat format,
+ SVGA3dResourceType resourceDimension,
+ const SVGA3dShaderResourceViewDesc *desc)
+{
+ SVGA3dCmdDXDefineShaderResourceView *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_DEFINE_SHADERRESOURCE_VIEW,
+ sizeof(SVGA3dCmdDXDefineShaderResourceView),
+ 1); /* one relocation */
+ if (!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ SVGA3D_COPY_BASIC_3(shaderResourceViewId, format, resourceDimension);
+
+ swc->surface_relocation(swc, &cmd->sid, NULL, surface,
+ SVGA_RELOC_READ);
+
+ cmd->desc = *desc;
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyShaderResourceView(struct svga_winsys_context *swc,
+ SVGA3dShaderResourceViewId shaderResourceViewId)
+{
+ SVGA3D_CREATE_COMMAND(DestroyShaderResourceView,
+ DESTROY_SHADERRESOURCE_VIEW);
+
+ cmd->shaderResourceViewId = shaderResourceViewId;
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_DefineRenderTargetView(struct svga_winsys_context *swc,
+ SVGA3dRenderTargetViewId renderTargetViewId,
+ struct svga_winsys_surface *surface,
+ SVGA3dSurfaceFormat format,
+ SVGA3dResourceType resourceDimension,
+ const SVGA3dRenderTargetViewDesc *desc)
+{
+ SVGA3dCmdDXDefineRenderTargetView *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_DEFINE_RENDERTARGET_VIEW,
+ sizeof(SVGA3dCmdDXDefineRenderTargetView),
+ 1); /* one relocation */
+ if (!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ SVGA3D_COPY_BASIC_3(renderTargetViewId, format, resourceDimension);
+ cmd->desc = *desc;
+
+ surface_to_resourceid(swc, surface,
+ &cmd->sid,
+ SVGA_RELOC_READ | SVGA_RELOC_WRITE);
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyRenderTargetView(struct svga_winsys_context *swc,
+ SVGA3dRenderTargetViewId renderTargetViewId)
+{
+ SVGA3D_CREATE_COMMAND(DestroyRenderTargetView, DESTROY_RENDERTARGET_VIEW);
+
+ cmd->renderTargetViewId = renderTargetViewId;
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_DefineDepthStencilView(struct svga_winsys_context *swc,
+ SVGA3dDepthStencilViewId depthStencilViewId,
+ struct svga_winsys_surface *surface,
+ SVGA3dSurfaceFormat format,
+ SVGA3dResourceType resourceDimension,
+ const SVGA3dRenderTargetViewDesc *desc)
+{
+ SVGA3dCmdDXDefineDepthStencilView *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_VIEW,
+ sizeof(SVGA3dCmdDXDefineDepthStencilView),
+ 1); /* one relocation */
+ if (!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ SVGA3D_COPY_BASIC_3(depthStencilViewId, format, resourceDimension);
+ cmd->mipSlice = desc->tex.mipSlice;
+ cmd->firstArraySlice = desc->tex.firstArraySlice;
+ cmd->arraySize = desc->tex.arraySize;
+
+ surface_to_resourceid(swc, surface,
+ &cmd->sid,
+ SVGA_RELOC_READ | SVGA_RELOC_WRITE);
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyDepthStencilView(struct svga_winsys_context *swc,
+ SVGA3dDepthStencilViewId depthStencilViewId)
+{
+ SVGA3D_CREATE_COMMAND(DestroyDepthStencilView, DESTROY_DEPTHSTENCIL_VIEW);
+
+ cmd->depthStencilViewId = depthStencilViewId;
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineElementLayout(struct svga_winsys_context *swc,
+ unsigned count,
+ SVGA3dElementLayoutId elementLayoutId,
+ const SVGA3dInputElementDesc *elements)
+{
+ SVGA3dCmdDXDefineElementLayout *cmd;
+ unsigned i;
+
+ cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_DEFINE_ELEMENTLAYOUT,
+ sizeof(SVGA3dCmdDXDefineElementLayout) +
+ count * sizeof(SVGA3dInputElementDesc), 0);
+ if (!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ /* check that all offsets are multiples of four */
+ for (i = 0; i < count; i++) {
+ assert(elements[i].alignedByteOffset % 4 == 0);
+ }
+ (void) i; /* silence unused var in release build */
+
+ cmd->elementLayoutId = elementLayoutId;
+ memcpy(cmd + 1, elements, count * sizeof(SVGA3dInputElementDesc));
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyElementLayout(struct svga_winsys_context *swc,
+ SVGA3dElementLayoutId elementLayoutId)
+{
+ SVGA3D_CREATE_COMMAND(DestroyElementLayout, DESTROY_ELEMENTLAYOUT);
+
+ cmd->elementLayoutId = elementLayoutId;
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineBlendState(struct svga_winsys_context *swc,
+ SVGA3dBlendStateId blendId,
+ uint8 alphaToCoverageEnable,
+ uint8 independentBlendEnable,
+ const SVGA3dDXBlendStatePerRT *perRT)
+{
+ SVGA3D_CREATE_COMMAND(DefineBlendState, DEFINE_BLEND_STATE);
+
+ cmd->blendId = blendId;
+ cmd->alphaToCoverageEnable = alphaToCoverageEnable;
+ cmd->independentBlendEnable = independentBlendEnable;
+ memcpy(cmd->perRT, perRT, sizeof(cmd->perRT));
+ cmd->pad0 = 0;
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyBlendState(struct svga_winsys_context *swc,
+ SVGA3dBlendStateId blendId)
+{
+ SVGA3D_CREATE_COMMAND(DestroyBlendState, DESTROY_BLEND_STATE);
+
+ cmd->blendId = blendId;
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineDepthStencilState(struct svga_winsys_context *swc,
+ SVGA3dDepthStencilStateId depthStencilId,
+ uint8 depthEnable,
+ SVGA3dDepthWriteMask depthWriteMask,
+ SVGA3dComparisonFunc depthFunc,
+ uint8 stencilEnable,
+ uint8 frontEnable,
+ uint8 backEnable,
+ uint8 stencilReadMask,
+ uint8 stencilWriteMask,
+ uint8 frontStencilFailOp,
+ uint8 frontStencilDepthFailOp,
+ uint8 frontStencilPassOp,
+ SVGA3dComparisonFunc frontStencilFunc,
+ uint8 backStencilFailOp,
+ uint8 backStencilDepthFailOp,
+ uint8 backStencilPassOp,
+ SVGA3dComparisonFunc backStencilFunc)
+{
+ SVGA3D_CREATE_COMMAND(DefineDepthStencilState, DEFINE_DEPTHSTENCIL_STATE);
+
+ SVGA3D_COPY_BASIC_9(depthStencilId, depthEnable,
+ depthWriteMask, depthFunc,
+ stencilEnable, frontEnable,
+ backEnable, stencilReadMask,
+ stencilWriteMask);
+ SVGA3D_COPY_BASIC_8(frontStencilFailOp, frontStencilDepthFailOp,
+ frontStencilPassOp, frontStencilFunc,
+ backStencilFailOp, backStencilDepthFailOp,
+ backStencilPassOp, backStencilFunc);
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyDepthStencilState(struct svga_winsys_context *swc,
+ SVGA3dDepthStencilStateId depthStencilId)
+{
+ SVGA3D_CREATE_COMMAND(DestroyDepthStencilState,
+ DESTROY_DEPTHSTENCIL_STATE);
+
+ cmd->depthStencilId = depthStencilId;
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineRasterizerState(struct svga_winsys_context *swc,
+ SVGA3dRasterizerStateId rasterizerId,
+ uint8 fillMode,
+ SVGA3dCullMode cullMode,
+ uint8 frontCounterClockwise,
+ int32 depthBias,
+ float depthBiasClamp,
+ float slopeScaledDepthBias,
+ uint8 depthClipEnable,
+ uint8 scissorEnable,
+ uint8 multisampleEnable,
+ uint8 antialiasedLineEnable,
+ float lineWidth,
+ uint8 lineStippleEnable,
+ uint8 lineStippleFactor,
+ uint16 lineStipplePattern,
+ uint8 provokingVertexLast)
+{
+ SVGA3D_CREATE_COMMAND(DefineRasterizerState, DEFINE_RASTERIZER_STATE);
+
+ SVGA3D_COPY_BASIC_5(rasterizerId, fillMode,
+ cullMode, frontCounterClockwise,
+ depthBias);
+ SVGA3D_COPY_BASIC_6(depthBiasClamp, slopeScaledDepthBias,
+ depthClipEnable, scissorEnable,
+ multisampleEnable, antialiasedLineEnable);
+ cmd->lineWidth = lineWidth;
+ cmd->lineStippleEnable = lineStippleEnable;
+ cmd->lineStippleFactor = lineStippleFactor;
+ cmd->lineStipplePattern = lineStipplePattern;
+ cmd->provokingVertexLast = provokingVertexLast;
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyRasterizerState(struct svga_winsys_context *swc,
+ SVGA3dRasterizerStateId rasterizerId)
+{
+ SVGA3D_CREATE_COMMAND(DestroyRasterizerState, DESTROY_RASTERIZER_STATE);
+
+ cmd->rasterizerId = rasterizerId;
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineSamplerState(struct svga_winsys_context *swc,
+ SVGA3dSamplerId samplerId,
+ SVGA3dFilter filter,
+ uint8 addressU,
+ uint8 addressV,
+ uint8 addressW,
+ float mipLODBias,
+ uint8 maxAnisotropy,
+ uint8 comparisonFunc,
+ SVGA3dRGBAFloat borderColor,
+ float minLOD,
+ float maxLOD)
+{
+ SVGA3D_CREATE_COMMAND(DefineSamplerState, DEFINE_SAMPLER_STATE);
+
+ SVGA3D_COPY_BASIC_6(samplerId, filter,
+ addressU, addressV,
+ addressW, mipLODBias);
+ SVGA3D_COPY_BASIC_5(maxAnisotropy, comparisonFunc,
+ borderColor, minLOD,
+ maxLOD);
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroySamplerState(struct svga_winsys_context *swc,
+ SVGA3dSamplerId samplerId)
+{
+ SVGA3D_CREATE_COMMAND(DestroySamplerState, DESTROY_SAMPLER_STATE);
+
+ cmd->samplerId = samplerId;
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_DefineAndBindShader(struct svga_winsys_context *swc,
+ struct svga_winsys_gb_shader *gbshader,
+ SVGA3dShaderId shaderId,
+ SVGA3dShaderType type,
+ uint32 sizeInBytes)
+{
+ SVGA3dCmdHeader *header;
+ SVGA3dCmdDXDefineShader *dcmd;
+ SVGA3dCmdDXBindShader *bcmd;
+ unsigned totalSize = 2 * sizeof(*header) +
+ sizeof(*dcmd) + sizeof(*bcmd);
+
+ /* Make sure there is room for both commands */
+ header = swc->reserve(swc, totalSize, 2);
+ if (!header)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ /* DXDefineShader command */
+ header->id = SVGA_3D_CMD_DX_DEFINE_SHADER;
+ header->size = sizeof(*dcmd);
+ dcmd = (SVGA3dCmdDXDefineShader *)(header + 1);
+ dcmd->shaderId = shaderId;
+ dcmd->type = type;
+ dcmd->sizeInBytes = sizeInBytes;
+
+ /* DXBindShader command */
+ header = (SVGA3dCmdHeader *)(dcmd + 1);
+
+ header->id = SVGA_3D_CMD_DX_BIND_SHADER;
+ header->size = sizeof(*bcmd);
+ bcmd = (SVGA3dCmdDXBindShader *)(header + 1);
+
+ bcmd->cid = swc->cid;
+ swc->shader_relocation(swc, NULL, &bcmd->mobid,
+ &bcmd->offsetInBytes, gbshader, 0);
+
+ bcmd->shid = shaderId;
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyShader(struct svga_winsys_context *swc,
+ SVGA3dShaderId shaderId)
+{
+ SVGA3D_CREATE_COMMAND(DestroyShader, DESTROY_SHADER);
+
+ cmd->shaderId = shaderId;
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DefineStreamOutput(struct svga_winsys_context *swc,
+ SVGA3dStreamOutputId soid,
+ uint32 numOutputStreamEntries,
+ uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS],
+ const SVGA3dStreamOutputDeclarationEntry decl[SVGA3D_MAX_STREAMOUT_DECLS])
+{
+ unsigned i;
+ SVGA3D_CREATE_COMMAND(DefineStreamOutput, DEFINE_STREAMOUTPUT);
+
+ cmd->soid = soid;
+ cmd->numOutputStreamEntries = numOutputStreamEntries;
+
+ for (i = 0; i < Elements(cmd->streamOutputStrideInBytes); i++)
+ cmd->streamOutputStrideInBytes[i] = streamOutputStrideInBytes[i];
+
+ memcpy(cmd->decl, decl,
+ sizeof(SVGA3dStreamOutputDeclarationEntry)
+ * SVGA3D_MAX_STREAMOUT_DECLS);
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_DestroyStreamOutput(struct svga_winsys_context *swc,
+ SVGA3dStreamOutputId soid)
+{
+ SVGA3D_CREATE_COMMAND(DestroyStreamOutput, DESTROY_STREAMOUTPUT);
+
+ cmd->soid = soid;
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetInputLayout(struct svga_winsys_context *swc,
+ SVGA3dElementLayoutId elementLayoutId)
+{
+ SVGA3D_CREATE_COMMAND(SetInputLayout, SET_INPUT_LAYOUT);
+
+ cmd->elementLayoutId = elementLayoutId;
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetVertexBuffers(struct svga_winsys_context *swc,
+ unsigned count,
+ uint32 startBuffer,
+ const SVGA3dVertexBuffer *bufferInfo,
+ struct svga_winsys_surface **surfaces)
+{
+ SVGA3dCmdDXSetVertexBuffers *cmd;
+ SVGA3dVertexBuffer *bufs;
+ unsigned i;
+
+ assert(count > 0);
+
+ cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_SET_VERTEX_BUFFERS,
+ sizeof(SVGA3dCmdDXSetVertexBuffers) +
+ count * sizeof(SVGA3dVertexBuffer),
+ count); /* 'count' relocations */
+ if (!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ cmd->startBuffer = startBuffer;
+
+ bufs = (SVGA3dVertexBuffer *) &cmd[1];
+ for (i = 0; i < count; i++) {
+ bufs[i].stride = bufferInfo[i].stride;
+ bufs[i].offset = bufferInfo[i].offset;
+ assert(bufs[i].stride % 4 == 0);
+ assert(bufs[i].offset % 4 == 0);
+ swc->surface_relocation(swc, &bufs[i].sid, NULL, surfaces[i],
+ SVGA_RELOC_READ);
+ }
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetTopology(struct svga_winsys_context *swc,
+ SVGA3dPrimitiveType topology)
+{
+ SVGA3D_CREATE_COMMAND(SetTopology, SET_TOPOLOGY);
+
+ cmd->topology = topology;
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetIndexBuffer(struct svga_winsys_context *swc,
+ struct svga_winsys_surface *indexes,
+ SVGA3dSurfaceFormat format,
+ uint32 offset)
+{
+ SVGA3dCmdDXSetIndexBuffer *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_SET_INDEX_BUFFER,
+ sizeof(SVGA3dCmdDXSetIndexBuffer),
+ 1); /* one relocations */
+ if (!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ swc->surface_relocation(swc, &cmd->sid, NULL, indexes, SVGA_RELOC_READ);
+ SVGA3D_COPY_BASIC_2(format, offset);
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_SetSingleConstantBuffer(struct svga_winsys_context *swc,
+ unsigned slot,
+ SVGA3dShaderType type,
+ struct svga_winsys_surface *surface,
+ uint32 offsetInBytes,
+ uint32 sizeInBytes)
+{
+ SVGA3dCmdDXSetSingleConstantBuffer *cmd;
+
+ assert(offsetInBytes % 256 == 0);
+ if (!surface)
+ assert(sizeInBytes == 0);
+ else
+ assert(sizeInBytes > 0);
+
+ cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_SET_SINGLE_CONSTANT_BUFFER,
+ sizeof(SVGA3dCmdDXSetSingleConstantBuffer),
+ 1); /* one relocation */
+ if (!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ cmd->slot = slot;
+ cmd->type = type;
+ swc->surface_relocation(swc, &cmd->sid, NULL, surface, SVGA_RELOC_READ);
+ cmd->offsetInBytes = offsetInBytes;
+ cmd->sizeInBytes = sizeInBytes;
+
+ swc->commit(swc);
+
+ return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_vgpu10_ReadbackSubResource(struct svga_winsys_context *swc,
+ struct svga_winsys_surface *surface,
+ unsigned subResource)
+{
+ SVGA3dCmdDXReadbackSubResource *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_READBACK_SUBRESOURCE,
+ sizeof(SVGA3dCmdDXReadbackSubResource),
+ 1);
+ if (!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ swc->surface_relocation(swc, &cmd->sid, NULL, surface,
+ SVGA_RELOC_READ | SVGA_RELOC_INTERNAL);
+ cmd->subResource = subResource;
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+enum pipe_error
+SVGA3D_vgpu10_UpdateSubResource(struct svga_winsys_context *swc,
+ struct svga_winsys_surface *surface,
+ const SVGA3dBox *box,
+ unsigned subResource)
+{
+ SVGA3dCmdDXUpdateSubResource *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE,
+ sizeof(SVGA3dCmdDXUpdateSubResource),
+ 1);
+ if (!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ swc->surface_relocation(swc, &cmd->sid, NULL, surface,
+ SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL);
+ cmd->subResource = subResource;
+ cmd->box = *box;
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
#include "pipe/p_screen.h"
#include "util/u_memory.h"
#include "util/u_bitmask.h"
+#include "util/u_upload_mgr.h"
#include "svga_context.h"
#include "svga_screen.h"
#include "svga_draw.h"
#include "svga_debug.h"
#include "svga_state.h"
+#include "svga_winsys.h"
+
+#define CONST0_UPLOAD_DEFAULT_SIZE 65536
+#define CONST0_UPLOAD_ALIGNMENT 256
DEBUG_GET_ONCE_BOOL_OPTION(no_swtnl, "SVGA_NO_SWTNL", FALSE)
DEBUG_GET_ONCE_BOOL_OPTION(force_swtnl, "SVGA_FORCE_SWTNL", FALSE);
static void svga_destroy( struct pipe_context *pipe )
{
struct svga_context *svga = svga_context( pipe );
- struct svga_winsys_screen *sws = svga_screen(pipe->screen)->sws;
- unsigned shader;
+ unsigned shader, i;
+
+ /* free any alternate rasterizer states used for point sprite */
+ for (i = 0; i < Elements(svga->rasterizer_no_cull); i++) {
+ if (svga->rasterizer_no_cull[i]) {
+ pipe->delete_rasterizer_state(pipe, svga->rasterizer_no_cull[i]);
+ }
+ }
+
+ /* free polygon stipple state */
+ if (svga->polygon_stipple.sampler) {
+ pipe->delete_sampler_state(pipe, svga->polygon_stipple.sampler);
+ }
+ if (svga->polygon_stipple.sampler_view) {
+ pipe->sampler_view_destroy(pipe,
+ &svga->polygon_stipple.sampler_view->base);
+ }
+ pipe_resource_reference(&svga->polygon_stipple.texture, NULL);
+
+ /* free HW constant buffers */
+ for (shader = 0; shader < Elements(svga->state.hw_draw.constbuf); shader++) {
+ pipe_resource_reference(&svga->state.hw_draw.constbuf[shader], NULL);
+ }
+
+ pipe->delete_blend_state(pipe, svga->noop_blend);
+
+ /* free query gb object */
+ if (svga->gb_query) {
+ pipe->destroy_query(pipe, NULL);
+ svga->gb_query = NULL;
+ }
util_blitter_destroy(svga->blitter);
svga_cleanup_framebuffer( svga );
svga_cleanup_tss_binding( svga );
- svga_hwtnl_destroy( svga->hwtnl );
-
svga_cleanup_vertex_state(svga);
- svga->swc->destroy(svga->swc);
-
svga_destroy_swtnl( svga );
+ svga_hwtnl_destroy( svga->hwtnl );
- util_bitmask_destroy( svga->shader_id_bm );
+ svga->swc->destroy(svga->swc);
+ util_bitmask_destroy(svga->blend_object_id_bm);
+ util_bitmask_destroy(svga->ds_object_id_bm);
+ util_bitmask_destroy(svga->input_element_object_id_bm);
+ util_bitmask_destroy(svga->rast_object_id_bm);
+ util_bitmask_destroy(svga->sampler_object_id_bm);
+ util_bitmask_destroy(svga->sampler_view_id_bm);
+ util_bitmask_destroy(svga->shader_id_bm);
+ util_bitmask_destroy(svga->surface_view_id_bm);
+ util_bitmask_destroy(svga->stream_output_id_bm);
+ util_bitmask_destroy(svga->query_id_bm);
+ u_upload_destroy(svga->const0_upload);
+
+ /* free user's constant buffers */
for (shader = 0; shader < PIPE_SHADER_TYPES; ++shader) {
- pipe_resource_reference( &svga->curr.cbufs[shader].buffer, NULL );
- sws->surface_reference(sws, &svga->state.hw_draw.hw_cb[shader], NULL);
+ for (i = 0; i < Elements(svga->curr.constbufs[shader]); ++i) {
+ pipe_resource_reference(&svga->curr.constbufs[shader][i].buffer, NULL);
+ }
}
FREE( svga );
svga = CALLOC_STRUCT(svga_context);
if (svga == NULL)
- goto no_svga;
+ goto cleanup;
LIST_INITHEAD(&svga->dirty_buffers);
svga->pipe.clear = svga_clear;
svga->swc = svgascreen->sws->context_create(svgascreen->sws);
- if(!svga->swc)
- goto no_swc;
+ if (!svga->swc)
+ goto cleanup;
svga_init_resource_functions(svga);
svga_init_blend_functions(svga);
svga_init_sampler_functions(svga);
svga_init_fs_functions(svga);
svga_init_vs_functions(svga);
+ svga_init_gs_functions(svga);
svga_init_vertex_functions(svga);
svga_init_constbuffer_functions(svga);
svga_init_query_functions(svga);
svga_init_surface_functions(svga);
+ svga_init_stream_output_functions(svga);
+ /* init misc state */
+ svga->curr.sample_mask = ~0;
/* debug */
svga->debug.no_swtnl = debug_get_option_no_swtnl();
svga->debug.no_line_width = debug_get_option_no_line_width();
svga->debug.force_hw_line_stipple = debug_get_option_force_hw_line_stipple();
- svga->shader_id_bm = util_bitmask_create();
- if (svga->shader_id_bm == NULL)
- goto no_shader_bm;
+ if (!(svga->blend_object_id_bm = util_bitmask_create()))
+ goto cleanup;
+
+ if (!(svga->ds_object_id_bm = util_bitmask_create()))
+ goto cleanup;
+
+ if (!(svga->input_element_object_id_bm = util_bitmask_create()))
+ goto cleanup;
+
+ if (!(svga->rast_object_id_bm = util_bitmask_create()))
+ goto cleanup;
+
+ if (!(svga->sampler_object_id_bm = util_bitmask_create()))
+ goto cleanup;
+
+ if (!(svga->sampler_view_id_bm = util_bitmask_create()))
+ goto cleanup;
+
+ if (!(svga->shader_id_bm = util_bitmask_create()))
+ goto cleanup;
+
+ if (!(svga->surface_view_id_bm = util_bitmask_create()))
+ goto cleanup;
+
+ if (!(svga->stream_output_id_bm = util_bitmask_create()))
+ goto cleanup;
+
+ if (!(svga->query_id_bm = util_bitmask_create()))
+ goto cleanup;
svga->hwtnl = svga_hwtnl_create(svga);
if (svga->hwtnl == NULL)
- goto no_hwtnl;
+ goto cleanup;
if (!svga_init_swtnl(svga))
- goto no_swtnl;
+ goto cleanup;
ret = svga_emit_initial_state( svga );
if (ret != PIPE_OK)
- goto no_state;
-
+ goto cleanup;
+
+ svga->const0_upload = u_upload_create(&svga->pipe,
+ CONST0_UPLOAD_DEFAULT_SIZE,
+ CONST0_UPLOAD_ALIGNMENT,
+ PIPE_BIND_CONSTANT_BUFFER);
+ if (!svga->const0_upload)
+ goto cleanup;
+
/* Avoid shortcircuiting state with initial value of zero.
*/
memset(&svga->state.hw_clear, 0xcd, sizeof(svga->state.hw_clear));
memset(&svga->state.hw_draw, 0xcd, sizeof(svga->state.hw_draw));
memset(&svga->state.hw_draw.views, 0x0, sizeof(svga->state.hw_draw.views));
+ memset(&svga->state.hw_draw.num_sampler_views, 0,
+ sizeof(svga->state.hw_draw.num_sampler_views));
svga->state.hw_draw.num_views = 0;
- memset(&svga->state.hw_draw.hw_cb, 0x0, sizeof(svga->state.hw_draw.hw_cb));
+
+ /* Initialize the shader pointers */
+ svga->state.hw_draw.vs = NULL;
+ svga->state.hw_draw.gs = NULL;
+ svga->state.hw_draw.fs = NULL;
+ memset(svga->state.hw_draw.constbuf, 0,
+ sizeof(svga->state.hw_draw.constbuf));
+ memset(svga->state.hw_draw.default_constbuf_size, 0,
+ sizeof(svga->state.hw_draw.default_constbuf_size));
+ memset(svga->state.hw_draw.enabled_constbufs, 0,
+ sizeof(svga->state.hw_draw.enabled_constbufs));
+
+ /* Create a no-operation blend state which we will bind whenever the
+ * requested blend state is impossible (e.g. due to having an integer
+ * render target attached).
+ *
+ * XXX: We will probably actually need 16 of these, one for each possible
+ * RGBA color mask (4 bits). Then, we would bind the one with a color mask
+ * matching the blend state it is replacing.
+ */
+ {
+ struct pipe_blend_state noop_tmpl = {0};
+ unsigned i;
+
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) {
+ // Set the color mask to all-ones. Later this may change.
+ noop_tmpl.rt[i].colormask = PIPE_MASK_RGBA;
+ }
+ svga->noop_blend = svga->pipe.create_blend_state(&svga->pipe, &noop_tmpl);
+ }
svga->dirty = ~0;
return &svga->pipe;
-no_state:
+cleanup:
svga_destroy_swtnl(svga);
-no_swtnl:
- svga_hwtnl_destroy( svga->hwtnl );
-no_hwtnl:
- util_bitmask_destroy( svga->shader_id_bm );
-no_shader_bm:
- svga->swc->destroy(svga->swc);
-no_swc:
+
+ if (svga->const0_upload)
+ u_upload_destroy(svga->const0_upload);
+ if (svga->hwtnl)
+ svga_hwtnl_destroy(svga->hwtnl);
+ if (svga->swc)
+ svga->swc->destroy(svga->swc);
+ util_bitmask_destroy(svga->blend_object_id_bm);
+ util_bitmask_destroy(svga->ds_object_id_bm);
+ util_bitmask_destroy(svga->input_element_object_id_bm);
+ util_bitmask_destroy(svga->rast_object_id_bm);
+ util_bitmask_destroy(svga->sampler_object_id_bm);
+ util_bitmask_destroy(svga->sampler_view_id_bm);
+ util_bitmask_destroy(svga->shader_id_bm);
+ util_bitmask_destroy(svga->surface_view_id_bm);
+ util_bitmask_destroy(svga->stream_output_id_bm);
+ util_bitmask_destroy(svga->query_id_bm);
FREE(svga);
-no_svga:
return NULL;
}
/* To force the re-emission of rendertargets and texture sampler bindings on
* the next command buffer.
*/
- svga->rebind.rendertargets = TRUE;
- svga->rebind.texture_samplers = TRUE;
+ svga->rebind.flags.rendertargets = TRUE;
+ svga->rebind.flags.texture_samplers = TRUE;
+
if (svga_have_gb_objects(svga)) {
- svga->rebind.vs = TRUE;
- svga->rebind.fs = TRUE;
+
+ svga->rebind.flags.constbufs = TRUE;
+ svga->rebind.flags.vs = TRUE;
+ svga->rebind.flags.fs = TRUE;
+ svga->rebind.flags.gs = TRUE;
+
+ if (svga_need_to_rebind_resources(svga)) {
+ svga->rebind.flags.query = TRUE;
+ }
}
if (SVGA_DEBUG & DEBUG_SYNC) {
}
+/**
+ * Flush pending commands and wait for completion with a fence.
+ */
+void
+svga_context_finish(struct svga_context *svga)
+{
+ struct pipe_screen *screen = svga->pipe.screen;
+ struct pipe_fence_handle *fence = NULL;
+
+ svga_context_flush(svga, &fence);
+ svga->pipe.screen->fence_finish(screen, fence, PIPE_TIMEOUT_INFINITE);
+ screen->fence_reference(screen, &fence, NULL);
+}
+
+
+/**
+ * Emit pending drawing commands to the command buffer.
+ * If the command buffer overflows, we flush it and retry.
+ * \sa svga_hwtnl_flush()
+ */
void svga_hwtnl_flush_retry( struct svga_context *svga )
{
enum pipe_error ret = PIPE_OK;
ret = svga_hwtnl_flush( svga->hwtnl );
}
- assert(ret == 0);
+ assert(ret == PIPE_OK);
}
#include "svga_screen.h"
#include "svga_state.h"
-#include "svga_tgsi.h"
#include "svga_winsys.h"
#include "svga_hw_reg.h"
#include "svga3d_shaderdefs.h"
#define SVGA_QUERY_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0)
#define SVGA_QUERY_FALLBACKS (PIPE_QUERY_DRIVER_SPECIFIC + 1)
#define SVGA_QUERY_MEMORY_USED (PIPE_QUERY_DRIVER_SPECIFIC + 2)
+#define SVGA_QUERY_MAX (PIPE_QUERY_DRIVER_SPECIFIC + 3)
+/**
+ * Maximum supported number of constant buffers per shader
+ */
+#define SVGA_MAX_CONST_BUFS 14
+
+/**
+ * Maximum constant buffer size that can be set in the
+ * DXSetSingleConstantBuffer command is
+ * DX10 constant buffer element count * 4 4-bytes components
+ */
+#define SVGA_MAX_CONST_BUF_SIZE (4096 * 4 * sizeof(int))
struct draw_vertex_shader;
struct draw_fragment_shader;
struct util_bitmask;
-struct svga_shader
-{
- const struct tgsi_token *tokens;
-
- struct tgsi_shader_info info;
-
- /** Head of linked list of variants */
- struct svga_shader_variant *variants;
-
- unsigned id; /**< for debugging only */
-};
-
-
-struct svga_fragment_shader
-{
- struct svga_shader base;
-
- struct draw_fragment_shader *draw_shader;
-
- /** Mask of which generic varying variables are read by this shader */
- unsigned generic_inputs;
- /** Table mapping original TGSI generic indexes to low integers */
- int8_t generic_remap_table[MAX_GENERIC_VARYING];
-};
-
-
-struct svga_vertex_shader
-{
- struct svga_shader base;
-
- struct draw_vertex_shader *draw_shader;
-};
-
-
struct svga_cache_context;
struct svga_tracked_state;
struct svga_blend_state {
+ unsigned need_white_fragments:1;
+ unsigned independent_blend_enable:1;
+ unsigned alpha_to_coverage:1;
+ unsigned blend_color_alpha:1; /**< set blend color to alpha value */
- boolean need_white_fragments;
-
- /* Should be per-render-target:
- */
+ /** Per-render target state */
struct {
uint8_t writemask;
uint8_t srcblend_alpha;
uint8_t dstblend_alpha;
uint8_t blendeq_alpha;
+ } rt[PIPE_MAX_COLOR_BUFS];
- } rt[1];
+ SVGA3dBlendStateId id; /**< vgpu10 */
};
struct svga_depth_stencil_state {
unsigned stencil_writemask:8;
float alpharef;
+
+ SVGA3dDepthStencilStateId id; /**< vgpu10 */
};
#define SVGA_UNFILLED_DISABLE 0
float pointsize;
float linewidth;
- unsigned hw_unfilled:16; /* PIPE_POLYGON_MODE_x */
+ unsigned hw_fillmode:2; /* PIPE_POLYGON_MODE_x */
/** Which prims do we need help for? Bitmask of (1 << PIPE_PRIM_x) flags */
unsigned need_pipeline:16;
+ SVGA3dRasterizerStateId id; /**< vgpu10 */
+
/** For debugging: */
const char* need_pipeline_tris_str;
const char* need_pipeline_lines_str;
unsigned min_lod;
unsigned view_min_lod;
unsigned view_max_lod;
+
+ SVGA3dSamplerId id;
};
+
+struct svga_pipe_sampler_view
+{
+ struct pipe_sampler_view base;
+
+ SVGA3dShaderResourceViewId id;
+};
+
+
+static inline struct svga_pipe_sampler_view *
+svga_pipe_sampler_view(struct pipe_sampler_view *v)
+{
+ return (struct svga_pipe_sampler_view *) v;
+}
+
+
struct svga_velems_state {
unsigned count;
struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
SVGA3dDeclType decl_type[PIPE_MAX_ATTRIBS]; /**< vertex attrib formats */
- unsigned adjust_attrib_range; /* bitmask of attrs needing range adjustment */
- unsigned adjust_attrib_w_1; /* bitmask of attrs needing w = 1 */
+
+ /** Bitmasks indicating which attributes need format conversion */
+ unsigned adjust_attrib_range; /**< range adjustment */
+ unsigned attrib_is_pure_int; /**< pure int */
+ unsigned adjust_attrib_w_1; /**< set w = 1 */
+ unsigned adjust_attrib_itof; /**< int->float */
+ unsigned adjust_attrib_utof; /**< uint->float */
+ unsigned attrib_is_bgra; /**< R / B swizzling */
+ unsigned attrib_puint_to_snorm; /**< 10_10_10_2 packed uint -> snorm */
+ unsigned attrib_puint_to_uscaled; /**< 10_10_10_2 packed uint -> uscaled */
+ unsigned attrib_puint_to_sscaled; /**< 10_10_10_2 packed uint -> sscaled */
+
boolean need_swvfetch;
+
+ SVGA3dElementLayoutId id; /**< VGPU10 */
};
/* Use to calculate differences between state emitted to hardware and
const struct svga_blend_state *blend;
const struct svga_depth_stencil_state *depth;
const struct svga_rasterizer_state *rast;
- const struct svga_sampler_state *sampler[PIPE_MAX_SAMPLERS];
+ const struct svga_sampler_state *sampler[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
const struct svga_velems_state *velems;
- struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; /* or texture ID's? */
+ struct pipe_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; /* or texture ID's? */
struct svga_fragment_shader *fs;
struct svga_vertex_shader *vs;
+ struct svga_geometry_shader *user_gs; /* user-specified GS */
+ struct svga_geometry_shader *gs; /* derived GS */
struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
struct pipe_index_buffer ib;
- struct pipe_constant_buffer cbufs[PIPE_SHADER_TYPES];
+ /** Constant buffers for each shader.
+ * The size should probably always match with that of
+ * svga_shader_emitter_v10.num_shader_consts.
+ */
+ struct pipe_constant_buffer constbufs[PIPE_SHADER_TYPES][SVGA_MAX_CONST_BUFS];
struct pipe_framebuffer_state framebuffer;
float depthscale;
struct pipe_clip_state clip;
struct pipe_viewport_state viewport;
- unsigned num_samplers;
- unsigned num_sampler_views;
+ unsigned num_samplers[PIPE_SHADER_TYPES];
+ unsigned num_sampler_views[PIPE_SHADER_TYPES];
unsigned num_vertex_buffers;
unsigned reduced_prim;
unsigned flag_1d;
unsigned flag_srgb;
} tex_flags;
+
+ unsigned sample_mask;
};
struct svga_prescale {
*/
struct svga_hw_clear_state
{
- struct {
- unsigned x,y,w,h;
- } viewport;
+ SVGA3dRect viewport;
struct {
float zmin, zmax;
unsigned ts[SVGA3D_PIXEL_SAMPLERREG_MAX][SVGA3D_TS_MAX];
float cb[PIPE_SHADER_TYPES][SVGA3D_CONSTREG_MAX][4];
- /**
- * For guest backed shader constants only.
- */
- struct svga_winsys_surface *hw_cb[PIPE_SHADER_TYPES];
-
struct svga_shader_variant *fs;
struct svga_shader_variant *vs;
+ struct svga_shader_variant *gs;
struct svga_hw_view_state views[PIPE_MAX_SAMPLERS];
-
unsigned num_views;
+ struct pipe_resource *constbuf[PIPE_SHADER_TYPES];
+
+ /* Bitmask of enabled constant bufffers */
+ unsigned enabled_constbufs[PIPE_SHADER_TYPES];
+
+ /* VGPU10 HW state (used to prevent emitting redundant state) */
+ SVGA3dDepthStencilStateId depth_stencil_id;
+ unsigned stencil_ref;
+ SVGA3dBlendStateId blend_id;
+ float blend_factor[4];
+ unsigned blend_sample_mask;
+ SVGA3dRasterizerStateId rasterizer_id;
+ SVGA3dElementLayoutId layout_id;
+ SVGA3dPrimitiveType topology;
+
+ /* used for rebinding */
+ unsigned num_sampler_views[PIPE_SHADER_TYPES];
+ unsigned default_constbuf_size[PIPE_SHADER_TYPES];
};
struct svga_hw_queue;
struct svga_query;
+struct svga_qmem_alloc_entry;
struct svga_context
{
struct pipe_context pipe;
struct svga_winsys_context *swc;
struct blitter_context *blitter;
+ struct u_upload_mgr *const0_upload;
struct {
boolean no_swtnl;
boolean new_vdecl;
} swtnl;
+ /* Bitmask of blend state objects IDs */
+ struct util_bitmask *blend_object_id_bm;
+
+ /* Bitmask of depth/stencil state objects IDs */
+ struct util_bitmask *ds_object_id_bm;
+
+ /* Bitmaks of input element object IDs */
+ struct util_bitmask *input_element_object_id_bm;
+
+ /* Bitmask of rasterizer object IDs */
+ struct util_bitmask *rast_object_id_bm;
+
+ /* Bitmask of sampler state objects IDs */
+ struct util_bitmask *sampler_object_id_bm;
+
+ /* Bitmask of sampler view IDs */
+ struct util_bitmask *sampler_view_id_bm;
+
/* Bitmask of used shader IDs */
struct util_bitmask *shader_id_bm;
+ /* Bitmask of used surface view IDs */
+ struct util_bitmask *surface_view_id_bm;
+
+ /* Bitmask of used stream output IDs */
+ struct util_bitmask *stream_output_id_bm;
+
+ /* Bitmask of used query IDs */
+ struct util_bitmask *query_id_bm;
+
struct {
unsigned dirty[SVGA_STATE_MAX];
+ /** bitmasks of which const buffers are changed */
+ unsigned dirty_constbufs[PIPE_SHADER_TYPES];
+
unsigned texture_timestamp;
/*
struct svga_state curr; /* state from the state tracker */
unsigned dirty; /* statechanges since last update_state() */
- struct {
- unsigned rendertargets:1;
- unsigned texture_samplers:1;
- unsigned vs:1;
- unsigned fs:1;
+ union {
+ struct {
+ unsigned rendertargets:1;
+ unsigned texture_samplers:1;
+ unsigned constbufs:1;
+ unsigned vs:1;
+ unsigned fs:1;
+ unsigned gs:1;
+ unsigned query:1;
+ } flags;
+ unsigned val;
} rebind;
struct svga_hwtnl *hwtnl;
- /** The occlusion query currently in progress */
- struct svga_query *sq;
+ /** Queries states */
+ struct svga_winsys_gb_query *gb_query; /**< gb query object, one per context */
+ unsigned gb_query_len; /**< gb query object size */
+ struct util_bitmask *gb_query_alloc_mask; /**< gb query object allocation mask */
+ struct svga_qmem_alloc_entry *gb_query_map[SVGA_QUERY_MAX];
+ /**< query mem block mapping */
+ struct svga_query *sq[SVGA_QUERY_MAX]; /**< queries currently in progress */
/** List of buffers with queued transfers */
struct list_head dirty_buffers;
/** performance / info queries */
uint64_t num_draw_calls; /**< SVGA_QUERY_DRAW_CALLS */
uint64_t num_fallbacks; /**< SVGA_QUERY_FALLBACKS */
+
+ /** The currently bound stream output targets */
+ unsigned num_so_targets;
+ struct svga_winsys_surface *so_surfaces[SVGA3D_DX_MAX_SOTARGETS];
+ struct pipe_stream_output_target *so_targets[SVGA3D_DX_MAX_SOTARGETS];
+ struct svga_stream_output *current_so;
+
+ /** A blend state with blending disabled, for falling back to when blending
+ * is illegal (e.g. an integer texture is bound)
+ */
+ struct svga_blend_state *noop_blend;
+
+ struct {
+ struct pipe_resource *texture;
+ struct svga_pipe_sampler_view *sampler_view;
+ void *sampler;
+ } polygon_stipple;
+
+ /** Alternate rasterizer states created for point sprite */
+ struct svga_rasterizer_state *rasterizer_no_cull[2];
};
/* A flag for each state_tracker state object:
*/
#define SVGA_NEW_BLEND 0x1
-#define SVGA_NEW_DEPTH_STENCIL 0x2
+#define SVGA_NEW_DEPTH_STENCIL_ALPHA 0x2
#define SVGA_NEW_RAST 0x4
#define SVGA_NEW_SAMPLER 0x8
#define SVGA_NEW_TEXTURE 0x10
#define SVGA_NEW_VS_VARIANT 0x1000000
#define SVGA_NEW_TEXTURE_FLAGS 0x4000000
#define SVGA_NEW_STENCIL_REF 0x8000000
-
+#define SVGA_NEW_GS 0x10000000
+#define SVGA_NEW_GS_CONST_BUFFER 0x20000000
+#define SVGA_NEW_GS_VARIANT 0x40000000
void svga_init_sampler_functions( struct svga_context *svga );
void svga_init_fs_functions( struct svga_context *svga );
void svga_init_vs_functions( struct svga_context *svga );
+void svga_init_gs_functions( struct svga_context *svga );
void svga_init_vertex_functions( struct svga_context *svga );
void svga_init_constbuffer_functions( struct svga_context *svga );
void svga_init_draw_functions( struct svga_context *svga );
void svga_init_query_functions( struct svga_context *svga );
void svga_init_surface_functions(struct svga_context *svga);
+void svga_init_stream_output_functions( struct svga_context *svga );
void svga_cleanup_vertex_state( struct svga_context *svga );
void svga_cleanup_tss_binding( struct svga_context *svga );
void svga_context_flush( struct svga_context *svga,
struct pipe_fence_handle **pfence );
+void svga_context_finish(struct svga_context *svga);
+
void svga_hwtnl_flush_retry( struct svga_context *svga );
void svga_hwtnl_flush_buffer( struct svga_context *svga,
struct pipe_resource *buffer );
return svga_screen(svga->pipe.screen)->sws->have_gb_dma;
}
+static inline boolean
+svga_have_vgpu10(const struct svga_context *svga)
+{
+ return svga_screen(svga->pipe.screen)->sws->have_vgpu10;
+}
+
+static inline boolean
+svga_need_to_rebind_resources(const struct svga_context *svga)
+{
+ return svga_screen(svga->pipe.screen)->sws->need_to_rebind_resources;
+}
+
+static inline boolean
+svga_rects_equal(const SVGA3dRect *r1, const SVGA3dRect *r2)
+{
+ return memcmp(r1, r2, sizeof(*r1)) == 0;
+}
#endif
#include "pipe/p_compiler.h"
#include "util/u_debug.h"
-#define DEBUG_DMA 0x1
-#define DEBUG_TGSI 0x4
-#define DEBUG_PIPE 0x8
-#define DEBUG_STATE 0x10
-#define DEBUG_SCREEN 0x20
-#define DEBUG_TEX 0x40
-#define DEBUG_SWTNL 0x80
-#define DEBUG_CONSTS 0x100
-#define DEBUG_VIEWPORT 0x200
-#define DEBUG_VIEWS 0x400
-#define DEBUG_PERF 0x800 /* print something when we hit any slow path operation */
-#define DEBUG_FLUSH 0x1000 /* flush after every draw */
-#define DEBUG_SYNC 0x2000 /* sync after every flush */
-#define DEBUG_QUERY 0x4000
-#define DEBUG_CACHE 0x8000
+#define DEBUG_DMA 0x1
+#define DEBUG_TGSI 0x4
+#define DEBUG_PIPE 0x8
+#define DEBUG_STATE 0x10
+#define DEBUG_SCREEN 0x20
+#define DEBUG_TEX 0x40
+#define DEBUG_SWTNL 0x80
+#define DEBUG_CONSTS 0x100
+#define DEBUG_VIEWPORT 0x200
+#define DEBUG_VIEWS 0x400
+#define DEBUG_PERF 0x800 /* print something when we hit any slow path operation */
+#define DEBUG_FLUSH 0x1000 /* flush after every draw */
+#define DEBUG_SYNC 0x2000 /* sync after every flush */
+#define DEBUG_QUERY 0x4000
+#define DEBUG_CACHE 0x8000
+#define DEBUG_STREAMOUT 0x10000
#ifdef DEBUG
extern int SVGA_DEBUG;
#include "pipe/p_compiler.h"
#include "util/u_inlines.h"
#include "pipe/p_defines.h"
+#include "util/u_helpers.h"
#include "util/u_memory.h"
#include "util/u_math.h"
-#include "util/u_upload_mgr.h"
#include "svga_context.h"
#include "svga_draw.h"
#include "svga_draw_private.h"
#include "svga_debug.h"
#include "svga_screen.h"
+#include "svga_resource.h"
#include "svga_resource_buffer.h"
#include "svga_resource_texture.h"
+#include "svga_shader.h"
#include "svga_surface.h"
#include "svga_winsys.h"
#include "svga_cmd.h"
}
}
- for (i = 0; i < hwtnl->cmd.vdecl_count; i++)
- pipe_resource_reference(&hwtnl->cmd.vdecl_vb[i], NULL);
+ for (i = 0; i < hwtnl->cmd.vbuf_count; i++)
+ pipe_resource_reference(&hwtnl->cmd.vbufs[i].buffer, NULL);
for (i = 0; i < hwtnl->cmd.prim_count; i++)
pipe_resource_reference(&hwtnl->cmd.prim_ib[i], NULL);
svga_hwtnl_set_flatshade(struct svga_hwtnl *hwtnl,
boolean flatshade, boolean flatshade_first)
{
- hwtnl->hw_pv = PV_FIRST;
+ struct svga_screen *svgascreen = svga_screen(hwtnl->svga->pipe.screen);
+
+ /* User-specified PV */
hwtnl->api_pv = (flatshade && !flatshade_first) ? PV_LAST : PV_FIRST;
+
+ /* Device supported PV */
+ if (svgascreen->haveProvokingVertex) {
+ /* use the mode specified by the user */
+ hwtnl->hw_pv = hwtnl->api_pv;
+ }
+ else {
+ /* the device only support first provoking vertex */
+ hwtnl->hw_pv = PV_FIRST;
+ }
}
void
-svga_hwtnl_set_unfilled(struct svga_hwtnl *hwtnl, unsigned mode)
+svga_hwtnl_set_fillmode(struct svga_hwtnl *hwtnl, unsigned mode)
{
hwtnl->api_fillmode = mode;
}
void
-svga_hwtnl_reset_vdecl(struct svga_hwtnl *hwtnl, unsigned count)
+svga_hwtnl_vertex_decls(struct svga_hwtnl *hwtnl,
+ unsigned count,
+ const SVGA3dVertexDecl * decls,
+ const unsigned *buffer_indexes,
+ SVGA3dElementLayoutId layout_id)
{
- unsigned i;
-
assert(hwtnl->cmd.prim_count == 0);
-
- for (i = count; i < hwtnl->cmd.vdecl_count; i++) {
- pipe_resource_reference(&hwtnl->cmd.vdecl_vb[i], NULL);
- }
-
hwtnl->cmd.vdecl_count = count;
+ hwtnl->cmd.vdecl_layout_id = layout_id;
+ memcpy(hwtnl->cmd.vdecl, decls, count * sizeof(*decls));
+ memcpy(hwtnl->cmd.vdecl_buffer_index, buffer_indexes,
+ count * sizeof(unsigned));
}
+/**
+ * Specify vertex buffers for hardware drawing.
+ */
void
-svga_hwtnl_vdecl(struct svga_hwtnl *hwtnl,
- unsigned i,
- const SVGA3dVertexDecl * decl, struct pipe_resource *vb)
+svga_hwtnl_vertex_buffers(struct svga_hwtnl *hwtnl,
+ unsigned count, struct pipe_vertex_buffer *buffers)
{
- assert(hwtnl->cmd.prim_count == 0);
-
- assert(i < hwtnl->cmd.vdecl_count);
-
- hwtnl->cmd.vdecl[i] = *decl;
-
- pipe_resource_reference(&hwtnl->cmd.vdecl_vb[i], vb);
+ util_set_vertex_buffers_count(hwtnl->cmd.vbufs,
+ &hwtnl->cmd.vbuf_count, buffers, 0, count);
}
return FALSE;
}
- for (i = 0; i < hwtnl->cmd.vdecl_count; ++i) {
- if (hwtnl->cmd.vdecl_vb[i] == buffer) {
+ for (i = 0; i < hwtnl->cmd.vbuf_count; ++i) {
+ if (hwtnl->cmd.vbufs[i].buffer == buffer) {
return TRUE;
}
}
}
-enum pipe_error
-svga_hwtnl_flush(struct svga_hwtnl *hwtnl)
+static enum pipe_error
+draw_vgpu9(struct svga_hwtnl *hwtnl)
{
struct svga_winsys_context *swc = hwtnl->cmd.swc;
struct svga_context *svga = hwtnl->svga;
enum pipe_error ret;
+ struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX];
+ struct svga_winsys_surface *ib_handle[QSZ];
+ struct svga_winsys_surface *handle;
+ SVGA3dVertexDecl *vdecl;
+ SVGA3dPrimitiveRange *prim;
+ unsigned i;
- if (hwtnl->cmd.prim_count) {
- struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX];
- struct svga_winsys_surface *ib_handle[QSZ];
- struct svga_winsys_surface *handle;
- SVGA3dVertexDecl *vdecl;
- SVGA3dPrimitiveRange *prim;
- unsigned i;
+ for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
+ unsigned j = hwtnl->cmd.vdecl_buffer_index[i];
+ handle = svga_buffer_handle(svga, hwtnl->cmd.vbufs[j].buffer);
+ if (handle == NULL)
+ return PIPE_ERROR_OUT_OF_MEMORY;
- for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
- assert(!svga_buffer_is_user_buffer(hwtnl->cmd.vdecl_vb[i]));
- handle = svga_buffer_handle(svga, hwtnl->cmd.vdecl_vb[i]);
+ vb_handle[i] = handle;
+ }
+
+ for (i = 0; i < hwtnl->cmd.prim_count; i++) {
+ if (hwtnl->cmd.prim_ib[i]) {
+ handle = svga_buffer_handle(svga, hwtnl->cmd.prim_ib[i]);
if (handle == NULL)
return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+ else
+ handle = NULL;
+
+ ib_handle[i] = handle;
+ }
+
+ if (svga->rebind.flags.rendertargets) {
+ ret = svga_reemit_framebuffer_bindings(svga);
+ if (ret != PIPE_OK) {
+ return ret;
+ }
+ }
+
+ if (svga->rebind.flags.texture_samplers) {
+ ret = svga_reemit_tss_bindings(svga);
+ if (ret != PIPE_OK) {
+ return ret;
+ }
+ }
+
+ if (svga->rebind.flags.vs) {
+ ret = svga_reemit_vs_bindings(svga);
+ if (ret != PIPE_OK) {
+ return ret;
+ }
+ }
+
+ if (svga->rebind.flags.fs) {
+ ret = svga_reemit_fs_bindings(svga);
+ if (ret != PIPE_OK) {
+ return ret;
+ }
+ }
+
+ SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n",
+ svga->curr.framebuffer.cbufs[0] ?
+ svga_surface(svga->curr.framebuffer.cbufs[0])->handle : NULL,
+ hwtnl->cmd.prim_count);
- vb_handle[i] = handle;
+ ret = SVGA3D_BeginDrawPrimitives(swc,
+ &vdecl,
+ hwtnl->cmd.vdecl_count,
+ &prim, hwtnl->cmd.prim_count);
+ if (ret != PIPE_OK)
+ return ret;
+
+ memcpy(vdecl,
+ hwtnl->cmd.vdecl,
+ hwtnl->cmd.vdecl_count * sizeof hwtnl->cmd.vdecl[0]);
+
+ for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
+ /* check for 4-byte alignment */
+ assert(vdecl[i].array.offset % 4 == 0);
+ assert(vdecl[i].array.stride % 4 == 0);
+
+ /* Given rangeHint is considered to be relative to indexBias, and
+ * indexBias varies per primitive, we cannot accurately supply an
+ * rangeHint when emitting more than one primitive per draw command.
+ */
+ if (hwtnl->cmd.prim_count == 1) {
+ vdecl[i].rangeHint.first = hwtnl->cmd.min_index[0];
+ vdecl[i].rangeHint.last = hwtnl->cmd.max_index[0] + 1;
+ }
+ else {
+ vdecl[i].rangeHint.first = 0;
+ vdecl[i].rangeHint.last = 0;
}
- for (i = 0; i < hwtnl->cmd.prim_count; i++) {
- if (hwtnl->cmd.prim_ib[i]) {
- assert(!svga_buffer_is_user_buffer(hwtnl->cmd.prim_ib[i]));
- handle = svga_buffer_handle(svga, hwtnl->cmd.prim_ib[i]);
- if (handle == NULL)
- return PIPE_ERROR_OUT_OF_MEMORY;
+ swc->surface_relocation(swc,
+ &vdecl[i].array.surfaceId,
+ NULL, vb_handle[i], SVGA_RELOC_READ);
+ }
+
+ memcpy(prim,
+ hwtnl->cmd.prim, hwtnl->cmd.prim_count * sizeof hwtnl->cmd.prim[0]);
+
+ for (i = 0; i < hwtnl->cmd.prim_count; i++) {
+ swc->surface_relocation(swc,
+ &prim[i].indexArray.surfaceId,
+ NULL, ib_handle[i], SVGA_RELOC_READ);
+ pipe_resource_reference(&hwtnl->cmd.prim_ib[i], NULL);
+ }
+
+ SVGA_FIFOCommitAll(swc);
+
+ hwtnl->cmd.prim_count = 0;
+
+ return PIPE_OK;
+}
+
+
+static SVGA3dSurfaceFormat
+xlate_index_format(unsigned indexWidth)
+{
+ if (indexWidth == 2) {
+ return SVGA3D_R16_UINT;
+ }
+ else if (indexWidth == 4) {
+ return SVGA3D_R32_UINT;
+ }
+ else {
+ assert(!"Bad indexWidth");
+ return SVGA3D_R32_UINT;
+ }
+}
+
+
+static enum pipe_error
+validate_sampler_resources(struct svga_context *svga)
+{
+ unsigned shader;
+
+ assert(svga_have_vgpu10(svga));
+
+ for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
+ unsigned count = svga->curr.num_sampler_views[shader];
+ unsigned i;
+ struct svga_winsys_surface *surfaces[PIPE_MAX_SAMPLERS];
+ enum pipe_error ret;
+
+ /*
+ * Reference bound sampler resources to ensure pending updates are
+ * noticed by the device.
+ */
+ for (i = 0; i < count; i++) {
+ struct svga_pipe_sampler_view *sv =
+ svga_pipe_sampler_view(svga->curr.sampler_views[shader][i]);
+
+ if (sv) {
+ if (sv->base.texture->target == PIPE_BUFFER) {
+ surfaces[i] = svga_buffer_handle(svga, sv->base.texture);
+ }
+ else {
+ surfaces[i] = svga_texture(sv->base.texture)->handle;
+ }
}
else {
- handle = NULL;
+ surfaces[i] = NULL;
}
-
- ib_handle[i] = handle;
}
- if (svga->rebind.rendertargets) {
- ret = svga_reemit_framebuffer_bindings(svga);
- if (ret != PIPE_OK) {
- return ret;
- }
+ if (shader == PIPE_SHADER_FRAGMENT &&
+ svga->curr.rast->templ.poly_stipple_enable) {
+ const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+ struct svga_pipe_sampler_view *sv =
+ svga->polygon_stipple.sampler_view;
+
+ assert(sv);
+ surfaces[unit] = svga_texture(sv->base.texture)->handle;
+ count = MAX2(count, unit+1);
}
- if (svga->rebind.texture_samplers) {
- ret = svga_reemit_tss_bindings(svga);
- if (ret != PIPE_OK) {
- return ret;
+ /* rebind the shader resources if needed */
+ if (svga->rebind.flags.texture_samplers) {
+ for (i = 0; i < count; i++) {
+ if (surfaces[i]) {
+ ret = svga->swc->resource_rebind(svga->swc,
+ surfaces[i],
+ NULL,
+ SVGA_RELOC_READ);
+ if (ret != PIPE_OK)
+ return ret;
+ }
}
}
+ }
+ svga->rebind.flags.texture_samplers = FALSE;
- if (svga->rebind.vs) {
- ret = svga_reemit_vs_bindings(svga);
- if (ret != PIPE_OK) {
- return ret;
+ return PIPE_OK;
+}
+
+
+static enum pipe_error
+validate_constant_buffers(struct svga_context *svga)
+{
+ unsigned shader;
+
+ assert(svga_have_vgpu10(svga));
+
+ for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
+ enum pipe_error ret;
+ struct svga_buffer *buffer;
+ struct svga_winsys_surface *handle;
+ unsigned enabled_constbufs;
+
+ /* Rebind the default constant buffer if needed */
+ if (svga->rebind.flags.constbufs) {
+ buffer = svga_buffer(svga->state.hw_draw.constbuf[shader]);
+ if (buffer) {
+ ret = svga->swc->resource_rebind(svga->swc,
+ buffer->handle,
+ NULL,
+ SVGA_RELOC_READ);
+ if (ret != PIPE_OK)
+ return ret;
}
}
- if (svga->rebind.fs) {
- ret = svga_reemit_fs_bindings(svga);
- if (ret != PIPE_OK) {
- return ret;
+ /*
+ * Reference other bound constant buffers to ensure pending updates are
+ * noticed by the device.
+ */
+ enabled_constbufs = svga->state.hw_draw.enabled_constbufs[shader] & ~1u;
+ while (enabled_constbufs) {
+ unsigned i = u_bit_scan(&enabled_constbufs);
+ buffer = svga_buffer(svga->curr.constbufs[shader][i].buffer);
+ if (buffer) {
+ handle = svga_buffer_handle(svga, &buffer->b.b);
+
+ if (svga->rebind.flags.constbufs) {
+ ret = svga->swc->resource_rebind(svga->swc,
+ handle,
+ NULL,
+ SVGA_RELOC_READ);
+ if (ret != PIPE_OK)
+ return ret;
+ }
}
}
+ }
+ svga->rebind.flags.constbufs = FALSE;
+
+ return PIPE_OK;
+}
- SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n",
- svga->curr.framebuffer.cbufs[0] ?
- svga_surface(svga->curr.framebuffer.cbufs[0])->handle : NULL,
- hwtnl->cmd.prim_count);
- ret = SVGA3D_BeginDrawPrimitives(swc, &vdecl, hwtnl->cmd.vdecl_count,
- &prim, hwtnl->cmd.prim_count);
+static enum pipe_error
+draw_vgpu10(struct svga_hwtnl *hwtnl,
+ const SVGA3dPrimitiveRange *range,
+ unsigned vcount,
+ unsigned min_index,
+ unsigned max_index, struct pipe_resource *ib,
+ unsigned start_instance, unsigned instance_count)
+{
+ struct svga_context *svga = hwtnl->svga;
+ struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX];
+ struct svga_winsys_surface *ib_handle;
+ const unsigned vbuf_count = hwtnl->cmd.vbuf_count;
+ enum pipe_error ret;
+ unsigned i;
+
+ assert(svga_have_vgpu10(svga));
+ assert(hwtnl->cmd.prim_count == 0);
+
+ /* We need to reemit all the current resource bindings along with the Draw
+ * command to be sure that the referenced resources are available for the
+ * Draw command, just in case the surfaces associated with the resources
+ * are paged out.
+ */
+ if (svga->rebind.val) {
+ ret = svga_rebind_framebuffer_bindings(svga);
if (ret != PIPE_OK)
return ret;
- memcpy(vdecl, hwtnl->cmd.vdecl,
- hwtnl->cmd.vdecl_count * sizeof hwtnl->cmd.vdecl[0]);
-
- for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
- /* Given rangeHint is considered to be relative to indexBias, and
- * indexBias varies per primitive, we cannot accurately supply an
- * rangeHint when emitting more than one primitive per draw command.
- */
- if (hwtnl->cmd.prim_count == 1) {
- vdecl[i].rangeHint.first = hwtnl->cmd.min_index[0];
- vdecl[i].rangeHint.last = hwtnl->cmd.max_index[0] + 1;
- }
- else {
- vdecl[i].rangeHint.first = 0;
- vdecl[i].rangeHint.last = 0;
- }
+ ret = svga_rebind_shaders(svga);
+ if (ret != PIPE_OK)
+ return ret;
+ }
+
+ ret = validate_sampler_resources(svga);
+ if (ret != PIPE_OK)
+ return ret;
- swc->surface_relocation(swc, &vdecl[i].array.surfaceId, NULL,
- vb_handle[i], SVGA_RELOC_READ);
+ ret = validate_constant_buffers(svga);
+ if (ret != PIPE_OK)
+ return ret;
+
+ /* Get handle for each referenced vertex buffer */
+ for (i = 0; i < vbuf_count; i++) {
+ struct svga_buffer *sbuf = svga_buffer(hwtnl->cmd.vbufs[i].buffer);
+
+ if (sbuf) {
+ assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_VERTEX_BUFFER);
+ vb_handle[i] = svga_buffer_handle(svga, &sbuf->b.b);
+ if (vb_handle[i] == NULL)
+ return PIPE_ERROR_OUT_OF_MEMORY;
}
+ else {
+ vb_handle[i] = NULL;
+ }
+ }
- memcpy(prim, hwtnl->cmd.prim,
- hwtnl->cmd.prim_count * sizeof hwtnl->cmd.prim[0]);
+ /* Get handles for the index buffers */
+ if (ib) {
+ struct svga_buffer *sbuf = svga_buffer(ib);
- for (i = 0; i < hwtnl->cmd.prim_count; i++) {
- swc->surface_relocation(swc, &prim[i].indexArray.surfaceId, NULL,
- ib_handle[i], SVGA_RELOC_READ);
- pipe_resource_reference(&hwtnl->cmd.prim_ib[i], NULL);
+ assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_INDEX_BUFFER);
+ (void) sbuf; /* silence unused var warning */
+
+ ib_handle = svga_buffer_handle(svga, ib);
+ if (ib_handle == NULL)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+ else {
+ ib_handle = NULL;
+ }
+
+ /* setup vertex attribute input layout */
+ if (svga->state.hw_draw.layout_id != hwtnl->cmd.vdecl_layout_id) {
+ ret = SVGA3D_vgpu10_SetInputLayout(svga->swc,
+ hwtnl->cmd.vdecl_layout_id);
+ if (ret != PIPE_OK)
+ return ret;
+
+ svga->state.hw_draw.layout_id = hwtnl->cmd.vdecl_layout_id;
+ }
+
+ /* setup vertex buffers */
+ {
+ SVGA3dVertexBuffer buffers[PIPE_MAX_ATTRIBS];
+
+ for (i = 0; i < vbuf_count; i++) {
+ buffers[i].stride = hwtnl->cmd.vbufs[i].stride;
+ buffers[i].offset = hwtnl->cmd.vbufs[i].buffer_offset;
+ }
+ if (vbuf_count > 0) {
+ ret = SVGA3D_vgpu10_SetVertexBuffers(svga->swc, vbuf_count,
+ 0, /* startBuffer */
+ buffers, vb_handle);
+ if (ret != PIPE_OK)
+ return ret;
}
+ }
+
+ /* Set primitive type (line, tri, etc) */
+ if (svga->state.hw_draw.topology != range->primType) {
+ ret = SVGA3D_vgpu10_SetTopology(svga->swc, range->primType);
+ if (ret != PIPE_OK)
+ return ret;
+
+ svga->state.hw_draw.topology = range->primType;
+ }
- SVGA_FIFOCommitAll(swc);
- hwtnl->cmd.prim_count = 0;
+ if (ib_handle) {
+ /* indexed drawing */
+ SVGA3dSurfaceFormat indexFormat = xlate_index_format(range->indexWidth);
+
+ /* setup index buffer */
+ ret = SVGA3D_vgpu10_SetIndexBuffer(svga->swc, ib_handle,
+ indexFormat,
+ range->indexArray.offset);
+ if (ret != PIPE_OK)
+ return ret;
+
+ if (instance_count > 1) {
+ ret = SVGA3D_vgpu10_DrawIndexedInstanced(svga->swc,
+ vcount,
+ instance_count,
+ 0, /* startIndexLocation */
+ range->indexBias,
+ start_instance);
+ if (ret != PIPE_OK)
+ return ret;
+ }
+ else {
+ /* non-instanced drawing */
+ ret = SVGA3D_vgpu10_DrawIndexed(svga->swc,
+ vcount,
+ 0, /* startIndexLocation */
+ range->indexBias);
+ if (ret != PIPE_OK)
+ return ret;
+ }
}
+ else {
+ /* non-indexed drawing */
+ if (instance_count > 1) {
+ ret = SVGA3D_vgpu10_DrawInstanced(svga->swc,
+ vcount,
+ instance_count,
+ range->indexBias,
+ start_instance);
+ if (ret != PIPE_OK)
+ return ret;
+ }
+ else {
+ /* non-instanced */
+ ret = SVGA3D_vgpu10_Draw(svga->swc,
+ vcount,
+ range->indexBias);
+ if (ret != PIPE_OK)
+ return ret;
+ }
+ }
+
+ hwtnl->cmd.prim_count = 0;
return PIPE_OK;
}
+
+/**
+ * Emit any pending drawing commands to the command buffer.
+ * When we receive VGPU9 drawing commands we accumulate them and don't
+ * immediately emit them into the command buffer.
+ * This function needs to be called before we change state that could
+ * effect those pending draws.
+ */
+enum pipe_error
+svga_hwtnl_flush(struct svga_hwtnl *hwtnl)
+{
+ if (!svga_have_vgpu10(hwtnl->svga) && hwtnl->cmd.prim_count) {
+ /* we only queue up primitive for VGPU9 */
+ return draw_vgpu9(hwtnl);
+ }
+ return PIPE_OK;
+}
+
+
void
svga_hwtnl_set_index_bias(struct svga_hwtnl *hwtnl, int index_bias)
{
{
unsigned i;
+ assert(!svga_have_vgpu10(hwtnl->svga));
+
for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
- struct pipe_resource *vb = hwtnl->cmd.vdecl_vb[i];
- unsigned size = vb ? vb->width0 : 0;
+ unsigned j = hwtnl->cmd.vdecl_buffer_index[i];
+ const struct pipe_vertex_buffer *vb = &hwtnl->cmd.vbufs[j];
+ unsigned size = vb->buffer ? vb->buffer->width0 : 0;
unsigned offset = hwtnl->cmd.vdecl[i].array.offset;
unsigned stride = hwtnl->cmd.vdecl[i].array.stride;
int index_bias = (int) range->indexBias + hwtnl->index_bias;
unsigned width;
+ if (size == 0)
+ continue;
+
assert(vb);
assert(size);
assert(offset < size);
assert(min_index <= max_index);
+ (void) width;
+ (void) stride;
+ (void) offset;
+ (void) size;
switch (hwtnl->cmd.vdecl[i].identity.type) {
case SVGA3D_DECLTYPE_FLOAT1:
assert(size);
assert(offset < size);
assert(stride);
+ (void) size;
+ (void) offset;
+ (void) stride;
switch (range->primType) {
case SVGA3D_PRIMITIVE_POINTLIST:
}
+/**
+ * All drawing filters down into this function, either directly
+ * on the hardware path or after doing software vertex processing.
+ */
enum pipe_error
svga_hwtnl_prim(struct svga_hwtnl *hwtnl,
const SVGA3dPrimitiveRange * range,
+ unsigned vcount,
unsigned min_index,
- unsigned max_index, struct pipe_resource *ib)
+ unsigned max_index, struct pipe_resource *ib,
+ unsigned start_instance, unsigned instance_count)
{
enum pipe_error ret = PIPE_OK;
+ if (svga_have_vgpu10(hwtnl->svga)) {
+ /* draw immediately */
+ ret = draw_vgpu10(hwtnl, range, vcount, min_index, max_index, ib,
+ start_instance, instance_count);
+ if (ret != PIPE_OK) {
+ svga_context_flush(hwtnl->svga, NULL);
+ ret = draw_vgpu10(hwtnl, range, vcount, min_index, max_index, ib,
+ start_instance, instance_count);
+ assert(ret == PIPE_OK);
+ }
+ }
+ else {
+ /* batch up drawing commands */
#ifdef DEBUG
- check_draw_params(hwtnl, range, min_index, max_index, ib);
+ check_draw_params(hwtnl, range, min_index, max_index, ib);
+ assert(start_instance == 0);
+ assert(instance_count <= 1);
+#else
+ (void) check_draw_params;
#endif
- if (hwtnl->cmd.prim_count + 1 >= QSZ) {
- ret = svga_hwtnl_flush(hwtnl);
- if (ret != PIPE_OK)
- return ret;
- }
+ if (hwtnl->cmd.prim_count + 1 >= QSZ) {
+ ret = svga_hwtnl_flush(hwtnl);
+ if (ret != PIPE_OK)
+ return ret;
+ }
- /* min/max indices are relative to bias */
- hwtnl->cmd.min_index[hwtnl->cmd.prim_count] = min_index;
- hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index;
+ /* min/max indices are relative to bias */
+ hwtnl->cmd.min_index[hwtnl->cmd.prim_count] = min_index;
+ hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index;
- hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range;
- hwtnl->cmd.prim[hwtnl->cmd.prim_count].indexBias += hwtnl->index_bias;
+ hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range;
+ hwtnl->cmd.prim[hwtnl->cmd.prim_count].indexBias += hwtnl->index_bias;
- pipe_resource_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib);
- hwtnl->cmd.prim_count++;
+ pipe_resource_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib);
+ hwtnl->cmd.prim_count++;
+ }
return ret;
}
struct svga_screen;
struct svga_context;
struct pipe_resource;
+struct u_upload_mgr;
-struct svga_hwtnl *
-svga_hwtnl_create(struct svga_context *svga);
+struct svga_hwtnl *svga_hwtnl_create(struct svga_context *svga);
-void svga_hwtnl_destroy( struct svga_hwtnl *hwtnl );
+void svga_hwtnl_destroy(struct svga_hwtnl *hwtnl);
-void svga_hwtnl_set_flatshade( struct svga_hwtnl *hwtnl,
- boolean flatshade,
- boolean flatshade_first );
+void svga_hwtnl_set_flatshade(struct svga_hwtnl *hwtnl,
+ boolean flatshade, boolean flatshade_first);
-void svga_hwtnl_set_unfilled( struct svga_hwtnl *hwtnl,
- unsigned mode );
+void svga_hwtnl_set_fillmode(struct svga_hwtnl *hwtnl, unsigned mode);
-void svga_hwtnl_vdecl( struct svga_hwtnl *hwtnl,
- unsigned i,
- const SVGA3dVertexDecl *decl,
- struct pipe_resource *vb);
+void
+svga_hwtnl_vertex_decls(struct svga_hwtnl *hwtnl,
+ unsigned count,
+ const SVGA3dVertexDecl * decls,
+ const unsigned *buffer_indexes,
+ SVGA3dElementLayoutId layoutId);
-void svga_hwtnl_reset_vdecl( struct svga_hwtnl *hwtnl,
- unsigned count );
+void
+svga_hwtnl_vertex_buffers(struct svga_hwtnl *hwtnl,
+ unsigned count, struct pipe_vertex_buffer *buffers);
-
-enum pipe_error
-svga_hwtnl_draw_arrays( struct svga_hwtnl *hwtnl,
- unsigned prim,
- unsigned start,
- unsigned count);
+enum pipe_error
+svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
+ unsigned prim, unsigned start, unsigned count,
+ unsigned start_instance, unsigned instance_count);
enum pipe_error
-svga_hwtnl_draw_range_elements( struct svga_hwtnl *hwtnl,
- struct pipe_resource *indexBuffer,
- unsigned index_size,
- int index_bias,
- unsigned min_index,
- unsigned max_index,
- unsigned prim,
- unsigned start,
- unsigned count );
+svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl,
+ struct pipe_resource *indexBuffer,
+ unsigned index_size,
+ int index_bias,
+ unsigned min_index,
+ unsigned max_index,
+ unsigned prim, unsigned start, unsigned count,
+ unsigned start_instance, unsigned instance_count);
boolean
-svga_hwtnl_is_buffer_referred( struct svga_hwtnl *hwtnl,
- struct pipe_resource *buffer );
+svga_hwtnl_is_buffer_referred(struct svga_hwtnl *hwtnl,
+ struct pipe_resource *buffer);
-enum pipe_error
-svga_hwtnl_flush( struct svga_hwtnl *hwtnl );
+enum pipe_error svga_hwtnl_flush(struct svga_hwtnl *hwtnl);
-void svga_hwtnl_set_index_bias( struct svga_hwtnl *hwtnl,
- int index_bias);
+void svga_hwtnl_set_index_bias(struct svga_hwtnl *hwtnl, int index_bias);
#endif /* SVGA_DRAW_H_ */
struct pipe_resource *dst = NULL;
void *dst_map = NULL;
- dst = pipe_buffer_create(pipe->screen,
- PIPE_BIND_INDEX_BUFFER, PIPE_USAGE_DEFAULT, size);
+ dst = pipe_buffer_create(pipe->screen, PIPE_BIND_INDEX_BUFFER,
+ PIPE_USAGE_IMMUTABLE, size);
if (dst == NULL)
goto fail;
static enum pipe_error
simple_draw_arrays(struct svga_hwtnl *hwtnl,
- unsigned prim, unsigned start, unsigned count)
+ unsigned prim, unsigned start, unsigned count,
+ unsigned start_instance, unsigned instance_count)
{
SVGA3dPrimitiveRange range;
unsigned hw_prim;
* looking at those numbers knows to adjust them by
* range.indexBias.
*/
- return svga_hwtnl_prim(hwtnl, &range, 0, count - 1, NULL);
+ return svga_hwtnl_prim(hwtnl, &range, count,
+ 0, count - 1, NULL,
+ start_instance, instance_count);
}
enum pipe_error
svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
- unsigned prim, unsigned start, unsigned count)
+ unsigned prim, unsigned start, unsigned count,
+ unsigned start_instance, unsigned instance_count)
{
unsigned gen_prim, gen_size, gen_nr, gen_type;
u_generate_func gen_func;
}
if (gen_type == U_GENERATE_LINEAR) {
- return simple_draw_arrays(hwtnl, gen_prim, start, count);
+ return simple_draw_arrays(hwtnl, gen_prim, start, count,
+ start_instance, instance_count);
}
else {
struct pipe_resource *gen_buf = NULL;
start,
0,
count - 1,
- gen_prim, 0, gen_nr);
-
+ gen_prim, 0, gen_nr,
+ start_instance,
+ instance_count);
if (ret != PIPE_OK)
goto done;
#include "util/u_inlines.h"
#include "util/u_prim.h"
+#include "util/u_upload_mgr.h"
#include "indices/u_indices.h"
#include "svga_cmd.h"
struct pipe_context *pipe = &hwtnl->svga->pipe;
struct pipe_transfer *src_transfer = NULL;
struct pipe_transfer *dst_transfer = NULL;
- unsigned size;
+ unsigned size = index_size * nr;
const void *src_map = NULL;
struct pipe_resource *dst = NULL;
void *dst_map = NULL;
unsigned index_size, int index_bias,
unsigned min_index, unsigned max_index,
unsigned prim, unsigned start,
- unsigned count)
+ unsigned count,
+ unsigned start_instance,
+ unsigned instance_count)
{
SVGA3dPrimitiveRange range;
unsigned hw_prim;
if (hw_count == 0)
return PIPE_OK; /* nothing to draw */
- /* We should never see user-space buffers in the driver. The vbuf
- * module should have converted them into real buffers.
- */
- if (index_buffer)
- assert(!svga_buffer_is_user_buffer(index_buffer));
-
range.primType = hw_prim;
range.primitiveCount = hw_count;
range.indexArray.offset = index_offset;
range.indexWidth = index_size;
range.indexBias = index_bias;
- return svga_hwtnl_prim(hwtnl, &range, min_index, max_index, index_buffer);
+ return svga_hwtnl_prim(hwtnl, &range, count,
+ min_index, max_index, index_buffer,
+ start_instance, instance_count);
}
struct pipe_resource *index_buffer,
unsigned index_size, int index_bias,
unsigned min_index, unsigned max_index,
- unsigned prim, unsigned start, unsigned count)
+ unsigned prim, unsigned start, unsigned count,
+ unsigned start_instance, unsigned instance_count)
{
unsigned gen_prim, gen_size, gen_nr, gen_type;
u_translate_func gen_func;
index_bias,
min_index,
max_index,
- gen_prim, start, count);
+ gen_prim, start, count,
+ start_instance,
+ instance_count);
}
else {
struct pipe_resource *gen_buf = NULL;
index_bias,
min_index,
max_index,
- gen_prim, 0, gen_nr);
+ gen_prim, 0, gen_nr,
+ start_instance,
+ instance_count);
if (ret != PIPE_OK)
goto done;
(1 << PIPE_PRIM_LINE_STRIP) |
(1 << PIPE_PRIM_TRIANGLES) |
(1 << PIPE_PRIM_TRIANGLE_STRIP) |
- (1 << PIPE_PRIM_TRIANGLE_FAN));
+ (1 << PIPE_PRIM_TRIANGLE_FAN) |
+ (1 << PIPE_PRIM_LINES_ADJACENCY) |
+ (1 << PIPE_PRIM_LINE_STRIP_ADJACENCY) |
+ (1 << PIPE_PRIM_TRIANGLES_ADJACENCY) |
+ (1 << PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY));
/**
* PIPE_PRIM_QUADS, PIPE_PRIM_QUAD_STRIP or PIPE_PRIM_POLYGON. We convert
* those to other types of primitives with index/translation code.
*/
-static inline unsigned
-svga_translate_prim(unsigned mode, unsigned vcount,unsigned *prim_count)
+static inline SVGA3dPrimitiveType
+svga_translate_prim(unsigned mode, unsigned vcount, unsigned *prim_count)
{
switch (mode) {
case PIPE_PRIM_POINTS:
*prim_count = vcount - 2;
return SVGA3D_PRIMITIVE_TRIANGLEFAN;
+ case PIPE_PRIM_LINES_ADJACENCY:
+ *prim_count = vcount / 4;
+ return SVGA3D_PRIMITIVE_LINELIST_ADJ;
+
+ case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+ *prim_count = vcount - 3;
+ return SVGA3D_PRIMITIVE_LINESTRIP_ADJ;
+
+ case PIPE_PRIM_TRIANGLES_ADJACENCY:
+ *prim_count = vcount / 6;
+ return SVGA3D_PRIMITIVE_TRIANGLELIST_ADJ;
+
+ case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+ *prim_count = vcount / 2 - 2 ;
+ return SVGA3D_PRIMITIVE_TRIANGLESTRIP_ADJ;
+
default:
assert(0);
*prim_count = 0;
struct draw_cmd {
struct svga_winsys_context *swc;
+ /* vertex layout info */
SVGA3dVertexDecl vdecl[SVGA3D_INPUTREG_MAX];
- struct pipe_resource *vdecl_vb[SVGA3D_INPUTREG_MAX];
unsigned vdecl_count;
+ SVGA3dElementLayoutId vdecl_layout_id;
+ unsigned vdecl_buffer_index[SVGA3D_INPUTREG_MAX];
+
+ /* vertex buffer info */
+ struct pipe_vertex_buffer vbufs[SVGA3D_INPUTREG_MAX];
+ unsigned vbuf_count;
SVGA3dPrimitiveRange prim[QSZ];
struct pipe_resource *prim_ib[QSZ];
- unsigned prim_count;
+ unsigned prim_count; /**< number of primitives for this draw */
unsigned min_index[QSZ];
unsigned max_index[QSZ];
};
enum pipe_error
svga_hwtnl_prim( struct svga_hwtnl *hwtnl,
const SVGA3dPrimitiveRange *range,
+ unsigned vcount,
unsigned min_index,
unsigned max_index,
- struct pipe_resource *ib );
+ struct pipe_resource *ib,
+ unsigned start_instance, unsigned instance_count);
enum pipe_error
svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl,
unsigned max_index,
unsigned prim,
unsigned start,
- unsigned count );
+ unsigned count,
+ unsigned start_instance,
+ unsigned instance_count);
#endif
#include "pipe/p_format.h"
#include "util/u_debug.h"
+#include "util/u_format.h"
#include "util/u_memory.h"
#include "svga_winsys.h"
#include "svga_format.h"
+/** Describes mapping from gallium formats to SVGA vertex/pixel formats */
+struct vgpu10_format_entry
+{
+ enum pipe_format pformat;
+ SVGA3dSurfaceFormat vertex_format;
+ SVGA3dSurfaceFormat pixel_format;
+ unsigned flags;
+};
+
+
+static const struct vgpu10_format_entry format_conversion_table[] =
+{
+ /* Gallium format SVGA3D vertex format SVGA3D pixel format Flags */
+ { PIPE_FORMAT_NONE, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_B8G8R8A8_UNORM, SVGA3D_B8G8R8A8_UNORM, SVGA3D_B8G8R8A8_UNORM, 0 },
+ { PIPE_FORMAT_B8G8R8X8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_B8G8R8X8_UNORM, 0 },
+ { PIPE_FORMAT_A8R8G8B8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_X8R8G8B8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_B5G5R5A1_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_B5G5R5A1_UNORM, 0 },
+ { PIPE_FORMAT_B4G4R4A4_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_A4R4G4B4, 0 },
+ { PIPE_FORMAT_B5G6R5_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_B5G6R5_UNORM, 0 },
+ { PIPE_FORMAT_R10G10B10A2_UNORM, SVGA3D_R10G10B10A2_UNORM, SVGA3D_R10G10B10A2_UNORM, 0 },
+ { PIPE_FORMAT_L8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_LUMINANCE8, 0 },
+ { PIPE_FORMAT_A8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_A8_UNORM, 0 },
+ { PIPE_FORMAT_I8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_L8A8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_L16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_UYVY, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_YUYV, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_Z16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_Z_D16, 0 },
+ { PIPE_FORMAT_Z32_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_Z32_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_D32_FLOAT, 0 },
+ { PIPE_FORMAT_Z24_UNORM_S8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_D24_UNORM_S8_UINT, 0 },
+ { PIPE_FORMAT_S8_UINT_Z24_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_Z24X8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_D24_UNORM_S8_UINT, 0 },
+ { PIPE_FORMAT_X8Z24_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_S8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R64_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R64G64_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R64G64B64_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R64G64B64A64_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R32_FLOAT, SVGA3D_R32_FLOAT, SVGA3D_R32_FLOAT, 0 },
+ { PIPE_FORMAT_R32G32_FLOAT, SVGA3D_R32G32_FLOAT, SVGA3D_R32G32_FLOAT, 0 },
+ { PIPE_FORMAT_R32G32B32_FLOAT, SVGA3D_R32G32B32_FLOAT, SVGA3D_R32G32B32_FLOAT, 0 },
+ { PIPE_FORMAT_R32G32B32A32_FLOAT, SVGA3D_R32G32B32A32_FLOAT, SVGA3D_R32G32B32A32_FLOAT, 0 },
+ { PIPE_FORMAT_R32_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R32G32_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R32G32B32_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R32G32B32A32_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R32_USCALED, SVGA3D_R32_UINT, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST },
+ { PIPE_FORMAT_R32G32_USCALED, SVGA3D_R32G32_UINT, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST },
+ { PIPE_FORMAT_R32G32B32_USCALED, SVGA3D_R32G32B32_UINT, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST },
+ { PIPE_FORMAT_R32G32B32A32_USCALED, SVGA3D_R32G32B32A32_UINT, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST },
+ { PIPE_FORMAT_R32_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R32G32_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R32G32B32_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R32G32B32A32_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R32_SSCALED, SVGA3D_R32_SINT, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST },
+ { PIPE_FORMAT_R32G32_SSCALED, SVGA3D_R32G32_SINT, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST },
+ { PIPE_FORMAT_R32G32B32_SSCALED, SVGA3D_R32G32B32_SINT, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST },
+ { PIPE_FORMAT_R32G32B32A32_SSCALED, SVGA3D_R32G32B32A32_SINT, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST },
+ { PIPE_FORMAT_R16_UNORM, SVGA3D_R16_UNORM, SVGA3D_R16_UNORM, 0 },
+ { PIPE_FORMAT_R16G16_UNORM, SVGA3D_R16G16_UNORM, SVGA3D_R16G16_UNORM, 0 },
+ { PIPE_FORMAT_R16G16B16_UNORM, SVGA3D_R16G16B16A16_UNORM, SVGA3D_FORMAT_INVALID, VF_W_TO_1 },
+ { PIPE_FORMAT_R16G16B16A16_UNORM, SVGA3D_R16G16B16A16_UNORM, SVGA3D_R16G16B16A16_UNORM, 0 },
+ { PIPE_FORMAT_R16_USCALED, SVGA3D_R16_UINT, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST },
+ { PIPE_FORMAT_R16G16_USCALED, SVGA3D_R16G16_UINT, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST },
+ { PIPE_FORMAT_R16G16B16_USCALED, SVGA3D_R16G16B16A16_UINT, SVGA3D_FORMAT_INVALID, VF_W_TO_1 | VF_U_TO_F_CAST },
+ { PIPE_FORMAT_R16G16B16A16_USCALED, SVGA3D_R16G16B16A16_UINT, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST },
+ { PIPE_FORMAT_R16_SNORM, SVGA3D_R16_SNORM, SVGA3D_R16_SNORM, 0 },
+ { PIPE_FORMAT_R16G16_SNORM, SVGA3D_R16G16_SNORM, SVGA3D_R16G16_SNORM, 0 },
+ { PIPE_FORMAT_R16G16B16_SNORM, SVGA3D_R16G16B16A16_SNORM, SVGA3D_FORMAT_INVALID, VF_W_TO_1 },
+ { PIPE_FORMAT_R16G16B16A16_SNORM, SVGA3D_R16G16B16A16_SNORM, SVGA3D_R16G16B16A16_SNORM, 0 },
+ { PIPE_FORMAT_R16_SSCALED, SVGA3D_R16_SINT, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST },
+ { PIPE_FORMAT_R16G16_SSCALED, SVGA3D_R16G16_SINT, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST },
+ { PIPE_FORMAT_R16G16B16_SSCALED, SVGA3D_R16G16B16A16_SINT, SVGA3D_FORMAT_INVALID, VF_W_TO_1 | VF_I_TO_F_CAST },
+ { PIPE_FORMAT_R16G16B16A16_SSCALED, SVGA3D_R16G16B16A16_SINT, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST },
+ { PIPE_FORMAT_R8_UNORM, SVGA3D_R8_UNORM, SVGA3D_R8_UNORM, 0 },
+ { PIPE_FORMAT_R8G8_UNORM, SVGA3D_R8G8_UNORM, SVGA3D_R8G8_UNORM, 0 },
+ { PIPE_FORMAT_R8G8B8_UNORM, SVGA3D_R8G8B8A8_UNORM, SVGA3D_FORMAT_INVALID, VF_W_TO_1 },
+ { PIPE_FORMAT_R8G8B8A8_UNORM, SVGA3D_R8G8B8A8_UNORM, SVGA3D_R8G8B8A8_UNORM, 0 },
+ { PIPE_FORMAT_X8B8G8R8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R8_USCALED, SVGA3D_R8_UINT, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST },
+ { PIPE_FORMAT_R8G8_USCALED, SVGA3D_R8G8_UINT, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST },
+ { PIPE_FORMAT_R8G8B8_USCALED, SVGA3D_R8G8B8A8_UINT, SVGA3D_FORMAT_INVALID, VF_W_TO_1 | VF_U_TO_F_CAST },
+ { PIPE_FORMAT_R8G8B8A8_USCALED, SVGA3D_R8G8B8A8_UINT, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST },
+ { 73, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R8_SNORM, SVGA3D_R8_SNORM, SVGA3D_R8_SNORM, 0 },
+ { PIPE_FORMAT_R8G8_SNORM, SVGA3D_R8G8_SNORM, SVGA3D_R8G8_SNORM, 0 },
+ { PIPE_FORMAT_R8G8B8_SNORM, SVGA3D_R8G8B8A8_SNORM, SVGA3D_FORMAT_INVALID, VF_W_TO_1 },
+ { PIPE_FORMAT_R8G8B8A8_SNORM, SVGA3D_R8G8B8A8_SNORM, SVGA3D_R8G8B8A8_SNORM, 0 },
+ { 78, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { 79, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { 80, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { 81, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R8_SSCALED, SVGA3D_R8_SINT, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST },
+ { PIPE_FORMAT_R8G8_SSCALED, SVGA3D_R8G8_SINT, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST },
+ { PIPE_FORMAT_R8G8B8_SSCALED, SVGA3D_R8G8B8A8_SINT, SVGA3D_FORMAT_INVALID, VF_W_TO_1 | VF_I_TO_F_CAST },
+ { PIPE_FORMAT_R8G8B8A8_SSCALED, SVGA3D_R8G8B8A8_SINT, SVGA3D_FORMAT_INVALID, VF_I_TO_F_CAST },
+ { 86, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R32_FIXED, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R32G32_FIXED, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R32G32B32_FIXED, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R32G32B32A32_FIXED, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R16_FLOAT, SVGA3D_R16_FLOAT, SVGA3D_R16_FLOAT, 0 },
+ { PIPE_FORMAT_R16G16_FLOAT, SVGA3D_R16G16_FLOAT, SVGA3D_R16G16_FLOAT, 0 },
+ { PIPE_FORMAT_R16G16B16_FLOAT, SVGA3D_R16G16B16A16_FLOAT, SVGA3D_FORMAT_INVALID, VF_W_TO_1 },
+ { PIPE_FORMAT_R16G16B16A16_FLOAT, SVGA3D_R16G16B16A16_FLOAT, SVGA3D_R16G16B16A16_FLOAT, 0 },
+ { PIPE_FORMAT_L8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_L8A8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R8G8B8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_A8B8G8R8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_X8B8G8R8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_B8G8R8A8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_B8G8R8A8_UNORM_SRGB, 0 },
+ { PIPE_FORMAT_B8G8R8X8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_B8G8R8X8_UNORM_SRGB, 0 },
+ { PIPE_FORMAT_A8R8G8B8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_X8R8G8B8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R8G8B8A8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_R8G8B8A8_UNORM_SRGB, 0 },
+ { PIPE_FORMAT_DXT1_RGB, SVGA3D_FORMAT_INVALID, SVGA3D_DXT1, 0 },
+ { PIPE_FORMAT_DXT1_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT1, 0 },
+ { PIPE_FORMAT_DXT3_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT3, 0 },
+ { PIPE_FORMAT_DXT5_RGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT5, 0 },
+ { PIPE_FORMAT_DXT1_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_DXT1, 0 },
+ { PIPE_FORMAT_DXT1_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT1, 0 },
+ { PIPE_FORMAT_DXT3_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT3, 0 },
+ { PIPE_FORMAT_DXT5_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_DXT5, 0 },
+ { PIPE_FORMAT_RGTC1_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_BC4_UNORM, 0 },
+ { PIPE_FORMAT_RGTC1_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_BC4_SNORM, 0 },
+ { PIPE_FORMAT_RGTC2_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_BC5_UNORM, 0 },
+ { PIPE_FORMAT_RGTC2_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_BC5_SNORM, 0 },
+ { PIPE_FORMAT_R8G8_B8G8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_G8R8_G8B8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R8SG8SB8UX8U_NORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R5SG5SB6U_NORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_A8B8G8R8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_B5G5R5X1_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R10G10B10A2_USCALED, SVGA3D_R10G10B10A2_UNORM, SVGA3D_FORMAT_INVALID, VF_PUINT_TO_USCALED },
+ { PIPE_FORMAT_R11G11B10_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_R11G11B10_FLOAT, 0 },
+ { PIPE_FORMAT_R9G9B9E5_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_R9G9B9E5_SHAREDEXP, 0 },
+ { PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_D32_FLOAT_S8X24_UINT, 0 },
+ { PIPE_FORMAT_R1_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R10G10B10X2_USCALED, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R10G10B10X2_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_L4A4_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_B10G10R10A2_UNORM, SVGA3D_R10G10B10A2_UNORM, SVGA3D_FORMAT_INVALID, VF_BGRA },
+ { PIPE_FORMAT_R10SG10SB10SA2U_NORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R8G8Bx_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R8G8B8X8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_B4G4R4X4_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_X24S8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_S8X24_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_X32_S8X24_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_B2G3R3_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_L16A16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_A16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_I16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_LATC1_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_LATC1_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_LATC2_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_LATC2_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_A8_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_L8_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_L8A8_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_I8_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_A16_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_L16_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_L16A16_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_I16_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_A16_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_L16_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_L16A16_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_I16_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_A32_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_L32_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_L32A32_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_I32_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_YV12, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_YV16, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_IYUV, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_NV12, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_NV21, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_A4R4_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R4A4_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R8A8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_A8R8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R10G10B10A2_SSCALED, SVGA3D_R32_UINT, SVGA3D_FORMAT_INVALID, VF_PUINT_TO_SSCALED },
+ { PIPE_FORMAT_R10G10B10A2_SNORM, SVGA3D_R10G10B10A2_UNORM, SVGA3D_FORMAT_INVALID, VF_PUINT_TO_SNORM },
+ { PIPE_FORMAT_B10G10R10A2_USCALED, SVGA3D_R10G10B10A2_UNORM, SVGA3D_FORMAT_INVALID, VF_BGRA | VF_PUINT_TO_USCALED },
+ { PIPE_FORMAT_B10G10R10A2_SSCALED, SVGA3D_R32_UINT, SVGA3D_FORMAT_INVALID, VF_BGRA | VF_PUINT_TO_SSCALED },
+ { PIPE_FORMAT_B10G10R10A2_SNORM, SVGA3D_R10G10B10A2_UNORM, SVGA3D_FORMAT_INVALID, VF_BGRA | VF_PUINT_TO_SNORM },
+ { PIPE_FORMAT_R8_UINT, SVGA3D_R8_UINT, SVGA3D_R8_UINT, 0 },
+ { PIPE_FORMAT_R8G8_UINT, SVGA3D_R8G8_UINT, SVGA3D_R8G8_UINT, 0 },
+ { PIPE_FORMAT_R8G8B8_UINT, SVGA3D_R8G8B8A8_UINT, SVGA3D_FORMAT_INVALID, VF_W_TO_1 },
+ { PIPE_FORMAT_R8G8B8A8_UINT, SVGA3D_R8G8B8A8_UINT, SVGA3D_R8G8B8A8_UINT, 0 },
+ { PIPE_FORMAT_R8_SINT, SVGA3D_R8_SINT, SVGA3D_R8_SINT, 0 },
+ { PIPE_FORMAT_R8G8_SINT, SVGA3D_R8G8_SINT, SVGA3D_R8G8_SINT, 0 },
+ { PIPE_FORMAT_R8G8B8_SINT, SVGA3D_R8G8B8A8_SINT, SVGA3D_FORMAT_INVALID, VF_W_TO_1 },
+ { PIPE_FORMAT_R8G8B8A8_SINT, SVGA3D_R8G8B8A8_SINT, SVGA3D_R8G8B8A8_SINT, 0 },
+ { PIPE_FORMAT_R16_UINT, SVGA3D_R16_UINT, SVGA3D_R16_UINT, 0 },
+ { PIPE_FORMAT_R16G16_UINT, SVGA3D_R16G16_UINT, SVGA3D_R16G16_UINT, 0 },
+ { PIPE_FORMAT_R16G16B16_UINT, SVGA3D_R16G16B16A16_UINT, SVGA3D_FORMAT_INVALID, VF_W_TO_1 },
+ { PIPE_FORMAT_R16G16B16A16_UINT, SVGA3D_R16G16B16A16_UINT, SVGA3D_R16G16B16A16_UINT, 0 },
+ { PIPE_FORMAT_R16_SINT, SVGA3D_R16_SINT, SVGA3D_R16_SINT, 0 },
+ { PIPE_FORMAT_R16G16_SINT, SVGA3D_R16G16_SINT, SVGA3D_R16G16_SINT, 0 },
+ { PIPE_FORMAT_R16G16B16_SINT, SVGA3D_R16G16B16A16_SINT, SVGA3D_FORMAT_INVALID, VF_W_TO_1 },
+ { PIPE_FORMAT_R16G16B16A16_SINT, SVGA3D_R16G16B16A16_SINT, SVGA3D_R16G16B16A16_SINT, 0 },
+ { PIPE_FORMAT_R32_UINT, SVGA3D_R32_UINT, SVGA3D_R32_UINT, 0 },
+ { PIPE_FORMAT_R32G32_UINT, SVGA3D_R32G32_UINT, SVGA3D_R32G32_UINT, 0 },
+ { PIPE_FORMAT_R32G32B32_UINT, SVGA3D_R32G32B32_UINT, SVGA3D_R32G32B32_UINT, 0 },
+ { PIPE_FORMAT_R32G32B32A32_UINT, SVGA3D_R32G32B32A32_UINT, SVGA3D_R32G32B32A32_UINT, 0 },
+ { PIPE_FORMAT_R32_SINT, SVGA3D_R32_SINT, SVGA3D_R32_SINT, 0 },
+ { PIPE_FORMAT_R32G32_SINT, SVGA3D_R32G32_SINT, SVGA3D_R32G32_SINT, 0 },
+ { PIPE_FORMAT_R32G32B32_SINT, SVGA3D_R32G32B32_SINT, SVGA3D_R32G32B32_SINT, 0 },
+ { PIPE_FORMAT_R32G32B32A32_SINT, SVGA3D_R32G32B32A32_SINT, SVGA3D_R32G32B32A32_SINT, 0 },
+ { PIPE_FORMAT_A8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_I8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_L8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_L8A8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_A8_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_I8_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_L8_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_L8A8_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_A16_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_I16_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_L16_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_L16A16_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_A16_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_I16_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_L16_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_L16A16_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_A32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_I32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_L32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_L32A32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_A32_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_I32_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_L32_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_L32A32_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_B10G10R10A2_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ETC1_RGB8, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R8G8_R8B8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_G8R8_B8R8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R8G8B8X8_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R8G8B8X8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R8G8B8X8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R8G8B8X8_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_B10G10R10X2_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R16G16B16X16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R16G16B16X16_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R16G16B16X16_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R16G16B16X16_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R16G16B16X16_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R32G32B32X32_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R32G32B32X32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R32G32B32X32_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R8A8_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R16A16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R16A16_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R16A16_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R32A32_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R8A8_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R8A8_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R16A16_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R16A16_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R32A32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R32A32_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R10G10B10A2_UINT, SVGA3D_R10G10B10A2_UINT, SVGA3D_R10G10B10A2_UINT, 0 },
+ { PIPE_FORMAT_B5G6R5_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_BPTC_RGBA_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_BPTC_SRGBA, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_BPTC_RGB_FLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_BPTC_RGB_UFLOAT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_A8L8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_A8L8_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_A8L8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_A16L16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_G8R8_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_G8R8_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_G16R16_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_G16R16_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_A8B8G8R8_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_X8B8G8R8_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ETC2_RGB8, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ETC2_SRGB8, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ETC2_RGB8A1, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ETC2_SRGB8A1, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ETC2_RGBA8, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ETC2_SRGBA8, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ETC2_R11_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ETC2_R11_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ETC2_RG11_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_ETC2_RG11_SNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+};
+
+
+/**
+ * Translate a gallium vertex format to a vgpu10 vertex format.
+ * Also, return any special vertex format flags.
+ */
+void
+svga_translate_vertex_format_vgpu10(enum pipe_format format,
+ SVGA3dSurfaceFormat *svga_format,
+ unsigned *vf_flags)
+{
+ assert(format < Elements(format_conversion_table));
+ if (format >= Elements(format_conversion_table)) {
+ format = PIPE_FORMAT_NONE;
+ }
+ *svga_format = format_conversion_table[format].vertex_format;
+ *vf_flags = format_conversion_table[format].flags;
+}
+
+
/*
* Translate from gallium format to SVGA3D format.
*/
enum pipe_format format,
unsigned bind)
{
- switch(format) {
+ if (ss->sws->have_vgpu10) {
+ if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) {
+ return format_conversion_table[format].vertex_format;
+ }
+ else {
+ return format_conversion_table[format].pixel_format;
+ }
+ }
+ switch(format) {
case PIPE_FORMAT_B8G8R8A8_UNORM:
return SVGA3D_A8R8G8B8;
case PIPE_FORMAT_B8G8R8X8_UNORM:
return SVGA3D_A16B16G16R16;
case PIPE_FORMAT_Z16_UNORM:
+ assert(!ss->sws->have_vgpu10);
return bind & PIPE_BIND_SAMPLER_VIEW ? ss->depth.z16 : SVGA3D_Z_D16;
case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+ assert(!ss->sws->have_vgpu10);
return bind & PIPE_BIND_SAMPLER_VIEW ? ss->depth.s8z24 : SVGA3D_Z_D24S8;
case PIPE_FORMAT_X8Z24_UNORM:
+ assert(!ss->sws->have_vgpu10);
return bind & PIPE_BIND_SAMPLER_VIEW ? ss->depth.x8z24 : SVGA3D_Z_D24X8;
case PIPE_FORMAT_A8_UNORM:
* Format capability description entry.
*/
struct format_cap {
+ const char *name;
+
SVGA3dSurfaceFormat format;
/*
* Capability index corresponding to the format.
*/
- SVGA3dDevCapIndex index;
+ SVGA3dDevCapIndex devcap;
+
+ /* size of each pixel/block */
+ unsigned block_width, block_height, block_bytes;
/*
* Mask of supported SVGA3dFormatOp operations, to be inferred when the
/*
* Format capability description table.
*
- * Ordererd by increasing SVGA3dSurfaceFormat value, but with gaps.
+ * Ordered by increasing SVGA3dSurfaceFormat value, but with gaps.
+ *
+ * Note: there are some special cases below where we set devcap=0 and
+ * avoid querying the host. In particular, depth/stencil formats which
+ * can be rendered to and sampled from. For example, the gallium format
+ * PIPE_FORMAT_Z24_UNORM_S8_UINT is converted to SVGA3D_D24_UNORM_S8_UINT
+ * for rendering but converted to SVGA3D_R24_UNORM_X8_TYPELESS for sampling.
+ * If we want to query if a format supports both rendering and sampling the
+ * host will tell us no for both SVGA3D_D24_UNORM_S8_UINT and
+ * SVGA3D_R24_UNORM_X8_TYPELESS. So we override the host query for those
+ * formats and report that both can do rendering and sampling.
*/
static const struct format_cap format_cap_table[] = {
{
+ "SVGA3D_FORMAT_INVALID",
+ SVGA3D_FORMAT_INVALID, 0, 0, 0, 0, 0
+ },
+ {
+ "SVGA3D_X8R8G8B8",
SVGA3D_X8R8G8B8,
SVGA3D_DEVCAP_SURFACEFMT_X8R8G8B8,
+ 1, 1, 4,
SVGA3DFORMAT_OP_TEXTURE |
SVGA3DFORMAT_OP_CUBETEXTURE |
SVGA3DFORMAT_OP_VOLUMETEXTURE |
SVGA3DFORMAT_OP_DISPLAYMODE |
- SVGA3DFORMAT_OP_3DACCELERATION |
- SVGA3DFORMAT_OP_CONVERT_TO_ARGB |
- SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN |
- SVGA3DFORMAT_OP_SRGBREAD |
- SVGA3DFORMAT_OP_SRGBWRITE |
- SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
},
{
+ "SVGA3D_A8R8G8B8",
SVGA3D_A8R8G8B8,
SVGA3D_DEVCAP_SURFACEFMT_A8R8G8B8,
+ 1, 1, 4,
SVGA3DFORMAT_OP_TEXTURE |
SVGA3DFORMAT_OP_CUBETEXTURE |
SVGA3DFORMAT_OP_VOLUMETEXTURE |
- SVGA3DFORMAT_OP_CONVERT_TO_ARGB |
- SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN |
- SVGA3DFORMAT_OP_SRGBREAD |
- SVGA3DFORMAT_OP_SRGBWRITE |
- SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
- SVGA3DFORMAT_OP_SAME_FORMAT_UP_TO_ALPHA_RENDERTARGET |
SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
},
{
+ "SVGA3D_R5G6B5",
SVGA3D_R5G6B5,
SVGA3D_DEVCAP_SURFACEFMT_R5G6B5,
+ 1, 1, 2,
SVGA3DFORMAT_OP_TEXTURE |
SVGA3DFORMAT_OP_CUBETEXTURE |
SVGA3DFORMAT_OP_VOLUMETEXTURE |
SVGA3DFORMAT_OP_DISPLAYMODE |
- SVGA3DFORMAT_OP_3DACCELERATION |
- SVGA3DFORMAT_OP_CONVERT_TO_ARGB |
- SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN |
- SVGA3DFORMAT_OP_SRGBREAD |
- SVGA3DFORMAT_OP_SRGBWRITE |
- SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
},
{
+ "SVGA3D_X1R5G5B5",
SVGA3D_X1R5G5B5,
SVGA3D_DEVCAP_SURFACEFMT_X1R5G5B5,
+ 1, 1, 2,
SVGA3DFORMAT_OP_TEXTURE |
SVGA3DFORMAT_OP_CUBETEXTURE |
SVGA3DFORMAT_OP_VOLUMETEXTURE |
- SVGA3DFORMAT_OP_CONVERT_TO_ARGB |
- SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN |
- SVGA3DFORMAT_OP_SRGBREAD |
- SVGA3DFORMAT_OP_SRGBWRITE |
- SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
},
{
+ "SVGA3D_A1R5G5B5",
SVGA3D_A1R5G5B5,
SVGA3D_DEVCAP_SURFACEFMT_A1R5G5B5,
+ 1, 1, 2,
SVGA3DFORMAT_OP_TEXTURE |
SVGA3DFORMAT_OP_CUBETEXTURE |
SVGA3DFORMAT_OP_VOLUMETEXTURE |
- SVGA3DFORMAT_OP_CONVERT_TO_ARGB |
- SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN |
- SVGA3DFORMAT_OP_SRGBREAD |
- SVGA3DFORMAT_OP_SRGBWRITE |
- SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
- SVGA3DFORMAT_OP_SAME_FORMAT_UP_TO_ALPHA_RENDERTARGET |
SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
},
{
+ "SVGA3D_A4R4G4B4",
SVGA3D_A4R4G4B4,
SVGA3D_DEVCAP_SURFACEFMT_A4R4G4B4,
+ 1, 1, 2,
SVGA3DFORMAT_OP_TEXTURE |
SVGA3DFORMAT_OP_CUBETEXTURE |
SVGA3DFORMAT_OP_VOLUMETEXTURE |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN |
- SVGA3DFORMAT_OP_SRGBREAD |
- SVGA3DFORMAT_OP_SRGBWRITE |
- SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
- SVGA3DFORMAT_OP_SAME_FORMAT_UP_TO_ALPHA_RENDERTARGET |
SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
},
- /*
- * SVGA3D_Z_D32 is not yet supported, and has no corresponding
- * SVGA3D_DEVCAP_xxx.
- */
{
+ /*
+ * SVGA3D_Z_D32 is not yet supported, and has no corresponding
+ * SVGA3D_DEVCAP_xxx.
+ */
+ "SVGA3D_Z_D32",
+ SVGA3D_Z_D32, 0, 0, 0, 0, 0
+ },
+ {
+ "SVGA3D_Z_D16",
SVGA3D_Z_D16,
SVGA3D_DEVCAP_SURFACEFMT_Z_D16,
- SVGA3DFORMAT_OP_ZSTENCIL |
- SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH
+ 1, 1, 2,
+ SVGA3DFORMAT_OP_ZSTENCIL
},
{
+ "SVGA3D_Z_D24S8",
SVGA3D_Z_D24S8,
SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8,
- SVGA3DFORMAT_OP_ZSTENCIL |
- SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_ZSTENCIL
},
{
+ "SVGA3D_Z_D15S1",
SVGA3D_Z_D15S1,
SVGA3D_DEVCAP_MAX,
- SVGA3DFORMAT_OP_ZSTENCIL |
- SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH
+ 1, 1, 2,
+ SVGA3DFORMAT_OP_ZSTENCIL
},
{
+ "SVGA3D_LUMINANCE8",
SVGA3D_LUMINANCE8,
SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8,
+ 1, 1, 1,
SVGA3DFORMAT_OP_TEXTURE |
SVGA3DFORMAT_OP_CUBETEXTURE |
- SVGA3DFORMAT_OP_VOLUMETEXTURE |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN
+ SVGA3DFORMAT_OP_VOLUMETEXTURE
},
{
- SVGA3D_LUMINANCE8_ALPHA8,
- SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8_ALPHA8,
- SVGA3DFORMAT_OP_TEXTURE |
- SVGA3DFORMAT_OP_CUBETEXTURE |
- SVGA3DFORMAT_OP_VOLUMETEXTURE |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN
+ /*
+ * SVGA3D_LUMINANCE4_ALPHA4 is not supported, and has no corresponding
+ * SVGA3D_DEVCAP_xxx.
+ */
+ "SVGA3D_LUMINANCE4_ALPHA4",
+ SVGA3D_LUMINANCE4_ALPHA4, 0, 0, 0, 0, 0
},
- /*
- * SVGA3D_LUMINANCE4_ALPHA4 is not supported, and has no corresponding
- * SVGA3D_DEVCAP_xxx.
- */
{
+ "SVGA3D_LUMINANCE16",
SVGA3D_LUMINANCE16,
SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE16,
+ 1, 1, 2,
SVGA3DFORMAT_OP_TEXTURE |
SVGA3DFORMAT_OP_CUBETEXTURE |
- SVGA3DFORMAT_OP_VOLUMETEXTURE |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN
+ SVGA3DFORMAT_OP_VOLUMETEXTURE
+ },
+ {
+ "SVGA3D_LUMINANCE8_ALPHA8",
+ SVGA3D_LUMINANCE8_ALPHA8,
+ SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8_ALPHA8,
+ 1, 1, 2,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE
},
{
+ "SVGA3D_DXT1",
SVGA3D_DXT1,
SVGA3D_DEVCAP_SURFACEFMT_DXT1,
+ 4, 4, 8,
SVGA3DFORMAT_OP_TEXTURE |
- SVGA3DFORMAT_OP_SRGBREAD |
- SVGA3DFORMAT_OP_CUBETEXTURE |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN
+ SVGA3DFORMAT_OP_CUBETEXTURE
},
{
+ "SVGA3D_DXT2",
SVGA3D_DXT2,
SVGA3D_DEVCAP_SURFACEFMT_DXT2,
+ 4, 4, 8,
SVGA3DFORMAT_OP_TEXTURE |
- SVGA3DFORMAT_OP_SRGBREAD |
- SVGA3DFORMAT_OP_CUBETEXTURE |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN
+ SVGA3DFORMAT_OP_CUBETEXTURE
},
{
+ "SVGA3D_DXT3",
SVGA3D_DXT3,
SVGA3D_DEVCAP_SURFACEFMT_DXT3,
+ 4, 4, 16,
SVGA3DFORMAT_OP_TEXTURE |
- SVGA3DFORMAT_OP_SRGBREAD |
- SVGA3DFORMAT_OP_CUBETEXTURE |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN
+ SVGA3DFORMAT_OP_CUBETEXTURE
},
{
+ "SVGA3D_DXT4",
SVGA3D_DXT4,
SVGA3D_DEVCAP_SURFACEFMT_DXT4,
+ 4, 4, 16,
SVGA3DFORMAT_OP_TEXTURE |
- SVGA3DFORMAT_OP_SRGBREAD |
- SVGA3DFORMAT_OP_CUBETEXTURE |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN
+ SVGA3DFORMAT_OP_CUBETEXTURE
},
{
+ "SVGA3D_DXT5",
SVGA3D_DXT5,
SVGA3D_DEVCAP_SURFACEFMT_DXT5,
+ 4, 4, 8,
SVGA3DFORMAT_OP_TEXTURE |
- SVGA3DFORMAT_OP_SRGBREAD |
- SVGA3DFORMAT_OP_CUBETEXTURE |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN
+ SVGA3DFORMAT_OP_CUBETEXTURE
},
{
+ "SVGA3D_BUMPU8V8",
SVGA3D_BUMPU8V8,
SVGA3D_DEVCAP_SURFACEFMT_BUMPU8V8,
+ 1, 1, 2,
SVGA3DFORMAT_OP_TEXTURE |
SVGA3DFORMAT_OP_CUBETEXTURE |
- SVGA3DFORMAT_OP_VOLUMETEXTURE |
- SVGA3DFORMAT_OP_BUMPMAP |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN
+ SVGA3DFORMAT_OP_VOLUMETEXTURE
},
- /*
- * SVGA3D_BUMPL6V5U5 is unsupported; it has no corresponding
- * SVGA3D_DEVCAP_xxx.
- */
{
+ /*
+ * SVGA3D_BUMPL6V5U5 is unsupported; it has no corresponding
+ * SVGA3D_DEVCAP_xxx.
+ */
+ "SVGA3D_BUMPL6V5U5",
+ SVGA3D_BUMPL6V5U5, 0, 0, 0, 0, 0
+ },
+ {
+ "SVGA3D_BUMPX8L8V8U8",
SVGA3D_BUMPX8L8V8U8,
SVGA3D_DEVCAP_SURFACEFMT_BUMPX8L8V8U8,
+ 1, 1, 4,
SVGA3DFORMAT_OP_TEXTURE |
- SVGA3DFORMAT_OP_CUBETEXTURE |
- SVGA3DFORMAT_OP_BUMPMAP |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN
+ SVGA3DFORMAT_OP_CUBETEXTURE
+ },
+ {
+ "SVGA3D_FORMAT_DEAD1",
+ SVGA3D_FORMAT_DEAD1, 0, 0, 0, 0, 0
},
- /*
- * SVGA3D_BUMPL8V8U8 is unsupported; it has no corresponding
- * SVGA3D_DEVCAP_xxx. SVGA3D_BUMPX8L8V8U8 should be used instead.
- */
{
+ "SVGA3D_ARGB_S10E5",
SVGA3D_ARGB_S10E5,
SVGA3D_DEVCAP_SURFACEFMT_ARGB_S10E5,
+ 1, 1, 2,
SVGA3DFORMAT_OP_TEXTURE |
SVGA3DFORMAT_OP_CUBETEXTURE |
SVGA3DFORMAT_OP_VOLUMETEXTURE |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN |
- SVGA3DFORMAT_OP_SRGBREAD |
- SVGA3DFORMAT_OP_SRGBWRITE |
- SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
},
{
+ "SVGA3D_ARGB_S23E8",
SVGA3D_ARGB_S23E8,
SVGA3D_DEVCAP_SURFACEFMT_ARGB_S23E8,
+ 1, 1, 4,
SVGA3DFORMAT_OP_TEXTURE |
SVGA3DFORMAT_OP_CUBETEXTURE |
SVGA3DFORMAT_OP_VOLUMETEXTURE |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN |
- SVGA3DFORMAT_OP_SRGBREAD |
- SVGA3DFORMAT_OP_SRGBWRITE |
- SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
},
{
+ "SVGA3D_A2R10G10B10",
SVGA3D_A2R10G10B10,
SVGA3D_DEVCAP_SURFACEFMT_A2R10G10B10,
+ 1, 1, 4,
SVGA3DFORMAT_OP_TEXTURE |
SVGA3DFORMAT_OP_CUBETEXTURE |
SVGA3DFORMAT_OP_VOLUMETEXTURE |
- SVGA3DFORMAT_OP_CONVERT_TO_ARGB |
- SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN |
- SVGA3DFORMAT_OP_SRGBREAD |
- SVGA3DFORMAT_OP_SRGBWRITE |
- SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
},
- /*
- * SVGA3D_V8U8 is unsupported; it has no corresponding
- * SVGA3D_DEVCAP_xxx. SVGA3D_BUMPU8V8 should be used instead.
- */
{
+ /*
+ * SVGA3D_V8U8 is unsupported; it has no corresponding
+ * SVGA3D_DEVCAP_xxx. SVGA3D_BUMPU8V8 should be used instead.
+ */
+ "SVGA3D_V8U8",
+ SVGA3D_V8U8, 0, 0, 0, 0, 0
+ },
+ {
+ "SVGA3D_Q8W8V8U8",
SVGA3D_Q8W8V8U8,
SVGA3D_DEVCAP_SURFACEFMT_Q8W8V8U8,
+ 1, 1, 4,
SVGA3DFORMAT_OP_TEXTURE |
- SVGA3DFORMAT_OP_CUBETEXTURE |
- SVGA3DFORMAT_OP_BUMPMAP |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN
+ SVGA3DFORMAT_OP_CUBETEXTURE
},
{
+ "SVGA3D_CxV8U8",
SVGA3D_CxV8U8,
SVGA3D_DEVCAP_SURFACEFMT_CxV8U8,
- SVGA3DFORMAT_OP_TEXTURE |
- SVGA3DFORMAT_OP_BUMPMAP |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN
+ 1, 1, 2,
+ SVGA3DFORMAT_OP_TEXTURE
+ },
+ {
+ /*
+ * SVGA3D_X8L8V8U8 is unsupported; it has no corresponding
+ * SVGA3D_DEVCAP_xxx. SVGA3D_BUMPX8L8V8U8 should be used instead.
+ */
+ "SVGA3D_X8L8V8U8",
+ SVGA3D_X8L8V8U8, 0, 0, 0, 0, 0
},
- /*
- * SVGA3D_X8L8V8U8 is unsupported; it has no corresponding
- * SVGA3D_DEVCAP_xxx. SVGA3D_BUMPX8L8V8U8 should be used instead.
- */
{
+ "SVGA3D_A2W10V10U10",
SVGA3D_A2W10V10U10,
SVGA3D_DEVCAP_SURFACEFMT_A2W10V10U10,
- SVGA3DFORMAT_OP_TEXTURE |
- SVGA3DFORMAT_OP_BUMPMAP |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE
},
{
+ "SVGA3D_ALPHA8",
SVGA3D_ALPHA8,
SVGA3D_DEVCAP_SURFACEFMT_ALPHA8,
+ 1, 1, 1,
SVGA3DFORMAT_OP_TEXTURE |
SVGA3DFORMAT_OP_CUBETEXTURE |
- SVGA3DFORMAT_OP_VOLUMETEXTURE |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN
+ SVGA3DFORMAT_OP_VOLUMETEXTURE
},
{
+ "SVGA3D_R_S10E5",
SVGA3D_R_S10E5,
SVGA3D_DEVCAP_SURFACEFMT_R_S10E5,
+ 1, 1, 2,
SVGA3DFORMAT_OP_TEXTURE |
SVGA3DFORMAT_OP_VOLUMETEXTURE |
SVGA3DFORMAT_OP_CUBETEXTURE |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN |
- SVGA3DFORMAT_OP_SRGBREAD |
- SVGA3DFORMAT_OP_SRGBWRITE |
- SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
},
{
+ "SVGA3D_R_S23E8",
SVGA3D_R_S23E8,
SVGA3D_DEVCAP_SURFACEFMT_R_S23E8,
+ 1, 1, 4,
SVGA3DFORMAT_OP_TEXTURE |
SVGA3DFORMAT_OP_VOLUMETEXTURE |
SVGA3DFORMAT_OP_CUBETEXTURE |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN |
- SVGA3DFORMAT_OP_SRGBREAD |
- SVGA3DFORMAT_OP_SRGBWRITE |
- SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
},
{
+ "SVGA3D_RG_S10E5",
SVGA3D_RG_S10E5,
SVGA3D_DEVCAP_SURFACEFMT_RG_S10E5,
+ 1, 1, 2,
SVGA3DFORMAT_OP_TEXTURE |
SVGA3DFORMAT_OP_VOLUMETEXTURE |
SVGA3DFORMAT_OP_CUBETEXTURE |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN |
- SVGA3DFORMAT_OP_SRGBREAD |
- SVGA3DFORMAT_OP_SRGBWRITE |
- SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
},
{
+ "SVGA3D_RG_S23E8",
SVGA3D_RG_S23E8,
SVGA3D_DEVCAP_SURFACEFMT_RG_S23E8,
+ 1, 1, 4,
SVGA3DFORMAT_OP_TEXTURE |
SVGA3DFORMAT_OP_VOLUMETEXTURE |
SVGA3DFORMAT_OP_CUBETEXTURE |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN |
- SVGA3DFORMAT_OP_SRGBREAD |
- SVGA3DFORMAT_OP_SRGBWRITE |
- SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
},
- /*
- * SVGA3D_BUFFER is a placeholder format for index/vertex buffers.
- */
{
+ /*
+ * SVGA3D_BUFFER is a placeholder format for index/vertex buffers.
+ */
+ "SVGA3D_BUFFER",
+ SVGA3D_BUFFER, 0, 1, 1, 1, 0
+ },
+ {
+ "SVGA3D_Z_D24X8",
SVGA3D_Z_D24X8,
SVGA3D_DEVCAP_SURFACEFMT_Z_D24X8,
- SVGA3DFORMAT_OP_ZSTENCIL |
- SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_ZSTENCIL
},
{
+ "SVGA3D_V16U16",
SVGA3D_V16U16,
SVGA3D_DEVCAP_SURFACEFMT_V16U16,
+ 1, 1, 4,
SVGA3DFORMAT_OP_TEXTURE |
SVGA3DFORMAT_OP_CUBETEXTURE |
- SVGA3DFORMAT_OP_VOLUMETEXTURE |
- SVGA3DFORMAT_OP_BUMPMAP |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN
+ SVGA3DFORMAT_OP_VOLUMETEXTURE
},
{
+ "SVGA3D_G16R16",
SVGA3D_G16R16,
SVGA3D_DEVCAP_SURFACEFMT_G16R16,
+ 1, 1, 4,
SVGA3DFORMAT_OP_TEXTURE |
SVGA3DFORMAT_OP_CUBETEXTURE |
SVGA3DFORMAT_OP_VOLUMETEXTURE |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN |
- SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
},
{
+ "SVGA3D_A16B16G16R16",
SVGA3D_A16B16G16R16,
SVGA3D_DEVCAP_SURFACEFMT_A16B16G16R16,
+ 1, 1, 8,
SVGA3DFORMAT_OP_TEXTURE |
SVGA3DFORMAT_OP_CUBETEXTURE |
SVGA3DFORMAT_OP_VOLUMETEXTURE |
- SVGA3DFORMAT_OP_OFFSCREENPLAIN |
- SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET |
SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
},
{
+ "SVGA3D_UYVY",
SVGA3D_UYVY,
SVGA3D_DEVCAP_SURFACEFMT_UYVY,
+ 0, 0, 0,
0
},
{
+ "SVGA3D_YUY2",
SVGA3D_YUY2,
SVGA3D_DEVCAP_SURFACEFMT_YUY2,
+ 0, 0, 0,
0
},
{
+ "SVGA3D_NV12",
SVGA3D_NV12,
SVGA3D_DEVCAP_SURFACEFMT_NV12,
+ 0, 0, 0,
0
},
{
+ "SVGA3D_AYUV",
SVGA3D_AYUV,
SVGA3D_DEVCAP_SURFACEFMT_AYUV,
+ 0, 0, 0,
0
},
{
- SVGA3D_Z_DF16,
- SVGA3D_DEVCAP_SURFACEFMT_Z_DF16,
- 0
+ "SVGA3D_R32G32B32A32_TYPELESS",
+ SVGA3D_R32G32B32A32_TYPELESS,
+ SVGA3D_DEVCAP_DXFMT_R32G32B32A32_TYPELESS,
+ 1, 1, 16,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
},
{
- SVGA3D_Z_DF24,
- SVGA3D_DEVCAP_SURFACEFMT_Z_DF24,
- 0
+ "SVGA3D_R32G32B32A32_UINT",
+ SVGA3D_R32G32B32A32_UINT,
+ SVGA3D_DEVCAP_DXFMT_R32G32B32A32_UINT,
+ 1, 1, 16,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
},
{
- SVGA3D_Z_D24S8_INT,
- SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8_INT,
- 0
+ "SVGA3D_R32G32B32A32_SINT",
+ SVGA3D_R32G32B32A32_SINT,
+ SVGA3D_DEVCAP_DXFMT_R32G32B32A32_SINT,
+ 1, 1, 16,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
},
-};
-
-
-/*
- * Get format capabilities from the host. It takes in consideration
- * deprecated/unsupported formats, and formats which are implicitely assumed to
- * be supported when the host does not provide an explicit capability entry.
- */
-void
-svga_get_format_cap(struct svga_screen *ss,
- SVGA3dSurfaceFormat format,
- SVGA3dSurfaceFormatCaps *caps)
-{
- const struct format_cap *entry;
-
- for (entry = format_cap_table; entry < format_cap_table + Elements(format_cap_table); ++entry) {
- if (entry->format == format) {
- struct svga_winsys_screen *sws = ss->sws;
- SVGA3dDevCapResult result;
-
- if (sws->get_cap(sws, entry->index, &result)) {
- /* Explicitly advertised format */
- caps->value = result.u;
- } else {
- /* Implicitly advertised format -- use default caps */
- caps->value = entry->defaultOperations;
- }
-
- return;
- }
- }
-
- /* Unsupported format */
- caps->value = 0;
-}
-
-
-/**
- * Return block size and bytes per block for the given SVGA3D format.
- * block_width and block_height are one for uncompressed formats and
- * greater than one for compressed formats.
- * Note: we don't handle formats that are unsupported, according to
- * the format_cap_table above.
- */
-void
-svga_format_size(SVGA3dSurfaceFormat format,
- unsigned *block_width,
- unsigned *block_height,
- unsigned *bytes_per_block)
-{
- *block_width = *block_height = 1;
-
- switch (format) {
- case SVGA3D_X8R8G8B8:
- case SVGA3D_A8R8G8B8:
- *bytes_per_block = 4;
- return;
-
- case SVGA3D_R5G6B5:
- case SVGA3D_X1R5G5B5:
- case SVGA3D_A1R5G5B5:
- case SVGA3D_A4R4G4B4:
- *bytes_per_block = 2;
- return;
-
- case SVGA3D_Z_D32:
- *bytes_per_block = 4;
- return;
-
- case SVGA3D_Z_D16:
- *bytes_per_block = 2;
- return;
-
- case SVGA3D_Z_D24S8:
- *bytes_per_block = 4;
- return;
-
- case SVGA3D_Z_D15S1:
- *bytes_per_block = 2;
- return;
-
- case SVGA3D_LUMINANCE8:
- case SVGA3D_LUMINANCE4_ALPHA4:
- *bytes_per_block = 1;
- return;
-
- case SVGA3D_LUMINANCE16:
- case SVGA3D_LUMINANCE8_ALPHA8:
- *bytes_per_block = 2;
- return;
-
- case SVGA3D_DXT1:
- case SVGA3D_DXT2:
- *block_width = *block_height = 4;
- *bytes_per_block = 8;
- return;
-
- case SVGA3D_DXT3:
- case SVGA3D_DXT4:
- case SVGA3D_DXT5:
- *block_width = *block_height = 4;
- *bytes_per_block = 16;
- return;
-
- case SVGA3D_BUMPU8V8:
- case SVGA3D_BUMPL6V5U5:
- *bytes_per_block = 2;
- return;
-
- case SVGA3D_BUMPX8L8V8U8:
- *bytes_per_block = 4;
- return;
-
- case SVGA3D_ARGB_S10E5:
- *bytes_per_block = 8;
- return;
-
- case SVGA3D_ARGB_S23E8:
- *bytes_per_block = 16;
- return;
-
- case SVGA3D_A2R10G10B10:
- *bytes_per_block = 4;
- return;
-
- case SVGA3D_Q8W8V8U8:
- *bytes_per_block = 4;
- return;
-
- case SVGA3D_CxV8U8:
- *bytes_per_block = 2;
- return;
-
- case SVGA3D_X8L8V8U8:
- case SVGA3D_A2W10V10U10:
- *bytes_per_block = 4;
- return;
-
- case SVGA3D_ALPHA8:
- *bytes_per_block = 1;
- return;
-
- case SVGA3D_R_S10E5:
- *bytes_per_block = 2;
- return;
- case SVGA3D_R_S23E8:
- *bytes_per_block = 4;
- return;
- case SVGA3D_RG_S10E5:
- *bytes_per_block = 4;
- return;
- case SVGA3D_RG_S23E8:
- *bytes_per_block = 8;
- return;
-
- case SVGA3D_BUFFER:
- *bytes_per_block = 1;
- return;
-
- case SVGA3D_Z_D24X8:
- *bytes_per_block = 4;
- return;
-
- case SVGA3D_V16U16:
- *bytes_per_block = 4;
- return;
-
- case SVGA3D_G16R16:
- *bytes_per_block = 4;
- return;
-
- case SVGA3D_A16B16G16R16:
- *bytes_per_block = 8;
- return;
-
- case SVGA3D_Z_DF16:
- *bytes_per_block = 2;
- return;
- case SVGA3D_Z_DF24:
- *bytes_per_block = 4;
- return;
- case SVGA3D_Z_D24S8_INT:
- *bytes_per_block = 4;
- return;
-
+ {
+ "SVGA3D_R32G32B32_TYPELESS",
+ SVGA3D_R32G32B32_TYPELESS,
+ SVGA3D_DEVCAP_DXFMT_R32G32B32_TYPELESS,
+ 1, 1, 12,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R32G32B32_FLOAT",
+ SVGA3D_R32G32B32_FLOAT,
+ SVGA3D_DEVCAP_DXFMT_R32G32B32_FLOAT,
+ 1, 1, 12,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R32G32B32_UINT",
+ SVGA3D_R32G32B32_UINT,
+ SVGA3D_DEVCAP_DXFMT_R32G32B32_UINT,
+ 1, 1, 12,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R32G32B32_SINT",
+ SVGA3D_R32G32B32_SINT,
+ SVGA3D_DEVCAP_DXFMT_R32G32B32_SINT,
+ 1, 1, 12,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R16G16B16A16_TYPELESS",
+ SVGA3D_R16G16B16A16_TYPELESS,
+ SVGA3D_DEVCAP_DXFMT_R16G16B16A16_TYPELESS,
+ 1, 1, 8,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R16G16B16A16_UINT",
+ SVGA3D_R16G16B16A16_UINT,
+ SVGA3D_DEVCAP_DXFMT_R16G16B16A16_UINT,
+ 1, 1, 8,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R16G16B16A16_SNORM",
+ SVGA3D_R16G16B16A16_SNORM,
+ SVGA3D_DEVCAP_DXFMT_R16G16B16A16_SNORM,
+ 1, 1, 8,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R16G16B16A16_SINT",
+ SVGA3D_R16G16B16A16_SINT,
+ SVGA3D_DEVCAP_DXFMT_R16G16B16A16_SINT,
+ 1, 1, 8,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R32G32_TYPELESS",
+ SVGA3D_R32G32_TYPELESS,
+ SVGA3D_DEVCAP_DXFMT_R32G32_TYPELESS,
+ 1, 1, 8,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R32G32_UINT",
+ SVGA3D_R32G32_UINT,
+ SVGA3D_DEVCAP_DXFMT_R32G32_UINT,
+ 1, 1, 8,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R32G32_SINT",
+ SVGA3D_R32G32_SINT,
+ SVGA3D_DEVCAP_DXFMT_R32G32_SINT,
+ 1, 1, 8,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R32G8X24_TYPELESS",
+ SVGA3D_R32G8X24_TYPELESS,
+ SVGA3D_DEVCAP_DXFMT_R32G8X24_TYPELESS,
+ 1, 1, 8,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_ZSTENCIL
+ },
+ {
+ /* Special case: no devcap / report sampler and depth/stencil ability
+ */
+ "SVGA3D_D32_FLOAT_S8X24_UINT",
+ SVGA3D_D32_FLOAT_S8X24_UINT,
+ 0, /*SVGA3D_DEVCAP_DXFMT_D32_FLOAT_S8X24_UINT*/
+ 1, 1, 8,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_ZSTENCIL
+ },
+ {
+ /* Special case: no devcap / report sampler and depth/stencil ability
+ */
+ "SVGA3D_R32_FLOAT_X8X24_TYPELESS",
+ SVGA3D_R32_FLOAT_X8X24_TYPELESS,
+ 0, /*SVGA3D_DEVCAP_DXFMT_R32_FLOAT_X8X24_TYPELESS*/
+ 1, 1, 8,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_ZSTENCIL
+ },
+ {
+ "SVGA3D_X32_TYPELESS_G8X24_UINT",
+ SVGA3D_X32_TYPELESS_G8X24_UINT,
+ SVGA3D_DEVCAP_DXFMT_X32_TYPELESS_G8X24_UINT,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R10G10B10A2_TYPELESS",
+ SVGA3D_R10G10B10A2_TYPELESS,
+ SVGA3D_DEVCAP_DXFMT_R10G10B10A2_TYPELESS,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R10G10B10A2_UINT",
+ SVGA3D_R10G10B10A2_UINT,
+ SVGA3D_DEVCAP_DXFMT_R10G10B10A2_UINT,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R11G11B10_FLOAT",
+ SVGA3D_R11G11B10_FLOAT,
+ SVGA3D_DEVCAP_DXFMT_R11G11B10_FLOAT,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R8G8B8A8_TYPELESS",
+ SVGA3D_R8G8B8A8_TYPELESS,
+ SVGA3D_DEVCAP_DXFMT_R8G8B8A8_TYPELESS,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R8G8B8A8_UNORM",
+ SVGA3D_R8G8B8A8_UNORM,
+ SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UNORM,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R8G8B8A8_UNORM_SRGB",
+ SVGA3D_R8G8B8A8_UNORM_SRGB,
+ SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UNORM_SRGB,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R8G8B8A8_UINT",
+ SVGA3D_R8G8B8A8_UINT,
+ SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UINT,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R8G8B8A8_SINT",
+ SVGA3D_R8G8B8A8_SINT,
+ SVGA3D_DEVCAP_DXFMT_R8G8B8A8_SINT,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R16G16_TYPELESS",
+ SVGA3D_R16G16_TYPELESS,
+ SVGA3D_DEVCAP_DXFMT_R16G16_TYPELESS,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R16G16_UINT",
+ SVGA3D_R16G16_UINT,
+ SVGA3D_DEVCAP_DXFMT_R16G16_UINT,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R16G16_SINT",
+ SVGA3D_R16G16_SINT,
+ SVGA3D_DEVCAP_DXFMT_R16G16_SINT,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R32_TYPELESS",
+ SVGA3D_R32_TYPELESS,
+ SVGA3D_DEVCAP_DXFMT_R32_TYPELESS,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_ZSTENCIL |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ /* Special case: no devcap / report sampler and depth/stencil ability
+ */
+ "SVGA3D_D32_FLOAT",
+ SVGA3D_D32_FLOAT,
+ 0, /*SVGA3D_DEVCAP_DXFMT_D32_FLOAT*/
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_ZSTENCIL
+ },
+ {
+ "SVGA3D_R32_UINT",
+ SVGA3D_R32_UINT,
+ SVGA3D_DEVCAP_DXFMT_R32_UINT,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R32_SINT",
+ SVGA3D_R32_SINT,
+ SVGA3D_DEVCAP_DXFMT_R32_SINT,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R24G8_TYPELESS",
+ SVGA3D_R24G8_TYPELESS,
+ SVGA3D_DEVCAP_DXFMT_R24G8_TYPELESS,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_ZSTENCIL
+ },
+ {
+ /* Special case: no devcap / report sampler and depth/stencil ability
+ */
+ "SVGA3D_D24_UNORM_S8_UINT",
+ SVGA3D_D24_UNORM_S8_UINT,
+ 0, /*SVGA3D_DEVCAP_DXFMT_D24_UNORM_S8_UINT*/
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_ZSTENCIL
+ },
+ {
+ /* Special case: no devcap / report sampler and depth/stencil ability
+ */
+ "SVGA3D_R24_UNORM_X8_TYPELESS",
+ SVGA3D_R24_UNORM_X8_TYPELESS,
+ 0, /*SVGA3D_DEVCAP_DXFMT_R24_UNORM_X8_TYPELESS*/
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_ZSTENCIL
+ },
+ {
+ "SVGA3D_X24_TYPELESS_G8_UINT",
+ SVGA3D_X24_TYPELESS_G8_UINT,
+ SVGA3D_DEVCAP_DXFMT_X24_TYPELESS_G8_UINT,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_ZSTENCIL
+ },
+ {
+ "SVGA3D_R8G8_TYPELESS",
+ SVGA3D_R8G8_TYPELESS,
+ SVGA3D_DEVCAP_DXFMT_R8G8_TYPELESS,
+ 1, 1, 2,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R8G8_UNORM",
+ SVGA3D_R8G8_UNORM,
+ SVGA3D_DEVCAP_DXFMT_R8G8_UNORM,
+ 1, 1, 2,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R8G8_UINT",
+ SVGA3D_R8G8_UINT,
+ SVGA3D_DEVCAP_DXFMT_R8G8_UINT,
+ 1, 1, 2,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R8G8_SINT",
+ SVGA3D_R8G8_SINT,
+ SVGA3D_DEVCAP_DXFMT_R8G8_SINT,
+ 1, 1, 2,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R16_TYPELESS",
+ SVGA3D_R16_TYPELESS,
+ SVGA3D_DEVCAP_DXFMT_R16_TYPELESS,
+ 1, 1, 2,
+ SVGA3DFORMAT_OP_ZSTENCIL |
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R16_UNORM",
+ SVGA3D_R16_UNORM,
+ SVGA3D_DEVCAP_DXFMT_R16_UNORM,
+ 1, 1, 2,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R16_UINT",
+ SVGA3D_R16_UINT,
+ SVGA3D_DEVCAP_DXFMT_R16_UINT,
+ 1, 1, 2,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R16_SNORM",
+ SVGA3D_R16_SNORM,
+ SVGA3D_DEVCAP_DXFMT_R16_SNORM,
+ 1, 1, 2,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R16_SINT",
+ SVGA3D_R16_SINT,
+ SVGA3D_DEVCAP_DXFMT_R16_SINT,
+ 1, 1, 2,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R8_TYPELESS",
+ SVGA3D_R8_TYPELESS,
+ SVGA3D_DEVCAP_DXFMT_R8_TYPELESS,
+ 1, 1, 1,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R8_UNORM",
+ SVGA3D_R8_UNORM,
+ SVGA3D_DEVCAP_DXFMT_R8_UNORM,
+ 1, 1, 1,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R8_UINT",
+ SVGA3D_R8_UINT,
+ SVGA3D_DEVCAP_DXFMT_R8_UINT,
+ 1, 1, 1,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R8_SNORM",
+ SVGA3D_R8_SNORM,
+ SVGA3D_DEVCAP_DXFMT_R8_SNORM,
+ 1, 1, 1,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R8_SINT",
+ SVGA3D_R8_SINT,
+ SVGA3D_DEVCAP_DXFMT_R8_SINT,
+ 1, 1, 1,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_P8",
+ SVGA3D_P8, 0, 0, 0, 0, 0
+ },
+ {
+ "SVGA3D_R9G9B9E5_SHAREDEXP",
+ SVGA3D_R9G9B9E5_SHAREDEXP,
+ SVGA3D_DEVCAP_DXFMT_R9G9B9E5_SHAREDEXP,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R8G8_B8G8_UNORM",
+ SVGA3D_R8G8_B8G8_UNORM, 0, 0, 0, 0, 0
+ },
+ {
+ "SVGA3D_G8R8_G8B8_UNORM",
+ SVGA3D_G8R8_G8B8_UNORM, 0, 0, 0, 0, 0
+ },
+ {
+ "SVGA3D_BC1_TYPELESS",
+ SVGA3D_BC1_TYPELESS, 0, 0, 0, 0, 0
+ },
+ {
+ "SVGA3D_BC1_UNORM_SRGB",
+ SVGA3D_BC1_UNORM_SRGB, 0, 0, 0, 0, 0
+ },
+ {
+ "SVGA3D_BC2_TYPELESS",
+ SVGA3D_BC2_TYPELESS, 0, 0, 0, 0, 0
+ },
+ {
+ "SVGA3D_BC2_UNORM_SRGB",
+ SVGA3D_BC2_UNORM_SRGB, 0, 0, 0, 0, 0
+ },
+ {
+ "SVGA3D_BC3_TYPELESS",
+ SVGA3D_BC3_TYPELESS, 0, 0, 0, 0, 0
+ },
+ {
+ "SVGA3D_BC3_UNORM_SRGB",
+ SVGA3D_BC3_UNORM_SRGB, 0, 0, 0, 0, 0
+ },
+ {
+ "SVGA3D_BC4_TYPELESS",
+ SVGA3D_BC4_TYPELESS,
+ SVGA3D_DEVCAP_DXFMT_BC4_TYPELESS,
+ 4, 4, 8,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE
+ },
+ {
+ "SVGA3D_ATI1",
+ SVGA3D_ATI1, 0, 0, 0, 0, 0
+ },
+ {
+ "SVGA3D_BC4_SNORM",
+ SVGA3D_BC4_SNORM,
+ SVGA3D_DEVCAP_DXFMT_BC4_SNORM,
+ 4, 4, 8,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE
+ },
+ {
+ "SVGA3D_BC5_TYPELESS",
+ SVGA3D_BC5_TYPELESS,
+ SVGA3D_DEVCAP_DXFMT_BC5_TYPELESS,
+ 4, 4, 16,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE
+ },
+ {
+ "SVGA3D_ATI2",
+ SVGA3D_ATI2, 0, 0, 0, 0, 0
+ },
+ {
+ "SVGA3D_BC5_SNORM",
+ SVGA3D_BC5_SNORM,
+ SVGA3D_DEVCAP_DXFMT_BC5_SNORM,
+ 4, 4, 16,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE
+ },
+ {
+ "SVGA3D_R10G10B10_XR_BIAS_A2_UNORM",
+ SVGA3D_R10G10B10_XR_BIAS_A2_UNORM, 0, 0, 0, 0, 0
+ },
+ {
+ "SVGA3D_B8G8R8A8_TYPELESS",
+ SVGA3D_B8G8R8A8_TYPELESS,
+ SVGA3D_DEVCAP_DXFMT_B8G8R8A8_TYPELESS,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_B8G8R8A8_UNORM_SRGB",
+ SVGA3D_B8G8R8A8_UNORM_SRGB,
+ SVGA3D_DEVCAP_DXFMT_B8G8R8A8_UNORM_SRGB,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_B8G8R8X8_TYPELESS",
+ SVGA3D_B8G8R8X8_TYPELESS,
+ SVGA3D_DEVCAP_DXFMT_B8G8R8X8_TYPELESS,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_B8G8R8X8_UNORM_SRGB",
+ SVGA3D_B8G8R8X8_UNORM_SRGB,
+ SVGA3D_DEVCAP_DXFMT_B8G8R8X8_UNORM_SRGB,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_Z_DF16",
+ SVGA3D_Z_DF16,
+ SVGA3D_DEVCAP_SURFACEFMT_Z_DF16,
+ 1, 1, 2,
+ 0
+ },
+ {
+ "SVGA3D_Z_DF24",
+ SVGA3D_Z_DF24,
+ SVGA3D_DEVCAP_SURFACEFMT_Z_DF24,
+ 1, 1, 4,
+ 0
+ },
+ {
+ "SVGA3D_Z_D24S8_INT",
+ SVGA3D_Z_D24S8_INT,
+ SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8_INT,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_ZSTENCIL
+ },
+ {
+ "SVGA3D_YV12",
+ SVGA3D_YV12, 0, 0, 0, 0, 0
+ },
+ {
+ "SVGA3D_R32G32B32A32_FLOAT",
+ SVGA3D_R32G32B32A32_FLOAT,
+ SVGA3D_DEVCAP_DXFMT_R32G32B32A32_FLOAT,
+ 1, 1, 16,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R16G16B16A16_FLOAT",
+ SVGA3D_R16G16B16A16_FLOAT,
+ SVGA3D_DEVCAP_DXFMT_R16G16B16A16_FLOAT,
+ 1, 1, 8,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R16G16B16A16_UNORM",
+ SVGA3D_R16G16B16A16_UNORM,
+ SVGA3D_DEVCAP_DXFMT_R16G16B16A16_UNORM,
+ 1, 1, 8,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R32G32_FLOAT",
+ SVGA3D_R32G32_FLOAT,
+ SVGA3D_DEVCAP_DXFMT_R32G32_FLOAT,
+ 1, 1, 8,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R10G10B10A2_UNORM",
+ SVGA3D_R10G10B10A2_UNORM,
+ SVGA3D_DEVCAP_DXFMT_R10G10B10A2_UNORM,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R8G8B8A8_SNORM",
+ SVGA3D_R8G8B8A8_SNORM,
+ SVGA3D_DEVCAP_DXFMT_R8G8B8A8_SNORM,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R16G16_FLOAT",
+ SVGA3D_R16G16_FLOAT,
+ SVGA3D_DEVCAP_DXFMT_R16G16_FLOAT,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R16G16_UNORM",
+ SVGA3D_R16G16_UNORM,
+ SVGA3D_DEVCAP_DXFMT_R16G16_UNORM,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R16G16_SNORM",
+ SVGA3D_R16G16_SNORM,
+ SVGA3D_DEVCAP_DXFMT_R16G16_SNORM,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ /* Special case: no devcap / report sampler, render target and
+ * depth/stencil ability
+ */
+ "SVGA3D_R32_FLOAT",
+ SVGA3D_R32_FLOAT,
+ 0, /*SVGA3D_DEVCAP_DXFMT_R32_FLOAT*/
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET |
+ SVGA3DFORMAT_OP_ZSTENCIL
+ },
+ {
+ "SVGA3D_R8G8_SNORM",
+ SVGA3D_R8G8_SNORM,
+ SVGA3D_DEVCAP_DXFMT_R8G8_SNORM,
+ 1, 1, 2,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_R16_FLOAT",
+ SVGA3D_R16_FLOAT,
+ SVGA3D_DEVCAP_DXFMT_R16_FLOAT,
+ 1, 1, 2,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_D16_UNORM",
+ SVGA3D_D16_UNORM,
+ SVGA3D_DEVCAP_DXFMT_D16_UNORM,
+ 1, 1, 2,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_ZSTENCIL
+ },
+ {
+ "SVGA3D_A8_UNORM",
+ SVGA3D_A8_UNORM,
+ SVGA3D_DEVCAP_DXFMT_A8_UNORM,
+ 1, 1, 1,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_BC1_UNORM",
+ SVGA3D_BC1_UNORM, 0, 0, 0, 0, 0
+ },
+ {
+ "SVGA3D_BC2_UNORM",
+ SVGA3D_BC2_UNORM, 0, 0, 0, 0, 0
+ },
+ {
+ "SVGA3D_BC3_UNORM",
+ SVGA3D_BC3_UNORM, 0, 0, 0, 0, 0
+ },
+ {
+ "SVGA3D_B5G6R5_UNORM",
+ SVGA3D_B5G6R5_UNORM,
+ SVGA3D_DEVCAP_DXFMT_B5G6R5_UNORM,
+ 1, 1, 2,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_DISPLAYMODE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_B5G5R5A1_UNORM",
+ SVGA3D_B5G5R5A1_UNORM,
+ SVGA3D_DEVCAP_DXFMT_B5G5R5A1_UNORM,
+ 1, 1, 2,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_DISPLAYMODE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_B8G8R8A8_UNORM",
+ SVGA3D_B8G8R8A8_UNORM,
+ SVGA3D_DEVCAP_DXFMT_B8G8R8A8_UNORM,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_B8G8R8X8_UNORM",
+ SVGA3D_B8G8R8X8_UNORM,
+ SVGA3D_DEVCAP_DXFMT_B8G8R8X8_UNORM,
+ 1, 1, 4,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE |
+ SVGA3DFORMAT_OP_VOLUMETEXTURE |
+ SVGA3DFORMAT_OP_DISPLAYMODE |
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET
+ },
+ {
+ "SVGA3D_BC4_UNORM",
+ SVGA3D_BC4_UNORM,
+ SVGA3D_DEVCAP_DXFMT_BC4_UNORM,
+ 4, 4, 8,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE
+ },
+ {
+ "SVGA3D_BC5_UNORM",
+ SVGA3D_BC5_UNORM,
+ SVGA3D_DEVCAP_DXFMT_BC5_UNORM,
+ 4, 4, 16,
+ SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE
+ }
+};
+
+
+/**
+ * Debug only:
+ * 1. check that format_cap_table[i] matches the i-th SVGA3D format.
+ * 2. check that format_conversion_table[i].pformat == i.
+ */
+static void
+check_format_tables(void)
+{
+ static boolean first_call = TRUE;
+
+ if (first_call) {
+ unsigned i;
+
+ STATIC_ASSERT(Elements(format_cap_table) == SVGA3D_FORMAT_MAX);
+ for (i = 0; i < Elements(format_cap_table); i++) {
+ assert(format_cap_table[i].format == i);
+ }
+
+ STATIC_ASSERT(Elements(format_conversion_table) == PIPE_FORMAT_COUNT);
+ for (i = 0; i < Elements(format_conversion_table); i++) {
+ assert(format_conversion_table[i].pformat == i);
+ }
+
+ first_call = FALSE;
+ }
+}
+
+
+/*
+ * Get format capabilities from the host. It takes in consideration
+ * deprecated/unsupported formats, and formats which are implicitely assumed to
+ * be supported when the host does not provide an explicit capability entry.
+ */
+void
+svga_get_format_cap(struct svga_screen *ss,
+ SVGA3dSurfaceFormat format,
+ SVGA3dSurfaceFormatCaps *caps)
+{
+ struct svga_winsys_screen *sws = ss->sws;
+ SVGA3dDevCapResult result;
+ const struct format_cap *entry;
+
+#ifdef DEBUG
+ check_format_tables();
+#else
+ (void) check_format_tables;
+#endif
+
+ assert(format < Elements(format_cap_table));
+ entry = &format_cap_table[format];
+ assert(entry->format == format);
+
+ if (entry->devcap && sws->get_cap(sws, entry->devcap, &result)) {
+ /* Explicitly advertised format */
+ if (entry->devcap > SVGA3D_DEVCAP_DX) {
+ /* Translate DX/VGPU10 format cap to VGPU9 cap */
+ caps->value = 0;
+ if (result.u & SVGA3D_DXFMT_COLOR_RENDERTARGET)
+ caps->value |= SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET;
+ if (!(result.u & SVGA3D_DXFMT_BLENDABLE))
+ caps->value |= SVGA3DFORMAT_OP_NOALPHABLEND;
+ if (result.u & SVGA3D_DXFMT_DEPTH_RENDERTARGET)
+ caps->value |= SVGA3DFORMAT_OP_ZSTENCIL;
+ if (result.u & SVGA3D_DXFMT_SHADER_SAMPLE)
+ caps->value |= (SVGA3DFORMAT_OP_TEXTURE |
+ SVGA3DFORMAT_OP_CUBETEXTURE);
+ if (result.u & SVGA3D_DXFMT_VOLUME)
+ caps->value |= SVGA3DFORMAT_OP_VOLUMETEXTURE;
+ }
+ else {
+ /* Return VGPU9 format cap as-is */
+ caps->value = result.u;
+ }
+
+ } else {
+ /* Implicitly advertised format -- use default caps */
+ caps->value = entry->defaultOperations;
+ }
+}
+
+
+void
+svga_format_size(SVGA3dSurfaceFormat format,
+ unsigned *block_width,
+ unsigned *block_height,
+ unsigned *bytes_per_block)
+{
+ assert(format < Elements(format_cap_table));
+ *block_width = format_cap_table[format].block_width;
+ *block_height = format_cap_table[format].block_height;
+ *bytes_per_block = format_cap_table[format].block_bytes;
+ /* Make sure the the table entry was valid */
+ if (*block_width == 0)
+ debug_printf("Bad table entry for %s\n", svga_format_name(format));
+ assert(*block_width);
+ assert(*block_height);
+ assert(*bytes_per_block);
+}
+
+
+const char *
+svga_format_name(SVGA3dSurfaceFormat format)
+{
+ assert(format < Elements(format_cap_table));
+ return format_cap_table[format].name;
+}
+
+
+/**
+ * Is the given SVGA3dSurfaceFormat a signed or unsigned integer color format?
+ */
+boolean
+svga_format_is_integer(SVGA3dSurfaceFormat format)
+{
+ switch (format) {
+ case SVGA3D_R32G32B32A32_SINT:
+ case SVGA3D_R32G32B32_SINT:
+ case SVGA3D_R32G32_SINT:
+ case SVGA3D_R32_SINT:
+ case SVGA3D_R16G16B16A16_SINT:
+ case SVGA3D_R16G16_SINT:
+ case SVGA3D_R16_SINT:
+ case SVGA3D_R8G8B8A8_SINT:
+ case SVGA3D_R8G8_SINT:
+ case SVGA3D_R8_SINT:
+ case SVGA3D_R32G32B32A32_UINT:
+ case SVGA3D_R32G32B32_UINT:
+ case SVGA3D_R32G32_UINT:
+ case SVGA3D_R32_UINT:
+ case SVGA3D_R16G16B16A16_UINT:
+ case SVGA3D_R16G16_UINT:
+ case SVGA3D_R16_UINT:
+ case SVGA3D_R8G8B8A8_UINT:
+ case SVGA3D_R8G8_UINT:
+ case SVGA3D_R8_UINT:
+ case SVGA3D_R10G10B10A2_UINT:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+
+/**
+ * Given a texture format, return the expected data type returned from
+ * the texture sampler. For example, UNORM8 formats return floating point
+ * values while SINT formats returned signed integer values.
+ * Note: this function could be moved into the gallum u_format.[ch] code
+ * if it's useful to anyone else.
+ */
+enum tgsi_return_type
+svga_get_texture_datatype(enum pipe_format format)
+{
+ const struct util_format_description *desc = util_format_description(format);
+ enum tgsi_return_type t;
+
+ if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN ) {
+ if (util_format_is_depth_or_stencil(format)) {
+ t = TGSI_RETURN_TYPE_FLOAT; /* XXX revisit this */
+ }
+ else if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) {
+ t = TGSI_RETURN_TYPE_FLOAT;
+ }
+ else if (desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
+ t = desc->channel[0].normalized ? TGSI_RETURN_TYPE_UNORM : TGSI_RETURN_TYPE_UINT;
+ }
+ else if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
+ t = desc->channel[0].normalized ? TGSI_RETURN_TYPE_SNORM : TGSI_RETURN_TYPE_SINT;
+ }
+ else {
+ assert(!"Unexpected channel type in svga_get_texture_datatype()");
+ t = TGSI_RETURN_TYPE_FLOAT;
+ }
+ }
+ else {
+ /* compressed format, shared exponent format, etc. */
+ switch (format) {
+ case PIPE_FORMAT_DXT1_RGB:
+ case PIPE_FORMAT_DXT1_RGBA:
+ case PIPE_FORMAT_DXT3_RGBA:
+ case PIPE_FORMAT_DXT5_RGBA:
+ case PIPE_FORMAT_DXT1_SRGB:
+ case PIPE_FORMAT_DXT1_SRGBA:
+ case PIPE_FORMAT_DXT3_SRGBA:
+ case PIPE_FORMAT_DXT5_SRGBA:
+ case PIPE_FORMAT_RGTC1_UNORM:
+ case PIPE_FORMAT_RGTC2_UNORM:
+ case PIPE_FORMAT_LATC1_UNORM:
+ case PIPE_FORMAT_LATC2_UNORM:
+ case PIPE_FORMAT_ETC1_RGB8:
+ t = TGSI_RETURN_TYPE_UNORM;
+ break;
+ case PIPE_FORMAT_RGTC1_SNORM:
+ case PIPE_FORMAT_RGTC2_SNORM:
+ case PIPE_FORMAT_LATC1_SNORM:
+ case PIPE_FORMAT_LATC2_SNORM:
+ case PIPE_FORMAT_R10G10B10X2_SNORM:
+ t = TGSI_RETURN_TYPE_SNORM;
+ break;
+ case PIPE_FORMAT_R11G11B10_FLOAT:
+ case PIPE_FORMAT_R9G9B9E5_FLOAT:
+ t = TGSI_RETURN_TYPE_FLOAT;
+ break;
+ default:
+ assert(!"Unexpected channel type in svga_get_texture_datatype()");
+ t = TGSI_RETURN_TYPE_FLOAT;
+ }
+ }
+
+ return t;
+}
+
+
+/**
+ * Given an svga context, return true iff there are currently any integer color
+ * buffers attached to the framebuffer.
+ */
+boolean
+svga_has_any_integer_cbufs(const struct svga_context *svga)
+{
+ unsigned i;
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) {
+ struct pipe_surface *cbuf = svga->curr.framebuffer.cbufs[i];
+
+ if (cbuf && util_format_is_pure_integer(cbuf->format)) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+
+/**
+ * Given an SVGA format, return the corresponding typeless format.
+ * If there is no typeless format, return the format unchanged.
+ */
+SVGA3dSurfaceFormat
+svga_typeless_format(SVGA3dSurfaceFormat format)
+{
+ switch (format) {
+ case SVGA3D_R32G32B32A32_UINT:
+ case SVGA3D_R32G32B32A32_SINT:
+ case SVGA3D_R32G32B32A32_FLOAT:
+ return SVGA3D_R32G32B32A32_TYPELESS;
+ case SVGA3D_R32G32B32_FLOAT:
+ case SVGA3D_R32G32B32_UINT:
+ case SVGA3D_R32G32B32_SINT:
+ return SVGA3D_R32G32B32_TYPELESS;
+ case SVGA3D_R16G16B16A16_UINT:
+ case SVGA3D_R16G16B16A16_UNORM:
+ case SVGA3D_R16G16B16A16_SNORM:
+ case SVGA3D_R16G16B16A16_SINT:
+ case SVGA3D_R16G16B16A16_FLOAT:
+ return SVGA3D_R16G16B16A16_TYPELESS;
+ case SVGA3D_R32G32_UINT:
+ case SVGA3D_R32G32_SINT:
+ case SVGA3D_R32G32_FLOAT:
+ return SVGA3D_R32G32_TYPELESS;
+ case SVGA3D_D32_FLOAT_S8X24_UINT:
+ return SVGA3D_R32G8X24_TYPELESS;
+ case SVGA3D_X32_TYPELESS_G8X24_UINT:
+ return SVGA3D_R32_FLOAT_X8X24_TYPELESS;
+ case SVGA3D_R10G10B10A2_UINT:
+ case SVGA3D_R10G10B10A2_UNORM:
+ return SVGA3D_R10G10B10A2_TYPELESS;
+ case SVGA3D_R8G8B8A8_UNORM:
+ case SVGA3D_R8G8B8A8_SNORM:
+ case SVGA3D_R8G8B8A8_UNORM_SRGB:
+ case SVGA3D_R8G8B8A8_UINT:
+ case SVGA3D_R8G8B8A8_SINT:
+ return SVGA3D_R8G8B8A8_TYPELESS;
+ case SVGA3D_R16G16_UINT:
+ case SVGA3D_R16G16_SINT:
+ case SVGA3D_R16G16_UNORM:
+ case SVGA3D_R16G16_SNORM:
+ case SVGA3D_R16G16_FLOAT:
+ return SVGA3D_R16G16_TYPELESS;
+ case SVGA3D_D32_FLOAT:
+ case SVGA3D_R32_FLOAT:
+ case SVGA3D_R32_UINT:
+ case SVGA3D_R32_SINT:
+ return SVGA3D_R32_TYPELESS;
+ case SVGA3D_D24_UNORM_S8_UINT:
+ return SVGA3D_R24G8_TYPELESS;
+ case SVGA3D_X24_TYPELESS_G8_UINT:
+ return SVGA3D_R24_UNORM_X8_TYPELESS;
+ case SVGA3D_R8G8_UNORM:
+ case SVGA3D_R8G8_SNORM:
+ case SVGA3D_R8G8_UINT:
+ case SVGA3D_R8G8_SINT:
+ return SVGA3D_R8G8_TYPELESS;
+ case SVGA3D_R16_UNORM:
+ case SVGA3D_R16_UINT:
+ case SVGA3D_R16_SNORM:
+ case SVGA3D_R16_SINT:
+ case SVGA3D_R16_FLOAT:
+ return SVGA3D_R16_TYPELESS;
+ case SVGA3D_R8_UNORM:
+ case SVGA3D_R8_UINT:
+ case SVGA3D_R8_SNORM:
+ case SVGA3D_R8_SINT:
+ return SVGA3D_R8_TYPELESS;
+ case SVGA3D_B8G8R8A8_UNORM_SRGB:
+ case SVGA3D_B8G8R8A8_UNORM:
+ return SVGA3D_B8G8R8A8_TYPELESS;
+ case SVGA3D_B8G8R8X8_UNORM_SRGB:
+ case SVGA3D_B8G8R8X8_UNORM:
+ return SVGA3D_B8G8R8X8_TYPELESS;
+ case SVGA3D_BC4_UNORM:
+ case SVGA3D_BC4_SNORM:
+ return SVGA3D_BC4_TYPELESS;
+ case SVGA3D_BC5_UNORM:
+ case SVGA3D_BC5_SNORM:
+ return SVGA3D_BC5_TYPELESS;
+
+ /* Special cases (no corresponding _TYPELESS formats) */
+ case SVGA3D_A8_UNORM:
+ case SVGA3D_A4R4G4B4:
+ case SVGA3D_B5G5R5A1_UNORM:
+ case SVGA3D_B5G6R5_UNORM:
+ case SVGA3D_DXT1:
+ case SVGA3D_DXT2:
+ case SVGA3D_DXT3:
+ case SVGA3D_DXT4:
+ case SVGA3D_DXT5:
+ case SVGA3D_R11G11B10_FLOAT:
+ case SVGA3D_R9G9B9E5_SHAREDEXP:
+ case SVGA3D_Z_D32:
+ case SVGA3D_Z_D16:
+ return format;
default:
- debug_printf("format %u\n", (unsigned) format);
- assert(!"unexpected format in svga_format_size()");
- *bytes_per_block = 4;
+ debug_printf("Unexpected format %s in %s\n",
+ svga_format_name(format), __FUNCTION__);
+ return format;
}
}
#include "pipe/p_format.h"
+#include "svga_context.h"
#include "svga_types.h"
#include "svga_reg.h"
#include "svga3d_reg.h"
struct svga_screen;
+/**
+ * Vertex format flags. These are used to specify that some vertex formats
+ * need extra processing/conversion in the vertex shader. For example,
+ * setting the W component to 1, or swapping R/B, or converting packed uint
+ * types to signed int/snorm.
+ */
+#define VF_ADJUST_RANGE (1 << 0)
+#define VF_W_TO_1 (1 << 1)
+#define VF_U_TO_F_CAST (1 << 2) /* convert uint to float */
+#define VF_I_TO_F_CAST (1 << 3) /* convert sint to float */
+#define VF_BGRA (1 << 4) /* swap R/B */
+#define VF_PUINT_TO_SNORM (1 << 5) /* 10_10_10_2 to snorm */
+#define VF_PUINT_TO_USCALED (1 << 6) /* 10_10_10_2 to uscaled */
+#define VF_PUINT_TO_SSCALED (1 << 7) /* 10_10_10_2 to sscaled */
+
+
+void
+svga_translate_vertex_format_vgpu10(enum pipe_format format,
+ SVGA3dSurfaceFormat *svga_format,
+ unsigned *vf_flags);
+
enum SVGA3dSurfaceFormat
svga_translate_format(struct svga_screen *ss,
enum pipe_format format,
unsigned *block_height,
unsigned *bytes_per_block);
+const char *
+svga_format_name(SVGA3dSurfaceFormat format);
+
+boolean
+svga_format_is_integer(SVGA3dSurfaceFormat format);
+
+enum tgsi_return_type
+svga_get_texture_datatype(enum pipe_format format);
+
+
+// XXX: Move this to svga_context?
+boolean
+svga_has_any_integer_cbufs(const struct svga_context *svga);
+
+
+SVGA3dSurfaceFormat
+svga_typeless_format(SVGA3dSurfaceFormat format);
+
#endif /* SVGA_FORMAT_H_ */
--- /dev/null
+/*/
+ * Copyright 2013 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#include "svga_context.h"
+#include "svga_link.h"
+
+#include "tgsi/tgsi_strings.h"
+
+
+#define INVALID_INDEX 255
+
+
+/**
+ * Examine input and output shaders info to link outputs from the
+ * output shader to inputs from the input shader.
+ * Basically, we'll remap input shader's input slots to new numbers
+ * based on semantic name/index of the outputs from the output shader.
+ */
+void
+svga_link_shaders(const struct tgsi_shader_info *outshader_info,
+ const struct tgsi_shader_info *inshader_info,
+ struct shader_linkage *linkage)
+{
+ unsigned i, free_slot;
+
+ for (i = 0; i < Elements(linkage->input_map); i++) {
+ linkage->input_map[i] = INVALID_INDEX;
+ }
+
+ /* Assign input slots for input shader inputs.
+ * Basically, we want to use the same index for the output shader's outputs
+ * and the input shader's inputs that should be linked together.
+ * We'll modify the input shader's inputs to match the output shader.
+ */
+ assert(inshader_info->num_inputs <=
+ Elements(inshader_info->input_semantic_name));
+
+ /* free register index that can be used for built-in varyings */
+ free_slot = outshader_info->num_outputs + 1;
+
+ for (i = 0; i < inshader_info->num_inputs; i++) {
+ unsigned sem_name = inshader_info->input_semantic_name[i];
+ unsigned sem_index = inshader_info->input_semantic_index[i];
+ unsigned j;
+ /**
+ * Get the clip distance inputs from the output shader's
+ * clip distance shadow copy.
+ */
+ if (sem_name == TGSI_SEMANTIC_CLIPDIST) {
+ linkage->input_map[i] = outshader_info->num_outputs + 1 + sem_index;
+ /* make sure free_slot includes this extra output */
+ free_slot = MAX2(free_slot, linkage->input_map[i] + 1);
+ }
+ else {
+ /* search output shader outputs for same item */
+ for (j = 0; j < outshader_info->num_outputs; j++) {
+ assert(j < Elements(outshader_info->output_semantic_name));
+ if (outshader_info->output_semantic_name[j] == sem_name &&
+ outshader_info->output_semantic_index[j] == sem_index) {
+ linkage->input_map[i] = j;
+ break;
+ }
+ }
+ }
+ }
+
+ linkage->num_inputs = inshader_info->num_inputs;
+
+ /* Things like the front-face register are handled here */
+ for (i = 0; i < inshader_info->num_inputs; i++) {
+ if (linkage->input_map[i] == INVALID_INDEX) {
+ unsigned j = free_slot++;
+ linkage->input_map[i] = j;
+ }
+ }
+
+ /* Debug */
+ if (0) {
+ unsigned reg = 0;
+ for (i = 0; i < linkage->num_inputs; i++) {
+
+ assert(linkage->input_map[i] != INVALID_INDEX);
+
+ debug_printf("input shader input[%d] slot %u %s %u %s\n",
+ i,
+ linkage->input_map[i],
+ tgsi_semantic_names[inshader_info->input_semantic_name[i]],
+ inshader_info->input_semantic_index[i],
+ tgsi_interpolate_names[inshader_info->input_interpolate[i]]);
+
+ /* make sure no repeating register index */
+ if (reg & 1 << linkage->input_map[i]) {
+ assert(0);
+ }
+ reg |= 1 << linkage->input_map[i];
+ }
+ }
+}
--- /dev/null
+
+#ifndef SVGA_LINK_H
+#define SVGA_LINK_H
+
+#include "pipe/p_defines.h"
+
+struct svga_context;
+
+struct shader_linkage
+{
+ unsigned num_inputs;
+ ubyte input_map[PIPE_MAX_SHADER_INPUTS];
+};
+
+void
+svga_link_shaders(const struct tgsi_shader_info *outshader_info,
+ const struct tgsi_shader_info *inshader_info,
+ struct shader_linkage *linkage);
+
+#endif /* SVGA_LINK_H */
#include "pipe/p_defines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_bitmask.h"
#include "svga_context.h"
-
#include "svga_hw_reg.h"
+#include "svga_cmd.h"
static inline unsigned
-svga_translate_blend_factor(unsigned factor)
+svga_translate_blend_factor(const struct svga_context *svga, unsigned factor)
{
switch (factor) {
case PIPE_BLENDFACTOR_ZERO: return SVGA3D_BLENDOP_ZERO;
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return SVGA3D_BLENDOP_SRCALPHASAT;
case PIPE_BLENDFACTOR_CONST_COLOR: return SVGA3D_BLENDOP_BLENDFACTOR;
case PIPE_BLENDFACTOR_INV_CONST_COLOR: return SVGA3D_BLENDOP_INVBLENDFACTOR;
- case PIPE_BLENDFACTOR_CONST_ALPHA: return SVGA3D_BLENDOP_BLENDFACTOR; /* ? */
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return SVGA3D_BLENDOP_INVBLENDFACTOR; /* ? */
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ if (svga_have_vgpu10(svga))
+ return SVGA3D_BLENDOP_BLENDFACTORALPHA;
+ else
+ return SVGA3D_BLENDOP_BLENDFACTOR; /* as close as we can get */
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ if (svga_have_vgpu10(svga))
+ return SVGA3D_BLENDOP_INVBLENDFACTORALPHA;
+ else
+ return SVGA3D_BLENDOP_INVBLENDFACTOR; /* as close as we can get */
+ case PIPE_BLENDFACTOR_SRC1_COLOR: return SVGA3D_BLENDOP_SRC1COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return SVGA3D_BLENDOP_INVSRC1COLOR;
+ case PIPE_BLENDFACTOR_SRC1_ALPHA: return SVGA3D_BLENDOP_SRC1ALPHA;
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return SVGA3D_BLENDOP_INVSRC1ALPHA;
+ case 0: return SVGA3D_BLENDOP_ONE;
default:
assert(0);
return SVGA3D_BLENDOP_ZERO;
}
+/**
+ * Define a vgpu10 blend state object for the given
+ * svga blend state.
+ */
+static void
+define_blend_state_object(struct svga_context *svga,
+ struct svga_blend_state *bs)
+{
+ SVGA3dDXBlendStatePerRT perRT[SVGA3D_MAX_RENDER_TARGETS];
+ unsigned try;
+ int i;
+
+ assert(svga_have_vgpu10(svga));
+
+ bs->id = util_bitmask_add(svga->blend_object_id_bm);
+
+ for (i = 0; i < SVGA3D_DX_MAX_RENDER_TARGETS; i++) {
+ perRT[i].blendEnable = bs->rt[i].blend_enable;
+ perRT[i].srcBlend = bs->rt[i].srcblend;
+ perRT[i].destBlend = bs->rt[i].dstblend;
+ perRT[i].blendOp = bs->rt[i].blendeq;
+ perRT[i].srcBlendAlpha = bs->rt[i].srcblend_alpha;
+ perRT[i].destBlendAlpha = bs->rt[i].dstblend_alpha;
+ perRT[i].blendOpAlpha = bs->rt[i].blendeq_alpha;
+ perRT[i].renderTargetWriteMask = bs->rt[i].writemask;
+ perRT[i].logicOpEnable = 0;
+ perRT[i].logicOp = SVGA3D_LOGICOP_COPY;
+ assert(perRT[i].srcBlend == perRT[0].srcBlend);
+ }
+
+ /* Loop in case command buffer is full and we need to flush and retry */
+ for (try = 0; try < 2; try++) {
+ enum pipe_error ret;
+
+ ret = SVGA3D_vgpu10_DefineBlendState(svga->swc,
+ bs->id,
+ bs->alpha_to_coverage,
+ bs->independent_blend_enable,
+ perRT);
+ if (ret == PIPE_OK)
+ return;
+ svga_context_flush(svga, NULL);
+ }
+}
+
+
static void *
svga_create_blend_state(struct pipe_context *pipe,
const struct pipe_blend_state *templ)
{
+ struct svga_context *svga = svga_context(pipe);
struct svga_blend_state *blend = CALLOC_STRUCT( svga_blend_state );
unsigned i;
-
/* Fill in the per-rendertarget blend state. We currently only
- * have one rendertarget.
+ * support independent blend enable and colormask per render target.
*/
- for (i = 0; i < 1; i++) {
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
/* No way to set this in SVGA3D, and no way to correctly implement it on
* top of D3D9 API. Instead we try to simulate with various blend modes.
*/
break;
case PIPE_LOGICOP_COPY:
blend->rt[i].blend_enable = FALSE;
+ blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE;
+ blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO;
+ blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD;
break;
case PIPE_LOGICOP_COPY_INVERTED:
blend->rt[i].blend_enable = TRUE;
case PIPE_LOGICOP_EQUIV:
/* Fill these in with plausible values */
blend->rt[i].blend_enable = FALSE;
+ blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE;
+ blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO;
+ blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD;
break;
default:
assert(0);
break;
}
+ blend->rt[i].srcblend_alpha = blend->rt[i].srcblend;
+ blend->rt[i].dstblend_alpha = blend->rt[i].dstblend;
+ blend->rt[i].blendeq_alpha = blend->rt[i].blendeq;
}
else {
- blend->rt[i].blend_enable = templ->rt[0].blend_enable;
-
- if (templ->rt[0].blend_enable) {
- blend->rt[i].srcblend = svga_translate_blend_factor(templ->rt[0].rgb_src_factor);
- blend->rt[i].dstblend = svga_translate_blend_factor(templ->rt[0].rgb_dst_factor);
- blend->rt[i].blendeq = svga_translate_blend_func(templ->rt[0].rgb_func);
- blend->rt[i].srcblend_alpha = svga_translate_blend_factor(templ->rt[0].alpha_src_factor);
- blend->rt[i].dstblend_alpha = svga_translate_blend_factor(templ->rt[0].alpha_dst_factor);
- blend->rt[i].blendeq_alpha = svga_translate_blend_func(templ->rt[0].alpha_func);
+ /* Note: the vgpu10 device does not yet support independent
+ * blend terms per render target. Target[0] always specifies the
+ * blending terms.
+ */
+ if (templ->independent_blend_enable || templ->rt[0].blend_enable) {
+ /* always use the 0th target's blending terms for now */
+ blend->rt[i].srcblend =
+ svga_translate_blend_factor(svga, templ->rt[0].rgb_src_factor);
+ blend->rt[i].dstblend =
+ svga_translate_blend_factor(svga, templ->rt[0].rgb_dst_factor);
+ blend->rt[i].blendeq =
+ svga_translate_blend_func(templ->rt[0].rgb_func);
+ blend->rt[i].srcblend_alpha =
+ svga_translate_blend_factor(svga, templ->rt[0].alpha_src_factor);
+ blend->rt[i].dstblend_alpha =
+ svga_translate_blend_factor(svga, templ->rt[0].alpha_dst_factor);
+ blend->rt[i].blendeq_alpha =
+ svga_translate_blend_func(templ->rt[0].alpha_func);
if (blend->rt[i].srcblend_alpha != blend->rt[i].srcblend ||
blend->rt[i].dstblend_alpha != blend->rt[i].dstblend ||
- blend->rt[i].blendeq_alpha != blend->rt[i].blendeq)
- {
+ blend->rt[i].blendeq_alpha != blend->rt[i].blendeq) {
blend->rt[i].separate_alpha_blend_enable = TRUE;
}
}
+ else {
+ /* disabled - default blend terms */
+ blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE;
+ blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO;
+ blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD;
+ blend->rt[i].srcblend_alpha = SVGA3D_BLENDOP_ONE;
+ blend->rt[i].dstblend_alpha = SVGA3D_BLENDOP_ZERO;
+ blend->rt[i].blendeq_alpha = SVGA3D_BLENDEQ_ADD;
+ }
+
+ if (templ->independent_blend_enable) {
+ blend->rt[i].blend_enable = templ->rt[i].blend_enable;
+ }
+ else {
+ blend->rt[i].blend_enable = templ->rt[0].blend_enable;
+ }
}
- blend->rt[i].writemask = templ->rt[0].colormask;
+ /* Some GL blend modes are not supported by the VGPU9 device (there's
+ * no equivalent of PIPE_BLENDFACTOR_[INV_]CONST_ALPHA).
+ * When we set this flag, we copy the constant blend alpha value
+ * to the R, G, B components.
+ * This works as long as the src/dst RGB blend factors doesn't use
+ * PIPE_BLENDFACTOR_CONST_COLOR and PIPE_BLENDFACTOR_CONST_ALPHA
+ * at the same time. There's no work-around for that.
+ */
+ if (!svga_have_vgpu10(svga)) {
+ if (templ->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_CONST_ALPHA ||
+ templ->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_CONST_ALPHA ||
+ templ->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_INV_CONST_ALPHA ||
+ templ->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_CONST_ALPHA) {
+ blend->blend_color_alpha = TRUE;
+ }
+ }
+
+ if (templ->independent_blend_enable) {
+ blend->rt[i].writemask = templ->rt[i].colormask;
+ }
+ else {
+ blend->rt[i].writemask = templ->rt[0].colormask;
+ }
+ }
+
+ blend->independent_blend_enable = templ->independent_blend_enable;
+
+ blend->alpha_to_coverage = templ->alpha_to_coverage;
+
+ if (svga_have_vgpu10(svga)) {
+ define_blend_state_object(svga, blend);
}
return blend;
}
+
static void svga_bind_blend_state(struct pipe_context *pipe,
void *blend)
{
svga->dirty |= SVGA_NEW_BLEND;
}
-
-static void svga_delete_blend_state(struct pipe_context *pipe, void *blend)
+static void svga_delete_blend_state(struct pipe_context *pipe,
+ void *blend)
{
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_blend_state *bs =
+ (struct svga_blend_state *) blend;
+
+ if (bs->id != SVGA3D_INVALID_ID) {
+ enum pipe_error ret;
+
+ ret = SVGA3D_vgpu10_DestroyBlendState(svga->swc, bs->id);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_vgpu10_DestroyBlendState(svga->swc, bs->id);
+ assert(ret == PIPE_OK);
+ }
+
+ if (bs->id == svga->state.hw_draw.blend_id)
+ svga->state.hw_draw.blend_id = SVGA3D_INVALID_ID;
+
+ util_bitmask_clear(svga->blend_object_id_bm, bs->id);
+ bs->id = SVGA3D_INVALID_ID;
+ }
+
FREE(blend);
}
svga->pipe.set_blend_color = svga_set_blend_color;
}
-
-
-
#include "svga_cmd.h"
#include "svga_surface.h"
+//#include "util/u_blit_sw.h"
#include "util/u_format.h"
#include "util/u_surface.h"
struct svga_context *svga = svga_context(pipe);
struct pipe_blit_info info = *blit_info;
- if (info.src.resource->nr_samples > 1 &&
+ if (!svga_have_vgpu10(svga) &&
+ info.src.resource->nr_samples > 1 &&
info.dst.resource->nr_samples <= 1 &&
!util_format_is_depth_or_stencil(info.src.resource->format) &&
!util_format_is_pure_integer(info.src.resource->format)) {
return; /* done */
}
- if (info.mask & PIPE_MASK_S) {
- debug_printf("svga: cannot blit stencil, skipping\n");
- info.mask &= ~PIPE_MASK_S;
- }
-
- if (!util_blitter_is_blit_supported(svga->blitter, &info)) {
+ if ((info.mask & PIPE_MASK_S) ||
+ !util_blitter_is_blit_supported(svga->blitter, &info)) {
debug_printf("svga: blit unsupported %s -> %s\n",
util_format_short_name(info.src.resource->format),
util_format_short_name(info.dst.resource->format));
util_blitter_save_vertex_buffer_slot(svga->blitter, svga->curr.vb);
util_blitter_save_vertex_elements(svga->blitter, (void*)svga->curr.velems);
util_blitter_save_vertex_shader(svga->blitter, svga->curr.vs);
- /*util_blitter_save_geometry_shader(svga->blitter, svga->curr.gs);*/
- /*util_blitter_save_so_targets(svga->blitter, svga->num_so_targets,
- (struct pipe_stream_output_target**)svga->so_targets);*/
+ util_blitter_save_geometry_shader(svga->blitter, svga->curr.user_gs);
+ util_blitter_save_so_targets(svga->blitter, svga->num_so_targets,
+ (struct pipe_stream_output_target**)svga->so_targets);
util_blitter_save_rasterizer(svga->blitter, (void*)svga->curr.rast);
util_blitter_save_viewport(svga->blitter, &svga->curr.viewport);
util_blitter_save_scissor(svga->blitter, &svga->curr.scissor);
util_blitter_save_depth_stencil_alpha(svga->blitter,
(void*)svga->curr.depth);
util_blitter_save_stencil_ref(svga->blitter, &svga->curr.stencil_ref);
- /*util_blitter_save_sample_mask(svga->blitter, svga->sample_mask);*/
+ util_blitter_save_sample_mask(svga->blitter, svga->curr.sample_mask);
util_blitter_save_framebuffer(svga->blitter, &svga->curr.framebuffer);
util_blitter_save_fragment_sampler_states(svga->blitter,
- svga->curr.num_samplers,
- (void**)svga->curr.sampler);
+ svga->curr.num_samplers[PIPE_SHADER_FRAGMENT],
+ (void**)svga->curr.sampler[PIPE_SHADER_FRAGMENT]);
util_blitter_save_fragment_sampler_views(svga->blitter,
- svga->curr.num_sampler_views,
- svga->curr.sampler_views);
+ svga->curr.num_sampler_views[PIPE_SHADER_FRAGMENT],
+ svga->curr.sampler_views[PIPE_SHADER_FRAGMENT]);
/*util_blitter_save_render_condition(svga->blitter, svga->render_cond_query,
svga->render_cond_cond, svga->render_cond_mode);*/
util_blitter_blit(svga->blitter, &info);
#include "svga_surface.h"
+/**
+ * Clear the whole color buffer(s) by drawing a quad. For VGPU10 we use
+ * this when clearing integer render targets. We'll also clear the
+ * depth and/or stencil buffers if the clear_buffers mask specifies them.
+ */
+static void
+clear_buffers_with_quad(struct svga_context *svga,
+ unsigned clear_buffers,
+ const union pipe_color_union *color,
+ double depth, unsigned stencil)
+{
+ const struct pipe_framebuffer_state *fb = &svga->curr.framebuffer;
+
+ util_blitter_save_vertex_buffer_slot(svga->blitter, svga->curr.vb);
+ util_blitter_save_vertex_elements(svga->blitter, (void*)svga->curr.velems);
+ util_blitter_save_vertex_shader(svga->blitter, svga->curr.vs);
+ util_blitter_save_geometry_shader(svga->blitter, svga->curr.gs);
+ util_blitter_save_so_targets(svga->blitter, svga->num_so_targets,
+ (struct pipe_stream_output_target**)svga->so_targets);
+ util_blitter_save_rasterizer(svga->blitter, (void*)svga->curr.rast);
+ util_blitter_save_viewport(svga->blitter, &svga->curr.viewport);
+ util_blitter_save_scissor(svga->blitter, &svga->curr.scissor);
+ util_blitter_save_fragment_shader(svga->blitter, svga->curr.fs);
+ util_blitter_save_blend(svga->blitter, (void*)svga->curr.blend);
+ util_blitter_save_depth_stencil_alpha(svga->blitter,
+ (void*)svga->curr.depth);
+ util_blitter_save_stencil_ref(svga->blitter, &svga->curr.stencil_ref);
+ util_blitter_save_sample_mask(svga->blitter, svga->curr.sample_mask);
+
+ util_blitter_clear(svga->blitter,
+ fb->width, fb->height,
+ 1, /* num_layers */
+ clear_buffers, color,
+ depth, stencil);
+}
+
+
+/**
+ * Check if any of the color buffers are integer buffers.
+ */
+static boolean
+is_integer_target(struct pipe_framebuffer_state *fb, unsigned buffers)
+{
+ unsigned i;
+
+ for (i = 0; i < fb->nr_cbufs; i++) {
+ if ((buffers & (PIPE_CLEAR_COLOR0 << i)) &&
+ fb->cbufs[i] &&
+ util_format_is_pure_integer(fb->cbufs[i]->format)) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+
+/**
+ * Check if the integer values in the clear color can be represented
+ * by floats. If so, we can use the VGPU10 ClearRenderTargetView command.
+ * Otherwise, we need to clear with a quad.
+ */
+static boolean
+ints_fit_in_floats(const union pipe_color_union *color)
+{
+ const int max = 1 << 24;
+ return (color->i[0] <= max &&
+ color->i[1] <= max &&
+ color->i[2] <= max &&
+ color->i[3] <= max);
+}
+
+
static enum pipe_error
try_clear(struct svga_context *svga,
unsigned buffers,
if (ret != PIPE_OK)
return ret;
- if (svga->rebind.rendertargets) {
+ if (svga->rebind.flags.rendertargets) {
ret = svga_reemit_framebuffer_bindings(svga);
if (ret != PIPE_OK) {
return ret;
if (buffers & PIPE_CLEAR_DEPTH)
flags |= SVGA3D_CLEAR_DEPTH;
- if ((svga->curr.framebuffer.zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) &&
- (buffers & PIPE_CLEAR_STENCIL))
+ if (buffers & PIPE_CLEAR_STENCIL)
flags |= SVGA3D_CLEAR_STENCIL;
rect.w = MAX2(rect.w, fb->zsbuf->width);
rect.h = MAX2(rect.h, fb->zsbuf->height);
}
- if (memcmp(&rect, &svga->state.hw_clear.viewport, sizeof(rect)) != 0) {
+ if (!svga_have_vgpu10(svga) &&
+ !svga_rects_equal(&rect, &svga->state.hw_clear.viewport)) {
restore_viewport = TRUE;
ret = SVGA3D_SetViewport(svga->swc, &rect);
if (ret != PIPE_OK)
return ret;
}
- ret = SVGA3D_ClearRect(svga->swc, flags, uc.ui[0], (float) depth, stencil,
- rect.x, rect.y, rect.w, rect.h);
- if (ret != PIPE_OK)
- return ret;
+ if (svga_have_vgpu10(svga)) {
+ if (flags & SVGA3D_CLEAR_COLOR) {
+ unsigned i;
+
+ if (is_integer_target(fb, buffers) && !ints_fit_in_floats(color)) {
+ clear_buffers_with_quad(svga, buffers, color, depth, stencil);
+ /* We also cleared depth/stencil, so that's done */
+ flags &= ~(SVGA3D_CLEAR_DEPTH | SVGA3D_CLEAR_STENCIL);
+ }
+ else {
+ struct pipe_surface *rtv;
+
+ /* Issue VGPU10 Clear commands */
+ for (i = 0; i < fb->nr_cbufs; i++) {
+ if ((fb->cbufs[i] == NULL) ||
+ !(buffers & (PIPE_CLEAR_COLOR0 << i)))
+ continue;
+
+ rtv = svga_validate_surface_view(svga,
+ svga_surface(fb->cbufs[i]));
+ if (rtv == NULL)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ ret = SVGA3D_vgpu10_ClearRenderTargetView(svga->swc,
+ rtv, color->f);
+ if (ret != PIPE_OK)
+ return ret;
+ }
+ }
+ }
+ if (flags & (SVGA3D_CLEAR_DEPTH | SVGA3D_CLEAR_STENCIL)) {
+ struct pipe_surface *dsv =
+ svga_validate_surface_view(svga, svga_surface(fb->zsbuf));
+ if (dsv == NULL)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ ret = SVGA3D_vgpu10_ClearDepthStencilView(svga->swc, dsv, flags,
+ stencil, (float) depth);
+ if (ret != PIPE_OK)
+ return ret;
+ }
+ }
+ else {
+ ret = SVGA3D_ClearRect(svga->swc, flags, uc.ui[0], (float) depth, stencil,
+ rect.x, rect.y, rect.w, rect.h);
+ if (ret != PIPE_OK)
+ return ret;
+ }
if (restore_viewport) {
- memcpy(&rect, &svga->state.hw_clear.viewport, sizeof rect);
- ret = SVGA3D_SetViewport(svga->swc, &rect);
+ ret = SVGA3D_SetViewport(svga->swc, &svga->state.hw_clear.viewport);
}
return ret;
uint shader, uint index,
struct pipe_constant_buffer *cb)
{
+ struct svga_screen *svgascreen = svga_screen(pipe->screen);
struct svga_context *svga = svga_context(pipe);
struct pipe_resource *buf = cb ? cb->buffer : NULL;
-
- if (cb && cb->user_buffer) {
- buf = svga_user_buffer_create(pipe->screen,
- (void *) cb->user_buffer,
- cb->buffer_size,
- PIPE_BIND_CONSTANT_BUFFER);
+ unsigned buffer_size = 0;
+
+ if (cb) {
+ buffer_size = cb->buffer_size;
+ if (cb->user_buffer) {
+ buf = svga_user_buffer_create(pipe->screen,
+ (void *) cb->user_buffer,
+ cb->buffer_size,
+ PIPE_BIND_CONSTANT_BUFFER);
+ }
}
assert(shader < PIPE_SHADER_TYPES);
- assert(index == 0);
+ assert(index < Elements(svga->curr.constbufs[shader]));
+ assert(index < svgascreen->max_const_buffers);
+ (void) svgascreen;
+
+ pipe_resource_reference(&svga->curr.constbufs[shader][index].buffer, buf);
+
+ /* Make sure the constant buffer size to be updated is within the
+ * limit supported by the device.
+ */
+ svga->curr.constbufs[shader][index].buffer_size =
+ MIN2(buffer_size, SVGA_MAX_CONST_BUF_SIZE);
- pipe_resource_reference(&svga->curr.cbufs[shader].buffer, buf);
- svga->curr.cbufs[shader].buffer_size = cb ? cb->buffer_size : 0;
- svga->curr.cbufs[shader].buffer_offset = cb ? cb->buffer_offset : 0;
- svga->curr.cbufs[shader].user_buffer = NULL; /* not used */
+ svga->curr.constbufs[shader][index].buffer_offset = cb ? cb->buffer_offset : 0;
+ svga->curr.constbufs[shader][index].user_buffer = NULL; /* not used */
if (shader == PIPE_SHADER_FRAGMENT)
svga->dirty |= SVGA_NEW_FS_CONST_BUFFER;
- else
+ else if (shader == PIPE_SHADER_VERTEX)
svga->dirty |= SVGA_NEW_VS_CONST_BUFFER;
+ else
+ svga->dirty |= SVGA_NEW_GS_CONST_BUFFER;
+
+ /* update bitmask of dirty const buffers */
+ svga->state.dirty_constbufs[shader] |= (1 << index);
if (cb && cb->user_buffer) {
pipe_resource_reference(&buf, NULL);
*
**********************************************************/
-#include "util/u_inlines.h"
#include "pipe/p_defines.h"
+#include "util/u_bitmask.h"
+#include "util/u_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "svga_context.h"
#include "svga_hw_reg.h"
+#include "svga_cmd.h"
static inline unsigned
}
+/**
+ * Define a vgpu10 depth/stencil state object for the given
+ * svga depth/stencil state.
+ */
+static void
+define_depth_stencil_state_object(struct svga_context *svga,
+ struct svga_depth_stencil_state *ds)
+{
+ unsigned try;
+
+ assert(svga_have_vgpu10(svga));
+
+ ds->id = util_bitmask_add(svga->ds_object_id_bm);
+
+ /* spot check that these comparision tokens are the same */
+ assert(SVGA3D_COMPARISON_NEVER == SVGA3D_CMP_NEVER);
+ assert(SVGA3D_COMPARISON_LESS == SVGA3D_CMP_LESS);
+ assert(SVGA3D_COMPARISON_NOT_EQUAL == SVGA3D_CMP_NOTEQUAL);
+
+ /* Loop in case command buffer is full and we need to flush and retry */
+ for (try = 0; try < 2; try++) {
+ enum pipe_error ret;
+
+ /* Note: we use the ds->stencil[0].enabled value for both the front
+ * and back-face enables. If single-side stencil is used, we'll have
+ * set the back state the same as the front state.
+ */
+ ret = SVGA3D_vgpu10_DefineDepthStencilState(svga->swc,
+ ds->id,
+ /* depth/Z */
+ ds->zenable,
+ ds->zwriteenable,
+ ds->zfunc,
+ /* Stencil */
+ ds->stencil[0].enabled, /*f|b*/
+ ds->stencil[0].enabled, /*f*/
+ ds->stencil[0].enabled, /*b*/
+ ds->stencil_mask,
+ ds->stencil_writemask,
+ /* front stencil */
+ ds->stencil[0].fail,
+ ds->stencil[0].zfail,
+ ds->stencil[0].pass,
+ ds->stencil[0].func,
+ /* back stencil */
+ ds->stencil[1].fail,
+ ds->stencil[1].zfail,
+ ds->stencil[1].pass,
+ ds->stencil[1].func);
+ if (ret == PIPE_OK)
+ return;
+ svga_context_flush(svga, NULL);
+ }
+}
+
+
static void *
svga_create_depth_stencil_state(struct pipe_context *pipe,
const struct pipe_depth_stencil_alpha_state *templ)
{
+ struct svga_context *svga = svga_context(pipe);
struct svga_depth_stencil_state *ds = CALLOC_STRUCT( svga_depth_stencil_state );
/* Don't try to figure out CW/CCW correspondence with
ds->stencil_mask = templ->stencil[0].valuemask & 0xff;
ds->stencil_writemask = templ->stencil[0].writemask & 0xff;
}
+ else {
+ ds->stencil[0].func = SVGA3D_CMP_ALWAYS;
+ ds->stencil[0].fail = SVGA3D_STENCILOP_KEEP;
+ ds->stencil[0].zfail = SVGA3D_STENCILOP_KEEP;
+ ds->stencil[0].pass = SVGA3D_STENCILOP_KEEP;
+ }
ds->stencil[1].enabled = templ->stencil[1].enabled;
if (templ->stencil[1].enabled) {
+ assert(templ->stencil[0].enabled);
+ /* two-sided stencil */
ds->stencil[1].func = svga_translate_compare_func(templ->stencil[1].func);
ds->stencil[1].fail = svga_translate_stencil_op(templ->stencil[1].fail_op);
ds->stencil[1].zfail = svga_translate_stencil_op(templ->stencil[1].zfail_op);
ds->stencil_mask = templ->stencil[1].valuemask & 0xff;
ds->stencil_writemask = templ->stencil[1].writemask & 0xff;
}
+ else {
+ /* back face state is same as front-face state */
+ ds->stencil[1].func = ds->stencil[0].func;
+ ds->stencil[1].fail = ds->stencil[0].fail;
+ ds->stencil[1].zfail = ds->stencil[0].zfail;
+ ds->stencil[1].pass = ds->stencil[0].pass;
+ }
ds->zenable = templ->depth.enabled;
ds->zfunc = svga_translate_compare_func(templ->depth.func);
ds->zwriteenable = templ->depth.writemask;
}
+ else {
+ ds->zfunc = SVGA3D_CMP_ALWAYS;
+ }
ds->alphatestenable = templ->alpha.enabled;
if (ds->alphatestenable) {
ds->alphafunc = svga_translate_compare_func(templ->alpha.func);
ds->alpharef = templ->alpha.ref_value;
}
+ else {
+ ds->alphafunc = SVGA3D_CMP_ALWAYS;
+ }
+
+ if (svga_have_vgpu10(svga)) {
+ define_depth_stencil_state_object(svga, ds);
+ }
return ds;
}
{
struct svga_context *svga = svga_context(pipe);
+ if (svga_have_vgpu10(svga)) {
+ /* flush any previously queued drawing before changing state */
+ svga_hwtnl_flush_retry(svga);
+ }
+
svga->curr.depth = (const struct svga_depth_stencil_state *)depth_stencil;
- svga->dirty |= SVGA_NEW_DEPTH_STENCIL;
+ svga->dirty |= SVGA_NEW_DEPTH_STENCIL_ALPHA;
}
static void svga_delete_depth_stencil_state(struct pipe_context *pipe,
void *depth_stencil)
{
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_depth_stencil_state *ds =
+ (struct svga_depth_stencil_state *) depth_stencil;
+
+ if (svga_have_vgpu10(svga)) {
+ enum pipe_error ret;
+
+ svga_hwtnl_flush_retry(svga);
+
+ assert(ds->id != SVGA3D_INVALID_ID);
+
+ ret = SVGA3D_vgpu10_DestroyDepthStencilState(svga->swc, ds->id);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_vgpu10_DestroyDepthStencilState(svga->swc, ds->id);
+ assert(ret == PIPE_OK);
+ }
+
+ if (ds->id == svga->state.hw_draw.depth_stencil_id)
+ svga->state.hw_draw.depth_stencil_id = SVGA3D_INVALID_ID;
+
+ util_bitmask_clear(svga->ds_object_id_bm, ds->id);
+ ds->id = SVGA3D_INVALID_ID;
+ }
+
FREE(depth_stencil);
}
{
struct svga_context *svga = svga_context(pipe);
+ if (svga_have_vgpu10(svga)) {
+ /* flush any previously queued drawing before changing state */
+ svga_hwtnl_flush_retry(svga);
+ }
+
svga->curr.stencil_ref = *stencil_ref;
svga->dirty |= SVGA_NEW_STENCIL_REF;
svga_set_sample_mask(struct pipe_context *pipe,
unsigned sample_mask)
{
+ struct svga_context *svga = svga_context(pipe);
+
+ svga->curr.sample_mask = sample_mask;
+
+ svga->dirty |= SVGA_NEW_BLEND; /* See emit_rss_vgpu10() */
}
#include "util/u_format.h"
#include "util/u_inlines.h"
#include "util/u_prim.h"
+#include "util/u_prim_restart.h"
#include "util/u_time.h"
+#include "util/u_upload_mgr.h"
#include "indices/u_indices.h"
#include "svga_hw_reg.h"
#include "svga_context.h"
#include "svga_screen.h"
#include "svga_draw.h"
+#include "svga_shader.h"
#include "svga_state.h"
#include "svga_swtnl.h"
#include "svga_debug.h"
#include "svga_resource_buffer.h"
-
static enum pipe_error
retry_draw_range_elements( struct svga_context *svga,
struct pipe_resource *index_buffer,
unsigned prim,
unsigned start,
unsigned count,
+ unsigned start_instance,
unsigned instance_count,
boolean do_retry )
{
enum pipe_error ret = PIPE_OK;
- svga_hwtnl_set_unfilled( svga->hwtnl,
- svga->curr.rast->hw_unfilled );
-
- svga_hwtnl_set_flatshade( svga->hwtnl,
- svga->curr.rast->templ.flatshade,
- svga->curr.rast->templ.flatshade_first );
+ svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode);
ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
if (ret != PIPE_OK)
goto retry;
+ /** determine if flatshade is to be used after svga_update_state()
+ * in case the fragment shader is changed.
+ */
+ svga_hwtnl_set_flatshade(svga->hwtnl,
+ svga->curr.rast->templ.flatshade ||
+ svga->state.hw_draw.fs->uses_flat_interp,
+ svga->curr.rast->templ.flatshade_first);
+
ret = svga_hwtnl_draw_range_elements( svga->hwtnl,
index_buffer, index_size, index_bias,
min_index, max_index,
- prim, start, count );
+ prim, start, count,
+ start_instance, instance_count);
if (ret != PIPE_OK)
goto retry;
index_buffer, index_size, index_bias,
min_index, max_index,
prim, start, count,
- instance_count, FALSE );
+ start_instance, instance_count, FALSE );
}
return ret;
static enum pipe_error
retry_draw_arrays( struct svga_context *svga,
- unsigned prim,
- unsigned start,
- unsigned count,
- unsigned instance_count,
+ unsigned prim, unsigned start, unsigned count,
+ unsigned start_instance, unsigned instance_count,
boolean do_retry )
{
enum pipe_error ret;
- svga_hwtnl_set_unfilled( svga->hwtnl,
- svga->curr.rast->hw_unfilled );
-
- svga_hwtnl_set_flatshade( svga->hwtnl,
- svga->curr.rast->templ.flatshade,
- svga->curr.rast->templ.flatshade_first );
+ svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode);
ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
if (ret != PIPE_OK)
goto retry;
- ret = svga_hwtnl_draw_arrays( svga->hwtnl, prim,
- start, count );
+ /** determine if flatshade is to be used after svga_update_state()
+ * in case the fragment shader is changed.
+ */
+ svga_hwtnl_set_flatshade(svga->hwtnl,
+ svga->curr.rast->templ.flatshade ||
+ svga->state.hw_draw.fs->uses_flat_interp,
+ svga->curr.rast->templ.flatshade_first);
+
+ ret = svga_hwtnl_draw_arrays(svga->hwtnl, prim, start, count,
+ start_instance, instance_count);
if (ret != PIPE_OK)
goto retry;
{
svga_context_flush( svga, NULL );
- return retry_draw_arrays( svga,
- prim,
- start,
- count,
- instance_count,
- FALSE );
+ return retry_draw_arrays(svga, prim, start, count,
+ start_instance, instance_count,
+ FALSE );
}
return ret;
}
+/**
+ * Determine if we need to implement primitive restart with a fallback
+ * path which breaks the original primitive into sub-primitive at the
+ * restart indexes.
+ */
+static boolean
+need_fallback_prim_restart(const struct svga_context *svga,
+ const struct pipe_draw_info *info)
+{
+ if (info->primitive_restart && info->indexed) {
+ if (!svga_have_vgpu10(svga))
+ return TRUE;
+ else if (!svga->state.sw.need_swtnl) {
+ if (svga->curr.ib.index_size == 1)
+ return TRUE; /* no device support for 1-byte indexes */
+ else if (svga->curr.ib.index_size == 2)
+ return info->restart_index != 0xffff;
+ else
+ return info->restart_index != 0xffffffff;
+ }
+ }
+
+ return FALSE;
+}
+
+
static void
svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
{
svga->num_draw_calls++; /* for SVGA_QUERY_DRAW_CALLS */
- if (!u_trim_pipe_prim( info->mode, &count ))
+ if (u_reduced_prim(info->mode) == PIPE_PRIM_TRIANGLES &&
+ svga->curr.rast->templ.cull_face == PIPE_FACE_FRONT_AND_BACK)
return;
/*
svga->dirty |= SVGA_NEW_REDUCED_PRIMITIVE;
}
+ if (need_fallback_prim_restart(svga, info)) {
+ enum pipe_error r;
+ r = util_draw_vbo_without_prim_restart(pipe, &svga->curr.ib, info);
+ assert(r == PIPE_OK);
+ (void) r;
+ return;
+ }
+
+ if (!u_trim_pipe_prim( info->mode, &count ))
+ return;
+
needed_swtnl = svga->state.sw.need_swtnl;
svga_update_state_retry( svga, SVGA_STATE_NEED_SWTNL );
info->max_index,
info->mode,
info->start + offset,
- info->count,
+ count,
+ info->start_instance,
info->instance_count,
TRUE );
}
else {
- ret = retry_draw_arrays( svga,
- info->mode,
- info->start,
- info->count,
- info->instance_count,
- TRUE );
+ ret = retry_draw_arrays(svga, info->mode, info->start, count,
+ info->start_instance, info->instance_count,
+ TRUE);
}
}
#include "draw/draw_context.h"
#include "svga_context.h"
-#include "svga_tgsi.h"
#include "svga_hw_reg.h"
#include "svga_cmd.h"
#include "svga_debug.h"
fs->draw_shader = draw_create_fragment_shader(svga->swtnl.draw, templ);
- if (SVGA_DEBUG & DEBUG_TGSI || 0) {
- debug_printf("%s id: %u, inputs: %u, outputs: %u\n",
- __FUNCTION__, fs->base.id,
- fs->base.info.num_inputs, fs->base.info.num_outputs);
- }
-
return fs;
}
svga_hwtnl_flush_retry(svga);
+ assert(fs->base.parent == NULL);
+
draw_delete_fragment_shader(svga->swtnl.draw, fs->draw_shader);
for (variant = fs->base.variants; variant; variant = tmp) {
tmp = variant->next;
- ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
- (void) ret; /* PIPE_ERROR_ not handled yet */
-
- /*
- * Remove stale references to this variant to ensure a new variant on the
- * same address will be detected as a change.
- */
- if (variant == svga->state.hw_draw.fs)
+ /* Check if deleting currently bound shader */
+ if (variant == svga->state.hw_draw.fs) {
+ ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, NULL);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, NULL);
+ assert(ret == PIPE_OK);
+ }
svga->state.hw_draw.fs = NULL;
+ }
+
+ ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
+ assert(ret == PIPE_OK);
+ }
}
FREE((void *)fs->base.tokens);
--- /dev/null
+/**********************************************************
+ * Copyright 2014 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "draw/draw_context.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_bitmask.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_text.h"
+
+#include "svga_context.h"
+#include "svga_cmd.h"
+#include "svga_debug.h"
+#include "svga_shader.h"
+#include "svga_streamout.h"
+
+static void *
+svga_create_gs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_geometry_shader *gs = CALLOC_STRUCT(svga_geometry_shader);
+
+ if (!gs)
+ return NULL;
+
+ gs->base.tokens = tgsi_dup_tokens(templ->tokens);
+
+ /* Collect basic info that we'll need later:
+ */
+ tgsi_scan_shader(gs->base.tokens, &gs->base.info);
+
+ gs->draw_shader = draw_create_geometry_shader(svga->swtnl.draw, templ);
+
+ gs->base.id = svga->debug.shader_id++;
+
+ gs->generic_outputs = svga_get_generic_outputs_mask(&gs->base.info);
+
+ /* check for any stream output declarations */
+ if (templ->stream_output.num_outputs) {
+ gs->base.stream_output = svga_create_stream_output(svga, &gs->base,
+ &templ->stream_output);
+ }
+
+ return gs;
+}
+
+
+static void
+svga_bind_gs_state(struct pipe_context *pipe, void *shader)
+{
+ struct svga_geometry_shader *gs = (struct svga_geometry_shader *)shader;
+ struct svga_context *svga = svga_context(pipe);
+
+ svga->curr.user_gs = gs;
+ svga->dirty |= SVGA_NEW_GS;
+}
+
+
+static void
+svga_delete_gs_state(struct pipe_context *pipe, void *shader)
+{
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_geometry_shader *gs = (struct svga_geometry_shader *)shader;
+ struct svga_geometry_shader *next_gs;
+ struct svga_shader_variant *variant, *tmp;
+ enum pipe_error ret;
+
+ svga_hwtnl_flush_retry(svga);
+
+ /* Start deletion from the original geometry shader state */
+ if (gs->base.parent != NULL)
+ gs = (struct svga_geometry_shader *)gs->base.parent;
+
+ /* Free the list of geometry shaders */
+ while (gs) {
+ next_gs = (struct svga_geometry_shader *)gs->base.next;
+
+ if (gs->base.stream_output != NULL)
+ svga_delete_stream_output(svga, gs->base.stream_output);
+
+ draw_delete_geometry_shader(svga->swtnl.draw, gs->draw_shader);
+
+ for (variant = gs->base.variants; variant; variant = tmp) {
+ tmp = variant->next;
+
+ /* Check if deleting currently bound shader */
+ if (variant == svga->state.hw_draw.gs) {
+ ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_GS, NULL);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_GS, NULL);
+ assert(ret == PIPE_OK);
+ }
+ svga->state.hw_draw.gs = NULL;
+ }
+
+ ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_GS, variant);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_GS,
+ variant);
+ assert(ret == PIPE_OK);
+ }
+ }
+
+ FREE((void *)gs->base.tokens);
+ FREE(gs);
+ gs = next_gs;
+ }
+}
+
+
+void
+svga_init_gs_functions(struct svga_context *svga)
+{
+ svga->pipe.create_gs_state = svga_create_gs_state;
+ svga->pipe.bind_gs_state = svga_bind_gs_state;
+ svga->pipe.delete_gs_state = svga_delete_gs_state;
+}
#include "util/u_framebuffer.h"
#include "util/u_inlines.h"
+#include "util/u_pstipple.h"
#include "svga_context.h"
#include "svga_screen.h"
}
-static void svga_set_polygon_stipple( struct pipe_context *pipe,
- const struct pipe_poly_stipple *stipple )
+static void
+svga_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
{
- /* overridden by the draw module */
+ struct svga_context *svga = svga_context(pipe);
+
+ /* release old texture */
+ pipe_resource_reference(&svga->polygon_stipple.texture, NULL);
+
+ /* release old sampler view */
+ if (svga->polygon_stipple.sampler_view) {
+ pipe->sampler_view_destroy(pipe,
+ &svga->polygon_stipple.sampler_view->base);
+ }
+
+ /* create new stipple texture */
+ svga->polygon_stipple.texture =
+ util_pstipple_create_stipple_texture(pipe, stipple->stipple);
+
+ /* create new sampler view */
+ svga->polygon_stipple.sampler_view =
+ (struct svga_pipe_sampler_view *)
+ util_pstipple_create_sampler_view(pipe,
+ svga->polygon_stipple.texture);
+
+ /* allocate sampler state, if first time */
+ if (!svga->polygon_stipple.sampler) {
+ svga->polygon_stipple.sampler = util_pstipple_create_sampler(pipe);
+ }
+
+ svga->dirty |= SVGA_NEW_STIPPLE;
}
boolean propagate = FALSE;
unsigned i;
+ /* make sure any pending drawing calls are flushed before changing
+ * the framebuffer state
+ */
+ svga_hwtnl_flush_retry(svga);
+
dst->width = fb->width;
dst->height = fb->height;
dst->nr_cbufs = fb->nr_cbufs;
}
if (propagate) {
- /* make sure that drawing calls comes before propagation calls */
- svga_hwtnl_flush_retry( svga );
-
for (i = 0; i < dst->nr_cbufs; i++) {
struct pipe_surface *s = i < fb->nr_cbufs ? fb->cbufs[i] : NULL;
if (dst->cbufs[i] && dst->cbufs[i] != s)
}
}
- /* XXX: Actually the virtual hardware may support rendertargets with
- * different size, depending on the host API and driver, but since we cannot
- * know that make no such assumption here. */
- for(i = 0; i < fb->nr_cbufs; ++i) {
- if (fb->zsbuf && fb->cbufs[i]) {
- assert(fb->zsbuf->width == fb->cbufs[i]->width);
- assert(fb->zsbuf->height == fb->cbufs[i]->height);
+ /* Check that all surfaces are the same size.
+ * Actually, the virtual hardware may support rendertargets with
+ * different size, depending on the host API and driver,
+ */
+ {
+ int width = 0, height = 0;
+ if (fb->zsbuf) {
+ width = fb->zsbuf->width;
+ height = fb->zsbuf->height;
+ }
+ for (i = 0; i < fb->nr_cbufs; ++i) {
+ if (fb->cbufs[i]) {
+ if (width && height) {
+ if (fb->cbufs[i]->width != width ||
+ fb->cbufs[i]->height != height) {
+ debug_warning("Mixed-size color and depth/stencil surfaces "
+ "may not work properly");
+ }
+ }
+ else {
+ width = fb->cbufs[i]->width;
+ height = fb->cbufs[i]->height;
+ }
+ }
}
}
/**********************************************************
- * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ * Copyright 2008-2015 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
#include "pipe/p_state.h"
#include "pipe/p_context.h"
+
+#include "util/u_bitmask.h"
#include "util/u_memory.h"
#include "svga_cmd.h"
int dummy;
};
-
struct svga_query {
struct pipe_query base;
unsigned type; /**< PIPE_QUERY_x or SVGA_QUERY_x */
SVGA3dQueryType svga_type; /**< SVGA3D_QUERYTYPE_x or unused */
+ unsigned id; /** Per-context query identifier */
+
+ struct pipe_fence_handle *fence;
+
/** For PIPE_QUERY_OCCLUSION_COUNTER / SVGA3D_QUERYTYPE_OCCLUSION */
+
+ /* For VGPU9 */
struct svga_winsys_buffer *hwbuf;
volatile SVGA3dQueryResult *queryResult;
- struct pipe_fence_handle *fence;
+
+ /** For VGPU10 */
+ struct svga_winsys_gb_query *gb_query;
+ SVGA3dDXQueryFlags flags;
+ unsigned offset; /**< offset to the gb_query memory */
+ struct pipe_query *predicate; /** The associated query that can be used for predicate */
/** For non-GPU SVGA_QUERY_x queries */
uint64_t begin_count, end_count;
boolean wait,
union pipe_query_result *result);
+static enum pipe_error
+define_query_vgpu9(struct svga_context *svga,
+ struct svga_query *sq)
+{
+ struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+
+ sq->hwbuf = svga_winsys_buffer_create(svga, 1,
+ SVGA_BUFFER_USAGE_PINNED,
+ sizeof *sq->queryResult);
+ if (!sq->hwbuf)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ sq->queryResult = (SVGA3dQueryResult *)
+ sws->buffer_map(sws, sq->hwbuf, PIPE_TRANSFER_WRITE);
+ if (!sq->queryResult) {
+ sws->buffer_destroy(sws, sq->hwbuf);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
+ sq->queryResult->totalSize = sizeof *sq->queryResult;
+ sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
+
+ /* We request the buffer to be pinned and assume it is always mapped.
+ * The reason is that we don't want to wait for fences when checking the
+ * query status.
+ */
+ sws->buffer_unmap(sws, sq->hwbuf);
+
+ return PIPE_OK;
+}
+
+static enum pipe_error
+begin_query_vgpu9(struct svga_context *svga, struct svga_query *sq)
+{
+ struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+ enum pipe_error ret = PIPE_OK;
+
+ if (sq->queryResult->state == SVGA3D_QUERYSTATE_PENDING) {
+ /* The application doesn't care for the pending query result.
+ * We cannot let go of the existing buffer and just get a new one
+ * because its storage may be reused for other purposes and clobbered
+ * by the host when it determines the query result. So the only
+ * option here is to wait for the existing query's result -- not a
+ * big deal, given that no sane application would do this.
+ */
+ uint64_t result;
+ svga_get_query_result(&svga->pipe, &sq->base, TRUE, (void*)&result);
+ assert(sq->queryResult->state != SVGA3D_QUERYSTATE_PENDING);
+ }
+
+ sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
+ sws->fence_reference(sws, &sq->fence, NULL);
+
+ ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
+ }
+ return ret;
+}
+
+static enum pipe_error
+end_query_vgpu9(struct svga_context *svga, struct svga_query *sq)
+{
+ enum pipe_error ret = PIPE_OK;
+
+ /* Set to PENDING before sending EndQuery. */
+ sq->queryResult->state = SVGA3D_QUERYSTATE_PENDING;
+
+ ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf);
+ }
+ return ret;
+}
+
+static boolean
+get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq,
+ boolean wait, uint64_t *result)
+{
+ struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+ enum pipe_error ret;
+ SVGA3dQueryState state;
+
+ if (!sq->fence) {
+ /* The query status won't be updated by the host unless
+ * SVGA_3D_CMD_WAIT_FOR_QUERY is emitted. Unfortunately this will cause
+ * a synchronous wait on the host.
+ */
+ ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf);
+ }
+ assert (ret == PIPE_OK);
+ svga_context_flush(svga, &sq->fence);
+ assert(sq->fence);
+ }
+
+ state = sq->queryResult->state;
+ if (state == SVGA3D_QUERYSTATE_PENDING) {
+ if (!wait)
+ return FALSE;
+ sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
+ state = sq->queryResult->state;
+ }
+
+ assert(state == SVGA3D_QUERYSTATE_SUCCEEDED ||
+ state == SVGA3D_QUERYSTATE_FAILED);
+
+ *result = (uint64_t)sq->queryResult->result32;
+ return TRUE;
+}
+
+
+/**
+ * VGPU10
+ *
+ * There is one query mob allocated for each context to be shared by all
+ * query types. The mob is used to hold queries's state and result. Since
+ * each query result type is of different length, to ease the query allocation
+ * management, the mob is divided into memory blocks. Each memory block
+ * will hold queries of the same type. Multiple memory blocks can be allocated
+ * for a particular query type.
+ *
+ * Currently each memory block is of 184 bytes. We support up to 128
+ * memory blocks. The query memory size is arbitrary right now.
+ * Each occlusion query takes about 8 bytes. One memory block can accomodate
+ * 23 occlusion queries. 128 of those blocks can support up to 2944 occlusion
+ * queries. That seems reasonable for now. If we think this limit is
+ * not enough, we can increase the limit or try to grow the mob in runtime.
+ * Note, SVGA device does not impose one mob per context for queries,
+ * we could allocate multiple mobs for queries; however, wddm KMD does not
+ * currently support that.
+ *
+ * Also note that the GL guest driver does not issue any of the
+ * following commands: DXMoveQuery, DXBindAllQuery & DXReadbackAllQuery.
+ */
+#define SVGA_QUERY_MEM_BLOCK_SIZE (sizeof(SVGADXQueryResultUnion) * 2)
+#define SVGA_QUERY_MEM_SIZE (128 * SVGA_QUERY_MEM_BLOCK_SIZE)
+
+struct svga_qmem_alloc_entry
+{
+ unsigned start_offset; /* start offset of the memory block */
+ unsigned block_index; /* block index of the memory block */
+ unsigned query_size; /* query size in this memory block */
+ unsigned nquery; /* number of queries allocated */
+ struct util_bitmask *alloc_mask; /* allocation mask */
+ struct svga_qmem_alloc_entry *next; /* next memory block */
+};
+
+
+/**
+ * Allocate a memory block from the query object memory
+ * \return -1 if out of memory, else index of the query memory block
+ */
+static int
+allocate_query_block(struct svga_context *svga)
+{
+ int index;
+ unsigned offset;
+
+ /* Find the next available query block */
+ index = util_bitmask_add(svga->gb_query_alloc_mask);
+
+ if (index == UTIL_BITMASK_INVALID_INDEX)
+ return -1;
+
+ offset = index * SVGA_QUERY_MEM_BLOCK_SIZE;
+ if (offset >= svga->gb_query_len) {
+ unsigned i;
+
+ /**
+ * All the memory blocks are allocated, lets see if there is
+ * any empty memory block around that can be freed up.
+ */
+ index = -1;
+ for (i = 0; i < SVGA_QUERY_MAX && index == -1; i++) {
+ struct svga_qmem_alloc_entry *alloc_entry;
+ struct svga_qmem_alloc_entry *prev_alloc_entry = NULL;
+
+ alloc_entry = svga->gb_query_map[i];
+ while (alloc_entry && index == -1) {
+ if (alloc_entry->nquery == 0) {
+ /* This memory block is empty, it can be recycled. */
+ if (prev_alloc_entry) {
+ prev_alloc_entry->next = alloc_entry->next;
+ } else {
+ svga->gb_query_map[i] = alloc_entry->next;
+ }
+ index = alloc_entry->block_index;
+ } else {
+ prev_alloc_entry = alloc_entry;
+ alloc_entry = alloc_entry->next;
+ }
+ }
+ }
+ }
+
+ return index;
+}
+
+/**
+ * Allocate a slot in the specified memory block.
+ * All slots in this memory block are of the same size.
+ *
+ * \return -1 if out of memory, else index of the query slot
+ */
+static int
+allocate_query_slot(struct svga_context *svga,
+ struct svga_qmem_alloc_entry *alloc)
+{
+ int index;
+ unsigned offset;
+
+ /* Find the next available slot */
+ index = util_bitmask_add(alloc->alloc_mask);
+
+ if (index == UTIL_BITMASK_INVALID_INDEX)
+ return -1;
+
+ offset = index * alloc->query_size;
+ if (offset >= SVGA_QUERY_MEM_BLOCK_SIZE)
+ return -1;
+
+ alloc->nquery++;
+
+ return index;
+}
+
+/**
+ * Deallocate the specified slot in the memory block.
+ * If all slots are freed up, then deallocate the memory block
+ * as well, so it can be allocated for other query type
+ */
+static void
+deallocate_query_slot(struct svga_context *svga,
+ struct svga_qmem_alloc_entry *alloc,
+ unsigned index)
+{
+ assert(index != UTIL_BITMASK_INVALID_INDEX);
+
+ util_bitmask_clear(alloc->alloc_mask, index);
+ alloc->nquery--;
+
+ /**
+ * Don't worry about deallocating the empty memory block here.
+ * The empty memory block will be recycled when no more memory block
+ * can be allocated.
+ */
+}
+
+static struct svga_qmem_alloc_entry *
+allocate_query_block_entry(struct svga_context *svga,
+ unsigned len)
+{
+ struct svga_qmem_alloc_entry *alloc_entry;
+ int block_index = -1;
+
+ block_index = allocate_query_block(svga);
+ if (block_index == -1)
+ return NULL;
+ alloc_entry = CALLOC_STRUCT(svga_qmem_alloc_entry);
+ if (alloc_entry == NULL)
+ return NULL;
+
+ alloc_entry->block_index = block_index;
+ alloc_entry->start_offset = block_index * SVGA_QUERY_MEM_BLOCK_SIZE;
+ alloc_entry->nquery = 0;
+ alloc_entry->alloc_mask = util_bitmask_create();
+ alloc_entry->next = NULL;
+ alloc_entry->query_size = len;
+
+ return alloc_entry;
+}
+
+/**
+ * Allocate a memory slot for a query of the specified type.
+ * It will first search through the memory blocks that are allocated
+ * for the query type. If no memory slot is available, it will try
+ * to allocate another memory block within the query object memory for
+ * this query type.
+ */
+static int
+allocate_query(struct svga_context *svga,
+ SVGA3dQueryType type,
+ unsigned len)
+{
+ struct svga_qmem_alloc_entry *alloc_entry;
+ int slot_index = -1;
+ unsigned offset;
+
+ assert(type < SVGA_QUERY_MAX);
+
+ alloc_entry = svga->gb_query_map[type];
+
+ if (alloc_entry == NULL) {
+ /**
+ * No query memory block has been allocated for this query type,
+ * allocate one now
+ */
+ alloc_entry = allocate_query_block_entry(svga, len);
+ if (alloc_entry == NULL)
+ return -1;
+ svga->gb_query_map[type] = alloc_entry;
+ }
+
+ /* Allocate a slot within the memory block allocated for this query type */
+ slot_index = allocate_query_slot(svga, alloc_entry);
+
+ if (slot_index == -1) {
+ /* This query memory block is full, allocate another one */
+ alloc_entry = allocate_query_block_entry(svga, len);
+ if (alloc_entry == NULL)
+ return -1;
+ alloc_entry->next = svga->gb_query_map[type];
+ svga->gb_query_map[type] = alloc_entry;
+ slot_index = allocate_query_slot(svga, alloc_entry);
+ }
+
+ assert(slot_index != -1);
+ offset = slot_index * len + alloc_entry->start_offset;
+
+ return offset;
+}
+
+
+/**
+ * Deallocate memory slot allocated for the specified query
+ */
+static void
+deallocate_query(struct svga_context *svga,
+ struct svga_query *sq)
+{
+ struct svga_qmem_alloc_entry *alloc_entry;
+ unsigned slot_index;
+ unsigned offset = sq->offset;
+
+ alloc_entry = svga->gb_query_map[sq->svga_type];
+
+ while (alloc_entry) {
+ if (offset >= alloc_entry->start_offset &&
+ offset < alloc_entry->start_offset + SVGA_QUERY_MEM_BLOCK_SIZE) {
+
+ /* The slot belongs to this memory block, deallocate it */
+ slot_index = (offset - alloc_entry->start_offset) /
+ alloc_entry->query_size;
+ deallocate_query_slot(svga, alloc_entry, slot_index);
+ alloc_entry = NULL;
+ } else {
+ alloc_entry = alloc_entry->next;
+ }
+ }
+}
+
+
+/**
+ * Destroy the gb query object and all the related query structures
+ */
+static void
+destroy_gb_query_obj(struct svga_context *svga)
+{
+ struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+ unsigned i;
+
+ for (i = 0; i < SVGA_QUERY_MAX; i++) {
+ struct svga_qmem_alloc_entry *alloc_entry, *next;
+ alloc_entry = svga->gb_query_map[i];
+ while (alloc_entry) {
+ next = alloc_entry->next;
+ util_bitmask_destroy(alloc_entry->alloc_mask);
+ FREE(alloc_entry);
+ alloc_entry = next;
+ }
+ svga->gb_query_map[i] = NULL;
+ }
+
+ if (svga->gb_query)
+ sws->query_destroy(sws, svga->gb_query);
+ svga->gb_query = NULL;
+
+ util_bitmask_destroy(svga->gb_query_alloc_mask);
+}
+
+/**
+ * Define query and create the gb query object if it is not already created.
+ * There is only one gb query object per context which will be shared by
+ * queries of all types.
+ */
+static enum pipe_error
+define_query_vgpu10(struct svga_context *svga,
+ struct svga_query *sq, int resultLen)
+{
+ struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+ int qlen;
+ enum pipe_error ret = PIPE_OK;
+
+ SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+
+ if (svga->gb_query == NULL) {
+ /* Create a gb query object */
+ svga->gb_query = sws->query_create(sws, SVGA_QUERY_MEM_SIZE);
+ if (!svga->gb_query)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ svga->gb_query_len = SVGA_QUERY_MEM_SIZE;
+ memset (svga->gb_query_map, 0, sizeof(svga->gb_query_map));
+ svga->gb_query_alloc_mask = util_bitmask_create();
+
+ /* Bind the query object to the context */
+ if (svga->swc->query_bind(svga->swc, svga->gb_query,
+ SVGA_QUERY_FLAG_SET) != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ svga->swc->query_bind(svga->swc, svga->gb_query,
+ SVGA_QUERY_FLAG_SET);
+ }
+ }
+
+ sq->gb_query = svga->gb_query;
+
+ /* Allocate an integer ID for this query */
+ sq->id = util_bitmask_add(svga->query_id_bm);
+ if (sq->id == UTIL_BITMASK_INVALID_INDEX)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ /* Find a slot for this query in the gb object */
+ qlen = resultLen + sizeof(SVGA3dQueryState);
+ sq->offset = allocate_query(svga, sq->svga_type, qlen);
+ if (sq->offset == -1)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ SVGA_DBG(DEBUG_QUERY, " query type=%d qid=0x%x offset=%d\n",
+ sq->svga_type, sq->id, sq->offset);
+
+ /**
+ * Send SVGA3D commands to define the query
+ */
+ ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags);
+ }
+ if (ret != PIPE_OK)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id);
+ }
+ assert(ret == PIPE_OK);
+
+ ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset);
+ }
+ assert(ret == PIPE_OK);
+
+ return PIPE_OK;
+}
+
+static enum pipe_error
+destroy_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
+{
+ enum pipe_error ret;
+
+ ret = SVGA3D_vgpu10_DestroyQuery(svga->swc, sq->id);
+
+ /* Deallocate the memory slot allocated for this query */
+ deallocate_query(svga, sq);
+
+ return ret;
+}
+
+
+/**
+ * Rebind queryies to the context.
+ */
+static void
+rebind_vgpu10_query(struct svga_context *svga)
+{
+ if (svga->swc->query_bind(svga->swc, svga->gb_query,
+ SVGA_QUERY_FLAG_REF) != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ svga->swc->query_bind(svga->swc, svga->gb_query,
+ SVGA_QUERY_FLAG_REF);
+ }
+
+ svga->rebind.flags.query = FALSE;
+}
+
+
+static enum pipe_error
+begin_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
+{
+ struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+ enum pipe_error ret = PIPE_OK;
+ int status = 0;
+
+ sws->fence_reference(sws, &sq->fence, NULL);
+
+ /* Initialize the query state to NEW */
+ status = sws->query_init(sws, sq->gb_query, sq->offset, SVGA3D_QUERYSTATE_NEW);
+ if (status)
+ return PIPE_ERROR;
+
+ if (svga->rebind.flags.query) {
+ rebind_vgpu10_query(svga);
+ }
+
+ /* Send the BeginQuery command to the device */
+ ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id);
+ }
+ return ret;
+}
+
+static enum pipe_error
+end_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
+{
+ struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+ enum pipe_error ret = PIPE_OK;
+
+ if (svga->rebind.flags.query) {
+ rebind_vgpu10_query(svga);
+ }
+
+ ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id);
+ }
+
+ /* Finish fence is copied here from get_query_result_vgpu10. This helps
+ * with cases where svga_begin_query might be called again before
+ * svga_get_query_result, such as GL_TIME_ELAPSED.
+ */
+ if (!sq->fence) {
+ svga_context_flush(svga, &sq->fence);
+ }
+ sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
+
+ return ret;
+}
+
+static boolean
+get_query_result_vgpu10(struct svga_context *svga, struct svga_query *sq,
+ boolean wait, void *result, int resultLen)
+{
+ struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+ SVGA3dQueryState queryState;
+
+ if (svga->rebind.flags.query) {
+ rebind_vgpu10_query(svga);
+ }
+
+ sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen);
+
+ if (queryState == SVGA3D_QUERYSTATE_PENDING) {
+ if (!wait)
+ return FALSE;
+ sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
+ sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen);
+ }
+
+ assert(queryState == SVGA3D_QUERYSTATE_SUCCEEDED ||
+ queryState == SVGA3D_QUERYSTATE_FAILED);
+
+ return TRUE;
+}
static struct pipe_query *
svga_create_query(struct pipe_context *pipe,
unsigned query_type,
unsigned index)
{
- struct svga_context *svga = svga_context( pipe );
- struct svga_screen *svgascreen = svga_screen(pipe->screen);
- struct svga_winsys_screen *sws = svgascreen->sws;
+ struct svga_context *svga = svga_context(pipe);
struct svga_query *sq;
- SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+ assert(query_type < SVGA_QUERY_MAX);
sq = CALLOC_STRUCT(svga_query);
if (!sq)
- goto no_sq;
+ goto fail;
+
+ /* Allocate an integer ID for the query */
+ sq->id = util_bitmask_add(svga->query_id_bm);
+ if (sq->id == UTIL_BITMASK_INVALID_INDEX)
+ goto fail;
+
+ SVGA_DBG(DEBUG_QUERY, "%s type=%d sq=0x%x id=%d\n", __FUNCTION__,
+ query_type, sq, sq->id);
switch (query_type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSION;
+ if (svga_have_vgpu10(svga)) {
+ define_query_vgpu10(svga, sq, sizeof(SVGADXOcclusionQueryResult));
- sq->hwbuf = svga_winsys_buffer_create(svga, 1,
- SVGA_BUFFER_USAGE_PINNED,
- sizeof *sq->queryResult);
- if (!sq->hwbuf) {
- debug_printf("svga: failed to alloc query object!\n");
- goto no_hwbuf;
- }
+ /**
+ * In OpenGL, occlusion counter query can be used in conditional
+ * rendering; however, in DX10, only OCCLUSION_PREDICATE query can
+ * be used for predication. Hence, we need to create an occlusion
+ * predicate query along with the occlusion counter query. So when
+ * the occlusion counter query is used for predication, the associated
+ * query of occlusion predicate type will be used
+ * in the SetPredication command.
+ */
+ sq->predicate = svga_create_query(pipe, PIPE_QUERY_OCCLUSION_PREDICATE, index);
- sq->queryResult = (SVGA3dQueryResult *)
- sws->buffer_map(sws, sq->hwbuf, PIPE_TRANSFER_WRITE);
- if (!sq->queryResult) {
- debug_printf("svga: failed to map query object!\n");
- goto no_query_result;
+ } else {
+ define_query_vgpu9(svga, sq);
}
-
- sq->queryResult->totalSize = sizeof *sq->queryResult;
- sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
-
- /* We request the buffer to be pinned and assume it is always mapped.
- * The reason is that we don't want to wait for fences when checking the
- * query status.
- */
- sws->buffer_unmap(sws, sq->hwbuf);
+ break;
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ assert(svga_have_vgpu10(svga));
+ sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE;
+ define_query_vgpu10(svga, sq, sizeof(SVGADXOcclusionPredicateQueryResult));
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ case PIPE_QUERY_SO_STATISTICS:
+ assert(svga_have_vgpu10(svga));
+ sq->svga_type = SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS;
+ define_query_vgpu10(svga, sq,
+ sizeof(SVGADXStreamOutStatisticsQueryResult));
+ break;
+ case PIPE_QUERY_TIMESTAMP:
+ assert(svga_have_vgpu10(svga));
+ sq->svga_type = SVGA3D_QUERYTYPE_TIMESTAMP;
+ define_query_vgpu10(svga, sq,
+ sizeof(SVGADXTimestampQueryResult));
break;
case SVGA_QUERY_DRAW_CALLS:
case SVGA_QUERY_FALLBACKS:
return &sq->base;
-no_query_result:
- sws->buffer_destroy(sws, sq->hwbuf);
-no_hwbuf:
+fail:
FREE(sq);
-no_sq:
return NULL;
}
-
static void
svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
{
- struct svga_screen *svgascreen = svga_screen(pipe->screen);
- struct svga_winsys_screen *sws = svgascreen->sws;
- struct svga_query *sq = svga_query( q );
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+ struct svga_query *sq;
- SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+ if (q == NULL) {
+ return destroy_gb_query_obj(svga);
+ }
+
+ sq = svga_query(q);
+
+ SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
+ sq, sq->id);
switch (sq->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
- sws->buffer_destroy(sws, sq->hwbuf);
- sq->hwbuf = NULL;
+ if (svga_have_vgpu10(svga)) {
+ /* make sure to also destroy any associated predicate query */
+ if (sq->predicate)
+ svga_destroy_query(pipe, sq->predicate);
+ destroy_query_vgpu10(svga, sq);
+ } else {
+ sws->buffer_destroy(sws, sq->hwbuf);
+ }
+ sws->fence_reference(sws, &sq->fence, NULL);
+ break;
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ assert(svga_have_vgpu10(svga));
+ destroy_query_vgpu10(svga, sq);
+ sws->fence_reference(sws, &sq->fence, NULL);
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ case PIPE_QUERY_SO_STATISTICS:
+ case PIPE_QUERY_TIMESTAMP:
+ assert(svga_have_vgpu10(svga));
+ destroy_query_vgpu10(svga, sq);
sws->fence_reference(sws, &sq->fence, NULL);
break;
case SVGA_QUERY_DRAW_CALLS:
assert(!"svga: unexpected query type in svga_destroy_query()");
}
+ /* Free the query id */
+ util_bitmask_clear(svga->query_id_bm, sq->id);
+
FREE(sq);
}
static boolean
svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
{
- struct svga_screen *svgascreen = svga_screen(pipe->screen);
- struct svga_winsys_screen *sws = svgascreen->sws;
- struct svga_context *svga = svga_context( pipe );
- struct svga_query *sq = svga_query( q );
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_query *sq = svga_query(q);
enum pipe_error ret;
- SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+ assert(sq);
+ assert(sq->type < SVGA_QUERY_MAX);
+
+ SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
+ sq, sq->id);
/* Need to flush out buffered drawing commands so that they don't
* get counted in the query results.
switch (sq->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
- assert(!svga->sq);
- if (sq->queryResult->state == SVGA3D_QUERYSTATE_PENDING) {
- /* The application doesn't care for the pending query result.
- * We cannot let go of the existing buffer and just get a new one
- * because its storage may be reused for other purposes and clobbered
- * by the host when it determines the query result. So the only
- * option here is to wait for the existing query's result -- not a
- * big deal, given that no sane application would do this.
- */
- uint64_t result;
- svga_get_query_result(pipe, q, TRUE, (void*)&result);
- assert(sq->queryResult->state != SVGA3D_QUERYSTATE_PENDING);
- }
-
- sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
- sws->fence_reference(sws, &sq->fence, NULL);
-
- ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
- if (ret != PIPE_OK) {
- svga_context_flush(svga, NULL);
- ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
- assert(ret == PIPE_OK);
+ if (svga_have_vgpu10(svga)) {
+ ret = begin_query_vgpu10(svga, sq);
+ /* also need to start the associated occlusion predicate query */
+ if (sq->predicate) {
+ enum pipe_error status;
+ status = begin_query_vgpu10(svga, svga_query(sq->predicate));
+ assert(status == PIPE_OK);
+ (void) status;
+ }
+ } else {
+ ret = begin_query_vgpu9(svga, sq);
}
-
- svga->sq = sq;
+ assert(ret == PIPE_OK);
+ (void) ret;
+ break;
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ assert(svga_have_vgpu10(svga));
+ ret = begin_query_vgpu10(svga, sq);
+ assert(ret == PIPE_OK);
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ case PIPE_QUERY_SO_STATISTICS:
+ case PIPE_QUERY_TIMESTAMP:
+ assert(svga_have_vgpu10(svga));
+ ret = begin_query_vgpu10(svga, sq);
+ assert(ret == PIPE_OK);
break;
case SVGA_QUERY_DRAW_CALLS:
sq->begin_count = svga->num_draw_calls;
default:
assert(!"unexpected query type in svga_begin_query()");
}
+
+ svga->sq[sq->type] = sq;
+
return true;
}
static void
svga_end_query(struct pipe_context *pipe, struct pipe_query *q)
{
- struct svga_context *svga = svga_context( pipe );
- struct svga_query *sq = svga_query( q );
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_query *sq = svga_query(q);
enum pipe_error ret;
- SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+ assert(sq);
+ assert(sq->type < SVGA_QUERY_MAX);
+
+ SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
+ sq, sq->id);
+
+ if (sq->type == PIPE_QUERY_TIMESTAMP && svga->sq[sq->type] != sq)
+ svga_begin_query(pipe, q);
svga_hwtnl_flush_retry(svga);
+ assert(svga->sq[sq->type] == sq);
+
switch (sq->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
- assert(svga->sq == sq);
-
- /* Set to PENDING before sending EndQuery. */
- sq->queryResult->state = SVGA3D_QUERYSTATE_PENDING;
-
- ret = SVGA3D_EndQuery( svga->swc, sq->svga_type, sq->hwbuf);
- if (ret != PIPE_OK) {
- svga_context_flush(svga, NULL);
- ret = SVGA3D_EndQuery( svga->swc, sq->svga_type, sq->hwbuf);
- assert(ret == PIPE_OK);
+ if (svga_have_vgpu10(svga)) {
+ ret = end_query_vgpu10(svga, sq);
+ /* also need to end the associated occlusion predicate query */
+ if (sq->predicate) {
+ enum pipe_error status;
+ status = end_query_vgpu10(svga, svga_query(sq->predicate));
+ assert(status == PIPE_OK);
+ (void) status;
+ }
+ } else {
+ ret = end_query_vgpu9(svga, sq);
}
-
+ assert(ret == PIPE_OK);
+ (void) ret;
/* TODO: Delay flushing. We don't really need to flush here, just ensure
* that there is one flush before svga_get_query_result attempts to get
* the result.
*/
svga_context_flush(svga, NULL);
-
- svga->sq = NULL;
+ break;
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ assert(svga_have_vgpu10(svga));
+ ret = end_query_vgpu10(svga, sq);
+ assert(ret == PIPE_OK);
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ case PIPE_QUERY_SO_STATISTICS:
+ case PIPE_QUERY_TIMESTAMP:
+ assert(svga_have_vgpu10(svga));
+ ret = end_query_vgpu10(svga, sq);
+ assert(ret == PIPE_OK);
break;
case SVGA_QUERY_DRAW_CALLS:
sq->end_count = svga->num_draw_calls;
default:
assert(!"unexpected query type in svga_end_query()");
}
+ svga->sq[sq->type] = NULL;
}
boolean wait,
union pipe_query_result *vresult)
{
- struct svga_context *svga = svga_context( pipe );
- struct svga_screen *svgascreen = svga_screen( pipe->screen );
- struct svga_winsys_screen *sws = svgascreen->sws;
- struct svga_query *sq = svga_query( q );
- SVGA3dQueryState state;
- uint64_t *result = (uint64_t *) vresult;
+ struct svga_screen *svgascreen = svga_screen(pipe->screen);
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_query *sq = svga_query(q);
+ uint64_t *result = (uint64_t *)vresult;
+ boolean ret = TRUE;
+
+ assert(sq);
- SVGA_DBG(DEBUG_QUERY, "%s wait: %d\n", __FUNCTION__);
+ SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d wait: %d\n",
+ __FUNCTION__, sq, sq->id, wait);
switch (sq->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
- /* The query status won't be updated by the host unless
- * SVGA_3D_CMD_WAIT_FOR_QUERY is emitted. Unfortunately this will cause
- * a synchronous wait on the host.
- */
- if (!sq->fence) {
- enum pipe_error ret;
-
- ret = SVGA3D_WaitForQuery( svga->swc, sq->svga_type, sq->hwbuf);
- if (ret != PIPE_OK) {
- svga_context_flush(svga, NULL);
- ret = SVGA3D_WaitForQuery( svga->swc, sq->svga_type, sq->hwbuf);
- assert(ret == PIPE_OK);
- }
-
- svga_context_flush(svga, &sq->fence);
-
- assert(sq->fence);
+ if (svga_have_vgpu10(svga)) {
+ SVGADXOcclusionQueryResult occResult;
+ ret = get_query_result_vgpu10(svga, sq, wait,
+ (void *)&occResult, sizeof(occResult));
+ *result = (uint64_t)occResult.samplesRendered;
+ } else {
+ ret = get_query_result_vgpu9(svga, sq, wait, (uint64_t *)result);
}
+ break;
+ case PIPE_QUERY_OCCLUSION_PREDICATE: {
+ SVGADXOcclusionPredicateQueryResult occResult;
+ assert(svga_have_vgpu10(svga));
+ ret = get_query_result_vgpu10(svga, sq, wait,
+ (void *)&occResult, sizeof(occResult));
+ vresult->b = occResult.anySamplesRendered != 0;
+ break;
+ }
+ case PIPE_QUERY_SO_STATISTICS: {
+ SVGADXStreamOutStatisticsQueryResult sResult;
+ struct pipe_query_data_so_statistics *pResult =
+ (struct pipe_query_data_so_statistics *)vresult;
- state = sq->queryResult->state;
- if (state == SVGA3D_QUERYSTATE_PENDING) {
- if (!wait)
- return FALSE;
- sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
- state = sq->queryResult->state;
- }
+ assert(svga_have_vgpu10(svga));
+ ret = get_query_result_vgpu10(svga, sq, wait,
+ (void *)&sResult, sizeof(sResult));
+ pResult->num_primitives_written = sResult.numPrimitivesWritten;
+ pResult->primitives_storage_needed = sResult.numPrimitivesRequired;
+ break;
+ }
+ case PIPE_QUERY_TIMESTAMP: {
+ SVGADXTimestampQueryResult sResult;
+
+ assert(svga_have_vgpu10(svga));
+ ret = get_query_result_vgpu10(svga, sq, wait,
+ (void *)&sResult, sizeof(sResult));
+ *result = (uint64_t)sResult.timestamp;
+ break;
+ }
+ case PIPE_QUERY_PRIMITIVES_GENERATED: {
+ SVGADXStreamOutStatisticsQueryResult sResult;
- assert(state == SVGA3D_QUERYSTATE_SUCCEEDED ||
- state == SVGA3D_QUERYSTATE_FAILED);
+ assert(svga_have_vgpu10(svga));
+ ret = get_query_result_vgpu10(svga, sq, wait,
+ (void *)&sResult, sizeof sResult);
+ *result = (uint64_t)sResult.numPrimitivesRequired;
+ break;
+ }
+ case PIPE_QUERY_PRIMITIVES_EMITTED: {
+ SVGADXStreamOutStatisticsQueryResult sResult;
- *result = (uint64_t) sq->queryResult->result32;
+ assert(svga_have_vgpu10(svga));
+ ret = get_query_result_vgpu10(svga, sq, wait,
+ (void *)&sResult, sizeof sResult);
+ *result = (uint64_t)sResult.numPrimitivesWritten;
break;
+ }
case SVGA_QUERY_DRAW_CALLS:
/* fall-through */
case SVGA_QUERY_FALLBACKS:
assert(!"unexpected query type in svga_get_query_result");
}
- SVGA_DBG(DEBUG_QUERY, "%s result %d\n", __FUNCTION__, (unsigned)*result);
+ SVGA_DBG(DEBUG_QUERY, "%s result %d\n", __FUNCTION__, *((uint64_t *)vresult));
- return TRUE;
+ return ret;
+}
+
+static void
+svga_render_condition(struct pipe_context *pipe, struct pipe_query *q,
+ boolean condition, uint mode)
+{
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+ struct svga_query *sq = svga_query(q);
+ SVGA3dQueryId queryId;
+ enum pipe_error ret;
+
+ SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+
+ assert(svga_have_vgpu10(svga));
+ if (sq == NULL) {
+ queryId = SVGA3D_INVALID_ID;
+ }
+ else {
+ assert(sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSION ||
+ sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE);
+
+ if (sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSION) {
+ assert(sq->predicate);
+ /**
+ * For conditional rendering, make sure to use the associated
+ * predicate query.
+ */
+ sq = svga_query(sq->predicate);
+ }
+ queryId = sq->id;
+
+ if ((mode == PIPE_RENDER_COND_WAIT ||
+ mode == PIPE_RENDER_COND_BY_REGION_WAIT) && sq->fence) {
+ sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
+ }
+ }
+
+ ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId,
+ (uint32) condition);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId,
+ (uint32) condition);
+ }
+}
+
+
+/*
+ * This function is a workaround because we lack the ability to query
+ * renderer's time synchornously.
+ */
+static uint64_t
+svga_get_timestamp(struct pipe_context *pipe)
+{
+ struct pipe_query *q = svga_create_query(pipe, PIPE_QUERY_TIMESTAMP, 0);
+ union pipe_query_result result;
+
+ svga_begin_query(pipe, q);
+ svga_end_query(pipe,q);
+ svga_get_query_result(pipe, q, TRUE, &result);
+ svga_destroy_query(pipe, q);
+
+ return result.u64;
}
svga->pipe.begin_query = svga_begin_query;
svga->pipe.end_query = svga_end_query;
svga->pipe.get_query_result = svga_get_query_result;
+ svga->pipe.render_condition = svga_render_condition;
+ svga->pipe.get_timestamp = svga_get_timestamp;
}
*
**********************************************************/
+#include "pipe/p_defines.h"
#include "draw/draw_context.h"
+#include "util/u_bitmask.h"
#include "util/u_inlines.h"
-#include "pipe/p_defines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "svga_cmd.h"
#include "svga_context.h"
+#include "svga_hw_reg.h"
#include "svga_screen.h"
-#include "svga_hw_reg.h"
/* Hardware frontwinding is always set up as SVGA3D_FRONTWINDING_CW.
*/
}
+static unsigned
+translate_fill_mode(unsigned fill)
+{
+ switch (fill) {
+ case PIPE_POLYGON_MODE_POINT:
+ return SVGA3D_FILLMODE_POINT;
+ case PIPE_POLYGON_MODE_LINE:
+ return SVGA3D_FILLMODE_LINE;
+ case PIPE_POLYGON_MODE_FILL:
+ return SVGA3D_FILLMODE_FILL;
+ default:
+ assert(!"Bad fill mode");
+ return SVGA3D_FILLMODE_FILL;
+ }
+}
+
+
+static unsigned
+translate_cull_mode(unsigned cull)
+{
+ switch (cull) {
+ case PIPE_FACE_NONE:
+ return SVGA3D_CULL_NONE;
+ case PIPE_FACE_FRONT:
+ return SVGA3D_CULL_FRONT;
+ case PIPE_FACE_BACK:
+ return SVGA3D_CULL_BACK;
+ case PIPE_FACE_FRONT_AND_BACK:
+ /* NOTE: we simply no-op polygon drawing in svga_draw_vbo() */
+ return SVGA3D_CULL_NONE;
+ default:
+ assert(!"Bad cull mode");
+ return SVGA3D_CULL_NONE;
+ }
+}
+
+
+static void
+define_rasterizer_object(struct svga_context *svga,
+ struct svga_rasterizer_state *rast)
+{
+ unsigned fill_mode = translate_fill_mode(rast->templ.fill_front);
+ unsigned cull_mode = translate_cull_mode(rast->templ.cull_face);
+ int depth_bias = rast->templ.offset_units;
+ float slope_scaled_depth_bias = rast->templ.offset_scale;
+ float depth_bias_clamp = 0.0; /* XXX fix me */
+ unsigned try;
+ const float line_width = rast->templ.line_width > 0.0f ?
+ rast->templ.line_width : 1.0f;
+ const uint8 line_factor = rast->templ.line_stipple_enable ?
+ rast->templ.line_stipple_factor : 0;
+ const uint16 line_pattern = rast->templ.line_stipple_enable ?
+ rast->templ.line_stipple_pattern : 0;
+
+ rast->id = util_bitmask_add(svga->rast_object_id_bm);
+
+ if (rast->templ.fill_front != rast->templ.fill_back) {
+ /* The VGPU10 device can't handle different front/back fill modes.
+ * We'll handle that with a swtnl/draw fallback. But we need to
+ * make sure we always fill triangles in that case.
+ */
+ fill_mode = SVGA3D_FILLMODE_FILL;
+ }
+
+ for (try = 0; try < 2; try++) {
+ enum pipe_error ret =
+ SVGA3D_vgpu10_DefineRasterizerState(svga->swc,
+ rast->id,
+ fill_mode,
+ cull_mode,
+ rast->templ.front_ccw,
+ depth_bias,
+ depth_bias_clamp,
+ slope_scaled_depth_bias,
+ rast->templ.depth_clip,
+ rast->templ.scissor,
+ rast->templ.multisample,
+ rast->templ.line_smooth,
+ line_width,
+ rast->templ.line_stipple_enable,
+ line_factor,
+ line_pattern,
+ !rast->templ.flatshade_first);
+ if (ret == PIPE_OK)
+ return;
+ svga_context_flush(svga, NULL);
+ }
+}
+
+
static void *
svga_create_rasterizer_state(struct pipe_context *pipe,
const struct pipe_rasterizer_state *templ)
rast->antialiasedlineenable = templ->line_smooth;
rast->lastpixel = templ->line_last_pixel;
rast->pointsprite = templ->sprite_coord_enable != 0x0;
- rast->pointsize = templ->point_size;
- rast->hw_unfilled = PIPE_POLYGON_MODE_FILL;
+
+ if (templ->point_smooth) {
+ /* For smooth points we need to generate fragments for at least
+ * a 2x2 region. Otherwise the quad we draw may be too small and
+ * we may generate no fragments at all.
+ */
+ rast->pointsize = MAX2(2.0f, templ->point_size);
+ }
+ else {
+ rast->pointsize = templ->point_size;
+ }
+
+ rast->hw_fillmode = PIPE_POLYGON_MODE_FILL;
/* Use swtnl + decomposition implement these:
*/
- if (templ->poly_stipple_enable) {
- rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
- rast->need_pipeline_tris_str = "poly stipple";
- }
- if (screen->maxLineWidth > 1.0F) {
+ if (templ->line_width <= screen->maxLineWidth) {
/* pass line width to device */
rast->linewidth = MAX2(1.0F, templ->line_width);
}
}
}
- if (templ->point_smooth) {
+ if (!svga_have_vgpu10(svga) && templ->point_smooth) {
rast->need_pipeline |= SVGA_PIPELINE_FLAG_POINTS;
rast->need_pipeline_points_str = "smooth points";
}
rast->depthbias = templ->offset_units;
}
- rast->hw_unfilled = fill;
+ rast->hw_fillmode = fill;
}
if (rast->need_pipeline & SVGA_PIPELINE_FLAG_TRIS) {
/* Turn off stuff which will get done in the draw module:
*/
- rast->hw_unfilled = PIPE_POLYGON_MODE_FILL;
+ rast->hw_fillmode = PIPE_POLYGON_MODE_FILL;
rast->slopescaledepthbias = 0;
rast->depthbias = 0;
}
debug_printf(" tris: %s \n", rast->need_pipeline_tris_str);
}
+ if (svga_have_vgpu10(svga)) {
+ define_rasterizer_object(svga, rast);
+ }
+
return rast;
}
struct svga_context *svga = svga_context(pipe);
struct svga_rasterizer_state *raster = (struct svga_rasterizer_state *)state;
-
- draw_set_rasterizer_state(svga->swtnl.draw, raster ? &raster->templ : NULL,
- state);
svga->curr.rast = raster;
svga->dirty |= SVGA_NEW_RAST;
+
+ if (raster && raster->templ.poly_stipple_enable) {
+ svga->dirty |= SVGA_NEW_STIPPLE;
+ }
}
-static void svga_delete_rasterizer_state(struct pipe_context *pipe,
- void *raster)
+static void
+svga_delete_rasterizer_state(struct pipe_context *pipe, void *state)
{
- FREE(raster);
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_rasterizer_state *raster =
+ (struct svga_rasterizer_state *) state;
+
+ if (svga_have_vgpu10(svga)) {
+ enum pipe_error ret =
+ SVGA3D_vgpu10_DestroyRasterizerState(svga->swc, raster->id);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_vgpu10_DestroyRasterizerState(svga->swc, raster->id);
+ }
+
+ if (raster->id == svga->state.hw_draw.rasterizer_id)
+ svga->state.hw_draw.rasterizer_id = SVGA3D_INVALID_ID;
+
+ util_bitmask_clear(svga->rast_object_id_bm, raster->id);
+ }
+
+ FREE(state);
}
*
**********************************************************/
-#include "util/u_inlines.h"
#include "pipe/p_defines.h"
+#include "util/u_bitmask.h"
#include "util/u_format.h"
+#include "util/u_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "tgsi/tgsi_parse.h"
#include "svga_context.h"
+#include "svga_cmd.h"
+#include "svga_debug.h"
#include "svga_resource_texture.h"
+#include "svga_surface.h"
+#include "svga_sampler_view.h"
-#include "svga_debug.h"
static inline unsigned
translate_wrap_mode(unsigned wrap)
}
}
+
+static uint8
+translate_comparison_func(unsigned func)
+{
+ switch (func) {
+ case PIPE_FUNC_NEVER:
+ return SVGA3D_COMPARISON_NEVER;
+ case PIPE_FUNC_LESS:
+ return SVGA3D_COMPARISON_LESS;
+ case PIPE_FUNC_EQUAL:
+ return SVGA3D_COMPARISON_EQUAL;
+ case PIPE_FUNC_LEQUAL:
+ return SVGA3D_COMPARISON_LESS_EQUAL;
+ case PIPE_FUNC_GREATER:
+ return SVGA3D_COMPARISON_GREATER;
+ case PIPE_FUNC_NOTEQUAL:
+ return SVGA3D_COMPARISON_NOT_EQUAL;
+ case PIPE_FUNC_GEQUAL:
+ return SVGA3D_COMPARISON_GREATER_EQUAL;
+ case PIPE_FUNC_ALWAYS:
+ return SVGA3D_COMPARISON_ALWAYS;
+ default:
+ assert(!"Invalid comparison function");
+ return SVGA3D_COMPARISON_ALWAYS;
+ }
+}
+
+
+/**
+ * Translate filtering state to vgpu10 format.
+ */
+static SVGA3dFilter
+translate_filter_mode(unsigned img_filter,
+ unsigned min_filter,
+ unsigned mag_filter,
+ boolean anisotropic,
+ boolean compare)
+{
+ SVGA3dFilter mode = 0;
+
+ if (img_filter == PIPE_TEX_FILTER_LINEAR)
+ mode |= SVGA3D_FILTER_MIP_LINEAR;
+ if (min_filter == PIPE_TEX_FILTER_LINEAR)
+ mode |= SVGA3D_FILTER_MIN_LINEAR;
+ if (mag_filter == PIPE_TEX_FILTER_LINEAR)
+ mode |= SVGA3D_FILTER_MAG_LINEAR;
+ if (anisotropic)
+ mode |= SVGA3D_FILTER_ANISOTROPIC;
+ if (compare)
+ mode |= SVGA3D_FILTER_COMPARE;
+
+ return mode;
+}
+
+
+/**
+ * Define a vgpu10 sampler state.
+ */
+static void
+define_sampler_state_object(struct svga_context *svga,
+ struct svga_sampler_state *ss,
+ const struct pipe_sampler_state *ps)
+{
+ uint8_t max_aniso = (uint8_t) 255; /* XXX fix me */
+ boolean anisotropic;
+ uint8 compare_func;
+ SVGA3dFilter filter;
+ SVGA3dRGBAFloat bcolor;
+ unsigned try;
+ float min_lod, max_lod;
+
+ assert(svga_have_vgpu10(svga));
+
+ anisotropic = ss->aniso_level > 1.0f;
+
+ filter = translate_filter_mode(ps->min_mip_filter,
+ ps->min_img_filter,
+ ps->mag_img_filter,
+ anisotropic,
+ ss->compare_mode);
+
+ compare_func = translate_comparison_func(ss->compare_func);
+
+ COPY_4V(bcolor.value, ps->border_color.f);
+
+ ss->id = util_bitmask_add(svga->sampler_object_id_bm);
+
+ assert(ps->min_lod <= ps->max_lod);
+
+ if (ps->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
+ /* just use the base level image */
+ min_lod = max_lod = 0.0f;
+ }
+ else {
+ min_lod = ps->min_lod;
+ max_lod = ps->max_lod;
+ }
+
+ /* Loop in case command buffer is full and we need to flush and retry */
+ for (try = 0; try < 2; try++) {
+ enum pipe_error ret =
+ SVGA3D_vgpu10_DefineSamplerState(svga->swc,
+ ss->id,
+ filter,
+ ss->addressu,
+ ss->addressv,
+ ss->addressw,
+ ss->lod_bias, /* float */
+ max_aniso,
+ compare_func,
+ bcolor,
+ min_lod, /* float */
+ max_lod); /* float */
+ if (ret == PIPE_OK)
+ return;
+ svga_context_flush(svga, NULL);
+ }
+}
+
+
static void *
svga_create_sampler_state(struct pipe_context *pipe,
const struct pipe_sampler_state *sampler)
}
}
+ if (svga_have_vgpu10(svga)) {
+ define_sampler_state_object(svga, cso, sampler);
+ }
+
SVGA_DBG(DEBUG_VIEWS, "min %u, view(min %u, max %u) lod, mipfilter %s\n",
cso->min_lod, cso->view_min_lod, cso->view_max_lod,
cso->mipfilter == SVGA3D_TEX_FILTER_NONE ? "SVGA3D_TEX_FILTER_NONE" : "SOMETHING");
assert(shader < PIPE_SHADER_TYPES);
assert(start + num <= PIPE_MAX_SAMPLERS);
- /* we only support fragment shader samplers at this time */
- if (shader != PIPE_SHADER_FRAGMENT)
+ /* Pre-VGPU10 only supports FS textures */
+ if (!svga_have_vgpu10(svga) && shader != PIPE_SHADER_FRAGMENT)
return;
for (i = 0; i < num; i++)
- svga->curr.sampler[start + i] = samplers[i];
+ svga->curr.sampler[shader][start + i] = samplers[i];
/* find highest non-null sampler[] entry */
{
- unsigned j = MAX2(svga->curr.num_samplers, start + num);
- while (j > 0 && svga->curr.sampler[j - 1] == NULL)
+ unsigned j = MAX2(svga->curr.num_samplers[shader], start + num);
+ while (j > 0 && svga->curr.sampler[shader][j - 1] == NULL)
j--;
- svga->curr.num_samplers = j;
+ svga->curr.num_samplers[shader] = j;
}
svga->dirty |= SVGA_NEW_SAMPLER;
static void svga_delete_sampler_state(struct pipe_context *pipe,
void *sampler)
{
+ struct svga_sampler_state *ss = (struct svga_sampler_state *) sampler;
+ struct svga_context *svga = svga_context(pipe);
+
+ if (svga_have_vgpu10(svga)) {
+ enum pipe_error ret;
+
+ svga_hwtnl_flush_retry(svga);
+
+ ret = SVGA3D_vgpu10_DestroySamplerState(svga->swc, ss->id);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_vgpu10_DestroySamplerState(svga->swc, ss->id);
+ }
+ util_bitmask_clear(svga->sampler_object_id_bm, ss->id);
+ }
+
FREE(sampler);
}
struct pipe_resource *texture,
const struct pipe_sampler_view *templ)
{
- struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view);
-
- if (view) {
- *view = *templ;
- view->reference.count = 1;
- view->texture = NULL;
- pipe_resource_reference(&view->texture, texture);
- view->context = pipe;
+ struct svga_pipe_sampler_view *sv = CALLOC_STRUCT(svga_pipe_sampler_view);
+
+ if (!sv) {
+ return NULL;
}
- return view;
+ sv->base = *templ;
+ sv->base.reference.count = 1;
+ sv->base.texture = NULL;
+ pipe_resource_reference(&sv->base.texture, texture);
+
+ sv->base.context = pipe;
+ sv->id = SVGA3D_INVALID_ID;
+
+ return &sv->base;
}
svga_sampler_view_destroy(struct pipe_context *pipe,
struct pipe_sampler_view *view)
{
- pipe_resource_reference(&view->texture, NULL);
- FREE(view);
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_pipe_sampler_view *sv = svga_pipe_sampler_view(view);
+
+ if (svga_have_vgpu10(svga) && sv->id != SVGA3D_INVALID_ID) {
+ if (view->context != pipe) {
+ /* The SVGA3D device will generate an error (and on Linux, cause
+ * us to abort) if we try to destroy a shader resource view from
+ * a context other than the one it was created with. Skip the
+ * SVGA3D_vgpu10_DestroyShaderResourceView() and leak the sampler
+ * view for now. This should only sometimes happen when a shared
+ * texture is deleted.
+ */
+ _debug_printf("context mismatch in %s\n", __func__);
+ }
+ else {
+ enum pipe_error ret;
+
+ svga_hwtnl_flush_retry(svga); /* XXX is this needed? */
+
+ ret = SVGA3D_vgpu10_DestroyShaderResourceView(svga->swc, sv->id);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_vgpu10_DestroyShaderResourceView(svga->swc, sv->id);
+ }
+ util_bitmask_clear(svga->sampler_view_id_bm, sv->id);
+ }
+ }
+
+ pipe_resource_reference(&sv->base.texture, NULL);
+
+ FREE(sv);
}
static void
uint i;
assert(shader < PIPE_SHADER_TYPES);
- assert(start + num <= Elements(svga->curr.sampler_views));
+ assert(start + num <= Elements(svga->curr.sampler_views[shader]));
- /* we only support fragment shader sampler views at this time */
- if (shader != PIPE_SHADER_FRAGMENT)
+ /* Pre-VGPU10 only supports FS textures */
+ if (!svga_have_vgpu10(svga) && shader != PIPE_SHADER_FRAGMENT)
return;
for (i = 0; i < num; i++) {
- if (svga->curr.sampler_views[start + i] != views[i]) {
+ if (svga->curr.sampler_views[shader][start + i] != views[i]) {
/* Note: we're using pipe_sampler_view_release() here to work around
* a possible crash when the old view belongs to another context that
* was already destroyed.
*/
- pipe_sampler_view_release(pipe, &svga->curr.sampler_views[start + i]);
- pipe_sampler_view_reference(&svga->curr.sampler_views[start + i],
+ pipe_sampler_view_release(pipe, &svga->curr.sampler_views[shader][start + i]);
+ pipe_sampler_view_reference(&svga->curr.sampler_views[shader][start + i],
views[i]);
}
/* find highest non-null sampler_views[] entry */
{
- unsigned j = MAX2(svga->curr.num_sampler_views, start + num);
- while (j > 0 && svga->curr.sampler_views[j - 1] == NULL)
+ unsigned j = MAX2(svga->curr.num_sampler_views[shader], start + num);
+ while (j > 0 && svga->curr.sampler_views[shader][j - 1] == NULL)
j--;
- svga->curr.num_sampler_views = j;
+ svga->curr.num_sampler_views[shader] = j;
}
svga->dirty |= SVGA_NEW_TEXTURE_BINDING;
svga->dirty |= SVGA_NEW_TEXTURE_FLAGS;
svga->curr.tex_flags.flag_1d = flag_1d;
svga->curr.tex_flags.flag_srgb = flag_srgb;
- }
+ }
+
+ /* Check if any of the sampler view resources collide with the framebuffer
+ * color buffers or depth stencil resource. If so, enable the NEW_FRAME_BUFFER
+ * dirty bit so that emit_framebuffer can be invoked to create backed view
+ * for the conflicted surface view.
+ */
+ for (i = 0; i < svga->curr.framebuffer.nr_cbufs; i++) {
+ if (svga->curr.framebuffer.cbufs[i]) {
+ struct svga_surface *s = svga_surface(svga->curr.framebuffer.cbufs[i]);
+ if (svga_check_sampler_view_resource_collision(svga, s->handle, shader)) {
+ svga->dirty |= SVGA_NEW_FRAME_BUFFER;
+ break;
+ }
+ }
+ }
+
+ if (svga->curr.framebuffer.zsbuf) {
+ struct svga_surface *s = svga_surface(svga->curr.framebuffer.zsbuf);
+ if (s) {
+ if (svga_check_sampler_view_resource_collision(svga, s->handle, shader)) {
+ svga->dirty |= SVGA_NEW_FRAME_BUFFER;
+ }
+ }
+ }
}
--- /dev/null
+/**********************************************************
+ * Copyright 2014 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "util/u_memory.h"
+#include "util/u_bitmask.h"
+
+#include "svga_cmd.h"
+#include "svga_context.h"
+#include "svga_resource_buffer.h"
+#include "svga_shader.h"
+#include "svga_debug.h"
+#include "svga_streamout.h"
+
+struct svga_stream_output_target {
+ struct pipe_stream_output_target base;
+};
+
+/** cast wrapper */
+static inline struct svga_stream_output_target *
+svga_stream_output_target(struct pipe_stream_output_target *s)
+{
+ return (struct svga_stream_output_target *)s;
+}
+
+struct svga_stream_output *
+svga_create_stream_output(struct svga_context *svga,
+ struct svga_shader *shader,
+ const struct pipe_stream_output_info *info)
+{
+ struct svga_stream_output *streamout;
+ SVGA3dStreamOutputDeclarationEntry decls[SVGA3D_MAX_STREAMOUT_DECLS];
+ unsigned strides[SVGA3D_DX_MAX_SOTARGETS];
+ unsigned i;
+ enum pipe_error ret;
+ unsigned id;
+
+ assert(info->num_outputs <= PIPE_MAX_SO_OUTPUTS);
+
+ /* Gallium utility creates shaders with stream output.
+ * For non-DX10, just return NULL.
+ */
+ if (!svga_have_vgpu10(svga))
+ return NULL;
+
+ assert(info->num_outputs <= SVGA3D_MAX_STREAMOUT_DECLS);
+
+ /* Allocate an integer ID for the stream output */
+ id = util_bitmask_add(svga->stream_output_id_bm);
+ if (id == UTIL_BITMASK_INVALID_INDEX) {
+ return NULL;
+ }
+
+ /* Allocate the streamout data structure */
+ streamout = CALLOC_STRUCT(svga_stream_output);
+
+ if (streamout == NULL)
+ return NULL;
+
+ streamout->info = *info;
+ streamout->id = id;
+ streamout->pos_out_index = -1;
+
+ SVGA_DBG(DEBUG_STREAMOUT, "%s, num_outputs=%d id=%d\n", __FUNCTION__,
+ info->num_outputs, id);
+
+ /* init whole decls and stride arrays to zero to avoid garbage values */
+ memset(decls, 0, sizeof(decls));
+ memset(strides, 0, sizeof(strides));
+
+ for (i = 0; i < info->num_outputs; i++) {
+ unsigned reg_idx = info->output[i].register_index;
+ unsigned buf_idx = info->output[i].output_buffer;
+ const unsigned sem_name = shader->info.output_semantic_name[reg_idx];
+
+ assert(buf_idx <= PIPE_MAX_SO_BUFFERS);
+
+ if (sem_name == TGSI_SEMANTIC_POSITION) {
+ /**
+ * Check if streaming out POSITION. If so, replace the
+ * register index with the index for NON_ADJUSTED POSITION.
+ */
+ decls[i].registerIndex = shader->info.num_outputs;
+
+ /* Save this output index, so we can tell later if this stream output
+ * includes an output of a vertex position
+ */
+ streamout->pos_out_index = i;
+ }
+ else if (sem_name == TGSI_SEMANTIC_CLIPDIST) {
+ /**
+ * Use the shadow copy for clip distance because
+ * CLIPDIST instruction is only emitted for enabled clip planes.
+ * It's valid to write to ClipDistance variable for non-enabled
+ * clip planes.
+ */
+ decls[i].registerIndex = shader->info.num_outputs + 1 +
+ shader->info.output_semantic_index[reg_idx];
+ }
+ else {
+ decls[i].registerIndex = reg_idx;
+ }
+
+ decls[i].outputSlot = buf_idx;
+ decls[i].registerMask =
+ ((1 << info->output[i].num_components) - 1)
+ << info->output[i].start_component;
+
+ SVGA_DBG(DEBUG_STREAMOUT, "%d slot=%d regIdx=%d regMask=0x%x\n",
+ i, decls[i].outputSlot, decls[i].registerIndex,
+ decls[i].registerMask);
+
+ strides[buf_idx] = info->stride[buf_idx] * sizeof(float);
+ }
+
+ ret = SVGA3D_vgpu10_DefineStreamOutput(svga->swc, id,
+ info->num_outputs,
+ strides,
+ decls);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_vgpu10_DefineStreamOutput(svga->swc, id,
+ info->num_outputs,
+ strides,
+ decls);
+ if (ret != PIPE_OK) {
+ util_bitmask_clear(svga->stream_output_id_bm, id);
+ FREE(streamout);
+ streamout = NULL;
+ }
+ }
+ return streamout;
+}
+
+enum pipe_error
+svga_set_stream_output(struct svga_context *svga,
+ struct svga_stream_output *streamout)
+{
+ enum pipe_error ret = PIPE_OK;
+ unsigned id = streamout ? streamout->id : SVGA3D_INVALID_ID;
+
+ if (!svga_have_vgpu10(svga)) {
+ return PIPE_OK;
+ }
+
+ SVGA_DBG(DEBUG_STREAMOUT, "%s streamout=0x%x id=%d\n", __FUNCTION__,
+ streamout, id);
+
+ if (svga->current_so != streamout) {
+ /* Save current SO state */
+ svga->current_so = streamout;
+
+ ret = SVGA3D_vgpu10_SetStreamOutput(svga->swc, id);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_vgpu10_SetStreamOutput(svga->swc, id);
+ }
+ }
+
+ return ret;
+}
+
+void
+svga_delete_stream_output(struct svga_context *svga,
+ struct svga_stream_output *streamout)
+{
+ enum pipe_error ret;
+
+ SVGA_DBG(DEBUG_STREAMOUT, "%s streamout=0x%x\n", __FUNCTION__, streamout);
+
+ assert(svga_have_vgpu10(svga));
+ assert(streamout != NULL);
+
+ ret = SVGA3D_vgpu10_DestroyStreamOutput(svga->swc, streamout->id);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_vgpu10_DestroyStreamOutput(svga->swc, streamout->id);
+ }
+
+ /* Release the ID */
+ util_bitmask_clear(svga->stream_output_id_bm, streamout->id);
+
+ /* Free streamout structure */
+ FREE(streamout);
+}
+
+static struct pipe_stream_output_target *
+svga_create_stream_output_target(struct pipe_context *pipe,
+ struct pipe_resource *buffer,
+ unsigned buffer_offset,
+ unsigned buffer_size)
+{
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_stream_output_target *sot;
+
+ SVGA_DBG(DEBUG_STREAMOUT, "%s offset=%d size=%d\n", __FUNCTION__,
+ buffer_offset, buffer_size);
+
+ assert(svga_have_vgpu10(svga));
+ (void) svga;
+
+ sot = CALLOC_STRUCT(svga_stream_output_target);
+ if (!sot)
+ return NULL;
+
+ pipe_reference_init(&sot->base.reference, 1);
+ pipe_resource_reference(&sot->base.buffer, buffer);
+ sot->base.context = pipe;
+ sot->base.buffer = buffer;
+ sot->base.buffer_offset = buffer_offset;
+ sot->base.buffer_size = buffer_size;
+
+ return &sot->base;
+}
+
+static void
+svga_destroy_stream_output_target(struct pipe_context *pipe,
+ struct pipe_stream_output_target *target)
+{
+ struct svga_stream_output_target *sot = svga_stream_output_target(target);
+
+ SVGA_DBG(DEBUG_STREAMOUT, "%s\n", __FUNCTION__);
+
+ pipe_resource_reference(&sot->base.buffer, NULL);
+ FREE(sot);
+}
+
+static void
+svga_set_stream_output_targets(struct pipe_context *pipe,
+ unsigned num_targets,
+ struct pipe_stream_output_target **targets,
+ const unsigned *offsets)
+{
+ struct svga_context *svga = svga_context(pipe);
+ struct SVGA3dSoTarget soBindings[SVGA3D_DX_MAX_SOTARGETS];
+ enum pipe_error ret;
+ unsigned i;
+ unsigned num_so_targets;
+
+ SVGA_DBG(DEBUG_STREAMOUT, "%s num_targets=%d\n", __FUNCTION__,
+ num_targets);
+
+ assert(svga_have_vgpu10(svga));
+
+ /* Mark the streamout buffers as dirty so that we'll issue readbacks
+ * before mapping.
+ */
+ for (i = 0; i < svga->num_so_targets; i++) {
+ struct svga_buffer *sbuf = svga_buffer(svga->so_targets[i]->buffer);
+ sbuf->dirty = TRUE;
+ }
+
+ assert(num_targets <= SVGA3D_DX_MAX_SOTARGETS);
+
+ for (i = 0; i < num_targets; i++) {
+ struct svga_stream_output_target *sot
+ = svga_stream_output_target(targets[i]);
+ struct svga_buffer *sbuf = svga_buffer(sot->base.buffer);
+ unsigned size;
+
+ assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_STREAM_OUTPUT);
+ (void) sbuf;
+
+ svga->so_surfaces[i] = svga_buffer_handle(svga, sot->base.buffer);
+ svga->so_targets[i] = &sot->base;
+ soBindings[i].offset = sot->base.buffer_offset;
+
+ /* The size cannot extend beyond the end of the buffer. Clamp it. */
+ size = MIN2(sot->base.buffer_size,
+ sot->base.buffer->width0 - sot->base.buffer_offset);
+
+ soBindings[i].sizeInBytes = size;
+ }
+
+ /* unbind any previously bound stream output buffers */
+ for (; i < svga->num_so_targets; i++) {
+ svga->so_surfaces[i] = NULL;
+ svga->so_targets[i] = NULL;
+ }
+
+ num_so_targets = MAX2(svga->num_so_targets, num_targets);
+ ret = SVGA3D_vgpu10_SetSOTargets(svga->swc, num_so_targets,
+ soBindings, svga->so_surfaces);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_vgpu10_SetSOTargets(svga->swc, num_so_targets,
+ soBindings, svga->so_surfaces);
+ }
+
+ svga->num_so_targets = num_targets;
+}
+
+void
+svga_init_stream_output_functions(struct svga_context *svga)
+{
+ svga->pipe.create_stream_output_target = svga_create_stream_output_target;
+ svga->pipe.stream_output_target_destroy = svga_destroy_stream_output_target;
+ svga->pipe.set_stream_output_targets = svga_set_stream_output_targets;
+}
*
**********************************************************/
+#include "pipe/p_defines.h"
+#include "util/u_bitmask.h"
+#include "util/u_format.h"
#include "util/u_helpers.h"
#include "util/u_inlines.h"
-#include "pipe/p_defines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_transfer.h"
#include "tgsi/tgsi_parse.h"
-#include "svga_screen.h"
-#include "svga_resource_buffer.h"
#include "svga_context.h"
+#include "svga_cmd.h"
+#include "svga_format.h"
+#include "svga_resource_buffer.h"
+#include "svga_screen.h"
static void svga_set_vertex_buffers(struct pipe_context *pipe,
{
struct svga_context *svga = svga_context(pipe);
- if (ib) {
- pipe_resource_reference(&svga->curr.ib.buffer, ib->buffer);
- memcpy(&svga->curr.ib, ib, sizeof(svga->curr.ib));
- }
- else {
- pipe_resource_reference(&svga->curr.ib.buffer, NULL);
- memset(&svga->curr.ib, 0, sizeof(svga->curr.ib));
- }
+ util_set_index_buffer(&svga->curr.ib, ib);
+}
- /* TODO make this more like a state */
+
+/**
+ * Does the given vertex attrib format need range adjustment in the VS?
+ * Range adjustment scales and biases values from [0,1] to [-1,1].
+ * This lets us avoid the swtnl path.
+ */
+static boolean
+attrib_needs_range_adjustment(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_R8G8B8_SNORM:
+ return TRUE;
+ default:
+ return FALSE;
+ }
}
/**
- * Given a gallium vertex element format, return the corresponding SVGA3D
- * format. Return SVGA3D_DECLTYPE_MAX for unsupported gallium formats.
+ * Given a gallium vertex element format, return the corresponding
+ * SVGA3dDeclType.
*/
static SVGA3dDeclType
-translate_vertex_format(enum pipe_format format)
+translate_vertex_format_to_decltype(enum pipe_format format)
{
switch (format) {
case PIPE_FORMAT_R32_FLOAT: return SVGA3D_DECLTYPE_FLOAT1;
case PIPE_FORMAT_R16G16_FLOAT: return SVGA3D_DECLTYPE_FLOAT16_2;
case PIPE_FORMAT_R16G16B16A16_FLOAT: return SVGA3D_DECLTYPE_FLOAT16_4;
- /* See attrib_needs_adjustment() and attrib_needs_w_to_1() below */
+ /* See attrib_needs_adjustment() and attrib_needs_w_to_1() above */
case PIPE_FORMAT_R8G8B8_SNORM: return SVGA3D_DECLTYPE_UBYTE4N;
- /* See attrib_needs_w_to_1() below */
+ /* See attrib_needs_w_to_1() above */
case PIPE_FORMAT_R16G16B16_SNORM: return SVGA3D_DECLTYPE_SHORT4N;
case PIPE_FORMAT_R16G16B16_UNORM: return SVGA3D_DECLTYPE_USHORT4N;
case PIPE_FORMAT_R8G8B8_UNORM: return SVGA3D_DECLTYPE_UBYTE4N;
}
-/**
- * Does the given vertex attrib format need range adjustment in the VS?
- * Range adjustment scales and biases values from [0,1] to [-1,1].
- * This lets us avoid the swtnl path.
- */
-static boolean
-attrib_needs_range_adjustment(enum pipe_format format)
+static void
+define_input_element_object(struct svga_context *svga,
+ struct svga_velems_state *velems)
{
- switch (format) {
- case PIPE_FORMAT_R8G8B8_SNORM:
- return TRUE;
- default:
- return FALSE;
+ SVGA3dInputElementDesc elements[PIPE_MAX_ATTRIBS];
+ enum pipe_error ret;
+ unsigned i;
+
+ assert(velems->count <= PIPE_MAX_ATTRIBS);
+ assert(svga_have_vgpu10(svga));
+
+ for (i = 0; i < velems->count; i++) {
+ const struct pipe_vertex_element *elem = velems->velem + i;
+ SVGA3dSurfaceFormat svga_format;
+ unsigned vf_flags;
+
+ svga_translate_vertex_format_vgpu10(elem->src_format,
+ &svga_format, &vf_flags);
+
+ velems->decl_type[i] =
+ translate_vertex_format_to_decltype(elem->src_format);
+ elements[i].inputSlot = elem->vertex_buffer_index;
+ elements[i].alignedByteOffset = elem->src_offset;
+ elements[i].format = svga_format;
+
+ if (elem->instance_divisor) {
+ elements[i].inputSlotClass = SVGA3D_INPUT_PER_INSTANCE_DATA;
+ elements[i].instanceDataStepRate = elem->instance_divisor;
+ }
+ else {
+ elements[i].inputSlotClass = SVGA3D_INPUT_PER_VERTEX_DATA;
+ elements[i].instanceDataStepRate = 0;
+ }
+ elements[i].inputRegister = i;
+
+ if (elements[i].format == SVGA3D_FORMAT_INVALID) {
+ velems->need_swvfetch = TRUE;
+ }
+
+ if (util_format_is_pure_integer(elem->src_format)) {
+ velems->attrib_is_pure_int |= (1 << i);
+ }
+
+ if (vf_flags & VF_W_TO_1) {
+ velems->adjust_attrib_w_1 |= (1 << i);
+ }
+
+ if (vf_flags & VF_U_TO_F_CAST) {
+ velems->adjust_attrib_utof |= (1 << i);
+ }
+ else if (vf_flags & VF_I_TO_F_CAST) {
+ velems->adjust_attrib_itof |= (1 << i);
+ }
+
+ if (vf_flags & VF_BGRA) {
+ velems->attrib_is_bgra |= (1 << i);
+ }
+
+ if (vf_flags & VF_PUINT_TO_SNORM) {
+ velems->attrib_puint_to_snorm |= (1 << i);
+ }
+ else if (vf_flags & VF_PUINT_TO_USCALED) {
+ velems->attrib_puint_to_uscaled |= (1 << i);
+ }
+ else if (vf_flags & VF_PUINT_TO_SSCALED) {
+ velems->attrib_puint_to_sscaled |= (1 << i);
+ }
+ }
+
+ velems->id = util_bitmask_add(svga->input_element_object_id_bm);
+
+ ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, velems->count,
+ velems->id, elements);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, velems->count,
+ velems->id, elements);
+ assert(ret == PIPE_OK);
}
}
/**
- * Does the given vertex attrib format need to have the W component set
- * to one in the VS?
+ * Translate the vertex element types to SVGA3dDeclType and check
+ * for VS-based vertex attribute adjustments.
*/
-static boolean
-attrib_needs_w_to_1(enum pipe_format format)
+static void
+translate_vertex_decls(struct svga_context *svga,
+ struct svga_velems_state *velems)
{
- switch (format) {
- case PIPE_FORMAT_R8G8B8_SNORM:
- case PIPE_FORMAT_R8G8B8_UNORM:
- case PIPE_FORMAT_R16G16B16_SNORM:
- case PIPE_FORMAT_R16G16B16_UNORM:
- return TRUE;
- default:
- return FALSE;
+ unsigned i;
+
+ assert(!svga_have_vgpu10(svga));
+
+ for (i = 0; i < velems->count; i++) {
+ const enum pipe_format f = velems->velem[i].src_format;
+ SVGA3dSurfaceFormat svga_format;
+ unsigned vf_flags;
+
+ svga_translate_vertex_format_vgpu10(f, &svga_format, &vf_flags);
+
+ velems->decl_type[i] = translate_vertex_format_to_decltype(f);
+ if (velems->decl_type[i] == SVGA3D_DECLTYPE_MAX) {
+ /* Unsupported format - use software fetch */
+ velems->need_swvfetch = TRUE;
+ }
+
+ /* Check for VS-based adjustments */
+ if (attrib_needs_range_adjustment(f)) {
+ velems->adjust_attrib_range |= (1 << i);
+ }
+
+ if (vf_flags & VF_W_TO_1) {
+ velems->adjust_attrib_w_1 |= (1 << i);
+ }
}
}
unsigned count,
const struct pipe_vertex_element *attribs)
{
+ struct svga_context *svga = svga_context(pipe);
struct svga_velems_state *velems;
+
assert(count <= PIPE_MAX_ATTRIBS);
velems = (struct svga_velems_state *) MALLOC(sizeof(struct svga_velems_state));
if (velems) {
- unsigned i;
-
velems->count = count;
memcpy(velems->velem, attribs, sizeof(*attribs) * count);
velems->need_swvfetch = FALSE;
velems->adjust_attrib_range = 0x0;
+ velems->attrib_is_pure_int = 0x0;
velems->adjust_attrib_w_1 = 0x0;
-
- /* Translate Gallium vertex format to SVGA3dDeclType */
- for (i = 0; i < count; i++) {
- enum pipe_format f = attribs[i].src_format;
- velems->decl_type[i] = translate_vertex_format(f);
- if (velems->decl_type[i] == SVGA3D_DECLTYPE_MAX) {
- /* Unsupported format - use software fetch */
- velems->need_swvfetch = TRUE;
- break;
- }
-
- if (attrib_needs_range_adjustment(f)) {
- velems->adjust_attrib_range |= (1 << i);
- }
- if (attrib_needs_w_to_1(f)) {
- velems->adjust_attrib_w_1 |= (1 << i);
- }
+ velems->adjust_attrib_itof = 0x0;
+ velems->adjust_attrib_utof = 0x0;
+ velems->attrib_is_bgra = 0x0;
+ velems->attrib_puint_to_snorm = 0x0;
+ velems->attrib_puint_to_uscaled = 0x0;
+ velems->attrib_puint_to_sscaled = 0x0;
+
+ if (svga_have_vgpu10(svga)) {
+ define_input_element_object(svga, velems);
+ }
+ else {
+ translate_vertex_decls(svga, velems);
}
}
return velems;
}
-static void svga_bind_vertex_elements_state(struct pipe_context *pipe,
- void *velems)
+
+static void
+svga_bind_vertex_elements_state(struct pipe_context *pipe, void *state)
{
struct svga_context *svga = svga_context(pipe);
- struct svga_velems_state *svga_velems = (struct svga_velems_state *) velems;
+ struct svga_velems_state *velems = (struct svga_velems_state *) state;
- svga->curr.velems = svga_velems;
+ svga->curr.velems = velems;
svga->dirty |= SVGA_NEW_VELEMENT;
}
-static void svga_delete_vertex_elements_state(struct pipe_context *pipe,
- void *velems)
+
+static void
+svga_delete_vertex_elements_state(struct pipe_context *pipe, void *state)
{
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_velems_state *velems = (struct svga_velems_state *) state;
+
+ if (svga_have_vgpu10(svga)) {
+ enum pipe_error ret;
+
+ svga_hwtnl_flush_retry(svga);
+
+ ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc, velems->id);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc, velems->id);
+ assert(ret == PIPE_OK);
+ }
+
+ if (velems->id == svga->state.hw_draw.layout_id)
+ svga->state.hw_draw.layout_id = SVGA3D_INVALID_ID;
+
+ util_bitmask_clear(svga->input_element_object_id_bm, velems->id);
+ velems->id = SVGA3D_INVALID_ID;
+ }
+
FREE(velems);
}
svga->pipe.bind_vertex_elements_state = svga_bind_vertex_elements_state;
svga->pipe.delete_vertex_elements_state = svga_delete_vertex_elements_state;
}
-
-
#include "tgsi/tgsi_text.h"
#include "svga_context.h"
-#include "svga_tgsi.h"
#include "svga_hw_reg.h"
#include "svga_cmd.h"
#include "svga_debug.h"
#include "svga_shader.h"
+#include "svga_streamout.h"
/**
{
struct svga_context *svga = svga_context(pipe);
struct svga_vertex_shader *vs = CALLOC_STRUCT(svga_vertex_shader);
+
if (!vs)
return NULL;
vs->base.id = svga->debug.shader_id++;
- if (SVGA_DEBUG & DEBUG_TGSI || 0) {
- debug_printf("%s id: %u, inputs: %u, outputs: %u\n",
- __FUNCTION__, vs->base.id,
- vs->base.info.num_inputs, vs->base.info.num_outputs);
+ vs->generic_outputs = svga_get_generic_outputs_mask(&vs->base.info);
+
+ /* check for any stream output declarations */
+ if (templ->stream_output.num_outputs) {
+ vs->base.stream_output = svga_create_stream_output(svga, &vs->base,
+ &templ->stream_output);
}
return vs;
struct svga_vertex_shader *vs = (struct svga_vertex_shader *)shader;
struct svga_context *svga = svga_context(pipe);
+ if (vs == svga->curr.vs)
+ return;
+
+ /* If the currently bound vertex shader has a generated geometry shader,
+ * then unbind the geometry shader before binding a new vertex shader.
+ * We need to unbind the geometry shader here because there is no
+ * pipe_shader associated with the generated geometry shader.
+ */
+ if (svga->curr.vs != NULL && svga->curr.vs->gs != NULL)
+ svga->pipe.bind_gs_state(&svga->pipe, NULL);
+
svga->curr.vs = vs;
svga->dirty |= SVGA_NEW_VS;
}
svga_hwtnl_flush_retry(svga);
+ assert(vs->base.parent == NULL);
+
+ /* Check if there is a generated geometry shader to go with this
+ * vertex shader. If there is, then delete the geometry shader as well.
+ */
+ if (vs->gs != NULL) {
+ svga->pipe.delete_gs_state(&svga->pipe, vs->gs);
+ }
+
+ if (vs->base.stream_output != NULL)
+ svga_delete_stream_output(svga, vs->base.stream_output);
+
draw_delete_vertex_shader(svga->swtnl.draw, vs->draw_shader);
for (variant = vs->base.variants; variant; variant = tmp) {
tmp = variant->next;
- ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
- (void) ret; /* PIPE_ERROR_ not handled yet */
-
- /*
- * Remove stale references to this variant to ensure a new variant on the
- * same address will be detected as a change.
- */
- if (variant == svga->state.hw_draw.vs)
+ /* Check if deleting currently bound shader */
+ if (variant == svga->state.hw_draw.vs) {
+ ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_VS, NULL);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_VS, NULL);
+ assert(ret == PIPE_OK);
+ }
svga->state.hw_draw.vs = NULL;
+ }
+
+ ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
+ assert(ret == PIPE_OK);
+ }
}
FREE((void *)vs->base.tokens);
struct svga_winsys_screen *sws = svgascreen->sws;
SVGA3dSurfaceFormat format;
SVGA3dSize base_level_size;
- uint32 numFaces;
uint32 numMipLevels;
+ uint32 arraySize;
if (res->target == PIPE_BUFFER) {
format = SVGA3D_BUFFER;
base_level_size.width = res->width0;
base_level_size.height = 1;
base_level_size.depth = 1;
- numFaces = 1;
numMipLevels = 1;
+ arraySize = 1;
} else {
+ if (res->target == PIPE_TEXTURE_CUBE)
+ assert(res->array_size == 6);
+
format = svga_translate_format(svgascreen, res->format, res->bind);
if (format == SVGA3D_FORMAT_INVALID)
return FALSE;
base_level_size.width = res->width0;
base_level_size.height = res->height0;
base_level_size.depth = res->depth0;
- numFaces = (res->target == PIPE_TEXTURE_CUBE) ? 6 : 1;
numMipLevels = res->last_level + 1;
+ arraySize = res->array_size;
}
return sws->surface_can_create(sws, format, base_level_size,
- numFaces, numMipLevels);
+ arraySize, numMipLevels);
}
static inline boolean
svga_buffer_needs_hw_storage(unsigned usage)
{
- return usage & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER);
+ return (usage & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER |
+ PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT)) != 0;
}
transfer->usage = usage;
transfer->box = *box;
+ if ((usage & PIPE_TRANSFER_READ) && sbuf->dirty) {
+ /* Only need to test for vgpu10 since only vgpu10 features (streamout,
+ * buffer copy) can modify buffers on the device.
+ */
+ if (svga_have_vgpu10(svga)) {
+ enum pipe_error ret;
+ assert(sbuf->handle);
+ ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, sbuf->handle, 0);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, sbuf->handle, 0);
+ assert(ret == PIPE_OK);
+ }
+
+ svga_context_finish(svga);
+
+ sbuf->dirty = FALSE;
+ }
+ }
+
if (usage & PIPE_TRANSFER_WRITE) {
if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
/*
sbuf->b.vtbl = &svga_buffer_vtbl;
pipe_reference_init(&sbuf->b.b.reference, 1);
sbuf->b.b.screen = screen;
+ sbuf->bind_flags = template->bind;
+
+ if (template->bind & PIPE_BIND_CONSTANT_BUFFER) {
+ /* Constant buffers can only have the PIPE_BIND_CONSTANT_BUFFER
+ * flag set.
+ */
+ if (ss->sws->have_vgpu10) {
+ sbuf->bind_flags = PIPE_BIND_CONSTANT_BUFFER;
+
+ /* Constant buffer size needs to be in multiples of 16. */
+ sbuf->b.b.width0 = align(sbuf->b.b.width0, 16);
+ }
+ }
if(svga_buffer_needs_hw_storage(template->bind)) {
+
+ /* If the buffer will be used for vertex/index/stream data, set all
+ * the flags so that the buffer will be accepted for all those uses.
+ * Note that the PIPE_BIND_ flags we get from the state tracker are
+ * just a hint about how the buffer may be used. And OpenGL buffer
+ * object may be used for many different things.
+ */
+ if (!(template->bind & PIPE_BIND_CONSTANT_BUFFER)) {
+ /* Not a constant buffer. The buffer may be used for vertex data,
+ * indexes or stream-out.
+ */
+ sbuf->bind_flags |= (PIPE_BIND_VERTEX_BUFFER |
+ PIPE_BIND_INDEX_BUFFER);
+ if (ss->sws->have_vgpu10)
+ sbuf->bind_flags |= PIPE_BIND_STREAM_OUTPUT;
+ }
+
if(svga_buffer_create_host_surface(ss, sbuf) != PIPE_OK)
goto error2;
}
else {
- sbuf->swbuf = align_malloc(template->width0, 64);
+ sbuf->swbuf = align_malloc(sbuf->b.b.width0, 64);
if(!sbuf->swbuf)
goto error2;
}
debug_reference(&sbuf->b.b.reference,
(debug_reference_descriptor)debug_describe_resource, 0);
- sbuf->size = util_resource_size(template);
+ sbuf->size = util_resource_size(&sbuf->b.b);
ss->total_resource_bytes += sbuf->size;
return &sbuf->b.b;
sbuf->b.b.depth0 = 1;
sbuf->b.b.array_size = 1;
+ sbuf->bind_flags = bind;
sbuf->swbuf = ptr;
sbuf->user = TRUE;
{
struct u_resource b;
+ /** This is a superset of b.b.bind */
+ unsigned bind_flags;
+
/**
* Regular (non DMA'able) memory.
*
struct list_head head;
unsigned size; /**< Approximate size in bytes */
+
+ boolean dirty; /**< Need to do a readback before mapping? */
};
sbuf->key.flags = 0;
sbuf->key.format = SVGA3D_BUFFER;
- if (sbuf->b.b.bind & PIPE_BIND_VERTEX_BUFFER)
+ if (sbuf->bind_flags & PIPE_BIND_VERTEX_BUFFER) {
sbuf->key.flags |= SVGA3D_SURFACE_HINT_VERTEXBUFFER;
- if (sbuf->b.b.bind & PIPE_BIND_INDEX_BUFFER)
+ sbuf->key.flags |= SVGA3D_SURFACE_BIND_VERTEX_BUFFER;
+ }
+ if (sbuf->bind_flags & PIPE_BIND_INDEX_BUFFER) {
sbuf->key.flags |= SVGA3D_SURFACE_HINT_INDEXBUFFER;
+ sbuf->key.flags |= SVGA3D_SURFACE_BIND_INDEX_BUFFER;
+ }
+ if (sbuf->bind_flags & PIPE_BIND_CONSTANT_BUFFER)
+ sbuf->key.flags |= SVGA3D_SURFACE_BIND_CONSTANT_BUFFER;
+
+ if (sbuf->bind_flags & PIPE_BIND_STREAM_OUTPUT)
+ sbuf->key.flags |= SVGA3D_SURFACE_BIND_STREAM_OUTPUT;
+
+ if (sbuf->bind_flags & PIPE_BIND_SAMPLER_VIEW)
+ sbuf->key.flags |= SVGA3D_SURFACE_BIND_SHADER_RESOURCE;
sbuf->key.size.width = sbuf->b.b.width0;
sbuf->key.size.height = 1;
sbuf->key.numFaces = 1;
sbuf->key.numMipLevels = 1;
sbuf->key.cachable = 1;
+ sbuf->key.arraySize = 1;
SVGA_DBG(DEBUG_DMA, "surface_create for buffer sz %d\n", sbuf->b.b.width0);
- sbuf->handle = svga_screen_surface_create(ss, &sbuf->key);
+ sbuf->handle = svga_screen_surface_create(ss, sbuf->b.b.bind,
+ sbuf->b.b.usage, &sbuf->key);
if (!sbuf->handle)
return PIPE_ERROR_OUT_OF_MEMORY;
struct svga_buffer *sbuf)
{
struct svga_winsys_context *swc = svga->swc;
- SVGA3dCmdUpdateGBImage *cmd;
- struct svga_3d_update_gb_image *ccmd = NULL;
+ SVGA3dCmdUpdateGBImage *update_cmd;
+ struct svga_3d_update_gb_image *whole_update_cmd = NULL;
uint32 numBoxes = sbuf->map.num_ranges;
struct pipe_resource *dummy;
unsigned int i;
if (sbuf->dma.flags.discard) {
struct svga_3d_invalidate_gb_image *cicmd = NULL;
- SVGA3dCmdInvalidateGBImage *icmd;
+ SVGA3dCmdInvalidateGBImage *invalidate_cmd;
+ const unsigned total_commands_size =
+ sizeof(*invalidate_cmd) + numBoxes * sizeof(*whole_update_cmd);
/* Allocate FIFO space for one INVALIDATE_GB_IMAGE command followed by
* 'numBoxes' UPDATE_GB_IMAGE commands. Allocate all at once rather
* than with separate commands because we need to properly deal with
* filling the command buffer.
*/
- icmd = SVGA3D_FIFOReserve(swc,
- SVGA_3D_CMD_INVALIDATE_GB_IMAGE,
- sizeof *icmd + numBoxes * sizeof *ccmd,
- 2);
- if (!icmd)
+ invalidate_cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_INVALIDATE_GB_IMAGE,
+ total_commands_size, 1 + numBoxes);
+ if (!invalidate_cmd)
return PIPE_ERROR_OUT_OF_MEMORY;
- cicmd = container_of(icmd, cicmd, body);
- cicmd->header.size = sizeof *icmd;
- swc->surface_relocation(swc, &icmd->image.sid, NULL, sbuf->handle,
+ cicmd = container_of(invalidate_cmd, cicmd, body);
+ cicmd->header.size = sizeof(*invalidate_cmd);
+ swc->surface_relocation(swc, &invalidate_cmd->image.sid, NULL, sbuf->handle,
(SVGA_RELOC_WRITE |
SVGA_RELOC_INTERNAL |
SVGA_RELOC_DMA));
- icmd->image.face = 0;
- icmd->image.mipmap = 0;
+ invalidate_cmd->image.face = 0;
+ invalidate_cmd->image.mipmap = 0;
+ /* The whole_update_command is a SVGA3dCmdHeader plus the
+ * SVGA3dCmdUpdateGBImage command.
+ */
+ whole_update_cmd = (struct svga_3d_update_gb_image *) &invalidate_cmd[1];
/* initialize the first UPDATE_GB_IMAGE command */
- ccmd = (struct svga_3d_update_gb_image *) &icmd[1];
- ccmd->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE;
- cmd = &ccmd->body;
+ whole_update_cmd->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE;
+ update_cmd = &whole_update_cmd->body;
} else {
/* Allocate FIFO space for 'numBoxes' UPDATE_GB_IMAGE commands */
- cmd = SVGA3D_FIFOReserve(swc,
- SVGA_3D_CMD_UPDATE_GB_IMAGE,
- sizeof *cmd + (numBoxes - 1) * sizeof *ccmd,
- 1);
- if (!cmd)
+ const unsigned total_commands_size =
+ sizeof(*update_cmd) + (numBoxes - 1) * sizeof(*whole_update_cmd);
+
+ update_cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_UPDATE_GB_IMAGE,
+ total_commands_size, numBoxes);
+ if (!update_cmd)
return PIPE_ERROR_OUT_OF_MEMORY;
- ccmd = container_of(cmd, ccmd, body);
+ /* The whole_update_command is a SVGA3dCmdHeader plus the
+ * SVGA3dCmdUpdateGBImage command.
+ */
+ whole_update_cmd = container_of(update_cmd, whole_update_cmd, body);
}
/* Init the first UPDATE_GB_IMAGE command */
- ccmd->header.size = sizeof *cmd;
- swc->surface_relocation(swc, &cmd->image.sid, NULL, sbuf->handle,
+ whole_update_cmd->header.size = sizeof(*update_cmd);
+ swc->surface_relocation(swc, &update_cmd->image.sid, NULL, sbuf->handle,
SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL);
- cmd->image.face = 0;
- cmd->image.mipmap = 0;
+ update_cmd->image.face = 0;
+ update_cmd->image.mipmap = 0;
/* Save pointer to the first UPDATE_GB_IMAGE command so that we can
* fill in the box info below.
*/
- sbuf->dma.updates = ccmd;
+ sbuf->dma.updates = whole_update_cmd;
/*
- * Copy the relocation info, face and mipmap to all
- * subsequent commands. NOTE: For winsyses that actually
- * patch the image.sid member at flush time, this will fail
- * miserably. For those we need to add as many relocations
- * as there are copy boxes.
+ * Copy the face, mipmap, etc. info to all subsequent commands.
+ * Also do the surface relocation for each subsequent command.
*/
-
for (i = 1; i < numBoxes; ++i) {
- memcpy(++ccmd, sbuf->dma.updates, sizeof *ccmd);
+ whole_update_cmd++;
+ memcpy(whole_update_cmd, sbuf->dma.updates, sizeof(*whole_update_cmd));
+
+ swc->surface_relocation(swc, &whole_update_cmd->body.image.sid, NULL,
+ sbuf->handle,
+ SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL);
}
/* Increment reference count */
#include "svga_debug.h"
-/* XXX: This isn't a real hardware flag, but just a hack for kernel to
- * know about primary surfaces. Find a better way to accomplish this.
- */
-#define SVGA3D_SURFACE_HINT_SCANOUT (1 << 9)
-
-
static void
svga_transfer_dma_band(struct svga_context *svga,
struct svga_transfer *st,
unsigned y, unsigned h, unsigned srcy,
SVGA3dSurfaceDMAFlags flags)
{
- struct svga_texture *texture = svga_texture(st->base.resource);
+ struct svga_texture *texture = svga_texture(st->base.resource);
SVGA3dCopyBox box;
enum pipe_error ret;
-
+
assert(!st->use_direct_map);
box.x = st->base.box.x;
box.srcy = srcy;
box.srcz = 0;
- if (st->base.resource->target == PIPE_TEXTURE_CUBE) {
- st->face = st->base.box.z;
- box.z = 0;
- }
- else
- st->face = 0;
-
- SVGA_DBG(DEBUG_DMA, "dma %s sid %p, face %u, (%u, %u, %u) - (%u, %u, %u), %ubpp\n",
- transfer == SVGA3D_WRITE_HOST_VRAM ? "to" : "from",
- texture->handle,
- st->face,
- st->base.box.x,
- y,
- box.z,
- st->base.box.x + st->base.box.width,
- y + h,
- box.z + 1,
- util_format_get_blocksize(texture->b.b.format) * 8 /
- (util_format_get_blockwidth(texture->b.b.format)*util_format_get_blockheight(texture->b.b.format)));
+ SVGA_DBG(DEBUG_DMA, "dma %s sid %p, face %u, (%u, %u, %u) - "
+ "(%u, %u, %u), %ubpp\n",
+ transfer == SVGA3D_WRITE_HOST_VRAM ? "to" : "from",
+ texture->handle,
+ st->slice,
+ st->base.box.x,
+ y,
+ box.z,
+ st->base.box.x + st->base.box.width,
+ y + h,
+ box.z + 1,
+ util_format_get_blocksize(texture->b.b.format) * 8 /
+ (util_format_get_blockwidth(texture->b.b.format)
+ * util_format_get_blockheight(texture->b.b.format)));
ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1, flags);
- if(ret != PIPE_OK) {
+ if (ret != PIPE_OK) {
svga_context_flush(svga, NULL);
ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1, flags);
assert(ret == PIPE_OK);
SVGA3dTransferType transfer,
SVGA3dSurfaceDMAFlags flags)
{
- struct svga_texture *texture = svga_texture(st->base.resource);
+ struct svga_texture *texture = svga_texture(st->base.resource);
struct svga_screen *screen = svga_screen(texture->b.b.screen);
struct svga_winsys_screen *sws = screen->sws;
struct pipe_fence_handle *fence = NULL;
*/
svga_surfaces_flush( svga );
- if(!st->swbuf) {
+ if (!st->swbuf) {
/* Do the DMA transfer in a single go */
-
svga_transfer_dma_band(svga, st, transfer,
st->base.box.y, st->base.box.height, 0,
flags);
- if(transfer == SVGA3D_READ_HOST_VRAM) {
+ if (transfer == SVGA3D_READ_HOST_VRAM) {
svga_context_flush(svga, &fence);
sws->fence_finish(sws, fence, 0);
sws->fence_reference(sws, &fence, NULL);
}
else {
int y, h, srcy;
- unsigned blockheight = util_format_get_blockheight(st->base.resource->format);
+ unsigned blockheight =
+ util_format_get_blockheight(st->base.resource->format);
+
h = st->hw_nblocksy * blockheight;
srcy = 0;
- for(y = 0; y < st->base.box.height; y += h) {
+
+ for (y = 0; y < st->base.box.height; y += h) {
unsigned offset, length;
void *hw, *sw;
offset = y * st->base.stride / blockheight;
length = h * st->base.stride / blockheight;
- sw = (uint8_t *)st->swbuf + offset;
+ sw = (uint8_t *) st->swbuf + offset;
if (transfer == SVGA3D_WRITE_HOST_VRAM) {
unsigned usage = PIPE_TRANSFER_WRITE;
* Prevent the texture contents to be discarded on the next band
* upload.
*/
-
flags.discard = FALSE;
- if(transfer == SVGA3D_READ_HOST_VRAM) {
+ if (transfer == SVGA3D_READ_HOST_VRAM) {
svga_context_flush(svga, &fence);
sws->fence_finish(sws, fence, 0);
hw = sws->buffer_map(sws, st->hwbuf, PIPE_TRANSFER_READ);
assert(hw);
- if(hw) {
+ if (hw) {
memcpy(sw, hw, length);
sws->buffer_unmap(sws, st->hwbuf);
}
}
-static boolean
+static boolean
svga_texture_get_handle(struct pipe_screen *screen,
- struct pipe_resource *texture,
- struct winsys_handle *whandle)
+ struct pipe_resource *texture,
+ struct winsys_handle *whandle)
{
struct svga_winsys_screen *sws = svga_winsys_screen(texture->screen);
unsigned stride;
assert(svga_texture(texture)->key.cachable == 0);
svga_texture(texture)->key.cachable = 0;
+
stride = util_format_get_nblocksx(texture->format, texture->width0) *
util_format_get_blocksize(texture->format);
- return sws->surface_get_handle(sws, svga_texture(texture)->handle, stride, whandle);
+
+ return sws->surface_get_handle(sws, svga_texture(texture)->handle,
+ stride, whandle);
}
ss->total_resource_bytes -= tex->size;
+ FREE(tex->defined);
FREE(tex->rendered_to);
FREE(tex);
}
}
+static enum pipe_error
+readback_image_vgpu9(struct svga_context *svga,
+ struct svga_winsys_surface *surf,
+ unsigned slice,
+ unsigned level)
+{
+ enum pipe_error ret;
+
+ ret = SVGA3D_ReadbackGBImage(svga->swc, surf, slice, level);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_ReadbackGBImage(svga->swc, surf, slice, level);
+ }
+ return ret;
+}
+
+
+static enum pipe_error
+readback_image_vgpu10(struct svga_context *svga,
+ struct svga_winsys_surface *surf,
+ unsigned slice,
+ unsigned level,
+ unsigned numMipLevels)
+{
+ enum pipe_error ret;
+ unsigned subResource;
+
+ subResource = slice * numMipLevels + level;
+ ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, surf, subResource);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, surf, subResource);
+ }
+ return ret;
+}
+
-/* XXX: Still implementing this as if it was a screen function, but
- * can now modify it to queue transfers on the context.
- */
static void *
svga_texture_transfer_map(struct pipe_context *pipe,
struct pipe_resource *texture,
struct svga_context *svga = svga_context(pipe);
struct svga_screen *ss = svga_screen(pipe->screen);
struct svga_winsys_screen *sws = ss->sws;
+ struct svga_texture *tex = svga_texture(texture);
struct svga_transfer *st;
unsigned nblocksx, nblocksy;
boolean use_direct_map = svga_have_gb_objects(svga) &&
}
pipe_resource_reference(&st->base.resource, texture);
+
st->base.level = level;
st->base.usage = usage;
st->base.box = *box;
st->base.stride = nblocksx*util_format_get_blocksize(texture->format);
st->base.layer_stride = st->base.stride * nblocksy;
+ switch (tex->b.b.target) {
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_2D_ARRAY:
+ case PIPE_TEXTURE_1D_ARRAY:
+ st->slice = st->base.box.z;
+ st->base.box.z = 0; /* so we don't apply double offsets below */
+ break;
+ default:
+ st->slice = 0;
+ break;
+ }
+
if (!use_direct_map) {
/* Use a DMA buffer */
st->hw_nblocksy = nblocksy;
- st->hwbuf = svga_winsys_buffer_create(svga,
- 1,
- 0,
- st->hw_nblocksy * st->base.stride * d);
+ st->hwbuf = svga_winsys_buffer_create(svga, 1, 0,
+ st->hw_nblocksy * st->base.stride * d);
while(!st->hwbuf && (st->hw_nblocksy /= 2)) {
- st->hwbuf = svga_winsys_buffer_create(svga,
- 1,
- 0,
- st->hw_nblocksy * st->base.stride * d);
+ st->hwbuf = svga_winsys_buffer_create(svga, 1, 0,
+ st->hw_nblocksy * st->base.stride * d);
}
if (!st->hwbuf) {
return NULL;
}
- if(st->hw_nblocksy < nblocksy) {
- /* We couldn't allocate a hardware buffer big enough for the transfer,
+ if (st->hw_nblocksy < nblocksy) {
+ /* We couldn't allocate a hardware buffer big enough for the transfer,
* so allocate regular malloc memory instead */
if (0) {
debug_printf("%s: failed to allocate %u KB of DMA, "
}
} else {
struct pipe_transfer *transfer = &st->base;
- struct svga_texture *tex = svga_texture(transfer->resource);
struct svga_winsys_surface *surf = tex->handle;
- unsigned face;
-
- assert(surf);
- if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
- face = transfer->box.z;
- } else {
- face = 0;
+ if (!surf) {
+ FREE(st);
+ return NULL;
}
if (need_tex_readback(transfer)) {
- SVGA3dBox box;
enum pipe_error ret;
- box.x = transfer->box.x;
- box.y = transfer->box.y;
- box.w = transfer->box.width;
- box.h = transfer->box.height;
- box.d = transfer->box.depth;
- if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
- box.z = 0;
- }
- else {
- box.z = transfer->box.z;
- }
-
- (void) box; /* not used at this time */
-
svga_surfaces_flush(svga);
- ret = SVGA3D_ReadbackGBImage(svga->swc, surf, face, transfer->level);
+ if (svga_have_vgpu10(svga)) {
+ ret = readback_image_vgpu10(svga, surf, st->slice, transfer->level,
+ tex->b.b.last_level + 1);
+ } else {
+ ret = readback_image_vgpu9(svga, surf, st->slice, transfer->level);
+ }
- if (ret != PIPE_OK) {
- svga_context_flush(svga, NULL);
- ret = SVGA3D_ReadbackGBImage(svga->swc, surf, face, transfer->level);
- assert(ret == PIPE_OK);
- }
+ assert(ret == PIPE_OK);
+ (void) ret;
svga_context_flush(svga, NULL);
* Note: if PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE were specified
* we could potentially clear the flag for all faces/layers/mips.
*/
- svga_clear_texture_rendered_to(tex, face, transfer->level);
+ svga_clear_texture_rendered_to(tex, st->slice, transfer->level);
}
else {
assert(transfer->usage & PIPE_TRANSFER_WRITE);
return sws->buffer_map(sws, st->hwbuf, usage);
}
else {
- struct svga_screen *screen = svga_screen(svga->pipe.screen);
- SVGA3dSurfaceFormat format;
SVGA3dSize baseLevelSize;
struct svga_texture *tex = svga_texture(texture);
struct svga_winsys_surface *surf = tex->handle;
uint8_t *map;
boolean retry;
- unsigned face, offset, mip_width, mip_height;
- unsigned xoffset = box->x;
- unsigned yoffset = box->y;
- unsigned zoffset = box->z;
+ unsigned offset, mip_width, mip_height;
+ unsigned xoffset = st->base.box.x;
+ unsigned yoffset = st->base.box.y;
+ unsigned zoffset = st->base.box.z;
map = svga->swc->surface_map(svga->swc, surf, usage, &retry);
if (map == NULL && retry) {
/**
* Compute the offset to the specific texture slice in the buffer.
*/
- if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
- face = zoffset;
- zoffset = 0;
- } else {
- face = 0;
- }
-
- format = svga_translate_format(screen, tex->b.b.format, 0);
baseLevelSize.width = tex->b.b.width0;
baseLevelSize.height = tex->b.b.height0;
baseLevelSize.depth = tex->b.b.depth0;
- offset = svga3dsurface_get_image_offset(format, baseLevelSize,
+ offset = svga3dsurface_get_image_offset(tex->key.format, baseLevelSize,
tex->b.b.last_level + 1, /* numMips */
- face, level);
+ st->slice, level);
if (level > 0) {
assert(offset > 0);
}
mip_width = u_minify(tex->b.b.width0, level);
mip_height = u_minify(tex->b.b.height0, level);
- offset += svga3dsurface_get_pixel_offset(format, mip_width, mip_height,
+ offset += svga3dsurface_get_pixel_offset(tex->key.format,
+ mip_width, mip_height,
xoffset, yoffset, zoffset);
return (void *) (map + offset);
}
-/* XXX: Still implementing this as if it was a screen function, but
- * can now modify it to queue transfers on the context.
- */
+static enum pipe_error
+update_image_vgpu9(struct svga_context *svga,
+ struct svga_winsys_surface *surf,
+ const SVGA3dBox *box,
+ unsigned slice,
+ unsigned level)
+{
+ enum pipe_error ret;
+
+ ret = SVGA3D_UpdateGBImage(svga->swc, surf, box, slice, level);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_UpdateGBImage(svga->swc, surf, box, slice, level);
+ }
+ return ret;
+}
+
+
+static enum pipe_error
+update_image_vgpu10(struct svga_context *svga,
+ struct svga_winsys_surface *surf,
+ const SVGA3dBox *box,
+ unsigned slice,
+ unsigned level,
+ unsigned numMipLevels)
+{
+ enum pipe_error ret;
+ unsigned subResource;
+
+ subResource = slice * numMipLevels + level;
+ ret = SVGA3D_vgpu10_UpdateSubResource(svga->swc, surf, box, subResource);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_vgpu10_UpdateSubResource(svga->swc, surf, box, subResource);
+ }
+ return ret;
+}
+
+
static void
svga_texture_transfer_unmap(struct pipe_context *pipe,
struct pipe_transfer *transfer)
} else if (transfer->usage & PIPE_TRANSFER_WRITE) {
struct svga_winsys_surface *surf =
svga_texture(transfer->resource)->handle;
- unsigned face;
SVGA3dBox box;
enum pipe_error ret;
assert(svga_have_gb_objects(svga));
/* update the effected region */
- if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
- face = transfer->box.z;
- } else {
- face = 0;
- }
-
box.x = transfer->box.x;
box.y = transfer->box.y;
- if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
+ switch (tex->b.b.target) {
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_2D_ARRAY:
box.z = 0;
- }
- else {
+ break;
+ case PIPE_TEXTURE_1D_ARRAY:
+ box.y = box.z = 0;
+ break;
+ default:
box.z = transfer->box.z;
+ break;
}
box.w = transfer->box.width;
box.h = transfer->box.height;
box.x, box.y, box.z,
box.w, box.h, box.d);
- ret = SVGA3D_UpdateGBImage(svga->swc, surf, &box, face, transfer->level);
- if (ret != PIPE_OK) {
- svga_context_flush(svga, NULL);
- ret = SVGA3D_UpdateGBImage(svga->swc, surf, &box, face, transfer->level);
- assert(ret == PIPE_OK);
+ if (svga_have_vgpu10(svga)) {
+ ret = update_image_vgpu10(svga, surf, &box, st->slice, transfer->level,
+ tex->b.b.last_level + 1);
+ } else {
+ ret = update_image_vgpu9(svga, surf, &box, st->slice, transfer->level);
}
+
+ assert(ret == PIPE_OK);
+ (void) ret;
}
ss->texture_timestamp++;
svga_age_texture_view(tex, transfer->level);
if (transfer->resource->target == PIPE_TEXTURE_CUBE)
- svga_define_texture_level(tex, transfer->box.z, transfer->level);
+ svga_define_texture_level(tex, st->slice, transfer->level);
else
svga_define_texture_level(tex, 0, transfer->level);
}
-struct u_resource_vtbl svga_texture_vtbl =
+/**
+ * Does format store depth values?
+ */
+static inline boolean
+format_has_depth(enum pipe_format format)
+{
+ const struct util_format_description *desc = util_format_description(format);
+ return util_format_has_depth(desc);
+}
+
+
+struct u_resource_vtbl svga_texture_vtbl =
{
svga_texture_get_handle, /* get_handle */
svga_texture_destroy, /* resource_destroy */
const struct pipe_resource *template)
{
struct svga_screen *svgascreen = svga_screen(screen);
- struct svga_texture *tex = CALLOC_STRUCT(svga_texture);
+ struct svga_texture *tex;
+ unsigned bindings = template->bind;
- if (!tex)
- goto error1;
+ assert(template->last_level < SVGA_MAX_TEXTURE_LEVELS);
+ if (template->last_level >= SVGA_MAX_TEXTURE_LEVELS) {
+ return NULL;
+ }
+
+ tex = CALLOC_STRUCT(svga_texture);
+ if (!tex) {
+ return NULL;
+ }
+
+ tex->defined = CALLOC(template->depth0 * template->array_size,
+ sizeof(tex->defined[0]));
+ if (!tex->defined) {
+ FREE(tex);
+ return NULL;
+ }
+
+ tex->rendered_to = CALLOC(template->depth0 * template->array_size,
+ sizeof(tex->rendered_to[0]));
+ if (!tex->rendered_to) {
+ FREE(tex->defined);
+ FREE(tex);
+ return NULL;
+ }
tex->b.b = *template;
tex->b.vtbl = &svga_texture_vtbl;
pipe_reference_init(&tex->b.b.reference, 1);
tex->b.b.screen = screen;
- assert(template->last_level < SVGA_MAX_TEXTURE_LEVELS);
- if(template->last_level >= SVGA_MAX_TEXTURE_LEVELS)
- goto error2;
-
tex->key.flags = 0;
tex->key.size.width = template->width0;
tex->key.size.height = template->height0;
tex->key.size.depth = template->depth0;
+ tex->key.arraySize = 1;
+ tex->key.numFaces = 1;
+ tex->key.sampleCount = template->nr_samples;
- if(template->target == PIPE_TEXTURE_CUBE) {
- tex->key.flags |= SVGA3D_SURFACE_CUBEMAP;
- tex->key.numFaces = 6;
- }
- else {
- tex->key.numFaces = 1;
+ if (template->nr_samples > 1) {
+ tex->key.flags |= SVGA3D_SURFACE_MASKABLE_ANTIALIAS;
}
- if (template->target == PIPE_TEXTURE_3D) {
- tex->key.flags |= SVGA3D_SURFACE_VOLUME;
+ if (svgascreen->sws->have_vgpu10) {
+ switch (template->target) {
+ case PIPE_TEXTURE_1D:
+ tex->key.flags |= SVGA3D_SURFACE_1D;
+ break;
+ case PIPE_TEXTURE_1D_ARRAY:
+ tex->key.flags |= SVGA3D_SURFACE_1D;
+ /* fall-through */
+ case PIPE_TEXTURE_2D_ARRAY:
+ tex->key.flags |= SVGA3D_SURFACE_ARRAY;
+ tex->key.arraySize = template->array_size;
+ break;
+ case PIPE_TEXTURE_3D:
+ tex->key.flags |= SVGA3D_SURFACE_VOLUME;
+ break;
+ case PIPE_TEXTURE_CUBE:
+ tex->key.flags |= (SVGA3D_SURFACE_CUBEMAP | SVGA3D_SURFACE_ARRAY);
+ tex->key.numFaces = 6;
+ break;
+ default:
+ break;
+ }
+ }
+ else {
+ switch (template->target) {
+ case PIPE_TEXTURE_3D:
+ tex->key.flags |= SVGA3D_SURFACE_VOLUME;
+ break;
+ case PIPE_TEXTURE_CUBE:
+ tex->key.flags |= SVGA3D_SURFACE_CUBEMAP;
+ tex->key.numFaces = 6;
+ break;
+ default:
+ break;
+ }
}
tex->key.cachable = 1;
- if (template->bind & PIPE_BIND_SAMPLER_VIEW)
+ if (bindings & PIPE_BIND_SAMPLER_VIEW) {
tex->key.flags |= SVGA3D_SURFACE_HINT_TEXTURE;
+ tex->key.flags |= SVGA3D_SURFACE_BIND_SHADER_RESOURCE;
+
+ if (!(bindings & PIPE_BIND_RENDER_TARGET)) {
+ /* Also check if the format is renderable */
+ if (screen->is_format_supported(screen, template->format,
+ template->target,
+ template->nr_samples,
+ PIPE_BIND_RENDER_TARGET)) {
+ bindings |= PIPE_BIND_RENDER_TARGET;
+ }
+ }
+ }
- if (template->bind & PIPE_BIND_DISPLAY_TARGET) {
+ if (bindings & PIPE_BIND_DISPLAY_TARGET) {
tex->key.cachable = 0;
}
- if (template->bind & PIPE_BIND_SHARED) {
+ if (bindings & PIPE_BIND_SHARED) {
tex->key.cachable = 0;
}
- if (template->bind & (PIPE_BIND_SCANOUT |
- PIPE_BIND_CURSOR)) {
- tex->key.flags |= SVGA3D_SURFACE_HINT_SCANOUT;
+ if (bindings & (PIPE_BIND_SCANOUT | PIPE_BIND_CURSOR)) {
+ tex->key.scanout = 1;
tex->key.cachable = 0;
}
- /*
+ /*
* Note: Previously we never passed the
* SVGA3D_SURFACE_HINT_RENDERTARGET hint. Mesa cannot
* know beforehand whether a texture will be used as a rendertarget or not
* (XA for example) uses it accurately and certain device versions
* relies on it in certain situations to render correctly.
*/
- if((template->bind & PIPE_BIND_RENDER_TARGET) &&
- !util_format_is_s3tc(template->format))
+ if ((bindings & PIPE_BIND_RENDER_TARGET) &&
+ !util_format_is_s3tc(template->format)) {
tex->key.flags |= SVGA3D_SURFACE_HINT_RENDERTARGET;
-
- if(template->bind & PIPE_BIND_DEPTH_STENCIL)
+ tex->key.flags |= SVGA3D_SURFACE_BIND_RENDER_TARGET;
+ }
+
+ if (bindings & PIPE_BIND_DEPTH_STENCIL) {
tex->key.flags |= SVGA3D_SURFACE_HINT_DEPTHSTENCIL;
-
+ tex->key.flags |= SVGA3D_SURFACE_BIND_DEPTH_STENCIL;
+ }
+
tex->key.numMipLevels = template->last_level + 1;
-
- tex->key.format = svga_translate_format(svgascreen, template->format, template->bind);
- if(tex->key.format == SVGA3D_FORMAT_INVALID)
- goto error2;
+
+ tex->key.format = svga_translate_format(svgascreen, template->format,
+ bindings);
+ if (tex->key.format == SVGA3D_FORMAT_INVALID) {
+ FREE(tex->defined);
+ FREE(tex->rendered_to);
+ FREE(tex);
+ return NULL;
+ }
+
+ /* Use typeless formats for sRGB and depth resources. Typeless
+ * formats can be reinterpreted as other formats. For example,
+ * SVGA3D_R8G8B8A8_UNORM_TYPELESS can be interpreted as
+ * SVGA3D_R8G8B8A8_UNORM_SRGB or SVGA3D_R8G8B8A8_UNORM.
+ */
+ if (svgascreen->sws->have_vgpu10 &&
+ (util_format_is_srgb(template->format) ||
+ format_has_depth(template->format))) {
+ SVGA3dSurfaceFormat typeless = svga_typeless_format(tex->key.format);
+ if (0) {
+ debug_printf("Convert resource type %s -> %s (bind 0x%x)\n",
+ svga_format_name(tex->key.format),
+ svga_format_name(typeless),
+ bindings);
+ }
+ tex->key.format = typeless;
+ }
SVGA_DBG(DEBUG_DMA, "surface_create for texture\n", tex->handle);
- tex->handle = svga_screen_surface_create(svgascreen, &tex->key);
- if (!tex->handle)
- goto error2;
+ tex->handle = svga_screen_surface_create(svgascreen, bindings,
+ tex->b.b.usage, &tex->key);
+ if (!tex->handle) {
+ FREE(tex->defined);
+ FREE(tex->rendered_to);
+ FREE(tex);
+ return NULL;
+ }
SVGA_DBG(DEBUG_DMA, " --> got sid %p (texture)\n", tex->handle);
tex->size = util_resource_size(template);
svgascreen->total_resource_bytes += tex->size;
- tex->rendered_to = CALLOC(template->depth0 * template->array_size,
- sizeof(tex->rendered_to[0]));
- if (!tex->rendered_to)
- goto error2;
-
return &tex->b.b;
-
-error2:
- FREE(tex->rendered_to);
- FREE(tex);
-error1:
- return NULL;
}
if (!srf)
return NULL;
- if (svga_translate_format(svga_screen(screen), template->format, template->bind) != format) {
- unsigned f1 = svga_translate_format(svga_screen(screen), template->format, template->bind);
+ if (svga_translate_format(svga_screen(screen), template->format,
+ template->bind) != format) {
+ unsigned f1 = svga_translate_format(svga_screen(screen),
+ template->format, template->bind);
unsigned f2 = format;
- /* It's okay for XRGB and ARGB or depth with/out stencil to get mixed up */
- if ( !( (f1 == SVGA3D_X8R8G8B8 && f2 == SVGA3D_A8R8G8B8) ||
+ /* It's okay for XRGB and ARGB or depth with/out stencil to get mixed up.
+ */
+ if (f1 == SVGA3D_B8G8R8A8_UNORM)
+ f1 = SVGA3D_A8R8G8B8;
+ if (f1 == SVGA3D_B8G8R8X8_UNORM)
+ f1 = SVGA3D_X8R8G8B8;
+
+ if ( !( (f1 == f2) ||
+ (f1 == SVGA3D_X8R8G8B8 && f2 == SVGA3D_A8R8G8B8) ||
+ (f1 == SVGA3D_X8R8G8B8 && f2 == SVGA3D_B8G8R8X8_UNORM) ||
(f1 == SVGA3D_A8R8G8B8 && f2 == SVGA3D_X8R8G8B8) ||
+ (f1 == SVGA3D_A8R8G8B8 && f2 == SVGA3D_B8G8R8A8_UNORM) ||
(f1 == SVGA3D_Z_D24X8 && f2 == SVGA3D_Z_D24S8) ||
(f1 == SVGA3D_Z_DF24 && f2 == SVGA3D_Z_D24S8_INT) ) ) {
- debug_printf("%s wrong format %u != %u\n", __FUNCTION__, f1, f2);
+ debug_printf("%s wrong format %s != %s\n", __FUNCTION__,
+ svga_format_name(f1), svga_format_name(f2));
return NULL;
}
}
if (!tex)
return NULL;
+ tex->defined = CALLOC(template->depth0 * template->array_size,
+ sizeof(tex->defined[0]));
+ if (!tex->defined) {
+ FREE(tex);
+ return NULL;
+ }
+
tex->b.b = *template;
tex->b.vtbl = &svga_texture_vtbl;
pipe_reference_init(&tex->b.b.reference, 1);
SVGA_DBG(DEBUG_DMA, "wrap surface sid %p\n", srf);
tex->key.cachable = 0;
+ tex->key.format = format;
tex->handle = srf;
tex->rendered_to = CALLOC(1, sizeof(tex->rendered_to[0]));
+ tex->imported = TRUE;
return &tex->b.b;
}
{
struct u_resource b;
- boolean defined[6][SVGA_MAX_TEXTURE_LEVELS];
+ ushort *defined;
struct svga_sampler_view *cached_view;
*/
struct svga_winsys_surface *handle;
+ /**
+ * Whether the host side surface is imported and not created by this
+ * driver.
+ */
+ boolean imported;
+
unsigned size; /**< Approximate size in bytes */
/** array indexed by cube face or 3D/array slice, one bit per mipmap level */
{
struct pipe_transfer base;
- unsigned face;
+ unsigned slice; /**< array slice or cube face */
struct svga_winsys_buffer *hwbuf;
}
-/**
- * Mark the given texture face/level as being defined.
- */
-static inline void
-svga_define_texture_level(struct svga_texture *tex,
- unsigned face,unsigned level)
-{
- assert(face < Elements(tex->defined));
- assert(level < Elements(tex->defined[0]));
- tex->defined[face][level] = TRUE;
-}
-
-
-static inline bool
-svga_is_texture_level_defined(const struct svga_texture *tex,
- unsigned face, unsigned level)
-{
- assert(face < Elements(tex->defined));
- assert(level < Elements(tex->defined[0]));
- return tex->defined[face][level];
-}
-
-
/** For debugging, check that face and level are legal */
static inline void
check_face_level(const struct svga_texture *tex,
}
+/**
+ * Mark the given texture face/level as being defined.
+ */
+static inline void
+svga_define_texture_level(struct svga_texture *tex,
+ unsigned face,unsigned level)
+{
+ check_face_level(tex, face, level);
+ tex->defined[face] |= 1 << level;
+}
+
+
+static inline bool
+svga_is_texture_level_defined(const struct svga_texture *tex,
+ unsigned face, unsigned level)
+{
+ check_face_level(tex, face, level);
+ return (tex->defined[face] & (1 << level)) != 0;
+}
+
+
static inline void
svga_set_texture_rendered_to(struct svga_texture *tex,
unsigned face, unsigned level)
assert(pt);
assert(min_lod <= max_lod);
assert(max_lod <= pt->last_level);
-
+ assert(!svga_have_vgpu10(svga));
/* Is a view needed */
{
pt->last_level);
sv->age = tex->age;
- sv->handle = svga_texture_view_surface(svga, tex, flags, format,
+ sv->handle = svga_texture_view_surface(svga, tex,
+ PIPE_BIND_SAMPLER_VIEW,
+ flags, format,
min_lod,
max_lod - min_lod + 1,
- -1, -1,
+ -1, 1, -1,
&sv->key);
if (!sv->handle) {
unsigned k;
assert(svga);
+ assert(!svga_have_vgpu10(svga));
if (v->handle == tex->handle)
return;
struct pipe_screen;
struct svga_context;
struct svga_winsys_surface;
+struct svga_surface;
enum SVGA3dSurfaceFormat;
*ptr = v;
}
-
+boolean
+svga_check_sampler_view_resource_collision(struct svga_context *svga,
+ struct svga_winsys_surface *res,
+ unsigned shader);
#endif
#include "svga_context.h"
#include "svga_format.h"
#include "svga_screen.h"
+#include "svga_tgsi.h"
#include "svga_resource_texture.h"
#include "svga_resource.h"
#include "svga_debug.h"
#include "svga3d_shaderdefs.h"
+#include "VGPU10ShaderTokens.h"
+/* NOTE: this constant may get moved into a svga3d*.h header file */
+#define SVGA3D_DX_MAX_RESOURCE_SIZE (128 * 1024 * 1024)
#ifdef DEBUG
int SVGA_DEBUG = 0;
static const struct debug_named_value svga_debug_flags[] = {
- { "dma", DEBUG_DMA, NULL },
- { "tgsi", DEBUG_TGSI, NULL },
- { "pipe", DEBUG_PIPE, NULL },
- { "state", DEBUG_STATE, NULL },
- { "screen", DEBUG_SCREEN, NULL },
- { "tex", DEBUG_TEX, NULL },
- { "swtnl", DEBUG_SWTNL, NULL },
- { "const", DEBUG_CONSTS, NULL },
- { "viewport", DEBUG_VIEWPORT, NULL },
- { "views", DEBUG_VIEWS, NULL },
- { "perf", DEBUG_PERF, NULL },
- { "flush", DEBUG_FLUSH, NULL },
- { "sync", DEBUG_SYNC, NULL },
- { "cache", DEBUG_CACHE, NULL },
+ { "dma", DEBUG_DMA, NULL },
+ { "tgsi", DEBUG_TGSI, NULL },
+ { "pipe", DEBUG_PIPE, NULL },
+ { "state", DEBUG_STATE, NULL },
+ { "screen", DEBUG_SCREEN, NULL },
+ { "tex", DEBUG_TEX, NULL },
+ { "swtnl", DEBUG_SWTNL, NULL },
+ { "const", DEBUG_CONSTS, NULL },
+ { "viewport", DEBUG_VIEWPORT, NULL },
+ { "views", DEBUG_VIEWS, NULL },
+ { "perf", DEBUG_PERF, NULL },
+ { "flush", DEBUG_FLUSH, NULL },
+ { "sync", DEBUG_SYNC, NULL },
+ { "cache", DEBUG_CACHE, NULL },
+ { "streamout", DEBUG_STREAMOUT, NULL },
+ { "query", DEBUG_QUERY, NULL },
DEBUG_NAMED_VALUE_END
};
#endif
*/
build = "build: DEBUG;";
mutex = "mutex: " PIPE_ATOMIC ";";
-#ifdef HAVE_LLVM
- llvm = "LLVM;";
-#endif
#else
build = "build: RELEASE;";
#endif
+#ifdef HAVE_LLVM
+ llvm = "LLVM;";
+#endif
util_snprintf(name, sizeof(name), "SVGA3D; %s %s %s", build, mutex, llvm);
return name;
}
+/** Helper for querying float-valued device cap */
+static float
+get_float_cap(struct svga_winsys_screen *sws, unsigned cap, float defaultVal)
+{
+ SVGA3dDevCapResult result;
+ if (sws->get_cap(sws, cap, &result))
+ return result.f;
+ else
+ return defaultVal;
+}
+
+
+/** Helper for querying uint-valued device cap */
+static unsigned
+get_uint_cap(struct svga_winsys_screen *sws, unsigned cap, unsigned defaultVal)
+{
+ SVGA3dDevCapResult result;
+ if (sws->get_cap(sws, cap, &result))
+ return result.u;
+ else
+ return defaultVal;
+}
+
+
+/** Helper for querying boolean-valued device cap */
+static boolean
+get_bool_cap(struct svga_winsys_screen *sws, unsigned cap, boolean defaultVal)
+{
+ SVGA3dDevCapResult result;
+ if (sws->get_cap(sws, cap, &result))
+ return result.b;
+ else
+ return defaultVal;
+}
static float
{
struct svga_screen *svgascreen = svga_screen(screen);
struct svga_winsys_screen *sws = svgascreen->sws;
- SVGA3dDevCapResult result;
switch (param) {
case PIPE_CAPF_MAX_LINE_WIDTH:
return svgascreen->maxPointSize;
case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
- if(!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_TEXTURE_ANISOTROPY, &result))
- return 4.0f;
- return (float) result.u;
+ return (float) get_uint_cap(sws, SVGA3D_DEVCAP_MAX_TEXTURE_ANISOTROPY, 4);
case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
return 15.0;
+
case PIPE_CAPF_GUARD_BAND_LEFT:
case PIPE_CAPF_GUARD_BAND_TOP:
case PIPE_CAPF_GUARD_BAND_RIGHT:
case PIPE_CAP_TWO_SIDED_STENCIL:
return 1;
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
- return 0;
+ /*
+ * "In virtually every OpenGL implementation and hardware,
+ * GL_MAX_DUAL_SOURCE_DRAW_BUFFERS is 1"
+ * http://www.opengl.org/wiki/Blending
+ */
+ return sws->have_vgpu10 ? 1 : 0;
case PIPE_CAP_ANISOTROPIC_FILTER:
return 1;
case PIPE_CAP_POINT_SPRITE:
return 1;
case PIPE_CAP_QUERY_TIME_ELAPSED:
return 0;
+ case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+ return sws->have_vgpu10;
case PIPE_CAP_TEXTURE_SHADOW_MAP:
return 1;
case PIPE_CAP_TEXTURE_SWIZZLE:
case PIPE_CAP_USER_CONSTANT_BUFFERS:
return 1;
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
- return 16;
+ return 256;
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
{
return MIN2(screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS),
12 /* 2048x2048 */);
+ case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+ return sws->have_vgpu10 ? SVGA3D_MAX_SURFACE_ARRAYSIZE : 0;
+
case PIPE_CAP_BLEND_EQUATION_SEPARATE: /* req. for GL 1.5 */
return 1;
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
return 1;
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
- return 0;
+ return sws->have_vgpu10;
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
return 0;
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
- return 1;
+ return !sws->have_vgpu10;
case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
return 1; /* The color outputs of vertex shaders are not clamped */
return 1; /* expected for GL_ARB_framebuffer_object */
case PIPE_CAP_GLSL_FEATURE_LEVEL:
- return 120;
+ return sws->have_vgpu10 ? 330 : 120;
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
return 0;
case PIPE_CAP_SM3:
return 1;
- /* Unsupported features */
- case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
- case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
- case PIPE_CAP_SHADER_STENCIL_EXPORT:
case PIPE_CAP_DEPTH_CLIP_DISABLE:
- case PIPE_CAP_SEAMLESS_CUBE_MAP:
- case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
case PIPE_CAP_INDEP_BLEND_ENABLE:
- case PIPE_CAP_INDEP_BLEND_FUNC:
- case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
- case PIPE_CAP_PRIMITIVE_RESTART:
+ case PIPE_CAP_CONDITIONAL_RENDER:
+ case PIPE_CAP_QUERY_TIMESTAMP:
case PIPE_CAP_TGSI_INSTANCEID:
case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
- case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+ case PIPE_CAP_SEAMLESS_CUBE_MAP:
+ case PIPE_CAP_FAKE_SW_MSAA:
+ return sws->have_vgpu10;
+
+ case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+ return sws->have_vgpu10 ? SVGA3D_DX_MAX_SOTARGETS : 0;
+ case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+ return sws->have_vgpu10 ? 4 : 0;
+ case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+ return sws->have_vgpu10 ? SVGA3D_MAX_STREAMOUT_DECLS : 0;
+ case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+ return 0;
+ case PIPE_CAP_TEXTURE_MULTISAMPLE:
+ return svgascreen->ms_samples ? 1 : 0;
+
+ case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+ return SVGA3D_DX_MAX_RESOURCE_SIZE;
+
case PIPE_CAP_MIN_TEXEL_OFFSET:
+ return sws->have_vgpu10 ? VGPU10_MIN_TEXEL_FETCH_OFFSET : 0;
case PIPE_CAP_MAX_TEXEL_OFFSET:
+ return sws->have_vgpu10 ? VGPU10_MAX_TEXEL_FETCH_OFFSET : 0;
+
case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
- case PIPE_CAP_CONDITIONAL_RENDER:
- case PIPE_CAP_TEXTURE_BARRIER:
- case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
- case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
- case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+ return 0;
+
case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
+ return sws->have_vgpu10 ? 256 : 0;
case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
+ return sws->have_vgpu10 ? 1024 : 0;
+
+ case PIPE_CAP_PRIMITIVE_RESTART:
+ return 1; /* may be a sw fallback, depending on restart index */
+
+ /* Unsupported features */
+ case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ case PIPE_CAP_SHADER_STENCIL_EXPORT:
+ case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+ case PIPE_CAP_INDEP_BLEND_FUNC:
+ case PIPE_CAP_TEXTURE_BARRIER:
case PIPE_CAP_MAX_VERTEX_STREAMS:
case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
- case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
- case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_COMPUTE:
case PIPE_CAP_START_INSTANCE:
- case PIPE_CAP_QUERY_TIMESTAMP:
- case PIPE_CAP_TEXTURE_MULTISAMPLE:
case PIPE_CAP_CUBE_MAP_ARRAY:
- case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
- case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
case PIPE_CAP_TEXTURE_GATHER_SM5:
case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
- case PIPE_CAP_FAKE_SW_MSAA:
case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_SAMPLE_SHADING:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
return 0;
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
return 64;
+ case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
- return 1;
+ return 1; /* need 4-byte alignment for all offsets and strides */
case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
return 2048;
case PIPE_CAP_MAX_VIEWPORTS:
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
+ case PIPE_CAP_TGSI_TXQS:
return 0;
}
return 0;
}
-static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param)
+
+static int
+vgpu9_get_shader_param(struct pipe_screen *screen, unsigned shader,
+ enum pipe_shader_cap param)
{
struct svga_screen *svgascreen = svga_screen(screen);
struct svga_winsys_screen *sws = svgascreen->sws;
- SVGA3dDevCapResult result;
+ unsigned val;
+
+ assert(!sws->have_vgpu10);
switch (shader)
{
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
return 1;
case PIPE_SHADER_CAP_MAX_TEMPS:
- if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_TEMPS, &result))
- return 32;
- return MIN2(result.u, SVGA3D_TEMPREG_MAX);
+ val = get_uint_cap(sws, SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_TEMPS, 32);
+ return MIN2(val, SVGA3D_TEMPREG_MAX);
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
/*
* Although PS 3.0 has some addressing abilities it can only represent
{
case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
- if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_VERTEX_SHADER_INSTRUCTIONS, &result))
- return 512;
- return result.u;
+ return get_uint_cap(sws, SVGA3D_DEVCAP_MAX_VERTEX_SHADER_INSTRUCTIONS,
+ 512);
case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
/* XXX: until we have vertex texture support */
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
return 1;
case PIPE_SHADER_CAP_MAX_TEMPS:
- if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEMPS, &result))
- return 32;
- return MIN2(result.u, SVGA3D_TEMPREG_MAX);
+ val = get_uint_cap(sws, SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEMPS, 32);
+ return MIN2(val, SVGA3D_TEMPREG_MAX);
case PIPE_SHADER_CAP_MAX_PREDS:
return 1;
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
}
+static int
+vgpu10_get_shader_param(struct pipe_screen *screen, unsigned shader,
+ enum pipe_shader_cap param)
+{
+ struct svga_screen *svgascreen = svga_screen(screen);
+ struct svga_winsys_screen *sws = svgascreen->sws;
+
+ assert(sws->have_vgpu10);
+ (void) sws; /* silence unused var warnings in non-debug builds */
+
+ /* Only VS, GS, FS supported */
+ if (shader != PIPE_SHADER_VERTEX &&
+ shader != PIPE_SHADER_GEOMETRY &&
+ shader != PIPE_SHADER_FRAGMENT) {
+ return 0;
+ }
+
+ /* NOTE: we do not query the device for any caps/limits at this time */
+
+ /* Generally the same limits for vertex, geometry and fragment shaders */
+ switch (param) {
+ case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+ return 64 * 1024;
+ case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+ return 64;
+ case PIPE_SHADER_CAP_MAX_INPUTS:
+ if (shader == PIPE_SHADER_FRAGMENT)
+ return VGPU10_MAX_FS_INPUTS;
+ else if (shader == PIPE_SHADER_GEOMETRY)
+ return VGPU10_MAX_GS_INPUTS;
+ else
+ return VGPU10_MAX_VS_INPUTS;
+ case PIPE_SHADER_CAP_MAX_OUTPUTS:
+ if (shader == PIPE_SHADER_FRAGMENT)
+ return VGPU10_MAX_FS_OUTPUTS;
+ else if (shader == PIPE_SHADER_GEOMETRY)
+ return VGPU10_MAX_GS_OUTPUTS;
+ else
+ return VGPU10_MAX_VS_OUTPUTS;
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+ return VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT * sizeof(float[4]);
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+ return svgascreen->max_const_buffers;
+ case PIPE_SHADER_CAP_MAX_TEMPS:
+ return VGPU10_MAX_TEMPS;
+ case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+ return TRUE; /* XXX verify */
+ case PIPE_SHADER_CAP_MAX_PREDS:
+ return 0;
+ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+ case PIPE_SHADER_CAP_SUBROUTINES:
+ case PIPE_SHADER_CAP_INTEGERS:
+ return TRUE;
+ case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+ case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
+ return SVGA3D_DX_MAX_SAMPLERS;
+ case PIPE_SHADER_CAP_PREFERRED_IR:
+ return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_DOUBLES:
+ case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+ return 0;
+ default:
+ debug_printf("Unexpected vgpu10 shader query %u\n", param);
+ return 0;
+ }
+ return 0;
+}
+
+
+static int
+svga_get_shader_param(struct pipe_screen *screen, unsigned shader,
+ enum pipe_shader_cap param)
+{
+ struct svga_screen *svgascreen = svga_screen(screen);
+ struct svga_winsys_screen *sws = svgascreen->sws;
+ if (sws->have_vgpu10) {
+ return vgpu10_get_shader_param(screen, shader, param);
+ }
+ else {
+ return vgpu9_get_shader_param(screen, shader, param);
+ }
+}
+
+
/**
- * Implemnt pipe_screen::is_format_supported().
+ * Implement pipe_screen::is_format_supported().
* \param bindings bitmask of PIPE_BIND_x flags
*/
static boolean
assert(bindings);
if (sample_count > 1) {
- return FALSE;
+ /* In ms_samples, if bit N is set it means that we support
+ * multisample with N+1 samples per pixel.
+ */
+ if ((ss->ms_samples & (1 << (sample_count - 1))) == 0) {
+ return FALSE;
+ }
}
svga_format = svga_translate_format(ss, format, bindings);
return FALSE;
}
+ /* we don't support sRGB rendering into display targets */
+ if (util_format_is_srgb(format) && (bindings & PIPE_BIND_DISPLAY_TARGET)) {
+ return FALSE;
+ }
+
+ /*
+ * For VGPU10 vertex formats, skip querying host capabilities
+ */
+
+ if (ss->sws->have_vgpu10 && (bindings & PIPE_BIND_VERTEX_BUFFER)) {
+ SVGA3dSurfaceFormat svga_format;
+ unsigned flags;
+ svga_translate_vertex_format_vgpu10(format, &svga_format, &flags);
+ return svga_format != SVGA3D_FORMAT_INVALID;
+ }
+
/*
* Override host capabilities, so that we end up with the same
* visuals for all virtual hardware implementations.
case SVGA3D_R5G6B5:
break;
+ /* VGPU10 formats */
+ case SVGA3D_B8G8R8A8_UNORM:
+ case SVGA3D_B8G8R8X8_UNORM:
+ case SVGA3D_B5G6R5_UNORM:
+ break;
+
/* Often unsupported/problematic. This means we end up with the same
* visuals for all virtual hardware implementations.
*/
svga_get_format_cap(ss, svga_format, &caps);
+ if (bindings & PIPE_BIND_RENDER_TARGET) {
+ /* Check that the color surface is blendable, unless it's an
+ * integer format.
+ */
+ if (!svga_format_is_integer(svga_format) &&
+ (caps.value & SVGA3DFORMAT_OP_NOALPHABLEND)) {
+ return FALSE;
+ }
+ }
+
mask.value = 0;
if (bindings & PIPE_BIND_RENDER_TARGET) {
- mask.offscreenRenderTarget = 1;
+ mask.value |= SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET;
}
if (bindings & PIPE_BIND_DEPTH_STENCIL) {
- mask.zStencil = 1;
+ mask.value |= SVGA3DFORMAT_OP_ZSTENCIL;
}
if (bindings & PIPE_BIND_SAMPLER_VIEW) {
- mask.texture = 1;
+ mask.value |= SVGA3DFORMAT_OP_TEXTURE;
}
if (target == PIPE_TEXTURE_CUBE) {
- mask.cubeTexture = 1;
+ mask.value |= SVGA3DFORMAT_OP_CUBETEXTURE;
}
- if (target == PIPE_TEXTURE_3D) {
- mask.volumeTexture = 1;
+ else if (target == PIPE_TEXTURE_3D) {
+ mask.value |= SVGA3DFORMAT_OP_VOLUMETEXTURE;
}
return (caps.value & mask.value) == mask.value;
{
struct svga_screen *svgascreen;
struct pipe_screen *screen;
- SVGA3dDevCapResult result;
- boolean use_vs30, use_ps30;
#ifdef DEBUG
SVGA_DEBUG = debug_get_flags_option("SVGA_DEBUG", svga_debug_flags, 0 );
screen->get_param = svga_get_param;
screen->get_shader_param = svga_get_shader_param;
screen->get_paramf = svga_get_paramf;
+ screen->get_timestamp = NULL;
screen->is_format_supported = svga_is_format_supported;
screen->context_create = svga_context_create;
screen->fence_reference = svga_fence_reference;
svgascreen->hw_version = SVGA3D_HWVERSION_WS65_B1;
}
- use_ps30 =
- sws->get_cap(sws, SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION, &result) &&
- result.u >= SVGA3DPSVERSION_30 ? TRUE : FALSE;
-
- use_vs30 =
- sws->get_cap(sws, SVGA3D_DEVCAP_VERTEX_SHADER_VERSION, &result) &&
- result.u >= SVGA3DVSVERSION_30 ? TRUE : FALSE;
-
- /* we require Shader model 3.0 or later */
- if (!use_ps30 || !use_vs30)
- goto error2;
-
/*
* The D16, D24X8, and D24S8 formats always do an implicit shadow compare
* when sampled from, where as the DF16, DF24, and D24S8_INT do not. So
/* Query device caps
*/
- if (!sws->get_cap(sws, SVGA3D_DEVCAP_LINE_STIPPLE, &result))
- svgascreen->haveLineStipple = FALSE;
- else
- svgascreen->haveLineStipple = result.u;
+ if (sws->have_vgpu10) {
+ svgascreen->haveProvokingVertex
+ = get_bool_cap(sws, SVGA3D_DEVCAP_DX_PROVOKING_VERTEX, FALSE);
+ svgascreen->haveLineSmooth = TRUE;
+ svgascreen->maxPointSize = 80.0F;
+ svgascreen->max_color_buffers = SVGA3D_DX_MAX_RENDER_TARGETS;
+
+ /* Multisample samples per pixel */
+ svgascreen->ms_samples =
+ get_uint_cap(sws, SVGA3D_DEVCAP_MULTISAMPLE_MASKABLESAMPLES, 0);
+
+ /* Maximum number of constant buffers */
+ svgascreen->max_const_buffers =
+ get_uint_cap(sws, SVGA3D_DEVCAP_DX_MAX_CONSTANT_BUFFERS, 1);
+ assert(svgascreen->max_const_buffers <= SVGA_MAX_CONST_BUFS);
+ }
+ else {
+ /* VGPU9 */
+ unsigned vs_ver = get_uint_cap(sws, SVGA3D_DEVCAP_VERTEX_SHADER_VERSION,
+ SVGA3DVSVERSION_NONE);
+ unsigned fs_ver = get_uint_cap(sws, SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION,
+ SVGA3DPSVERSION_NONE);
+
+ /* we require Shader model 3.0 or later */
+ if (fs_ver < SVGA3DPSVERSION_30 || vs_ver < SVGA3DVSVERSION_30) {
+ goto error2;
+ }
- if (!sws->get_cap(sws, SVGA3D_DEVCAP_LINE_AA, &result))
- svgascreen->haveLineSmooth = FALSE;
- else
- svgascreen->haveLineSmooth = result.u;
+ svgascreen->haveProvokingVertex = FALSE;
- if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_LINE_WIDTH, &result))
- svgascreen->maxLineWidth = 1.0F;
- else
- svgascreen->maxLineWidth = result.f;
+ svgascreen->haveLineSmooth =
+ get_bool_cap(sws, SVGA3D_DEVCAP_LINE_AA, FALSE);
- if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_AA_LINE_WIDTH, &result))
- svgascreen->maxLineWidthAA = 1.0F;
- else
- svgascreen->maxLineWidthAA = result.f;
+ svgascreen->maxPointSize =
+ get_float_cap(sws, SVGA3D_DEVCAP_MAX_POINT_SIZE, 1.0f);
+ /* Keep this to a reasonable size to avoid failures in conform/pntaa.c */
+ svgascreen->maxPointSize = MIN2(svgascreen->maxPointSize, 80.0f);
+
+ /* The SVGA3D device always supports 4 targets at this time, regardless
+ * of what querying SVGA3D_DEVCAP_MAX_RENDER_TARGETS might return.
+ */
+ svgascreen->max_color_buffers = 4;
+
+ /* Only support one constant buffer
+ */
+ svgascreen->max_const_buffers = 1;
- if (0)
+ /* No multisampling */
+ svgascreen->ms_samples = 0;
+ }
+
+ /* common VGPU9 / VGPU10 caps */
+ svgascreen->haveLineStipple =
+ get_bool_cap(sws, SVGA3D_DEVCAP_LINE_STIPPLE, FALSE);
+
+ svgascreen->maxLineWidth =
+ get_float_cap(sws, SVGA3D_DEVCAP_MAX_LINE_WIDTH, 1.0f);
+
+ svgascreen->maxLineWidthAA =
+ get_float_cap(sws, SVGA3D_DEVCAP_MAX_AA_LINE_WIDTH, 1.0f);
+
+ if (0) {
+ debug_printf("svga: haveProvokingVertex %u\n",
+ svgascreen->haveProvokingVertex);
debug_printf("svga: haveLineStip %u "
"haveLineSmooth %u maxLineWidth %f\n",
svgascreen->haveLineStipple, svgascreen->haveLineSmooth,
svgascreen->maxLineWidth);
-
- if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_POINT_SIZE, &result)) {
- svgascreen->maxPointSize = 1.0F;
- } else {
- /* Keep this to a reasonable size to avoid failures in
- * conform/pntaa.c:
- */
- svgascreen->maxPointSize = MIN2(result.f, 80.0f);
+ debug_printf("svga: maxPointSize %g\n", svgascreen->maxPointSize);
}
- /* The SVGA3D device always supports 4 targets at this time, regardless
- * of what querying SVGA3D_DEVCAP_MAX_RENDER_TARGETS might return.
- */
- svgascreen->max_color_buffers = 4;
-
pipe_mutex_init(svgascreen->tex_mutex);
pipe_mutex_init(svgascreen->swc_mutex);
-/**********************************************************
+ /**********************************************************
* Copyright 2008-2009 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
SVGA3dHardwareVersion hw_version;
/** Device caps */
+ boolean haveProvokingVertex;
boolean haveLineStipple, haveLineSmooth;
float maxLineWidth, maxLineWidthAA;
float maxPointSize;
unsigned max_color_buffers;
+ unsigned max_const_buffers;
+ unsigned ms_samples;
struct {
boolean force_level_surface_view;
/* which formats to translate depth formats into */
struct {
enum SVGA3dSurfaceFormat z16;
+
/* note gallium order */
enum SVGA3dSurfaceFormat x8z24;
enum SVGA3dSurfaceFormat s8z24;
assert(entry->handle);
+ /* If the key matches and the fence is signalled (the surface is no
+ * longer needed) the lookup was successful. We found a surface that
+ * can be reused.
+ * We unlink the surface from the cache entry and we add the entry to
+ * the 'empty' list.
+ */
if (memcmp(&entry->key, key, sizeof *key) == 0 &&
- sws->fence_signalled(sws, entry->fence, 0) == 0) {
+ sws->fence_signalled(sws, entry->fence, 0) == 0) {
unsigned surf_size;
assert(sws->surface_is_flushed(sws, entry->handle));
handle = entry->handle; /* Reference is transfered here. */
entry->handle = NULL;
+ /* Remove from hash table */
LIST_DEL(&entry->bucket_head);
+ /* remove from LRU list */
LIST_DEL(&entry->head);
+ /* Add the cache entry (but not the surface!) to the empty list */
LIST_ADD(&entry->head, &cache->empty);
/* update the cache size */
/**
- * Transfers a handle reference.
+ * Add a surface to the cache. This is done when the driver deletes
+ * the surface. Note: transfers a handle reference.
*/
static void
svga_screen_cache_add(struct svga_screen *svgascreen,
struct svga_host_surface_cache_entry *entry = NULL;
struct svga_winsys_surface *handle = *p_handle;
unsigned surf_size;
-
+
assert(key->cachable);
if (!handle)
return;
-
+
surf_size = surface_size(key);
*p_handle = NULL;
pipe_mutex_lock(cache->mutex);
-
+
if (surf_size >= SVGA_HOST_SURFACE_CACHE_BYTES) {
/* this surface is too large to cache, just free it */
sws->surface_reference(sws, &handle, NULL);
}
if (!LIST_IS_EMPTY(&cache->empty)) {
- /* use the first empty entry */
+ /* An empty entry has no surface associated with it.
+ * Use the first empty entry.
+ */
entry = LIST_ENTRY(struct svga_host_surface_cache_entry,
cache->empty.next, head);
+ /* Remove from LRU list */
LIST_DEL(&entry->head);
}
else if (!LIST_IS_EMPTY(&cache->unused)) {
sws->surface_reference(sws, &entry->handle, NULL);
+ /* Remove from hash table */
LIST_DEL(&entry->bucket_head);
+ /* Remove from LRU list */
LIST_DEL(&entry->head);
}
if (entry) {
+ assert(entry->handle == NULL);
entry->handle = handle;
memcpy(&entry->key, key, sizeof entry->key);
pipe_mutex_lock(cache->mutex);
+ /* Loop over entries in the validated list */
curr = cache->validated.next;
next = curr->next;
while (curr != &cache->validated) {
assert(entry->handle);
if (sws->surface_is_flushed(sws, entry->handle)) {
+ /* remove entry from LRU list */
LIST_DEL(&entry->head);
svgascreen->sws->fence_reference(svgascreen->sws, &entry->fence, fence);
+ /* Add entry to the unused list */
LIST_ADD(&entry->head, &cache->unused);
+ /* Add entry to the hash table bucket */
bucket = svga_screen_cache_bucket(&entry->key);
LIST_ADD(&entry->bucket_head, &cache->bucket[bucket]);
}
* Allocate a new host-side surface. If the surface is marked as cachable,
* first try re-using a surface in the cache of freed surfaces. Otherwise,
* allocate a new surface.
+ * \param bind_flags bitmask of PIPE_BIND_x flags
+ * \param usage one of PIPE_USAGE_x values
*/
struct svga_winsys_surface *
svga_screen_surface_create(struct svga_screen *svgascreen,
+ unsigned bind_flags, unsigned usage,
struct svga_host_surface_cache_key *key)
{
struct svga_winsys_screen *sws = svgascreen->sws;
boolean cachable = SVGA_SURFACE_CACHE_ENABLED && key->cachable;
SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
- "%s sz %dx%dx%d mips %d faces %d cachable %d\n",
+ "%s sz %dx%dx%d mips %d faces %d arraySize %d cachable %d\n",
__FUNCTION__,
key->size.width,
key->size.height,
key->size.depth,
key->numMipLevels,
key->numFaces,
+ key->arraySize,
key->cachable);
if (cachable) {
if (key->format == SVGA3D_BUFFER) {
+ SVGA3dSurfaceFlags hint_flag;
+
/* For buffers, round the buffer size up to the nearest power
* of two to increase the probability of cache hits. Keep
* texture surface dimensions unchanged.
while (size < key->size.width)
size <<= 1;
key->size.width = size;
- /* Since we're reusing buffers we're effectively transforming all
- * of them into dynamic buffers.
- *
- * It would be nice to not cache long lived static buffers. But there
- * is no way to detect the long lived from short lived ones yet. A
- * good heuristic would be buffer size.
- */
- key->flags &= ~SVGA3D_SURFACE_HINT_STATIC;
- key->flags |= SVGA3D_SURFACE_HINT_DYNAMIC;
+
+ /* Determine whether the buffer is static or dynamic.
+ * This is a bit of a heuristic which can be tuned as needed.
+ */
+ if (usage == PIPE_USAGE_DEFAULT ||
+ usage == PIPE_USAGE_IMMUTABLE) {
+ hint_flag = SVGA3D_SURFACE_HINT_STATIC;
+ }
+ else if (bind_flags & PIPE_BIND_INDEX_BUFFER) {
+ /* Index buffers don't change too often. Mark them as static.
+ */
+ hint_flag = SVGA3D_SURFACE_HINT_STATIC;
+ }
+ else {
+ /* Since we're reusing buffers we're effectively transforming all
+ * of them into dynamic buffers.
+ *
+ * It would be nice to not cache long lived static buffers. But there
+ * is no way to detect the long lived from short lived ones yet. A
+ * good heuristic would be buffer size.
+ */
+ hint_flag = SVGA3D_SURFACE_HINT_DYNAMIC;
+ }
+
+ key->flags &= ~(SVGA3D_SURFACE_HINT_STATIC |
+ SVGA3D_SURFACE_HINT_DYNAMIC);
+ key->flags |= hint_flag;
}
handle = svga_screen_cache_lookup(svgascreen, key);
key->size.width);
else
SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
- "reuse sid %p sz %dx%dx%d mips %d faces %d\n", handle,
+ "reuse sid %p sz %dx%dx%d mips %d faces %d arraySize %d\n", handle,
key->size.width,
key->size.height,
key->size.depth,
key->numMipLevels,
- key->numFaces);
+ key->numFaces,
+ key->arraySize);
}
}
if (!handle) {
+ unsigned usage = 0;
+
+ if (!key->cachable)
+ usage |= SVGA_SURFACE_USAGE_SHARED;
+ if (key->scanout)
+ usage |= SVGA_SURFACE_USAGE_SCANOUT;
+
handle = sws->surface_create(sws,
key->flags,
key->format,
- key->cachable ?
- 0 : SVGA_SURFACE_USAGE_SHARED,
+ usage,
key->size,
- key->numFaces,
- key->numMipLevels);
+ key->numFaces * key->arraySize,
+ key->numMipLevels,
+ key->sampleCount);
if (handle)
SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
" CREATE sid %p sz %dx%dx%d\n",
SVGA3dSurfaceFlags flags;
SVGA3dSurfaceFormat format;
SVGA3dSize size;
- uint32_t numFaces:24;
- uint32_t numMipLevels:7;
+ uint32_t numFaces:3;
+ uint32_t arraySize:16;
+ uint32_t numMipLevels:6;
uint32_t cachable:1; /* False if this is a shared surface */
+ uint32_t sampleCount:5;
+ uint32_t scanout:1;
};
struct svga_winsys_surface *
svga_screen_surface_create(struct svga_screen *svgascreen,
+ unsigned bind_flags, unsigned usage,
struct svga_host_surface_cache_key *key);
void
#include "util/u_memory.h"
#include "svga_context.h"
#include "svga_cmd.h"
+#include "svga_format.h"
#include "svga_shader.h"
+/**
+ * This bit isn't really used anywhere. It only serves to help
+ * generate a unique "signature" for the vertex shader output bitmask.
+ * Shader input/output signatures are used to resolve shader linking
+ * issues.
+ */
+#define FOG_GENERIC_BIT (((uint64_t) 1) << 63)
+
+
+/**
+ * Use the shader info to generate a bitmask indicating which generic
+ * inputs are used by the shader. A set bit indicates that GENERIC[i]
+ * is used.
+ */
+uint64_t
+svga_get_generic_inputs_mask(const struct tgsi_shader_info *info)
+{
+ unsigned i;
+ uint64_t mask = 0x0;
+
+ for (i = 0; i < info->num_inputs; i++) {
+ if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
+ unsigned j = info->input_semantic_index[i];
+ assert(j < sizeof(mask) * 8);
+ mask |= ((uint64_t) 1) << j;
+ }
+ }
+
+ return mask;
+}
+
+
+/**
+ * Scan shader info to return a bitmask of written outputs.
+ */
+uint64_t
+svga_get_generic_outputs_mask(const struct tgsi_shader_info *info)
+{
+ unsigned i;
+ uint64_t mask = 0x0;
+
+ for (i = 0; i < info->num_outputs; i++) {
+ switch (info->output_semantic_name[i]) {
+ case TGSI_SEMANTIC_GENERIC:
+ {
+ unsigned j = info->output_semantic_index[i];
+ assert(j < sizeof(mask) * 8);
+ mask |= ((uint64_t) 1) << j;
+ }
+ break;
+ case TGSI_SEMANTIC_FOG:
+ mask |= FOG_GENERIC_BIT;
+ break;
+ }
+ }
+
+ return mask;
+}
+
+
+
+/**
+ * Given a mask of used generic variables (as returned by the above functions)
+ * fill in a table which maps those indexes to small integers.
+ * This table is used by the remap_generic_index() function in
+ * svga_tgsi_decl_sm30.c
+ * Example: if generics_mask = binary(1010) it means that GENERIC[1] and
+ * GENERIC[3] are used. The remap_table will contain:
+ * table[1] = 0;
+ * table[3] = 1;
+ * The remaining table entries will be filled in with the next unused
+ * generic index (in this example, 2).
+ */
+void
+svga_remap_generics(uint64_t generics_mask,
+ int8_t remap_table[MAX_GENERIC_VARYING])
+{
+ /* Note texcoord[0] is reserved so start at 1 */
+ unsigned count = 1, i;
+
+ for (i = 0; i < MAX_GENERIC_VARYING; i++) {
+ remap_table[i] = -1;
+ }
+
+ /* for each bit set in generic_mask */
+ while (generics_mask) {
+ unsigned index = ffsll(generics_mask) - 1;
+ remap_table[index] = count++;
+ generics_mask &= ~((uint64_t) 1 << index);
+ }
+}
+
+
+/**
+ * Use the generic remap table to map a TGSI generic varying variable
+ * index to a small integer. If the remapping table doesn't have a
+ * valid value for the given index (the table entry is -1) it means
+ * the fragment shader doesn't use that VS output. Just allocate
+ * the next free value in that case. Alternately, we could cull
+ * VS instructions that write to register, or replace the register
+ * with a dummy temp register.
+ * XXX TODO: we should do one of the later as it would save precious
+ * texcoord registers.
+ */
+int
+svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
+ int generic_index)
+{
+ assert(generic_index < MAX_GENERIC_VARYING);
+
+ if (generic_index >= MAX_GENERIC_VARYING) {
+ /* just don't return a random/garbage value */
+ generic_index = MAX_GENERIC_VARYING - 1;
+ }
+
+ if (remap_table[generic_index] == -1) {
+ /* This is a VS output that has no matching PS input. Find a
+ * free index.
+ */
+ int i, max = 0;
+ for (i = 0; i < MAX_GENERIC_VARYING; i++) {
+ max = MAX2(max, remap_table[i]);
+ }
+ remap_table[generic_index] = max + 1;
+ }
+
+ return remap_table[generic_index];
+}
+
+
+/**
+ * Initialize the shader-neutral fields of svga_compile_key from context
+ * state. This is basically the texture-related state.
+ */
+void
+svga_init_shader_key_common(const struct svga_context *svga, unsigned shader,
+ struct svga_compile_key *key)
+{
+ unsigned i, idx = 0;
+
+ assert(shader < Elements(svga->curr.num_sampler_views));
+
+ for (i = 0; i < svga->curr.num_sampler_views[shader]; i++) {
+ struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i];
+ if (view) {
+ assert(svga->curr.sampler[shader][i]);
+ assert(view->texture);
+ assert(view->texture->target < (1 << 4)); /* texture_target:4 */
+
+ key->tex[i].texture_target = view->texture->target;
+
+ /* 1D/2D array textures with one slice are treated as non-arrays
+ * by the SVGA3D device. Convert the texture type here so that
+ * we emit the right TEX/SAMPLE instruction in the shader.
+ */
+ if (view->texture->array_size == 1) {
+ if (view->texture->target == PIPE_TEXTURE_1D_ARRAY) {
+ key->tex[i].texture_target = PIPE_TEXTURE_1D;
+ }
+ else if (view->texture->target == PIPE_TEXTURE_2D_ARRAY) {
+ key->tex[i].texture_target = PIPE_TEXTURE_2D;
+ }
+ }
+
+ key->tex[i].texture_msaa = view->texture->nr_samples > 1;
+ if (!svga->curr.sampler[shader][i]->normalized_coords) {
+ assert(idx < (1 << 5)); /* width_height_idx:5 bitfield */
+ key->tex[i].width_height_idx = idx++;
+ key->tex[i].unnormalized = TRUE;
+ ++key->num_unnormalized_coords;
+ }
+
+ key->tex[i].swizzle_r = view->swizzle_r;
+ key->tex[i].swizzle_g = view->swizzle_g;
+ key->tex[i].swizzle_b = view->swizzle_b;
+ key->tex[i].swizzle_a = view->swizzle_a;
+
+ key->tex[i].return_type = svga_get_texture_datatype(view->format);
+ }
+ }
+ key->num_textures = svga->curr.num_sampler_views[shader];
+}
+
+
+/** Search for a compiled shader variant with the same compile key */
+struct svga_shader_variant *
+svga_search_shader_key(const struct svga_shader *shader,
+ const struct svga_compile_key *key)
+{
+ struct svga_shader_variant *variant = shader->variants;
+
+ assert(key);
+
+ for ( ; variant; variant = variant->next) {
+ if (svga_compile_keys_equal(key, &variant->key))
+ return variant;
+ }
+ return NULL;
+}
+
+/** Search for a shader with the same token key */
+struct svga_shader *
+svga_search_shader_token_key(struct svga_shader *pshader,
+ const struct svga_token_key *key)
+{
+ struct svga_shader *shader = pshader;
+
+ assert(key);
+
+ for ( ; shader; shader = shader->next) {
+ if (memcmp(key, &shader->token_key, sizeof(struct svga_token_key)) == 0)
+ return shader;
+ }
+ return NULL;
+}
+
+/**
+ * Helper function to define a gb shader for non-vgpu10 device
+ */
+static enum pipe_error
+define_gb_shader_vgpu9(struct svga_context *svga,
+ SVGA3dShaderType type,
+ struct svga_shader_variant *variant,
+ unsigned codeLen)
+{
+ struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+ enum pipe_error ret;
+
+ /**
+ * Create gb memory for the shader and upload the shader code.
+ * Kernel module will allocate an id for the shader and issue
+ * the DefineGBShader command.
+ */
+ variant->gb_shader = sws->shader_create(sws, type,
+ variant->tokens, codeLen);
+
+ if (!variant->gb_shader)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader);
+
+ return ret;
+}
+
+/**
+ * Helper function to define a gb shader for vgpu10 device
+ */
+static enum pipe_error
+define_gb_shader_vgpu10(struct svga_context *svga,
+ SVGA3dShaderType type,
+ struct svga_shader_variant *variant,
+ unsigned codeLen)
+{
+ struct svga_winsys_context *swc = svga->swc;
+ enum pipe_error ret;
+
+ /**
+ * Shaders in VGPU10 enabled device reside in the device COTable.
+ * SVGA driver will allocate an integer ID for the shader and
+ * issue DXDefineShader and DXBindShader commands.
+ */
+ variant->id = util_bitmask_add(svga->shader_id_bm);
+ if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
+ /* Create gb memory for the shader and upload the shader code */
+ variant->gb_shader = swc->shader_create(swc,
+ variant->id, type,
+ variant->tokens, codeLen);
+
+ if (!variant->gb_shader) {
+ /* Free the shader ID */
+ assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
+ goto fail_no_allocation;
+ }
+
+ /**
+ * Since we don't want to do any flush within state emission to avoid
+ * partial state in a command buffer, it's important to make sure that
+ * there is enough room to send both the DXDefineShader & DXBindShader
+ * commands in the same command buffer. So let's send both
+ * commands in one command reservation. If it fails, we'll undo
+ * the shader creation and return an error.
+ */
+ ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader,
+ variant->id, type, codeLen);
+
+ if (ret != PIPE_OK)
+ goto fail;
+
+ return PIPE_OK;
+
+fail:
+ swc->shader_destroy(swc, variant->gb_shader);
+ variant->gb_shader = NULL;
+
+fail_no_allocation:
+ util_bitmask_clear(svga->shader_id_bm, variant->id);
+ variant->id = UTIL_BITMASK_INVALID_INDEX;
+
+ return PIPE_ERROR_OUT_OF_MEMORY;
+}
/**
* Issue the SVGA3D commands to define a new shader.
- * \param result contains the shader tokens, etc. The result->id field will
- * be set here.
+ * \param variant contains the shader tokens, etc. The result->id field will
+ * be set here.
*/
enum pipe_error
svga_define_shader(struct svga_context *svga,
struct svga_shader_variant *variant)
{
unsigned codeLen = variant->nr_tokens * sizeof(variant->tokens[0]);
+ enum pipe_error ret;
- if (svga_have_gb_objects(svga)) {
- struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
- enum pipe_error ret;
-
- variant->gb_shader = sws->shader_create(sws, type,
- variant->tokens, codeLen);
- if (!variant->gb_shader)
- return PIPE_ERROR_OUT_OF_MEMORY;
-
- ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader);
- if (ret != PIPE_OK) {
- sws->shader_destroy(sws, variant->gb_shader);
- variant->gb_shader = NULL;
- }
+ variant->id = UTIL_BITMASK_INVALID_INDEX;
- return ret;
+ if (svga_have_gb_objects(svga)) {
+ if (svga_have_vgpu10(svga))
+ return define_gb_shader_vgpu10(svga, type, variant, codeLen);
+ else
+ return define_gb_shader_vgpu9(svga, type, variant, codeLen);
}
else {
- enum pipe_error ret;
-
/* Allocate an integer ID for the shader */
variant->id = util_bitmask_add(svga->shader_id_bm);
if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
util_bitmask_clear(svga->shader_id_bm, variant->id);
variant->id = UTIL_BITMASK_INVALID_INDEX;
- return ret;
}
}
- return PIPE_OK;
+ return ret;
}
+/**
+ * Issue the SVGA3D commands to set/bind a shader.
+ * \param result the shader to bind.
+ */
+enum pipe_error
+svga_set_shader(struct svga_context *svga,
+ SVGA3dShaderType type,
+ struct svga_shader_variant *variant)
+{
+ enum pipe_error ret;
+ unsigned id = variant ? variant->id : SVGA3D_INVALID_ID;
+
+ assert(type == SVGA3D_SHADERTYPE_VS ||
+ type == SVGA3D_SHADERTYPE_GS ||
+ type == SVGA3D_SHADERTYPE_PS);
+
+ if (svga_have_gb_objects(svga)) {
+ struct svga_winsys_gb_shader *gbshader =
+ variant ? variant->gb_shader : NULL;
+
+ if (svga_have_vgpu10(svga))
+ ret = SVGA3D_vgpu10_SetShader(svga->swc, type, gbshader, id);
+ else
+ ret = SVGA3D_SetGBShader(svga->swc, type, gbshader);
+ }
+ else {
+ ret = SVGA3D_SetShader(svga->swc, type, id);
+ }
+
+ return ret;
+}
+
enum pipe_error
svga_destroy_shader_variant(struct svga_context *svga,
{
enum pipe_error ret = PIPE_OK;
- if (svga_have_gb_objects(svga)) {
- struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
-
- sws->shader_destroy(sws, variant->gb_shader);
+ if (svga_have_gb_objects(svga) && variant->gb_shader) {
+ if (svga_have_vgpu10(svga)) {
+ struct svga_winsys_context *swc = svga->swc;
+ swc->shader_destroy(swc, variant->gb_shader);
+ ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
+ if (ret != PIPE_OK) {
+ /* flush and try again */
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
+ }
+ util_bitmask_clear(svga->shader_id_bm, variant->id);
+ }
+ else {
+ struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+ sws->shader_destroy(sws, variant->gb_shader);
+ }
variant->gb_shader = NULL;
- goto end;
}
-
- /* first try */
- if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
- ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
-
- if (ret != PIPE_OK) {
- /* flush and try again */
- svga_context_flush(svga, NULL);
-
+ else {
+ if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
- assert(ret == PIPE_OK);
+ if (ret != PIPE_OK) {
+ /* flush and try again */
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
+ assert(ret == PIPE_OK);
+ }
+ util_bitmask_clear(svga->shader_id_bm, variant->id);
}
-
- util_bitmask_clear(svga->shader_id_bm, variant->id);
}
-end:
FREE((unsigned *)variant->tokens);
FREE(variant);
return ret;
}
+
+/*
+ * Rebind shaders.
+ * Called at the beginning of every new command buffer to ensure that
+ * shaders are properly paged-in. Instead of sending the SetShader
+ * command, this function sends a private allocation command to
+ * page in a shader. This avoids emitting redundant state to the device
+ * just to page in a resource.
+ */
+enum pipe_error
+svga_rebind_shaders(struct svga_context *svga)
+{
+ struct svga_winsys_context *swc = svga->swc;
+ struct svga_hw_draw_state *hw = &svga->state.hw_draw;
+ enum pipe_error ret;
+
+ assert(svga_have_vgpu10(svga));
+
+ /**
+ * If the underlying winsys layer does not need resource rebinding,
+ * just clear the rebind flags and return.
+ */
+ if (swc->resource_rebind == NULL) {
+ svga->rebind.flags.vs = 0;
+ svga->rebind.flags.gs = 0;
+ svga->rebind.flags.fs = 0;
+
+ return PIPE_OK;
+ }
+
+ if (svga->rebind.flags.vs && hw->vs && hw->vs->gb_shader) {
+ ret = swc->resource_rebind(swc, NULL, hw->vs->gb_shader, SVGA_RELOC_READ);
+ if (ret != PIPE_OK)
+ return ret;
+ }
+ svga->rebind.flags.vs = 0;
+
+ if (svga->rebind.flags.gs && hw->gs && hw->gs->gb_shader) {
+ ret = swc->resource_rebind(swc, NULL, hw->gs->gb_shader, SVGA_RELOC_READ);
+ if (ret != PIPE_OK)
+ return ret;
+ }
+ svga->rebind.flags.gs = 0;
+
+ if (svga->rebind.flags.fs && hw->fs && hw->fs->gb_shader) {
+ ret = swc->resource_rebind(swc, NULL, hw->fs->gb_shader, SVGA_RELOC_READ);
+ if (ret != PIPE_OK)
+ return ret;
+ }
+ svga->rebind.flags.fs = 0;
+
+ return PIPE_OK;
+}
#define SVGA_SHADER_H
#include "svga3d_reg.h"
+#include "svga_context.h"
+#include "svga_streamout.h"
-struct svga_shader_variant;
+
+/**
+ * We use a 64-bit mask to keep track of the generic indexes.
+ * This is the maximum semantic index for a TGSI GENERIC[i] register.
+ */
+#define MAX_GENERIC_VARYING 64
+
+
+struct svga_context;
+
+
+struct svga_compile_key
+{
+ /* vertex shader only */
+ struct {
+ uint64_t fs_generic_inputs;
+ unsigned passthrough:1;
+ unsigned need_prescale:1;
+ unsigned undo_viewport:1;
+ unsigned allow_psiz:1;
+ /** The following are all 32-bit bitmasks (per VS input) */
+ unsigned adjust_attrib_range;
+ unsigned attrib_is_pure_int;
+ unsigned adjust_attrib_w_1;
+ unsigned adjust_attrib_itof;
+ unsigned adjust_attrib_utof;
+ unsigned attrib_is_bgra;
+ unsigned attrib_puint_to_snorm;
+ unsigned attrib_puint_to_uscaled;
+ unsigned attrib_puint_to_sscaled;
+ } vs;
+
+ /* geometry shader only */
+ struct {
+ uint64_t vs_generic_outputs;
+ unsigned need_prescale:1;
+ unsigned writes_psize:1;
+ unsigned wide_point:1;
+ } gs;
+
+ /* fragment shader only */
+ struct {
+ uint64_t vs_generic_outputs;
+ uint64_t gs_generic_outputs;
+ unsigned light_twoside:1;
+ unsigned front_ccw:1;
+ unsigned white_fragments:1;
+ unsigned flatshade:1;
+ unsigned pstipple:1;
+ unsigned alpha_func:4; /**< SVGA3D_CMP_x */
+ unsigned write_color0_to_n_cbufs:4;
+ unsigned aa_point:1;
+ int aa_point_coord_index;
+ float alpha_ref;
+ } fs;
+
+ /* any shader type */
+ int8_t generic_remap_table[MAX_GENERIC_VARYING];
+ unsigned num_textures:8;
+ unsigned num_unnormalized_coords:8;
+ unsigned clip_plane_enable:PIPE_MAX_CLIP_PLANES;
+ unsigned sprite_origin_lower_left:1;
+ unsigned sprite_coord_enable;
+ struct {
+ unsigned compare_mode:1;
+ unsigned compare_func:3;
+ unsigned unnormalized:1;
+ unsigned width_height_idx:5; /**< texture unit */
+ unsigned texture_target:4; /**< PIPE_TEXTURE_x */
+ unsigned texture_msaa:1; /**< A multisample texture? */
+ unsigned sprite_texgen:1;
+ unsigned swizzle_r:3;
+ unsigned swizzle_g:3;
+ unsigned swizzle_b:3;
+ unsigned swizzle_a:3;
+ unsigned return_type:3; /**< TGSI_RETURN_TYPE_x */
+ } tex[PIPE_MAX_SAMPLERS];
+ /* Note: svga_compile_keys_equal() depends on the variable-size
+ * tex[] array being at the end of this structure.
+ */
+};
+
+/* A key for a variant of token string of a shader */
+struct svga_token_key {
+ struct {
+ unsigned sprite_coord_enable:24;
+ unsigned sprite_origin_upper_left:1;
+ unsigned point_pos_stream_out:1;
+ unsigned writes_psize:1;
+ unsigned aa_point:1;
+ } gs;
+};
+
+/**
+ * A single TGSI shader may be compiled into different variants of
+ * SVGA3D shaders depending on the compile key. Each user shader
+ * will have a linked list of these variants.
+ */
+struct svga_shader_variant
+{
+ const struct svga_shader *shader;
+
+ /** Parameters used to generate this variant */
+ struct svga_compile_key key;
+
+ /* Compiled shader tokens:
+ */
+ const unsigned *tokens;
+ unsigned nr_tokens;
+
+ /** Per-context shader identifier used with SVGA_3D_CMD_SHADER_DEFINE,
+ * SVGA_3D_CMD_SET_SHADER and SVGA_3D_CMD_SHADER_DESTROY.
+ */
+ unsigned id;
+
+ /** Start of extra constants (number of float[4] constants) */
+ unsigned extra_const_start;
+
+ /* GB object buffer containing the bytecode */
+ struct svga_winsys_gb_shader *gb_shader;
+
+ boolean uses_flat_interp; /** TRUE if flat interpolation qualifier is
+ * applied to any of the varyings.
+ */
+
+ /** For FS-based polygon stipple */
+ unsigned pstipple_sampler_unit;
+
+ /** Next variant */
+ struct svga_shader_variant *next;
+};
+
+
+struct svga_shader
+{
+ const struct tgsi_token *tokens;
+ struct svga_token_key token_key; /* token key for the token string */
+ struct tgsi_shader_info info;
+
+ /* List of shaders with tokens derived from the same token string */
+ struct svga_shader *next;
+ struct svga_shader *parent; /* shader with the original token string */
+
+ struct svga_stream_output *stream_output;
+
+ /** Head of linked list of compiled variants */
+ struct svga_shader_variant *variants;
+
+ unsigned id; /**< for debugging only */
+};
+
+
+struct svga_fragment_shader
+{
+ struct svga_shader base;
+
+ struct draw_fragment_shader *draw_shader;
+
+ /** Mask of which generic varying variables are read by this shader */
+ uint64_t generic_inputs;
+
+ /** Table mapping original TGSI generic indexes to low integers */
+ int8_t generic_remap_table[MAX_GENERIC_VARYING];
+};
+
+
+struct svga_vertex_shader
+{
+ struct svga_shader base;
+
+ struct draw_vertex_shader *draw_shader;
+
+ /** Mask of which generic varying variables are written by this shader */
+ uint64_t generic_outputs;
+
+ /** Generated geometry shader that goes with this vertex shader */
+ struct svga_geometry_shader *gs;
+};
+
+
+struct svga_geometry_shader
+{
+ struct svga_shader base;
+
+ struct draw_geometry_shader *draw_shader;
+
+ /** Table mapping original TGSI generic indexes to low integers */
+ int8_t generic_remap_table[MAX_GENERIC_VARYING];
+ uint64_t generic_outputs;
+
+ unsigned aa_point_coord_index; /* generic index for aa point coord */
+
+ unsigned wide_point:1; /* set if the shader emulates wide point */
+};
+
+
+static inline boolean
+svga_compile_keys_equal(const struct svga_compile_key *a,
+ const struct svga_compile_key *b)
+{
+ unsigned key_size =
+ (const char *) &a->tex[a->num_textures] - (const char *) a;
+
+ return memcmp(a, b, key_size) == 0;
+}
+
+
+uint64_t
+svga_get_generic_inputs_mask(const struct tgsi_shader_info *info);
+
+uint64_t
+svga_get_generic_outputs_mask(const struct tgsi_shader_info *info);
+
+void
+svga_remap_generics(uint64_t generics_mask,
+ int8_t remap_table[MAX_GENERIC_VARYING]);
+
+int
+svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
+ int generic_index);
+
+void
+svga_init_shader_key_common(const struct svga_context *svga, unsigned shader,
+ struct svga_compile_key *key);
+
+struct svga_shader_variant *
+svga_search_shader_key(const struct svga_shader *shader,
+ const struct svga_compile_key *key);
+
+struct svga_shader *
+svga_search_shader_token_key(struct svga_shader *shader,
+ const struct svga_token_key *key);
enum pipe_error
svga_define_shader(struct svga_context *svga,
struct svga_shader_variant *variant);
enum pipe_error
+svga_set_shader(struct svga_context *svga,
+ SVGA3dShaderType type,
+ struct svga_shader_variant *variant);
+
+enum pipe_error
svga_destroy_shader_variant(struct svga_context *svga,
SVGA3dShaderType type,
struct svga_shader_variant *variant);
+enum pipe_error
+svga_rebind_shaders(struct svga_context *svga);
/**
* Check if a shader's bytecode exceeds the device limits.
}
+/**
+ * Convert from PIPE_SHADER_* to SVGA3D_SHADERTYPE_*
+ */
+static inline SVGA3dShaderType
+svga_shader_type(unsigned shader)
+{
+ switch (shader) {
+ case PIPE_SHADER_VERTEX:
+ return SVGA3D_SHADERTYPE_VS;
+ case PIPE_SHADER_GEOMETRY:
+ return SVGA3D_SHADERTYPE_GS;
+ case PIPE_SHADER_FRAGMENT:
+ return SVGA3D_SHADERTYPE_PS;
+ default:
+ assert(!"Invalid shader type");
+ return SVGA3D_SHADERTYPE_VS;
+ }
+}
+
+
+/** Does the current VS have stream output? */
+static inline boolean
+svga_have_vs_streamout(const struct svga_context *svga)
+{
+ return svga->curr.vs != NULL && svga->curr.vs->base.stream_output != NULL;
+}
+
+
+/** Does the current GS have stream output? */
+static inline boolean
+svga_have_gs_streamout(const struct svga_context *svga)
+{
+ return svga->curr.gs != NULL && svga->curr.gs->base.stream_output != NULL;
+}
+
+
#endif /* SVGA_SHADER_H */
*
**********************************************************/
+#include "util/u_bitmask.h"
#include "util/u_debug.h"
#include "pipe/p_defines.h"
#include "util/u_memory.h"
*/
static const struct svga_tracked_state *hw_draw_state[] =
{
+ &svga_need_tgsi_transform,
&svga_hw_fs,
+ &svga_hw_gs,
&svga_hw_vs,
&svga_hw_rss,
- &svga_hw_tss,
- &svga_hw_tss_binding,
+ &svga_hw_sampler, /* VGPU10 */
+ &svga_hw_sampler_bindings, /* VGPU10 */
+ &svga_hw_tss, /* pre-VGPU10 */
+ &svga_hw_tss_binding, /* pre-VGPU10 */
&svga_hw_clip_planes,
&svga_hw_vdecl,
&svga_hw_fs_constants,
+ &svga_hw_gs_constants,
&svga_hw_vs_constants,
NULL
};
*/
enum pipe_error svga_emit_initial_state( struct svga_context *svga )
{
- SVGA3dRenderState *rs;
- unsigned count = 0;
- const unsigned COUNT = 2;
- enum pipe_error ret;
-
- ret = SVGA3D_BeginSetRenderState( svga->swc, &rs, COUNT );
- if (ret != PIPE_OK)
+ if (svga_have_vgpu10(svga)) {
+ SVGA3dRasterizerStateId id = util_bitmask_add(svga->rast_object_id_bm);
+ enum pipe_error ret;
+
+ /* XXX preliminary code */
+ ret = SVGA3D_vgpu10_DefineRasterizerState(svga->swc,
+ id,
+ SVGA3D_FILLMODE_FILL,
+ SVGA3D_CULL_NONE,
+ 1, /* frontCounterClockwise */
+ 0, /* depthBias */
+ 0.0f, /* depthBiasClamp */
+ 0.0f, /* slopeScaledDepthBiasClamp */
+ 0, /* depthClampEnable */
+ 0, /* scissorEnable */
+ 0, /* multisampleEnable */
+ 0, /* aalineEnable */
+ 1.0f, /* lineWidth */
+ 0, /* lineStippleEnable */
+ 0, /* lineStippleFactor */
+ 0, /* lineStipplePattern */
+ 0); /* provokingVertexLast */
+
+
+ assert(ret == PIPE_OK);
+
+ ret = SVGA3D_vgpu10_SetRasterizerState(svga->swc, id);
return ret;
+ }
+ else {
+ SVGA3dRenderState *rs;
+ unsigned count = 0;
+ const unsigned COUNT = 2;
+ enum pipe_error ret;
- /* Always use D3D style coordinate space as this is the only one
- * which is implemented on all backends.
- */
- EMIT_RS(rs, count, SVGA3D_RS_COORDINATETYPE, SVGA3D_COORDINATE_LEFTHANDED );
- EMIT_RS(rs, count, SVGA3D_RS_FRONTWINDING, SVGA3D_FRONTWINDING_CW );
-
- assert( COUNT == count );
- SVGA_FIFOCommitAll( svga->swc );
+ ret = SVGA3D_BeginSetRenderState( svga->swc, &rs, COUNT );
+ if (ret != PIPE_OK)
+ return ret;
- return PIPE_OK;
+ /* Always use D3D style coordinate space as this is the only one
+ * which is implemented on all backends.
+ */
+ EMIT_RS(rs, count, SVGA3D_RS_COORDINATETYPE,
+ SVGA3D_COORDINATE_LEFTHANDED );
+ EMIT_RS(rs, count, SVGA3D_RS_FRONTWINDING, SVGA3D_FRONTWINDING_CW );
+
+ assert( COUNT == count );
+ SVGA_FIFOCommitAll( svga->swc );
+
+ return PIPE_OK;
+ }
}
/* HW_DRAW
*/
+extern struct svga_tracked_state svga_need_tgsi_transform;
extern struct svga_tracked_state svga_hw_vs;
extern struct svga_tracked_state svga_hw_fs;
+extern struct svga_tracked_state svga_hw_gs;
extern struct svga_tracked_state svga_hw_rss;
+extern struct svga_tracked_state svga_hw_pstipple;
+extern struct svga_tracked_state svga_hw_sampler;
+extern struct svga_tracked_state svga_hw_sampler_bindings;
extern struct svga_tracked_state svga_hw_tss;
extern struct svga_tracked_state svga_hw_tss_binding;
extern struct svga_tracked_state svga_hw_clip_planes;
extern struct svga_tracked_state svga_hw_vdecl;
extern struct svga_tracked_state svga_hw_fs_constants;
+extern struct svga_tracked_state svga_hw_gs_constants;
extern struct svga_tracked_state svga_hw_vs_constants;
/* SWTNL_DRAW
enum pipe_error svga_reemit_framebuffer_bindings( struct svga_context *svga );
+enum pipe_error svga_rebind_framebuffer_bindings( struct svga_context *svga );
+
enum pipe_error svga_reemit_tss_bindings( struct svga_context *svga );
enum pipe_error svga_reemit_vs_bindings(struct svga_context *svga);
enum pipe_error svga_reemit_fs_bindings(struct svga_context *svga);
+enum pipe_error svga_reemit_gs_bindings(struct svga_context *svga);
+
#endif
+
/**********************************************************
* Copyright 2008-2009 VMware, Inc. All rights reserved.
*
*
**********************************************************/
+#include "util/u_format.h"
#include "util/u_inlines.h"
#include "util/u_memory.h"
#include "pipe/p_defines.h"
+#include "util/u_upload_mgr.h"
#include "svga_screen.h"
#include "svga_context.h"
#include "svga_tgsi.h"
#include "svga_debug.h"
#include "svga_resource_buffer.h"
+#include "svga_shader.h"
#include "svga_hw_reg.h"
/** Guest-backed surface constant buffers must be this size */
#define GB_CONSTBUF_SIZE (SVGA3D_CONSTREG_MAX)
+
/**
- * Convert from PIPE_SHADER_* to SVGA3D_SHADERTYPE_*
+ * Emit any extra shader-type-independent shader constants into the buffer
+ * pointed to by 'dest'.
+ * \return number of float[4] constants put into the 'dest' buffer
*/
static unsigned
-svga_shader_type(unsigned shader)
+svga_get_extra_constants_common(struct svga_context *svga,
+ const struct svga_shader_variant *variant,
+ unsigned shader, float *dest)
{
- switch (shader) {
- case PIPE_SHADER_VERTEX:
- return SVGA3D_SHADERTYPE_VS;
- case PIPE_SHADER_FRAGMENT:
- return SVGA3D_SHADERTYPE_PS;
- default:
- assert(!"Unexpected shader type");
- return SVGA3D_SHADERTYPE_VS;
+ uint32_t *dest_u = (uint32_t *) dest; // uint version of dest
+ unsigned i;
+ unsigned count = 0;
+
+ for (i = 0; i < variant->key.num_textures; i++) {
+ struct pipe_sampler_view *sv = svga->curr.sampler_views[shader][i];
+ if (sv) {
+ struct pipe_resource *tex = sv->texture;
+ /* Scaling factors needed for handling unnormalized texture coordinates
+ * for texture rectangles.
+ */
+ if (variant->key.tex[i].unnormalized) {
+ /* debug/sanity check */
+ assert(variant->key.tex[i].width_height_idx == count);
+
+ *dest++ = 1.0 / (float)tex->width0;
+ *dest++ = 1.0 / (float)tex->height0;
+ *dest++ = 1.0;
+ *dest++ = 1.0;
+
+ count++;
+ }
+
+ /* Store the sizes for texture buffers.
+ */
+ if (tex->target == PIPE_BUFFER) {
+ unsigned bytes_per_element = util_format_get_blocksize(sv->format);
+ *dest_u++ = tex->width0 / bytes_per_element;
+ *dest_u++ = 1;
+ *dest_u++ = 1;
+ *dest_u++ = 1;
+
+ count++;
+ }
+ }
}
+
+ return count;
}
/**
* Emit any extra fragment shader constants into the buffer pointed
* to by 'dest'.
- * In particular, these would be the scaling factors needed for handling
- * unnormalized texture coordinates for texture rectangles.
* \return number of float[4] constants put into the dest buffer
*/
static unsigned
svga_get_extra_fs_constants(struct svga_context *svga, float *dest)
{
const struct svga_shader_variant *variant = svga->state.hw_draw.fs;
- const struct svga_fs_compile_key *key = &variant->key.fkey;
unsigned count = 0;
- /* SVGA_NEW_VS_VARIANT
- */
- if (key->num_unnormalized_coords) {
- unsigned i;
+ count += svga_get_extra_constants_common(svga, variant,
+ PIPE_SHADER_FRAGMENT, dest);
- for (i = 0; i < key->num_textures; i++) {
- if (key->tex[i].unnormalized) {
- struct pipe_resource *tex = svga->curr.sampler_views[i]->texture;
+ assert(count <= MAX_EXTRA_CONSTS);
- /* debug/sanity check */
- assert(key->tex[i].width_height_idx == count);
+ return count;
+}
- *dest++ = 1.0 / (float)tex->width0;
- *dest++ = 1.0 / (float)tex->height0;
- *dest++ = 1.0;
- *dest++ = 1.0;
+/**
+ * Emit extra constants needed for prescale computation into the
+ * the buffer pointed to by '*dest'. The updated buffer pointer
+ * will be returned in 'dest'.
+ */
+static unsigned
+svga_get_prescale_constants(struct svga_context *svga, float **dest)
+{
+ memcpy(*dest, svga->state.hw_clear.prescale.scale, 4 * sizeof(float));
+ *dest += 4;
- count++;
- }
- }
- }
+ memcpy(*dest, svga->state.hw_clear.prescale.translate, 4 * sizeof(float));
+ *dest += 4;
- assert(count <= MAX_EXTRA_CONSTS);
+ return 2;
+}
- return count;
+/**
+ * Emit extra constants needed for point sprite emulation.
+ */
+static unsigned
+svga_get_pt_sprite_constants(struct svga_context *svga, float **dest)
+{
+ struct svga_screen *screen = svga_screen(svga->pipe.screen);
+ float *dst = *dest;
+
+ dst[0] = 1.0 / (svga->curr.viewport.scale[0] * 2);
+ dst[1] = 1.0 / (svga->curr.viewport.scale[1] * 2);
+ dst[2] = svga->curr.rast->pointsize;
+ dst[3] = screen->maxPointSize;
+ *dest = *dest + 4;
+ return 1;
}
+/**
+ * Emit user-defined clip plane coefficients into the buffer pointed to
+ * by '*dest'. The updated buffer pointer will be returned in 'dest'.
+ */
+static unsigned
+svga_get_clip_plane_constants(struct svga_context *svga,
+ const struct svga_shader_variant *variant,
+ float **dest)
+{
+ unsigned count = 0;
+
+ /* SVGA_NEW_CLIP */
+ if (svga_have_vgpu10(svga)) {
+ /* append user-defined clip plane coefficients onto constant buffer */
+ unsigned clip_planes = variant->key.clip_plane_enable;
+ while (clip_planes) {
+ int i = u_bit_scan(&clip_planes);
+ COPY_4V(*dest, svga->curr.clip.ucp[i]);
+ *dest += 4;
+ count += 1;
+ }
+ }
+ return count;
+}
/**
* Emit any extra vertex shader constants into the buffer pointed
svga_get_extra_vs_constants(struct svga_context *svga, float *dest)
{
const struct svga_shader_variant *variant = svga->state.hw_draw.vs;
- const struct svga_vs_compile_key *key = &variant->key.vkey;
unsigned count = 0;
/* SVGA_NEW_VS_VARIANT
*/
- if (key->need_prescale) {
- memcpy(dest, svga->state.hw_clear.prescale.scale, 4 * sizeof(float));
- dest += 4;
+ if (variant->key.vs.need_prescale) {
+ count += svga_get_prescale_constants(svga, &dest);
+ }
- memcpy(dest, svga->state.hw_clear.prescale.translate, 4 * sizeof(float));
+ if (variant->key.vs.undo_viewport) {
+ /* Used to convert window coords back to NDC coords */
+ dest[0] = 1.0f / svga->curr.viewport.scale[0];
+ dest[1] = 1.0f / svga->curr.viewport.scale[1];
+ dest[2] = -svga->curr.viewport.translate[0];
+ dest[3] = -svga->curr.viewport.translate[1];
dest += 4;
-
- count = 2;
+ count += 1;
}
+ /* SVGA_NEW_CLIP */
+ count += svga_get_clip_plane_constants(svga, variant, &dest);
+
+ /* common constants */
+ count += svga_get_extra_constants_common(svga, variant,
+ PIPE_SHADER_VERTEX, dest);
+
assert(count <= MAX_EXTRA_CONSTS);
return count;
}
+/**
+ * Emit any extra geometry shader constants into the buffer pointed
+ * to by 'dest'.
+ */
+static unsigned
+svga_get_extra_gs_constants(struct svga_context *svga, float *dest)
+{
+ const struct svga_shader_variant *variant = svga->state.hw_draw.gs;
+ unsigned count = 0;
+
+ /* SVGA_NEW_GS_VARIANT
+ */
+
+ /* Constants for point sprite
+ * These are used in the transformed gs that supports point sprite.
+ * They need to be added before the prescale constants.
+ */
+ if (variant->key.gs.wide_point) {
+ count += svga_get_pt_sprite_constants(svga, &dest);
+ }
+
+ if (variant->key.gs.need_prescale) {
+ count += svga_get_prescale_constants(svga, &dest);
+ }
+
+ /* SVGA_NEW_CLIP */
+ count += svga_get_clip_plane_constants(svga, variant, &dest);
+
+ /* common constants */
+ count += svga_get_extra_constants_common(svga, variant,
+ PIPE_SHADER_GEOMETRY, dest);
+
+ assert(count <= MAX_EXTRA_CONSTS);
+ return count;
+}
/**
* Check and emit one shader constant register.
assert(shader < PIPE_SHADER_TYPES);
assert(i < SVGA3D_CONSTREG_MAX);
+ assert(!svga_have_vgpu10(svga));
if (memcmp(svga->state.hw_draw.cb[shader][i], value,
4 * sizeof(float)) != 0) {
unsigned i, j;
enum pipe_error ret;
+ assert(shader == PIPE_SHADER_VERTEX ||
+ shader == PIPE_SHADER_FRAGMENT);
+ assert(!svga_have_vgpu10(svga));
+
#ifdef DEBUG
if (offset + count > SVGA3D_CONSTREG_MAX) {
debug_printf("svga: too many constants (offset %u + count %u = %u (max = %u))\n",
/**
* Emit all the constants in a constant buffer for a shader stage.
+ * On VGPU10, emit_consts_vgpu10 is used instead.
*/
static enum pipe_error
-emit_consts(struct svga_context *svga, unsigned shader)
+emit_consts_vgpu9(struct svga_context *svga, unsigned shader)
{
+ const struct pipe_constant_buffer *cbuf;
struct svga_screen *ss = svga_screen(svga->pipe.screen);
struct pipe_transfer *transfer = NULL;
unsigned count;
const unsigned offset = 0;
assert(shader < PIPE_SHADER_TYPES);
+ assert(!svga_have_vgpu10(svga));
+ /* Only one constant buffer per shader is supported before VGPU10.
+ * This is only an approximate check against that.
+ */
+ assert(svga->curr.constbufs[shader][1].buffer == NULL);
- if (svga->curr.cbufs[shader].buffer == NULL)
- goto done;
+ cbuf = &svga->curr.constbufs[shader][0];
- data = (const float (*)[4])pipe_buffer_map(&svga->pipe,
- svga->curr.cbufs[shader].buffer,
- PIPE_TRANSFER_READ,
- &transfer);
- if (data == NULL) {
- ret = PIPE_ERROR_OUT_OF_MEMORY;
- goto done;
- }
+ if (svga->curr.constbufs[shader][0].buffer) {
+ /* emit user-provided constants */
+ data = (const float (*)[4])
+ pipe_buffer_map(&svga->pipe, svga->curr.constbufs[shader][0].buffer,
+ PIPE_TRANSFER_READ, &transfer);
+ if (data == NULL) {
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
- /* sanity check */
- assert(svga->curr.cbufs[shader].buffer->width0 >=
- svga->curr.cbufs[shader].buffer_size);
+ /* sanity check */
+ assert(cbuf->buffer->width0 >=
+ cbuf->buffer_size);
- /* Use/apply the constant buffer size and offsets here */
- count = svga->curr.cbufs[shader].buffer_size / (4 * sizeof(float));
- data += svga->curr.cbufs[shader].buffer_offset / (4 * sizeof(float));
+ /* Use/apply the constant buffer size and offsets here */
+ count = cbuf->buffer_size / (4 * sizeof(float));
+ data += cbuf->buffer_offset / (4 * sizeof(float));
+
+ if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) {
+ ret = emit_const_range( svga, shader, offset, count, data );
+ }
+ else {
+ for (i = 0; i < count; i++) {
+ ret = emit_const( svga, shader, offset + i, data[i] );
+ if (ret != PIPE_OK) {
+ break;
+ }
+ }
+ }
+
+ pipe_buffer_unmap(&svga->pipe, transfer);
- if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) {
- ret = emit_const_range( svga, shader, offset, count, data );
if (ret != PIPE_OK) {
- goto done;
+ return ret;
}
- } else {
- for (i = 0; i < count; i++) {
- ret = emit_const( svga, shader, offset + i, data[i] );
- if (ret != PIPE_OK) {
- goto done;
+ }
+
+ /* emit extra shader constants */
+ {
+ const struct svga_shader_variant *variant = NULL;
+ unsigned offset;
+ float extras[MAX_EXTRA_CONSTS][4];
+ unsigned count, i;
+
+ switch (shader) {
+ case PIPE_SHADER_VERTEX:
+ variant = svga->state.hw_draw.vs;
+ count = svga_get_extra_vs_constants(svga, (float *) extras);
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ variant = svga->state.hw_draw.fs;
+ count = svga_get_extra_fs_constants(svga, (float *) extras);
+ break;
+ default:
+ assert(!"Unexpected shader type");
+ count = 0;
+ }
+
+ assert(variant);
+ offset = variant->shader->info.file_max[TGSI_FILE_CONSTANT] + 1;
+ assert(count <= Elements(extras));
+
+ if (count > 0) {
+ if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) {
+ ret = emit_const_range(svga, shader, offset, count,
+ (const float (*) [4])extras);
+ }
+ else {
+ for (i = 0; i < count; i++) {
+ ret = emit_const(svga, shader, offset + i, extras[i]);
+ if (ret != PIPE_OK)
+ return ret;
+ }
}
}
}
-done:
- if (data)
- pipe_buffer_unmap(&svga->pipe, transfer);
+ return ret;
+}
+
+
+
+static enum pipe_error
+emit_constbuf_vgpu10(struct svga_context *svga, unsigned shader)
+{
+ const struct pipe_constant_buffer *cbuf;
+ struct pipe_resource *dst_buffer = NULL;
+ enum pipe_error ret = PIPE_OK;
+ struct pipe_transfer *src_transfer;
+ struct svga_winsys_surface *dst_handle;
+ float extras[MAX_EXTRA_CONSTS][4];
+ unsigned extra_count, extra_size, extra_offset;
+ unsigned new_buf_size;
+ void *src_map = NULL, *dst_map;
+ unsigned offset;
+ const struct svga_shader_variant *variant;
+
+ assert(shader == PIPE_SHADER_VERTEX ||
+ shader == PIPE_SHADER_GEOMETRY ||
+ shader == PIPE_SHADER_FRAGMENT);
+
+ cbuf = &svga->curr.constbufs[shader][0];
+
+ switch (shader) {
+ case PIPE_SHADER_VERTEX:
+ variant = svga->state.hw_draw.vs;
+ extra_count = svga_get_extra_vs_constants(svga, (float *) extras);
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ variant = svga->state.hw_draw.fs;
+ extra_count = svga_get_extra_fs_constants(svga, (float *) extras);
+ break;
+ case PIPE_SHADER_GEOMETRY:
+ variant = svga->state.hw_draw.gs;
+ extra_count = svga_get_extra_gs_constants(svga, (float *) extras);
+ break;
+ default:
+ assert(!"Unexpected shader type");
+ /* Don't return an error code since we don't want to keep re-trying
+ * this function and getting stuck in an infinite loop.
+ */
+ return PIPE_OK;
+ }
+
+ assert(variant);
+
+ /* Compute extra constants size and offset in bytes */
+ extra_size = extra_count * 4 * sizeof(float);
+ extra_offset = 4 * sizeof(float) * variant->extra_const_start;
+
+ if (cbuf->buffer_size + extra_size == 0)
+ return PIPE_OK; /* nothing to do */
+
+ /* Typically, the cbuf->buffer here is a user-space buffer so mapping
+ * it is really cheap. If we ever get real HW buffers for constants
+ * we should void mapping and instead use a ResourceCopy command.
+ */
+ if (cbuf->buffer_size > 0) {
+ src_map = pipe_buffer_map_range(&svga->pipe, cbuf->buffer,
+ cbuf->buffer_offset, cbuf->buffer_size,
+ PIPE_TRANSFER_READ, &src_transfer);
+ assert(src_map);
+ if (!src_map) {
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+ }
+
+ /* The new/dest buffer's size must be large enough to hold the original,
+ * user-specified constants, plus the extra constants.
+ * The size of the original constant buffer _should_ agree with what the
+ * shader is expecting, but it might not (it's not enforced anywhere by
+ * gallium).
+ */
+ new_buf_size = MAX2(cbuf->buffer_size, extra_offset) + extra_size;
+
+ /* According to the DX10 spec, the constant buffer size must be
+ * in multiples of 16.
+ */
+ new_buf_size = align(new_buf_size, 16);
+
+ u_upload_alloc(svga->const0_upload, 0, new_buf_size, &offset,
+ &dst_buffer, &dst_map);
+ if (!dst_map) {
+ if (src_map)
+ pipe_buffer_unmap(&svga->pipe, src_transfer);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
+ if (src_map) {
+ memcpy(dst_map, src_map, cbuf->buffer_size);
+ pipe_buffer_unmap(&svga->pipe, src_transfer);
+ }
+
+ if (extra_size) {
+ assert(extra_offset + extra_size <= new_buf_size);
+ memcpy((char *) dst_map + extra_offset, extras, extra_size);
+ }
+ u_upload_unmap(svga->const0_upload);
+
+ /* Issue the SetSingleConstantBuffer command */
+ dst_handle = svga_buffer_handle(svga, dst_buffer);
+ if (!dst_handle) {
+ pipe_resource_reference(&dst_buffer, NULL);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
+ assert(new_buf_size % 16 == 0);
+ ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc,
+ 0, /* index */
+ svga_shader_type(shader),
+ dst_handle,
+ offset,
+ new_buf_size);
+
+ if (ret != PIPE_OK) {
+ pipe_resource_reference(&dst_buffer, NULL);
+ return ret;
+ }
+
+ /* Save this const buffer until it's replaced in the future.
+ * Otherwise, all references to the buffer will go away after the
+ * command buffer is submitted, it'll get recycled and we will have
+ * incorrect constant buffer bindings.
+ */
+ pipe_resource_reference(&svga->state.hw_draw.constbuf[shader], dst_buffer);
+
+ svga->state.hw_draw.default_constbuf_size[shader] = new_buf_size;
+
+ pipe_resource_reference(&dst_buffer, NULL);
return ret;
}
static enum pipe_error
+emit_consts_vgpu10(struct svga_context *svga, unsigned shader)
+{
+ enum pipe_error ret;
+ unsigned dirty_constbufs;
+ unsigned enabled_constbufs;
+
+ /* Emit 0th constant buffer (with extra constants) */
+ ret = emit_constbuf_vgpu10(svga, shader);
+ if (ret != PIPE_OK) {
+ return ret;
+ }
+
+ enabled_constbufs = svga->state.hw_draw.enabled_constbufs[shader] | 1u;
+
+ /* Emit other constant buffers (UBOs) */
+ dirty_constbufs = svga->state.dirty_constbufs[shader] & ~1u;
+
+ while (dirty_constbufs) {
+ unsigned index = u_bit_scan(&dirty_constbufs);
+ unsigned offset = svga->curr.constbufs[shader][index].buffer_offset;
+ unsigned size = svga->curr.constbufs[shader][index].buffer_size;
+ struct svga_buffer *buffer =
+ svga_buffer(svga->curr.constbufs[shader][index].buffer);
+ struct svga_winsys_surface *handle;
+
+ if (buffer) {
+ handle = svga_buffer_handle(svga, &buffer->b.b);
+ enabled_constbufs |= 1 << index;
+ }
+ else {
+ handle = NULL;
+ enabled_constbufs &= ~(1 << index);
+ assert(offset == 0);
+ assert(size == 0);
+ }
+
+ assert(size % 16 == 0);
+ ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc,
+ index,
+ svga_shader_type(shader),
+ handle,
+ offset,
+ size);
+ if (ret != PIPE_OK)
+ return ret;
+ }
+
+ svga->state.hw_draw.enabled_constbufs[shader] = enabled_constbufs;
+ svga->state.dirty_constbufs[shader] = 0;
+
+ return ret;
+}
+
+static enum pipe_error
emit_fs_consts(struct svga_context *svga, unsigned dirty)
{
- struct svga_screen *ss = svga_screen(svga->pipe.screen);
const struct svga_shader_variant *variant = svga->state.hw_draw.fs;
enum pipe_error ret = PIPE_OK;
/* SVGA_NEW_FS_CONST_BUFFER
*/
- ret = emit_consts( svga, PIPE_SHADER_FRAGMENT );
- if (ret != PIPE_OK)
- return ret;
-
- /* emit extra shader constants */
- {
- unsigned offset = variant->shader->info.file_max[TGSI_FILE_CONSTANT] + 1;
- float extras[MAX_EXTRA_CONSTS][4];
- unsigned count, i;
-
- count = svga_get_extra_fs_constants(svga, (float *) extras);
-
- if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) {
- ret = emit_const_range(svga, PIPE_SHADER_FRAGMENT, offset, count,
- (const float (*) [4])extras);
- } else {
- for (i = 0; i < count; i++) {
- ret = emit_const(svga, PIPE_SHADER_FRAGMENT, offset + i, extras[i]);
- if (ret != PIPE_OK)
- return ret;
- }
- }
+ if (svga_have_vgpu10(svga)) {
+ ret = emit_consts_vgpu10(svga, PIPE_SHADER_FRAGMENT);
+ }
+ else {
+ ret = emit_consts_vgpu9(svga, PIPE_SHADER_FRAGMENT);
}
return ret;
static enum pipe_error
emit_vs_consts(struct svga_context *svga, unsigned dirty)
{
- struct svga_screen *ss = svga_screen(svga->pipe.screen);
const struct svga_shader_variant *variant = svga->state.hw_draw.vs;
enum pipe_error ret = PIPE_OK;
/* SVGA_NEW_VS_CONST_BUFFER
*/
- ret = emit_consts( svga, PIPE_SHADER_VERTEX );
- if (ret != PIPE_OK)
- return ret;
-
- /* emit extra shader constants */
- {
- unsigned offset = variant->shader->info.file_max[TGSI_FILE_CONSTANT] + 1;
- float extras[MAX_EXTRA_CONSTS][4];
- unsigned count, i;
-
- count = svga_get_extra_vs_constants(svga, (float *) extras);
- assert(count <= Elements(extras));
-
- if (ss->hw_version >= SVGA3D_HWVERSION_WS8_B1) {
- ret = emit_const_range(svga, PIPE_SHADER_VERTEX, offset, count,
- (const float (*) [4]) extras);
- } else {
- for (i = 0; i < count; i++) {
- ret = emit_const(svga, PIPE_SHADER_VERTEX, offset + i, extras[i]);
- if (ret != PIPE_OK)
- return ret;
- }
- }
+ if (svga_have_vgpu10(svga)) {
+ ret = emit_consts_vgpu10(svga, PIPE_SHADER_VERTEX);
+ }
+ else {
+ ret = emit_consts_vgpu9(svga, PIPE_SHADER_VERTEX);
}
return ret;
SVGA_NEW_VS_VARIANT),
emit_vs_consts
};
+
+
+static enum pipe_error
+emit_gs_consts(struct svga_context *svga, unsigned dirty)
+{
+ const struct svga_shader_variant *variant = svga->state.hw_draw.gs;
+ enum pipe_error ret = PIPE_OK;
+
+ /* SVGA_NEW_GS_VARIANT
+ */
+ if (variant == NULL)
+ return PIPE_OK;
+
+ /* SVGA_NEW_GS_CONST_BUFFER
+ */
+ if (svga_have_vgpu10(svga)) {
+ /**
+ * If only the rasterizer state has changed and the current geometry
+ * shader does not emit wide points, then there is no reason to
+ * re-emit the GS constants, so skip it.
+ */
+ if (dirty == SVGA_NEW_RAST && !variant->key.gs.wide_point)
+ return PIPE_OK;
+
+ ret = emit_consts_vgpu10(svga, PIPE_SHADER_GEOMETRY);
+ }
+
+ return ret;
+}
+
+
+struct svga_tracked_state svga_hw_gs_constants =
+{
+ "hw gs params",
+ (SVGA_NEW_GS_CONST_BUFFER |
+ SVGA_NEW_RAST |
+ SVGA_NEW_GS_VARIANT),
+ emit_gs_consts
+};
#include "util/u_inlines.h"
#include "pipe/p_defines.h"
#include "util/u_math.h"
+#include "util/u_format.h"
#include "svga_context.h"
#include "svga_state.h"
#include "svga_cmd.h"
#include "svga_debug.h"
#include "svga_screen.h"
+#include "svga_surface.h"
/*
#define MAX_RT_PER_BATCH 8
-/***********************************************************************
- * Hardware state update
- */
-
static enum pipe_error
-emit_framebuffer( struct svga_context *svga,
- unsigned dirty )
+emit_fb_vgpu9(struct svga_context *svga)
{
struct svga_screen *svgascreen = svga_screen(svga->pipe.screen);
const struct pipe_framebuffer_state *curr = &svga->curr.framebuffer;
struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
- boolean reemit = svga->rebind.rendertargets;
+ boolean reemit = svga->rebind.flags.rendertargets;
unsigned i;
enum pipe_error ret;
+ assert(!svga_have_vgpu10(svga));
+
/*
* We need to reemit non-null surface bindings, even when they are not
* dirty, to ensure that the resources are paged in.
*/
for (i = 0; i < svgascreen->max_color_buffers; i++) {
- if (curr->cbufs[i] != hw->cbufs[i] ||
- (reemit && hw->cbufs[i])) {
+ if ((curr->cbufs[i] != hw->cbufs[i]) || (reemit && hw->cbufs[i])) {
if (svga->curr.nr_fbs++ > MAX_RT_PER_BATCH)
return PIPE_ERROR_OUT_OF_MEMORY;
}
}
- if (curr->zsbuf != hw->zsbuf ||
- (reemit && hw->zsbuf)) {
+ if ((curr->zsbuf != hw->zsbuf) || (reemit && hw->zsbuf)) {
ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_DEPTH, curr->zsbuf);
if (ret != PIPE_OK)
return ret;
if (curr->zsbuf &&
- curr->zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) {
+ util_format_is_depth_and_stencil(curr->zsbuf->format)) {
ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL,
curr->zsbuf);
if (ret != PIPE_OK)
pipe_surface_reference(&hw->zsbuf, curr->zsbuf);
}
- svga->rebind.rendertargets = FALSE;
-
return PIPE_OK;
}
* Called at the beginning of every new command buffer to ensure that
* non-dirty rendertargets are properly paged-in.
*/
-enum pipe_error
-svga_reemit_framebuffer_bindings(struct svga_context *svga)
+static enum pipe_error
+svga_reemit_framebuffer_bindings_vgpu9(struct svga_context *svga)
{
struct svga_screen *svgascreen = svga_screen(svga->pipe.screen);
struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
unsigned i;
enum pipe_error ret;
- assert(svga->rebind.rendertargets);
+ assert(!svga_have_vgpu10(svga));
for (i = 0; i < svgascreen->max_color_buffers; i++) {
if (hw->cbufs[i]) {
}
if (hw->zsbuf &&
- hw->zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) {
+ util_format_is_depth_and_stencil(hw->zsbuf->format)) {
ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, hw->zsbuf);
if (ret != PIPE_OK) {
return ret;
}
}
- svga->rebind.rendertargets = FALSE;
+ return PIPE_OK;
+}
+
+
+
+static enum pipe_error
+emit_fb_vgpu10(struct svga_context *svga)
+{
+ const struct svga_screen *ss = svga_screen(svga->pipe.screen);
+ struct pipe_surface *rtv[SVGA3D_MAX_RENDER_TARGETS];
+ struct pipe_surface *dsv;
+ struct pipe_framebuffer_state *curr = &svga->curr.framebuffer;
+ struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
+ const unsigned num_color = MAX2(curr->nr_cbufs, hw->nr_cbufs);
+ unsigned i;
+ enum pipe_error ret;
+
+ assert(svga_have_vgpu10(svga));
+
+ /* Setup render targets array. Note that we loop over the max of the
+ * number of previously bound buffers and the new buffers to unbind
+ * any previously bound buffers when the new number of buffers is less
+ * than the old number of buffers.
+ */
+ for (i = 0; i < num_color; i++) {
+ if (curr->cbufs[i]) {
+ rtv[i] = svga_validate_surface_view(svga,
+ svga_surface(curr->cbufs[i]));
+ if (rtv[i] == NULL) {
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
+ assert(svga_surface(rtv[i])->view_id != SVGA3D_INVALID_ID);
+ }
+ else {
+ rtv[i] = NULL;
+ }
+ }
+
+ /* Setup depth stencil view */
+ if (curr->zsbuf) {
+ dsv = svga_validate_surface_view(svga, svga_surface(curr->zsbuf));
+ if (dsv == NULL) {
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+ }
+ else {
+ dsv = NULL;
+ }
+
+ ret = SVGA3D_vgpu10_SetRenderTargets(svga->swc, num_color, rtv, dsv);
+ if (ret != PIPE_OK)
+ return ret;
+
+ for (i = 0; i < ss->max_color_buffers; i++) {
+ if (hw->cbufs[i] != curr->cbufs[i]) {
+ /* propagate the backed view surface before unbinding it */
+ if (hw->cbufs[i] && svga_surface(hw->cbufs[i])->backed) {
+ svga_propagate_surface(svga,
+ &svga_surface(hw->cbufs[i])->backed->base);
+ }
+ pipe_surface_reference(&hw->cbufs[i], curr->cbufs[i]);
+ }
+ }
+ hw->nr_cbufs = curr->nr_cbufs;
+
+ if (hw->zsbuf != curr->zsbuf) {
+ /* propagate the backed view surface before unbinding it */
+ if (hw->zsbuf && svga_surface(hw->zsbuf)->backed) {
+ svga_propagate_surface(svga, &svga_surface(hw->zsbuf)->backed->base);
+ }
+ pipe_surface_reference(&hw->zsbuf, curr->zsbuf);
+ }
+
+ return ret;
+}
+
+
+static enum pipe_error
+emit_framebuffer(struct svga_context *svga, unsigned dirty)
+{
+ if (svga_have_vgpu10(svga)) {
+ return emit_fb_vgpu10(svga);
+ }
+ else {
+ return emit_fb_vgpu9(svga);
+ }
+}
+
+
+/*
+ * Rebind rendertargets.
+ *
+ * Similar to emit_framebuffer, but without any state checking/update.
+ *
+ * Called at the beginning of every new command buffer to ensure that
+ * non-dirty rendertargets are properly paged-in.
+ */
+enum pipe_error
+svga_reemit_framebuffer_bindings(struct svga_context *svga)
+{
+ enum pipe_error ret;
+
+ assert(svga->rebind.flags.rendertargets);
+
+ if (svga_have_vgpu10(svga)) {
+ ret = emit_fb_vgpu10(svga);
+ }
+ else {
+ ret = svga_reemit_framebuffer_bindings_vgpu9(svga);
+ }
+
+ svga->rebind.flags.rendertargets = FALSE;
+
+ return ret;
+}
+
+
+/*
+ * Send a private allocation command to page in rendertargets resource.
+ */
+enum pipe_error
+svga_rebind_framebuffer_bindings(struct svga_context *svga)
+{
+ const struct svga_screen *ss = svga_screen(svga->pipe.screen);
+ struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
+ unsigned i;
+ enum pipe_error ret;
+
+ assert(svga_have_vgpu10(svga));
+
+ if (!svga->rebind.flags.rendertargets)
+ return PIPE_OK;
+
+ for (i = 0; i < ss->max_color_buffers; i++) {
+ if (hw->cbufs[i]) {
+ ret = svga->swc->resource_rebind(svga->swc,
+ svga_surface(hw->cbufs[i])->handle,
+ NULL,
+ SVGA_RELOC_WRITE);
+ if (ret != PIPE_OK)
+ return ret;
+ }
+ }
+
+ if (hw->zsbuf) {
+ ret = svga->swc->resource_rebind(svga->swc,
+ svga_surface(hw->zsbuf)->handle,
+ NULL,
+ SVGA_RELOC_WRITE);
+ if (ret != PIPE_OK)
+ return ret;
+ }
+
+ svga->rebind.flags.rendertargets = 0;
return PIPE_OK;
}
float fy = flip * viewport->scale[1] * -1.0f + viewport->translate[1];
float fw = viewport->scale[0] * 2.0f;
float fh = flip * viewport->scale[1] * 2.0f;
+ boolean emit_vgpu10_viewport = FALSE;
memset( &prescale, 0, sizeof(prescale) );
prescale.translate[1] = 0;
prescale.translate[2] = 0;
prescale.translate[3] = 0;
- prescale.enabled = TRUE;
+
+ /* Enable prescale to adjust vertex positions to match
+ VGPU10 convention only if rasterization is enabled.
+ */
+ if (svga->curr.rast->templ.rasterizer_discard) {
+ degenerate = TRUE;
+ goto out;
+ } else {
+ prescale.enabled = TRUE;
+ }
if (fw < 0) {
prescale.scale[0] *= -1.0f;
}
if (fh < 0.0) {
- prescale.translate[1] = fh - 1.0f + fy * 2.0f;
+ if (svga_have_vgpu10(svga)) {
+ /* floating point viewport params below */
+ prescale.translate[1] = fh + fy * 2.0f;
+ }
+ else {
+ /* integer viewport params below */
+ prescale.translate[1] = fh - 1.0f + fy * 2.0f;
+ }
fh = -fh;
fy -= fh;
prescale.scale[1] = -1.0f;
float adjust_x = 0.0;
float adjust_y = 0.0;
- switch (svga->curr.reduced_prim) {
- case PIPE_PRIM_POINTS:
- adjust_x = -0.375;
- adjust_y = -0.75;
- break;
- case PIPE_PRIM_LINES:
- adjust_x = -0.5;
- adjust_y = 0;
- break;
- case PIPE_PRIM_TRIANGLES:
- adjust_x = -0.5;
- adjust_y = -0.5;
- break;
+ if (svga_have_vgpu10(svga)) {
+ /* Normally, we don't have to do any sub-pixel coordinate
+ * adjustments for VGPU10. But when we draw wide points with
+ * a GS we need an X adjustment in order to be conformant.
+ */
+ if (svga->curr.reduced_prim == PIPE_PRIM_POINTS &&
+ svga->curr.rast->pointsize > 1.0f) {
+ adjust_x = 0.5;
+ }
+ }
+ else {
+ switch (svga->curr.reduced_prim) {
+ case PIPE_PRIM_POINTS:
+ adjust_x = -0.375;
+ adjust_y = -0.75;
+ break;
+ case PIPE_PRIM_LINES:
+ adjust_x = -0.5;
+ adjust_y = 0;
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ adjust_x = -0.5;
+ adjust_y = -0.5;
+ break;
+ }
}
if (invertY)
prescale.scale[2] = -prescale.scale[2];
}
+ /* If zmin is less than 0, clamp zmin to 0 and adjust the prescale.
+ * zmin can be set to -1 when viewport->scale[2] is set to 1 and
+ * viewport->translate[2] is set to 0 in the blit code.
+ */
+ if (range_min < 0.0f) {
+ range_min = -0.5f * viewport->scale[2] + 0.5f + viewport->translate[2];
+ range_max = 0.5f * viewport->scale[2] + 0.5f + viewport->translate[2];
+ prescale.scale[2] *= 2.0f;
+ prescale.translate[2] -= 0.5f;
+ }
+
if (prescale.enabled) {
float H[2];
float J[2];
prescale.enabled = FALSE;
}
- if (memcmp(&rect, &svga->state.hw_clear.viewport, sizeof(rect)) != 0) {
- ret = SVGA3D_SetViewport(svga->swc, &rect);
- if(ret != PIPE_OK)
- return ret;
+ if (!svga_rects_equal(&rect, &svga->state.hw_clear.viewport)) {
+ if (svga_have_vgpu10(svga)) {
+ emit_vgpu10_viewport = TRUE;
+ }
+ else {
+ ret = SVGA3D_SetViewport(svga->swc, &rect);
+ if (ret != PIPE_OK)
+ return ret;
- memcpy(&svga->state.hw_clear.viewport, &rect, sizeof(rect));
- assert(sizeof(rect) == sizeof(svga->state.hw_clear.viewport));
+ svga->state.hw_clear.viewport = rect;
+ }
}
if (svga->state.hw_clear.depthrange.zmin != range_min ||
- svga->state.hw_clear.depthrange.zmax != range_max) {
- ret = SVGA3D_SetZRange(svga->swc, range_min, range_max );
- if(ret != PIPE_OK)
+ svga->state.hw_clear.depthrange.zmax != range_max)
+ {
+ if (svga_have_vgpu10(svga)) {
+ emit_vgpu10_viewport = TRUE;
+ }
+ else {
+ ret = SVGA3D_SetZRange(svga->swc, range_min, range_max );
+ if (ret != PIPE_OK)
+ return ret;
+
+ svga->state.hw_clear.depthrange.zmin = range_min;
+ svga->state.hw_clear.depthrange.zmax = range_max;
+ }
+ }
+
+ if (emit_vgpu10_viewport) {
+ SVGA3dViewport vp;
+ vp.x = (float) rect.x;
+ vp.y = (float) rect.y;
+ vp.width = (float) rect.w;
+ vp.height = (float) rect.h;
+ vp.minDepth = range_min;
+ vp.maxDepth = range_max;
+ ret = SVGA3D_vgpu10_SetViewports(svga->swc, 1, &vp);
+ if (ret != PIPE_OK)
return ret;
+ svga->state.hw_clear.viewport = rect;
+
svga->state.hw_clear.depthrange.zmin = range_min;
svga->state.hw_clear.depthrange.zmax = range_max;
}
unsigned dirty )
{
const struct pipe_scissor_state *scissor = &svga->curr.scissor;
- SVGA3dRect rect;
- rect.x = scissor->minx;
- rect.y = scissor->miny;
- rect.w = scissor->maxx - scissor->minx; /* + 1 ?? */
- rect.h = scissor->maxy - scissor->miny; /* + 1 ?? */
+ if (svga_have_vgpu10(svga)) {
+ SVGASignedRect rect;
+
+ rect.left = scissor->minx;
+ rect.top = scissor->miny;
+ rect.right = scissor->maxx;
+ rect.bottom = scissor->maxy;
+
+ return SVGA3D_vgpu10_SetScissorRects(svga->swc, 1, &rect);
+ }
+ else {
+ SVGA3dRect rect;
- return SVGA3D_SetScissorRect(svga->swc, &rect);
+ rect.x = scissor->minx;
+ rect.y = scissor->miny;
+ rect.w = scissor->maxx - scissor->minx; /* + 1 ?? */
+ rect.h = scissor->maxy - scissor->miny; /* + 1 ?? */
+
+ return SVGA3D_SetScissorRect(svga->swc, &rect);
+ }
}
plane[2] = 2.0f * c;
plane[3] = d - c;
- ret = SVGA3D_SetClipPlane(svga->swc, i, plane);
- if(ret != PIPE_OK)
- return ret;
+ if (svga_have_vgpu10(svga)) {
+ //debug_printf("XXX emit DX10 clip plane\n");
+ ret = PIPE_OK;
+ }
+ else {
+ ret = SVGA3D_SetClipPlane(svga->swc, i, plane);
+ if (ret != PIPE_OK)
+ return ret;
+ }
}
return PIPE_OK;
#include "svga_shader.h"
#include "svga_resource_texture.h"
#include "svga_tgsi.h"
+#include "svga_format.h"
#include "svga_hw_reg.h"
-static inline int
-compare_fs_keys(const struct svga_fs_compile_key *a,
- const struct svga_fs_compile_key *b)
-{
- unsigned keysize_a = svga_fs_key_size( a );
- unsigned keysize_b = svga_fs_key_size( b );
-
- if (keysize_a != keysize_b) {
- return (int)(keysize_a - keysize_b);
- }
- return memcmp( a, b, keysize_a );
-}
-
-
-/** Search for a fragment shader variant */
-static struct svga_shader_variant *
-search_fs_key(const struct svga_fragment_shader *fs,
- const struct svga_fs_compile_key *key)
-{
- struct svga_shader_variant *variant = fs->base.variants;
-
- assert(key);
-
- for ( ; variant; variant = variant->next) {
- if (compare_fs_keys( key, &variant->key.fkey ) == 0)
- return variant;
- }
-
- return NULL;
-}
-
-
/**
* If we fail to compile a fragment shader (because it uses too many
* registers, for example) we'll use a dummy/fallback shader that
}
+static struct svga_shader_variant *
+translate_fragment_program(struct svga_context *svga,
+ const struct svga_fragment_shader *fs,
+ const struct svga_compile_key *key)
+{
+ if (svga_have_vgpu10(svga)) {
+ return svga_tgsi_vgpu10_translate(svga, &fs->base, key,
+ PIPE_SHADER_FRAGMENT);
+ }
+ else {
+ return svga_tgsi_vgpu9_translate(&fs->base, key, PIPE_SHADER_FRAGMENT);
+ }
+}
+
+
/**
* Replace the given shader's instruction with a simple constant-color
* shader. We use this when normal shader translation fails.
*/
static struct svga_shader_variant *
-get_compiled_dummy_shader(struct svga_fragment_shader *fs,
- const struct svga_fs_compile_key *key)
+get_compiled_dummy_shader(struct svga_context *svga,
+ struct svga_fragment_shader *fs,
+ const struct svga_compile_key *key)
{
const struct tgsi_token *dummy = get_dummy_fragment_shader();
struct svga_shader_variant *variant;
FREE((void *) fs->base.tokens);
fs->base.tokens = dummy;
- variant = svga_translate_fragment_program(fs, key);
+ variant = translate_fragment_program(svga, fs, key);
return variant;
}
static enum pipe_error
compile_fs(struct svga_context *svga,
struct svga_fragment_shader *fs,
- const struct svga_fs_compile_key *key,
+ const struct svga_compile_key *key,
struct svga_shader_variant **out_variant)
{
struct svga_shader_variant *variant;
enum pipe_error ret = PIPE_ERROR;
- variant = svga_translate_fragment_program( fs, key );
+ variant = translate_fragment_program(svga, fs, key);
if (variant == NULL) {
debug_printf("Failed to compile fragment shader,"
" using dummy shader instead.\n");
- variant = get_compiled_dummy_shader(fs, key);
- if (!variant) {
- ret = PIPE_ERROR;
- goto fail;
- }
+ variant = get_compiled_dummy_shader(svga, fs, key);
}
-
- if (svga_shader_too_large(svga, variant)) {
+ else if (svga_shader_too_large(svga, variant)) {
/* too big, use dummy shader */
- debug_printf("Shader too large (%lu bytes),"
+ debug_printf("Shader too large (%u bytes),"
" using dummy shader instead.\n",
- (unsigned long ) variant->nr_tokens * sizeof(variant->tokens[0]));
- variant = get_compiled_dummy_shader(fs, key);
- if (!variant) {
- ret = PIPE_ERROR;
- goto fail;
- }
+ (unsigned) (variant->nr_tokens
+ * sizeof(variant->tokens[0])));
+ /* Free the too-large variant */
+ svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
+ /* Use simple pass-through shader instead */
+ variant = get_compiled_dummy_shader(svga, fs, key);
+ }
+
+ if (!variant) {
+ return PIPE_ERROR;
}
ret = svga_define_shader(svga, SVGA3D_SHADERTYPE_PS, variant);
- if (ret != PIPE_OK)
- goto fail;
+ if (ret != PIPE_OK) {
+ svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
+ return ret;
+ }
*out_variant = variant;
- /* insert variants at head of linked list */
+ /* insert variant at head of linked list */
variant->next = fs->base.variants;
fs->base.variants = variant;
return PIPE_OK;
-
-fail:
- if (variant) {
- svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
- }
- return ret;
}
static enum pipe_error
make_fs_key(const struct svga_context *svga,
struct svga_fragment_shader *fs,
- struct svga_fs_compile_key *key)
+ struct svga_compile_key *key)
{
+ const unsigned shader = PIPE_SHADER_FRAGMENT;
unsigned i;
- int idx = 0;
memset(key, 0, sizeof *key);
+ memcpy(key->generic_remap_table, fs->generic_remap_table,
+ sizeof(fs->generic_remap_table));
+
+ /* SVGA_NEW_GS, SVGA_NEW_VS
+ */
+ if (svga->curr.gs) {
+ key->fs.gs_generic_outputs = svga->curr.gs->generic_outputs;
+ } else {
+ key->fs.vs_generic_outputs = svga->curr.vs->generic_outputs;
+ }
+
/* Only need fragment shader fixup for twoside lighting if doing
* hwtnl. Otherwise the draw module does the whole job for us.
*
* SVGA_NEW_SWTNL
*/
if (!svga->state.sw.need_swtnl) {
- /* SVGA_NEW_RAST
+ /* SVGA_NEW_RAST, SVGA_NEW_REDUCED_PRIMITIVE
*/
- key->light_twoside = svga->curr.rast->templ.light_twoside;
- key->front_ccw = svga->curr.rast->templ.front_ccw;
+ key->fs.light_twoside = svga->curr.rast->templ.light_twoside;
+ key->fs.front_ccw = svga->curr.rast->templ.front_ccw;
+ key->fs.pstipple = (svga->curr.rast->templ.poly_stipple_enable &&
+ svga->curr.reduced_prim == PIPE_PRIM_TRIANGLES);
+ key->fs.aa_point = (svga->curr.rast->templ.point_smooth &&
+ svga->curr.reduced_prim == PIPE_PRIM_POINTS &&
+ (svga->curr.rast->pointsize > 1.0 ||
+ svga->curr.vs->base.info.writes_psize));
+ if (key->fs.aa_point) {
+ assert(svga->curr.gs != NULL);
+ assert(svga->curr.gs->aa_point_coord_index != -1);
+ key->fs.aa_point_coord_index = svga->curr.gs->aa_point_coord_index;
+ }
}
/* The blend workaround for simulating logicop xor behaviour
* SVGA_NEW_BLEND
*/
if (svga->curr.blend->need_white_fragments) {
- key->white_fragments = 1;
+ key->fs.white_fragments = 1;
}
#ifdef DEBUG
*/
{
static boolean warned = FALSE;
- unsigned i, n = MAX2(svga->curr.num_sampler_views,
- svga->curr.num_samplers);
+ unsigned i, n = MAX2(svga->curr.num_sampler_views[shader],
+ svga->curr.num_samplers[shader]);
/* Only warn once to prevent too much debug output */
if (!warned) {
- if (svga->curr.num_sampler_views != svga->curr.num_samplers) {
+ if (svga->curr.num_sampler_views[shader] !=
+ svga->curr.num_samplers[shader]) {
debug_printf("svga: mismatched number of sampler views (%u) "
"vs. samplers (%u)\n",
- svga->curr.num_sampler_views,
- svga->curr.num_samplers);
+ svga->curr.num_sampler_views[shader],
+ svga->curr.num_samplers[shader]);
}
for (i = 0; i < n; i++) {
- if ((svga->curr.sampler_views[i] == NULL) !=
- (svga->curr.sampler[i] == NULL))
+ if ((svga->curr.sampler_views[shader][i] == NULL) !=
+ (svga->curr.sampler[shader][i] == NULL))
debug_printf("sampler_view[%u] = %p but sampler[%u] = %p\n",
- i, svga->curr.sampler_views[i],
- i, svga->curr.sampler[i]);
+ i, svga->curr.sampler_views[shader][i],
+ i, svga->curr.sampler[shader][i]);
}
warned = TRUE;
}
*
* SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER
*/
- for (i = 0; i < svga->curr.num_sampler_views; i++) {
- if (svga->curr.sampler_views[i] && svga->curr.sampler[i]) {
- assert(svga->curr.sampler_views[i]->texture);
- key->tex[i].texture_target = svga->curr.sampler_views[i]->texture->target;
- if (!svga->curr.sampler[i]->normalized_coords) {
- key->tex[i].width_height_idx = idx++;
- key->tex[i].unnormalized = TRUE;
- ++key->num_unnormalized_coords;
- }
-
- key->tex[i].swizzle_r = svga->curr.sampler_views[i]->swizzle_r;
- key->tex[i].swizzle_g = svga->curr.sampler_views[i]->swizzle_g;
- key->tex[i].swizzle_b = svga->curr.sampler_views[i]->swizzle_b;
- key->tex[i].swizzle_a = svga->curr.sampler_views[i]->swizzle_a;
- }
- }
- key->num_textures = svga->curr.num_sampler_views;
-
- idx = 0;
- for (i = 0; i < svga->curr.num_samplers; ++i) {
- if (svga->curr.sampler_views[i] && svga->curr.sampler[i]) {
- struct pipe_resource *tex = svga->curr.sampler_views[i]->texture;
- struct svga_texture *stex = svga_texture(tex);
- SVGA3dSurfaceFormat format = stex->key.format;
-
- if (format == SVGA3D_Z_D16 ||
- format == SVGA3D_Z_D24X8 ||
- format == SVGA3D_Z_D24S8) {
- /* If we're sampling from a SVGA3D_Z_D16, SVGA3D_Z_D24X8,
- * or SVGA3D_Z_D24S8 surface, we'll automatically get
- * shadow comparison. But we only get LEQUAL mode.
- * Set TEX_COMPARE_NONE here so we don't emit the extra FS
- * code for shadow comparison.
- */
- key->tex[i].compare_mode = PIPE_TEX_COMPARE_NONE;
- key->tex[i].compare_func = PIPE_FUNC_NEVER;
- /* These depth formats _only_ support comparison mode and
- * not ordinary sampling so warn if the later is expected.
- */
- if (svga->curr.sampler[i]->compare_mode !=
- PIPE_TEX_COMPARE_R_TO_TEXTURE) {
- debug_warn_once("Unsupported shadow compare mode");
- }
- /* The only supported comparison mode is LEQUAL */
- if (svga->curr.sampler[i]->compare_func != PIPE_FUNC_LEQUAL) {
- debug_warn_once("Unsupported shadow compare function");
+ svga_init_shader_key_common(svga, shader, key);
+
+ for (i = 0; i < svga->curr.num_samplers[shader]; ++i) {
+ struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i];
+ const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i];
+ if (view) {
+ struct pipe_resource *tex = view->texture;
+ if (tex->target != PIPE_BUFFER) {
+ struct svga_texture *stex = svga_texture(tex);
+ SVGA3dSurfaceFormat format = stex->key.format;
+
+ if (!svga_have_vgpu10(svga) &&
+ (format == SVGA3D_Z_D16 ||
+ format == SVGA3D_Z_D24X8 ||
+ format == SVGA3D_Z_D24S8)) {
+ /* If we're sampling from a SVGA3D_Z_D16, SVGA3D_Z_D24X8,
+ * or SVGA3D_Z_D24S8 surface, we'll automatically get
+ * shadow comparison. But we only get LEQUAL mode.
+ * Set TEX_COMPARE_NONE here so we don't emit the extra FS
+ * code for shadow comparison.
+ */
+ key->tex[i].compare_mode = PIPE_TEX_COMPARE_NONE;
+ key->tex[i].compare_func = PIPE_FUNC_NEVER;
+ /* These depth formats _only_ support comparison mode and
+ * not ordinary sampling so warn if the later is expected.
+ */
+ if (sampler->compare_mode != PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ debug_warn_once("Unsupported shadow compare mode");
+ }
+ /* The shader translation code can emit code to
+ * handle ALWAYS and NEVER compare functions
+ */
+ else if (sampler->compare_func == PIPE_FUNC_ALWAYS ||
+ sampler->compare_func == PIPE_FUNC_NEVER) {
+ key->tex[i].compare_mode = sampler->compare_mode;
+ key->tex[i].compare_func = sampler->compare_func;
+ }
+ else if (sampler->compare_func != PIPE_FUNC_LEQUAL) {
+ debug_warn_once("Unsupported shadow compare function");
+ }
+ }
+ else {
+ /* For other texture formats, just use the compare func/mode
+ * as-is. Should be no-ops for color textures. For depth
+ * textures, we do not get automatic depth compare. We have
+ * to do it ourselves in the shader. And we don't get PCF.
+ */
+ key->tex[i].compare_mode = sampler->compare_mode;
+ key->tex[i].compare_func = sampler->compare_func;
}
- }
- else {
- /* For other texture formats, just use the compare func/mode
- * as-is. Should be no-ops for color textures. For depth
- * textures, we do not get automatic depth compare. We have
- * to do it ourselves in the shader. And we don't get PCF.
- */
- key->tex[i].compare_mode = svga->curr.sampler[i]->compare_mode;
- key->tex[i].compare_func = svga->curr.sampler[i]->compare_func;
}
}
}
/* sprite coord gen state */
- for (i = 0; i < svga->curr.num_samplers; ++i) {
+ for (i = 0; i < svga->curr.num_samplers[shader]; ++i) {
key->tex[i].sprite_texgen =
svga->curr.rast->templ.sprite_coord_enable & (1 << i);
}
key->sprite_origin_lower_left = (svga->curr.rast->templ.sprite_coord_mode
== PIPE_SPRITE_COORD_LOWER_LEFT);
+ key->fs.flatshade = svga->curr.rast->templ.flatshade;
+
+ /* SVGA_NEW_DEPTH_STENCIL_ALPHA */
+ if (svga_have_vgpu10(svga)) {
+ /* Alpha testing is not supported in integer-valued render targets. */
+ if (svga_has_any_integer_cbufs(svga)) {
+ key->fs.alpha_func = SVGA3D_CMP_ALWAYS;
+ key->fs.alpha_ref = 0;
+ }
+ else {
+ key->fs.alpha_func = svga->curr.depth->alphafunc;
+ key->fs.alpha_ref = svga->curr.depth->alpharef;
+ }
+ }
+
/* SVGA_NEW_FRAME_BUFFER */
if (fs->base.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) {
/* Replicate color0 output to N colorbuffers */
- key->write_color0_to_n_cbufs = svga->curr.framebuffer.nr_cbufs;
+ key->fs.write_color0_to_n_cbufs = svga->curr.framebuffer.nr_cbufs;
}
return PIPE_OK;
{
enum pipe_error ret;
- assert(svga->rebind.fs);
+ assert(svga->rebind.flags.fs);
assert(svga_have_gb_objects(svga));
if (!svga->state.hw_draw.fs)
return PIPE_OK;
- ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS,
- svga->state.hw_draw.fs->gb_shader);
+ if (!svga_need_to_rebind_resources(svga)) {
+ ret = svga->swc->resource_rebind(svga->swc, NULL,
+ svga->state.hw_draw.fs->gb_shader,
+ SVGA_RELOC_READ);
+ goto out;
+ }
+
+ if (svga_have_vgpu10(svga))
+ ret = SVGA3D_vgpu10_SetShader(svga->swc, SVGA3D_SHADERTYPE_PS,
+ svga->state.hw_draw.fs->gb_shader,
+ svga->state.hw_draw.fs->id);
+ else
+ ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS,
+ svga->state.hw_draw.fs->gb_shader);
+
+ out:
if (ret != PIPE_OK)
return ret;
- svga->rebind.fs = FALSE;
+ svga->rebind.flags.fs = FALSE;
return PIPE_OK;
}
struct svga_shader_variant *variant = NULL;
enum pipe_error ret = PIPE_OK;
struct svga_fragment_shader *fs = svga->curr.fs;
- struct svga_fs_compile_key key;
+ struct svga_compile_key key;
/* SVGA_NEW_BLEND
* SVGA_NEW_TEXTURE_BINDING
* SVGA_NEW_NEED_SWTNL
* SVGA_NEW_SAMPLER
* SVGA_NEW_FRAME_BUFFER
+ * SVGA_NEW_DEPTH_STENCIL_ALPHA
+ * SVGA_NEW_VS
*/
- ret = make_fs_key( svga, fs, &key );
+ ret = make_fs_key(svga, fs, &key);
if (ret != PIPE_OK)
return ret;
- variant = search_fs_key( fs, &key );
+ variant = svga_search_shader_key(&fs->base, &key);
if (!variant) {
- ret = compile_fs( svga, fs, &key, &variant );
+ ret = compile_fs(svga, fs, &key, &variant);
if (ret != PIPE_OK)
return ret;
}
assert(variant);
if (variant != svga->state.hw_draw.fs) {
- if (svga_have_gb_objects(svga)) {
- ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS,
- variant->gb_shader);
- if (ret != PIPE_OK)
- return ret;
+ ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, variant);
+ if (ret != PIPE_OK)
+ return ret;
- svga->rebind.fs = FALSE;
- }
- else {
- ret = SVGA3D_SetShader(svga->swc, SVGA3D_SHADERTYPE_PS, variant->id);
- if (ret != PIPE_OK)
- return ret;
- }
+ svga->rebind.flags.fs = FALSE;
svga->dirty |= SVGA_NEW_FS_VARIANT;
- svga->state.hw_draw.fs = variant;
+ svga->state.hw_draw.fs = variant;
}
return PIPE_OK;
{
"fragment shader (hwtnl)",
(SVGA_NEW_FS |
+ SVGA_NEW_GS |
+ SVGA_NEW_VS |
SVGA_NEW_TEXTURE_BINDING |
SVGA_NEW_NEED_SWTNL |
SVGA_NEW_RAST |
+ SVGA_NEW_REDUCED_PRIMITIVE |
SVGA_NEW_SAMPLER |
SVGA_NEW_FRAME_BUFFER |
+ SVGA_NEW_DEPTH_STENCIL_ALPHA |
SVGA_NEW_BLEND),
emit_hw_fs
};
--- /dev/null
+/**********************************************************
+ * Copyright 2014 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_bitmask.h"
+#include "translate/translate.h"
+#include "tgsi/tgsi_ureg.h"
+
+#include "svga_context.h"
+#include "svga_cmd.h"
+#include "svga_shader.h"
+#include "svga_tgsi.h"
+#include "svga_streamout.h"
+#include "svga_format.h"
+
+/**
+ * If we fail to compile a geometry shader we'll use a dummy/fallback shader
+ * that simply emits the incoming vertices.
+ */
+static const struct tgsi_token *
+get_dummy_geometry_shader(void)
+{
+ //XXX
+ return NULL;
+}
+
+
+static struct svga_shader_variant *
+translate_geometry_program(struct svga_context *svga,
+ const struct svga_geometry_shader *gs,
+ const struct svga_compile_key *key)
+{
+ if (svga_have_vgpu10(svga)) {
+ return svga_tgsi_vgpu10_translate(svga, &gs->base, key,
+ PIPE_SHADER_GEOMETRY);
+ }
+ else {
+ return svga_tgsi_vgpu9_translate(&gs->base, key, PIPE_SHADER_GEOMETRY);
+ }
+}
+
+
+/**
+ * Translate TGSI shader into an svga shader variant.
+ */
+static enum pipe_error
+compile_gs(struct svga_context *svga,
+ struct svga_geometry_shader *gs,
+ const struct svga_compile_key *key,
+ struct svga_shader_variant **out_variant)
+{
+ struct svga_shader_variant *variant;
+ enum pipe_error ret = PIPE_ERROR;
+
+ variant = translate_geometry_program(svga, gs, key);
+ if (variant == NULL) {
+ /* some problem during translation, try the dummy shader */
+ const struct tgsi_token *dummy = get_dummy_geometry_shader();
+ if (!dummy) {
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+ debug_printf("Failed to compile geometry shader, using dummy shader instead.\n");
+ FREE((void *) gs->base.tokens);
+ gs->base.tokens = dummy;
+ variant = translate_geometry_program(svga, gs, key);
+ if (variant == NULL) {
+ return PIPE_ERROR;
+ }
+ }
+
+ ret = svga_define_shader(svga, SVGA3D_SHADERTYPE_GS, variant);
+ if (ret != PIPE_OK) {
+ svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_GS, variant);
+ return ret;
+ }
+
+ *out_variant = variant;
+
+ return PIPE_OK;
+}
+
+
+static void
+make_gs_key(struct svga_context *svga, struct svga_compile_key *key)
+{
+ struct svga_geometry_shader *gs = svga->curr.gs;
+
+ memset(key, 0, sizeof *key);
+
+ /*
+ * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER
+ */
+ svga_init_shader_key_common(svga, PIPE_SHADER_GEOMETRY, key);
+
+ memcpy(key->generic_remap_table, gs->generic_remap_table,
+ sizeof(gs->generic_remap_table));
+
+ key->gs.vs_generic_outputs = svga->curr.vs->generic_outputs;
+
+ key->gs.need_prescale = svga->state.hw_clear.prescale.enabled;
+
+ key->gs.writes_psize = gs->base.info.writes_psize;
+ key->gs.wide_point = gs->wide_point;
+ key->sprite_coord_enable = svga->curr.rast->templ.sprite_coord_enable;
+ key->sprite_origin_lower_left = (svga->curr.rast->templ.sprite_coord_mode
+ == PIPE_SPRITE_COORD_LOWER_LEFT);
+
+ /* SVGA_NEW_RAST */
+ key->clip_plane_enable = svga->curr.rast->templ.clip_plane_enable;
+}
+
+
+/**
+ * svga_reemit_gs_bindings - Reemit the geometry shader bindings
+ */
+enum pipe_error
+svga_reemit_gs_bindings(struct svga_context *svga)
+{
+ enum pipe_error ret;
+ struct svga_winsys_gb_shader *gbshader = NULL;
+ SVGA3dShaderId shaderId = SVGA3D_INVALID_ID;
+
+ assert(svga->rebind.flags.gs);
+ assert(svga_have_gb_objects(svga));
+
+ /* Geometry Shader is only supported in vgpu10 */
+ assert(svga_have_vgpu10(svga));
+
+ if (svga->state.hw_draw.gs) {
+ gbshader = svga->state.hw_draw.gs->gb_shader;
+ shaderId = svga->state.hw_draw.gs->id;
+ }
+
+ if (!svga_need_to_rebind_resources(svga)) {
+ ret = svga->swc->resource_rebind(svga->swc, NULL, gbshader,
+ SVGA_RELOC_READ);
+ goto out;
+ }
+
+ ret = SVGA3D_vgpu10_SetShader(svga->swc, SVGA3D_SHADERTYPE_GS,
+ gbshader, shaderId);
+
+ out:
+ if (ret != PIPE_OK)
+ return ret;
+
+ svga->rebind.flags.gs = FALSE;
+ return PIPE_OK;
+}
+
+static enum pipe_error
+emit_hw_gs(struct svga_context *svga, unsigned dirty)
+{
+ struct svga_shader_variant *variant;
+ struct svga_geometry_shader *gs = svga->curr.gs;
+ enum pipe_error ret = PIPE_OK;
+ struct svga_compile_key key;
+
+ /* If there's a user-defined GS, we should have a pointer to a derived
+ * GS. This should have been resolved in update_tgsi_transform().
+ */
+ if (svga->curr.user_gs)
+ assert(svga->curr.gs);
+
+ if (gs == NULL) {
+ if (svga->state.hw_draw.gs != NULL) {
+
+ /** The previous geometry shader is made inactive.
+ * Needs to unbind the geometry shader.
+ */
+ ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_GS, NULL);
+ svga->state.hw_draw.gs = NULL;
+ }
+ return ret;
+ }
+
+ /* If there is stream output info for this geometry shader, then use
+ * it instead of the one from the vertex shader.
+ */
+ if (svga_have_gs_streamout(svga)) {
+ svga_set_stream_output(svga, gs->base.stream_output);
+ }
+ else if (!svga_have_vs_streamout(svga)) {
+ /* turn off stream out */
+ svga_set_stream_output(svga, NULL);
+ }
+
+ /* SVGA_NEW_NEED_SWTNL */
+ if (svga->state.sw.need_swtnl && !svga_have_vgpu10(svga)) {
+ /* No geometry shader is needed */
+ variant = NULL;
+ }
+ else {
+ make_gs_key(svga, &key);
+
+ /* See if we already have a GS variant that matches the key */
+ variant = svga_search_shader_key(&gs->base, &key);
+
+ if (!variant) {
+ ret = compile_gs(svga, gs, &key, &variant);
+ if (ret != PIPE_OK)
+ return ret;
+
+ /* insert the new variant at head of linked list */
+ assert(variant);
+ variant->next = gs->base.variants;
+ gs->base.variants = variant;
+ }
+ }
+
+ if (variant != svga->state.hw_draw.gs) {
+ /* Bind the new variant */
+ ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_GS, variant);
+ if (ret != PIPE_OK)
+ return ret;
+
+ svga->rebind.flags.gs = FALSE;
+ svga->dirty |= SVGA_NEW_GS_VARIANT;
+ svga->state.hw_draw.gs = variant;
+ }
+
+ return PIPE_OK;
+}
+
+struct svga_tracked_state svga_hw_gs =
+{
+ "geometry shader (hwtnl)",
+ (SVGA_NEW_VS |
+ SVGA_NEW_FS |
+ SVGA_NEW_GS |
+ SVGA_NEW_TEXTURE_BINDING |
+ SVGA_NEW_SAMPLER |
+ SVGA_NEW_RAST |
+ SVGA_NEW_NEED_SWTNL),
+ emit_hw_gs
+};
#include "util/u_inlines.h"
#include "pipe/p_state.h"
#include "svga_context.h"
+#include "svga_shader.h"
#include "svga_state.h"
#include "svga_debug.h"
#include "svga_hw_reg.h"
unsigned generic_inputs =
svga->curr.fs ? svga->curr.fs->generic_inputs : 0;
- if (sprite_coord_gen &&
+ if (!svga_have_vgpu10(svga) && sprite_coord_gen &&
(generic_inputs & ~sprite_coord_gen)) {
/* The fragment shader is using some generic inputs that are
* not being replaced by auto-generated point/sprite coords (and
*
**********************************************************/
+#include "pipe/p_defines.h"
+#include "util/u_bitmask.h"
#include "util/u_format.h"
#include "util/u_inlines.h"
#include "util/u_memory.h"
-#include "pipe/p_defines.h"
#include "util/u_math.h"
+#include "util/u_memory.h"
#include "svga_context.h"
#include "svga_screen.h"
#include "svga_state.h"
#include "svga_cmd.h"
+#include "svga_format.h"
+#include "svga_shader.h"
struct rs_queue {
* the "to" state.
*/
static enum pipe_error
-emit_rss(struct svga_context *svga, unsigned dirty)
+emit_rss_vgpu9(struct svga_context *svga, unsigned dirty)
{
struct svga_screen *screen = svga_screen(svga->pipe.screen);
struct rs_queue queue;
queue.rs_count = 0;
- if (dirty & SVGA_NEW_BLEND) {
+ if (dirty & (SVGA_NEW_BLEND | SVGA_NEW_BLEND_COLOR)) {
const struct svga_blend_state *curr = svga->curr.blend;
EMIT_RS( svga, curr->rt[0].writemask, COLORWRITEENABLE, fail );
EMIT_RS( svga, color, BLENDCOLOR, fail );
}
- if (dirty & (SVGA_NEW_DEPTH_STENCIL | SVGA_NEW_RAST)) {
+ if (dirty & (SVGA_NEW_DEPTH_STENCIL_ALPHA | SVGA_NEW_RAST)) {
const struct svga_depth_stencil_state *curr = svga->curr.depth;
const struct svga_rasterizer_state *rast = svga->curr.rast;
return PIPE_ERROR_OUT_OF_MEMORY;
}
+/** Returns a non-culling rasterizer state object to be used with
+ * point sprite.
+ */
+static struct svga_rasterizer_state *
+get_no_cull_rasterizer_state(struct svga_context *svga)
+{
+ const struct svga_rasterizer_state *r = svga->curr.rast;
+ unsigned int aa_point = r->templ.point_smooth;
+
+ if (!svga->rasterizer_no_cull[aa_point]) {
+ struct pipe_rasterizer_state rast;
+
+ memset(&rast, 0, sizeof(rast));
+ rast.flatshade = 1;
+ rast.front_ccw = 1;
+ rast.point_smooth = r->templ.point_smooth;
+
+ /* All rasterizer states have the same half_pixel_center,
+ * bottom_edge_rule and clip_halfz values since they are
+ * constant for a context. If we ever implement
+ * GL_ARB_clip_control, the clip_halfz field would have to be observed.
+ */
+ rast.half_pixel_center = r->templ.half_pixel_center;
+ rast.bottom_edge_rule = r->templ.bottom_edge_rule;
+ rast.clip_halfz = r->templ.clip_halfz;
+
+ svga->rasterizer_no_cull[aa_point] =
+ svga->pipe.create_rasterizer_state(&svga->pipe, &rast);
+ }
+ return svga->rasterizer_no_cull[aa_point];
+}
+
+static enum pipe_error
+emit_rss_vgpu10(struct svga_context *svga, unsigned dirty)
+{
+ enum pipe_error ret = PIPE_OK;
+
+ svga_hwtnl_flush_retry(svga);
+
+ if (dirty & (SVGA_NEW_BLEND | SVGA_NEW_BLEND_COLOR)) {
+ const struct svga_blend_state *curr;
+ float blend_factor[4];
+
+ if (svga_has_any_integer_cbufs(svga)) {
+ /* Blending is not supported in integer-valued render targets. */
+ curr = svga->noop_blend;
+ blend_factor[0] =
+ blend_factor[1] =
+ blend_factor[2] =
+ blend_factor[3] = 0;
+ }
+ else {
+ curr = svga->curr.blend;
+
+ if (curr->blend_color_alpha) {
+ blend_factor[0] =
+ blend_factor[1] =
+ blend_factor[2] =
+ blend_factor[3] = svga->curr.blend_color.color[3];
+ }
+ else {
+ blend_factor[0] = svga->curr.blend_color.color[0];
+ blend_factor[1] = svga->curr.blend_color.color[1];
+ blend_factor[2] = svga->curr.blend_color.color[2];
+ blend_factor[3] = svga->curr.blend_color.color[3];
+ }
+ }
+
+ /* Set/bind the blend state object */
+ if (svga->state.hw_draw.blend_id != curr->id ||
+ svga->state.hw_draw.blend_factor[0] != blend_factor[0] ||
+ svga->state.hw_draw.blend_factor[1] != blend_factor[1] ||
+ svga->state.hw_draw.blend_factor[2] != blend_factor[2] ||
+ svga->state.hw_draw.blend_factor[3] != blend_factor[3] ||
+ svga->state.hw_draw.blend_sample_mask != svga->curr.sample_mask) {
+ ret = SVGA3D_vgpu10_SetBlendState(svga->swc, curr->id,
+ blend_factor,
+ svga->curr.sample_mask);
+ if (ret != PIPE_OK)
+ return ret;
+
+ svga->state.hw_draw.blend_id = curr->id;
+ svga->state.hw_draw.blend_factor[0] = blend_factor[0];
+ svga->state.hw_draw.blend_factor[1] = blend_factor[1];
+ svga->state.hw_draw.blend_factor[2] = blend_factor[2];
+ svga->state.hw_draw.blend_factor[3] = blend_factor[3];
+ svga->state.hw_draw.blend_sample_mask = svga->curr.sample_mask;
+ }
+ }
+
+ if (dirty & (SVGA_NEW_DEPTH_STENCIL_ALPHA | SVGA_NEW_STENCIL_REF)) {
+ const struct svga_depth_stencil_state *curr = svga->curr.depth;
+ unsigned curr_ref = svga->curr.stencil_ref.ref_value[0];
+
+ if (curr->id != svga->state.hw_draw.depth_stencil_id ||
+ curr_ref != svga->state.hw_draw.stencil_ref) {
+ /* Set/bind the depth/stencil state object */
+ ret = SVGA3D_vgpu10_SetDepthStencilState(svga->swc, curr->id,
+ curr_ref);
+ if (ret != PIPE_OK)
+ return ret;
+
+ svga->state.hw_draw.depth_stencil_id = curr->id;
+ svga->state.hw_draw.stencil_ref = curr_ref;
+ }
+ }
+
+ if (dirty & (SVGA_NEW_REDUCED_PRIMITIVE | SVGA_NEW_RAST)) {
+ const struct svga_rasterizer_state *rast;
+
+ if (svga->curr.reduced_prim == PIPE_PRIM_POINTS &&
+ svga->curr.gs && svga->curr.gs->wide_point) {
+
+ /* If we are drawing a point sprite, we will need to
+ * bind a non-culling rasterizer state object
+ */
+ rast = get_no_cull_rasterizer_state(svga);
+ }
+ else {
+ rast = svga->curr.rast;
+ }
+
+ if (svga->state.hw_draw.rasterizer_id != rast->id) {
+ /* Set/bind the rasterizer state object */
+ ret = SVGA3D_vgpu10_SetRasterizerState(svga->swc, rast->id);
+ if (ret != PIPE_OK)
+ return ret;
+ svga->state.hw_draw.rasterizer_id = rast->id;
+ }
+ }
+ return PIPE_OK;
+}
+
+
+static enum pipe_error
+emit_rss(struct svga_context *svga, unsigned dirty)
+{
+ if (svga_have_vgpu10(svga)) {
+ return emit_rss_vgpu10(svga, dirty);
+ }
+ else {
+ return emit_rss_vgpu9(svga, dirty);
+ }
+}
+
struct svga_tracked_state svga_hw_rss =
{
(SVGA_NEW_BLEND |
SVGA_NEW_BLEND_COLOR |
- SVGA_NEW_DEPTH_STENCIL |
+ SVGA_NEW_DEPTH_STENCIL_ALPHA |
SVGA_NEW_STENCIL_REF |
SVGA_NEW_RAST |
SVGA_NEW_FRAME_BUFFER |
- SVGA_NEW_NEED_PIPELINE),
+ SVGA_NEW_NEED_PIPELINE |
+ SVGA_NEW_REDUCED_PRIMITIVE),
emit_rss
};
--- /dev/null
+/*
+ * Copyright 2013 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+/**
+ * VGPU10 sampler and sampler view functions.
+ */
+
+
+#include "pipe/p_defines.h"
+#include "util/u_bitmask.h"
+#include "util/u_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "svga_cmd.h"
+#include "svga_context.h"
+#include "svga_format.h"
+#include "svga_resource_buffer.h"
+#include "svga_resource_texture.h"
+#include "svga_shader.h"
+#include "svga_state.h"
+#include "svga_sampler_view.h"
+
+
+/** Get resource handle for a texture or buffer */
+static inline struct svga_winsys_surface *
+svga_resource_handle(struct pipe_resource *res)
+{
+ if (res->target == PIPE_BUFFER) {
+ return svga_buffer(res)->handle;
+ }
+ else {
+ return svga_texture(res)->handle;
+ }
+}
+
+
+/**
+ * This helper function returns TRUE if the specified resource collides with
+ * any of the resources bound to any of the currently bound sampler views.
+ */
+boolean
+svga_check_sampler_view_resource_collision(struct svga_context *svga,
+ struct svga_winsys_surface *res,
+ unsigned shader)
+{
+ struct pipe_screen *screen = svga->pipe.screen;
+ unsigned i;
+
+ if (svga_screen(screen)->debug.no_surface_view) {
+ return FALSE;
+ }
+
+ for (i = 0; i < svga->curr.num_sampler_views[shader]; i++) {
+ struct svga_pipe_sampler_view *sv =
+ svga_pipe_sampler_view(svga->curr.sampler_views[shader][i]);
+
+ if (sv && res == svga_resource_handle(sv->base.texture)) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+
+/**
+ * Create a DX ShaderResourceSamplerView for the given pipe_sampler_view,
+ * if needed.
+ */
+static enum pipe_error
+svga_validate_pipe_sampler_view(struct svga_context *svga,
+ struct svga_pipe_sampler_view *sv)
+{
+ enum pipe_error ret = PIPE_OK;
+
+ if (sv->id == SVGA3D_INVALID_ID) {
+ struct svga_screen *ss = svga_screen(svga->pipe.screen);
+ struct pipe_resource *texture = sv->base.texture;
+ struct svga_winsys_surface *surface = svga_resource_handle(texture);
+ SVGA3dSurfaceFormat format;
+ SVGA3dResourceType resourceDim;
+ SVGA3dShaderResourceViewDesc viewDesc;
+
+ format = svga_translate_format(ss, sv->base.format,
+ PIPE_BIND_SAMPLER_VIEW);
+ assert(format != SVGA3D_FORMAT_INVALID);
+
+ if (texture->target == PIPE_BUFFER) {
+ viewDesc.buffer.firstElement = sv->base.u.buf.first_element;
+ viewDesc.buffer.numElements = (sv->base.u.buf.last_element -
+ sv->base.u.buf.first_element + 1);
+ }
+ else {
+ viewDesc.tex.mostDetailedMip = sv->base.u.tex.first_level;
+ viewDesc.tex.firstArraySlice = sv->base.u.tex.first_layer;
+ viewDesc.tex.mipLevels = (sv->base.u.tex.last_level -
+ sv->base.u.tex.first_level + 1);
+ }
+
+ /* arraySize in viewDesc specifies the number of array slices in a
+ * texture array. For 3D texture, last_layer in
+ * pipe_sampler_view specifies the last slice of the texture
+ * which is different from the last slice in a texture array,
+ * hence we need to set arraySize to 1 explicitly.
+ */
+ viewDesc.tex.arraySize =
+ (texture->target == PIPE_TEXTURE_3D ||
+ texture->target == PIPE_BUFFER) ? 1 :
+ (sv->base.u.tex.last_layer - sv->base.u.tex.first_layer + 1);
+
+ switch (texture->target) {
+ case PIPE_BUFFER:
+ resourceDim = SVGA3D_RESOURCE_BUFFER;
+ break;
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_1D_ARRAY:
+ resourceDim = SVGA3D_RESOURCE_TEXTURE1D;
+ break;
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_2D_ARRAY:
+ resourceDim = SVGA3D_RESOURCE_TEXTURE2D;
+ break;
+ case PIPE_TEXTURE_3D:
+ resourceDim = SVGA3D_RESOURCE_TEXTURE3D;
+ break;
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ resourceDim = SVGA3D_RESOURCE_TEXTURECUBE;
+ break;
+
+ default:
+ assert(!"Unexpected texture type");
+ resourceDim = SVGA3D_RESOURCE_TEXTURE2D;
+ }
+
+ sv->id = util_bitmask_add(svga->sampler_view_id_bm);
+
+ ret = SVGA3D_vgpu10_DefineShaderResourceView(svga->swc,
+ sv->id,
+ surface,
+ format,
+ resourceDim,
+ &viewDesc);
+ if (ret != PIPE_OK) {
+ util_bitmask_clear(svga->sampler_view_id_bm, sv->id);
+ sv->id = SVGA3D_INVALID_ID;
+ }
+ }
+
+ return ret;
+}
+
+
+static enum pipe_error
+update_sampler_resources(struct svga_context *svga, unsigned dirty)
+{
+ enum pipe_error ret = PIPE_OK;
+ unsigned shader;
+
+ if (!svga_have_vgpu10(svga))
+ return PIPE_OK;
+
+ for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
+ SVGA3dShaderResourceViewId ids[PIPE_MAX_SAMPLERS];
+ struct svga_winsys_surface *surfaces[PIPE_MAX_SAMPLERS];
+ unsigned count;
+ unsigned nviews;
+ unsigned i;
+
+ count = svga->curr.num_sampler_views[shader];
+ for (i = 0; i < count; i++) {
+ struct svga_pipe_sampler_view *sv =
+ svga_pipe_sampler_view(svga->curr.sampler_views[shader][i]);
+ struct svga_winsys_surface *surface;
+
+ if (sv) {
+ surface = svga_resource_handle(sv->base.texture);
+
+ ret = svga_validate_pipe_sampler_view(svga, sv);
+ if (ret != PIPE_OK)
+ return ret;
+
+ assert(sv->id != SVGA3D_INVALID_ID);
+ ids[i] = sv->id;
+ }
+ else {
+ surface = NULL;
+ ids[i] = SVGA3D_INVALID_ID;
+ }
+ surfaces[i] = surface;
+ }
+
+ for (; i < Elements(ids); i++) {
+ ids[i] = SVGA3D_INVALID_ID;
+ surfaces[i] = NULL;
+ }
+
+ if (shader == PIPE_SHADER_FRAGMENT) {
+ /* Handle polygon stipple sampler view */
+ if (svga->curr.rast->templ.poly_stipple_enable) {
+ const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+ struct svga_pipe_sampler_view *sv =
+ svga->polygon_stipple.sampler_view;
+
+ assert(sv);
+ if (!sv) {
+ return PIPE_OK; /* probably out of memory */
+ }
+
+ ret = svga_validate_pipe_sampler_view(svga, sv);
+ if (ret != PIPE_OK)
+ return ret;
+
+ ids[unit] = sv->id;
+ surfaces[unit] = svga_resource_handle(sv->base.texture);
+ count = MAX2(count, unit+1);
+ }
+ }
+
+ /* Number of ShaderResources that need to be modified. This includes
+ * the one that need to be unbound.
+ */
+ nviews = MAX2(svga->state.hw_draw.num_sampler_views[shader], count);
+ if (nviews > 0) {
+ ret = SVGA3D_vgpu10_SetShaderResources(svga->swc,
+ svga_shader_type(shader),
+ 0, /* startView */
+ nviews,
+ ids,
+ surfaces);
+ if (ret != PIPE_OK)
+ return ret;
+ }
+
+ /* Number of sampler views enabled in the device */
+ svga->state.hw_draw.num_sampler_views[shader] = count;
+ }
+
+ return ret;
+}
+
+
+struct svga_tracked_state svga_hw_sampler_bindings = {
+ "shader resources emit",
+ SVGA_NEW_STIPPLE |
+ SVGA_NEW_TEXTURE_BINDING,
+ update_sampler_resources
+};
+
+
+
+static enum pipe_error
+update_samplers(struct svga_context *svga, unsigned dirty )
+{
+ enum pipe_error ret = PIPE_OK;
+ unsigned shader;
+
+ if (!svga_have_vgpu10(svga))
+ return PIPE_OK;
+
+ for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
+ const unsigned count = svga->curr.num_samplers[shader];
+ SVGA3dSamplerId ids[PIPE_MAX_SAMPLERS];
+ unsigned i;
+
+ for (i = 0; i < count; i++) {
+ if (svga->curr.sampler[shader][i]) {
+ ids[i] = svga->curr.sampler[shader][i]->id;
+ assert(ids[i] != SVGA3D_INVALID_ID);
+ }
+ else {
+ ids[i] = SVGA3D_INVALID_ID;
+ }
+ }
+
+ if (count > 0) {
+ ret = SVGA3D_vgpu10_SetSamplers(svga->swc,
+ count,
+ 0, /* start */
+ svga_shader_type(shader), /* type */
+ ids);
+ if (ret != PIPE_OK)
+ return ret;
+ }
+ }
+
+ /* Handle polygon stipple sampler texture */
+ if (svga->curr.rast->templ.poly_stipple_enable) {
+ const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+ struct svga_sampler_state *sampler = svga->polygon_stipple.sampler;
+
+ assert(sampler);
+ if (!sampler) {
+ return PIPE_OK; /* probably out of memory */
+ }
+
+ ret = SVGA3D_vgpu10_SetSamplers(svga->swc,
+ 1, /* count */
+ unit, /* start */
+ SVGA3D_SHADERTYPE_PS,
+ &sampler->id);
+ }
+
+ return ret;
+}
+
+
+struct svga_tracked_state svga_hw_sampler = {
+ "texture sampler emit",
+ (SVGA_NEW_SAMPLER |
+ SVGA_NEW_STIPPLE |
+ SVGA_NEW_TEXTURE_FLAGS),
+ update_samplers
+};
--- /dev/null
+/**********************************************************
+ * Copyright 2014 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_bitmask.h"
+#include "util/u_simple_shaders.h"
+#include "tgsi/tgsi_ureg.h"
+#include "tgsi/tgsi_point_sprite.h"
+#include "tgsi/tgsi_dump.h"
+
+#include "svga_context.h"
+#include "svga_shader.h"
+#include "svga_tgsi.h"
+
+
+/**
+ * Bind a new GS. This updates the derived current gs state, not the
+ * user-specified GS state.
+ */
+static void
+bind_gs_state(struct svga_context *svga,
+ struct svga_geometry_shader *gs)
+{
+ svga->curr.gs = gs;
+ svga->dirty |= SVGA_NEW_GS;
+}
+
+
+/**
+ * emulate_point_sprite searches the shader variants list to see it there is
+ * a shader variant with a token string that matches the emulation
+ * requirement. It there isn't, then it will use a tgsi utility
+ * tgsi_add_point_sprite to transform the original token string to support
+ * point sprite. A new geometry shader state will be created with the
+ * transformed token string and added to the shader variants list of the
+ * original geometry shader. The new geometry shader state will then be
+ * bound as the current geometry shader.
+ */
+static struct svga_shader *
+emulate_point_sprite(struct svga_context *svga,
+ struct svga_shader *shader,
+ const struct tgsi_token *tokens)
+{
+ struct svga_token_key key;
+ struct tgsi_token *new_tokens;
+ const struct tgsi_token *orig_tokens;
+ struct svga_geometry_shader *orig_gs = (struct svga_geometry_shader *)shader;
+ struct svga_geometry_shader *gs = NULL;
+ struct pipe_shader_state templ;
+ struct svga_stream_output *streamout = NULL;
+ int pos_out_index = -1;
+ int aa_point_coord_index = -1;
+
+ assert(tokens != NULL);
+
+ orig_tokens = tokens;
+
+ /* Create a token key */
+ memset(&key, 0, sizeof key);
+ key.gs.writes_psize = 1;
+ key.gs.sprite_coord_enable = svga->curr.rast->templ.sprite_coord_enable;
+
+ key.gs.sprite_origin_upper_left =
+ !(svga->curr.rast->templ.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT);
+
+ key.gs.aa_point = svga->curr.rast->templ.point_smooth;
+
+ if (orig_gs != NULL) {
+
+ /* Check if the original geometry shader has stream output and
+ * if position is one of the outputs.
+ */
+ streamout = orig_gs->base.stream_output;
+ if (streamout != NULL) {
+ pos_out_index = streamout->pos_out_index;
+ key.gs.point_pos_stream_out = pos_out_index != -1;
+ }
+
+ /* Search the shader lists to see if there is a variant that matches
+ * this token key.
+ */
+ gs = (struct svga_geometry_shader *)
+ svga_search_shader_token_key(&orig_gs->base, &key);
+ }
+
+ /* If there isn't, then call the tgsi utility tgsi_add_point_sprite
+ * to transform the original tokens to support point sprite.
+ * Flip the sprite origin as SVGA3D device only supports an
+ * upper-left origin.
+ */
+ if (!gs) {
+ new_tokens = tgsi_add_point_sprite(orig_tokens,
+ key.gs.sprite_coord_enable,
+ key.gs.sprite_origin_upper_left,
+ key.gs.point_pos_stream_out,
+ key.gs.aa_point ?
+ &aa_point_coord_index : NULL);
+
+ if (new_tokens == NULL) {
+ /* if no new tokens are generated for whatever reason, just return */
+ return NULL;
+ }
+
+ if (0) {
+ debug_printf("Before tgsi_add_point_sprite ---------------\n");
+ tgsi_dump(orig_tokens, 0);
+ debug_printf("After tgsi_add_point_sprite --------------\n");
+ tgsi_dump(new_tokens, 0);
+ }
+
+ templ.tokens = new_tokens;
+ templ.stream_output.num_outputs = 0;
+
+ if (streamout != NULL) {
+ templ.stream_output = streamout->info;
+ /* The tgsi_add_point_sprite utility adds an extra output
+ * for the original point position for stream output purpose.
+ * We need to replace the position output register index in the
+ * stream output declaration with the new register index.
+ */
+ if (pos_out_index != -1) {
+ assert(orig_gs != NULL);
+ templ.stream_output.output[pos_out_index].register_index =
+ orig_gs->base.info.num_outputs;
+ }
+ }
+
+ /* Create a new geometry shader state with the new tokens */
+ gs = svga->pipe.create_gs_state(&svga->pipe, &templ);
+
+ /* Don't need the token string anymore. There is a local copy
+ * in the shader state.
+ */
+ FREE(new_tokens);
+
+ if (!gs) {
+ return NULL;
+ }
+
+ gs->wide_point = TRUE;
+ gs->aa_point_coord_index = aa_point_coord_index;
+ gs->base.token_key = key;
+ gs->base.parent = &orig_gs->base;
+ gs->base.next = NULL;
+
+ /* Add the new geometry shader to the head of the shader list
+ * pointed to by the original geometry shader.
+ */
+ if (orig_gs != NULL) {
+ gs->base.next = orig_gs->base.next;
+ orig_gs->base.next = &gs->base;
+ }
+ }
+
+ /* Bind the new geometry shader state */
+ bind_gs_state(svga, gs);
+
+ return &gs->base;
+}
+
+/**
+ * Generate a geometry shader that emits a wide point by drawing a quad.
+ * This function first creates a passthrough geometry shader and then
+ * calls emulate_point_sprite() to transform the geometry shader to
+ * support point sprite.
+ */
+static struct svga_shader *
+add_point_sprite_shader(struct svga_context *svga)
+{
+ struct svga_vertex_shader *vs = svga->curr.vs;
+ struct svga_geometry_shader *orig_gs = vs->gs;
+ struct svga_geometry_shader *new_gs;
+ const struct tgsi_token *tokens;
+
+ if (orig_gs == NULL) {
+
+ /* If this is the first time adding a geometry shader to this
+ * vertex shader to support point sprite, then create
+ * a passthrough geometry shader first.
+ */
+ orig_gs = (struct svga_geometry_shader *)
+ util_make_geometry_passthrough_shader(
+ &svga->pipe, vs->base.info.num_outputs,
+ vs->base.info.output_semantic_name,
+ vs->base.info.output_semantic_index);
+
+ if (orig_gs == NULL)
+ return NULL;
+ }
+ else {
+ if (orig_gs->base.parent)
+ orig_gs = (struct svga_geometry_shader *)orig_gs->base.parent;
+ }
+ tokens = orig_gs->base.tokens;
+
+ /* Call emulate_point_sprite to find or create a transformed
+ * geometry shader for supporting point sprite.
+ */
+ new_gs = (struct svga_geometry_shader *)
+ emulate_point_sprite(svga, &orig_gs->base, tokens);
+
+ /* If this is the first time creating a geometry shader to
+ * support vertex point size, then add the new geometry shader
+ * to the vertex shader.
+ */
+ if (vs->gs == NULL) {
+ vs->gs = new_gs;
+ }
+
+ return &new_gs->base;
+}
+
+/* update_tgsi_transform provides a hook to transform a shader if needed.
+ */
+static enum pipe_error
+update_tgsi_transform(struct svga_context *svga, unsigned dirty)
+{
+ struct svga_geometry_shader *gs = svga->curr.user_gs; /* current gs */
+ struct svga_vertex_shader *vs = svga->curr.vs; /* currently bound vs */
+ struct svga_shader *orig_gs; /* original gs */
+ struct svga_shader *new_gs; /* new gs */
+
+ if (!svga_have_vgpu10(svga))
+ return PIPE_OK;
+
+ if (svga->curr.reduced_prim == PIPE_PRIM_POINTS) {
+ /* If the current prim type is POINTS and the current geometry shader
+ * emits wide points, transform the shader to emulate wide points using
+ * quads.
+ */
+ if (gs != NULL && (gs->base.info.writes_psize || gs->wide_point)) {
+ orig_gs = gs->base.parent ? gs->base.parent : &gs->base;
+ new_gs = emulate_point_sprite(svga, orig_gs, orig_gs->tokens);
+ }
+
+ /* If there is not an active geometry shader and the current vertex
+ * shader emits wide point then create a new geometry shader to emulate
+ * wide point.
+ */
+ else if (gs == NULL &&
+ (svga->curr.rast->pointsize > 1.0 ||
+ vs->base.info.writes_psize)) {
+ new_gs = add_point_sprite_shader(svga);
+ }
+ else {
+ /* use the user's GS */
+ bind_gs_state(svga, svga->curr.user_gs);
+ }
+ }
+ else if (svga->curr.gs != svga->curr.user_gs) {
+ /* If current primitive type is not POINTS, then make sure
+ * we don't bind to any of the generated geometry shader
+ */
+ bind_gs_state(svga, svga->curr.user_gs);
+ }
+ (void) new_gs; /* silence the unused var warning */
+
+ return PIPE_OK;
+}
+
+struct svga_tracked_state svga_need_tgsi_transform =
+{
+ "transform shader for optimization",
+ (SVGA_NEW_VS |
+ SVGA_NEW_FS |
+ SVGA_NEW_GS |
+ SVGA_NEW_REDUCED_PRIMITIVE |
+ SVGA_NEW_RAST),
+ update_tgsi_transform
+};
#include "svga_sampler_view.h"
#include "svga_winsys.h"
#include "svga_context.h"
+#include "svga_shader.h"
#include "svga_state.h"
#include "svga_cmd.h"
+/**
+ * Called when tearing down a context to free resources and samplers.
+ */
void svga_cleanup_tss_binding(struct svga_context *svga)
{
+ const unsigned shader = PIPE_SHADER_FRAGMENT;
unsigned i;
- unsigned count = MAX2( svga->curr.num_sampler_views,
- svga->state.hw_draw.num_views );
- for (i = 0; i < count; i++) {
+ for (i = 0; i < Elements(svga->state.hw_draw.views); i++) {
struct svga_hw_view_state *view = &svga->state.hw_draw.views[i];
-
- svga_sampler_view_reference(&view->v, NULL);
- pipe_sampler_view_release(&svga->pipe, &svga->curr.sampler_views[i]);
- pipe_resource_reference( &view->texture, NULL );
-
- view->dirty = 1;
+ if (view) {
+ svga_sampler_view_reference(&view->v, NULL);
+ pipe_sampler_view_release(&svga->pipe,
+ &svga->curr.sampler_views[shader][i]);
+ pipe_resource_reference(&view->texture, NULL);
+ view->dirty = TRUE;
+ }
}
}
};
+/**
+ * Update the texture binding for one texture unit.
+ */
+static void
+emit_tex_binding_unit(struct svga_context *svga,
+ unsigned unit,
+ const struct svga_sampler_state *s,
+ const struct pipe_sampler_view *sv,
+ struct svga_hw_view_state *view,
+ boolean reemit,
+ struct bind_queue *queue)
+{
+ struct pipe_resource *texture = NULL;
+ unsigned last_level, min_lod, max_lod;
+
+ /* get min max lod */
+ if (sv && s) {
+ if (s->mipfilter == SVGA3D_TEX_FILTER_NONE) {
+ /* just use the base level image */
+ min_lod = max_lod = sv->u.tex.first_level;
+ }
+ else {
+ last_level = MIN2(sv->u.tex.last_level, sv->texture->last_level);
+ min_lod = s->view_min_lod + sv->u.tex.first_level;
+ min_lod = MIN2(min_lod, last_level);
+ max_lod = MIN2(s->view_max_lod + sv->u.tex.first_level, last_level);
+ }
+ texture = sv->texture;
+ }
+ else {
+ min_lod = 0;
+ max_lod = 0;
+ }
+
+ if (view->texture != texture ||
+ view->min_lod != min_lod ||
+ view->max_lod != max_lod) {
+
+ svga_sampler_view_reference(&view->v, NULL);
+ pipe_resource_reference( &view->texture, texture );
+
+ view->dirty = TRUE;
+ view->min_lod = min_lod;
+ view->max_lod = max_lod;
+
+ if (texture) {
+ view->v = svga_get_tex_sampler_view(&svga->pipe,
+ texture,
+ min_lod,
+ max_lod);
+ }
+ }
+
+ /*
+ * We need to reemit non-null texture bindings, even when they are not
+ * dirty, to ensure that the resources are paged in.
+ */
+ if (view->dirty || (reemit && view->v)) {
+ queue->bind[queue->bind_count].unit = unit;
+ queue->bind[queue->bind_count].view = view;
+ queue->bind_count++;
+ }
+
+ if (!view->dirty && view->v) {
+ svga_validate_sampler_view(svga, view->v);
+ }
+}
+
+
static enum pipe_error
update_tss_binding(struct svga_context *svga,
unsigned dirty )
{
- boolean reemit = svga->rebind.texture_samplers;
+ const unsigned shader = PIPE_SHADER_FRAGMENT;
+ boolean reemit = svga->rebind.flags.texture_samplers;
unsigned i;
- unsigned count = MAX2( svga->curr.num_sampler_views,
+ unsigned count = MAX2( svga->curr.num_sampler_views[shader],
svga->state.hw_draw.num_views );
- unsigned min_lod;
- unsigned max_lod;
struct bind_queue queue;
+ if (svga_have_vgpu10(svga))
+ return PIPE_OK;
+
queue.bind_count = 0;
for (i = 0; i < count; i++) {
- const struct svga_sampler_state *s = svga->curr.sampler[i];
- struct svga_hw_view_state *view = &svga->state.hw_draw.views[i];
- struct pipe_resource *texture = NULL;
- struct pipe_sampler_view *sv = svga->curr.sampler_views[i];
-
- /* get min max lod */
- if (sv && s) {
- min_lod = MAX2(0, (s->view_min_lod + sv->u.tex.first_level));
- max_lod = MIN2(s->view_max_lod + sv->u.tex.first_level,
- sv->texture->last_level);
- texture = sv->texture;
- } else {
- min_lod = 0;
- max_lod = 0;
- }
-
- if (view->texture != texture ||
- view->min_lod != min_lod ||
- view->max_lod != max_lod) {
-
- svga_sampler_view_reference(&view->v, NULL);
- pipe_resource_reference( &view->texture, texture );
-
- view->dirty = TRUE;
- view->min_lod = min_lod;
- view->max_lod = max_lod;
-
- if (texture)
- view->v = svga_get_tex_sampler_view(&svga->pipe,
- texture,
- min_lod,
- max_lod);
- }
-
- /*
- * We need to reemit non-null texture bindings, even when they are not
- * dirty, to ensure that the resources are paged in.
- */
-
- if (view->dirty ||
- (reemit && view->v)) {
- queue.bind[queue.bind_count].unit = i;
- queue.bind[queue.bind_count].view = view;
- queue.bind_count++;
- }
- if (!view->dirty && view->v) {
- svga_validate_sampler_view(svga, view->v);
- }
+ emit_tex_binding_unit(svga, i,
+ svga->curr.sampler[shader][i],
+ svga->curr.sampler_views[shader][i],
+ &svga->state.hw_draw.views[i],
+ reemit,
+ &queue);
}
- svga->state.hw_draw.num_views = svga->curr.num_sampler_views;
+ svga->state.hw_draw.num_views = svga->curr.num_sampler_views[shader];
+
+ /* Polygon stipple */
+ if (svga->curr.rast->templ.poly_stipple_enable) {
+ const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+ emit_tex_binding_unit(svga, unit,
+ svga->polygon_stipple.sampler,
+ &svga->polygon_stipple.sampler_view->base,
+ &svga->state.hw_draw.views[unit],
+ reemit,
+ &queue);
+ }
if (queue.bind_count) {
SVGA3dTextureState *ts;
SVGA_FIFOCommitAll( svga->swc );
}
- svga->rebind.texture_samplers = FALSE;
+ svga->rebind.flags.texture_samplers = FALSE;
return PIPE_OK;
enum pipe_error ret;
struct bind_queue queue;
- assert(svga->rebind.texture_samplers);
+ assert(!svga_have_vgpu10(svga));
+ assert(svga->rebind.flags.texture_samplers);
queue.bind_count = 0;
}
}
+ /* Polygon stipple */
+ if (svga->curr.rast->templ.poly_stipple_enable) {
+ const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+ struct svga_hw_view_state *view = &svga->state.hw_draw.views[unit];
+
+ if (view->v) {
+ queue.bind[queue.bind_count].unit = unit;
+ queue.bind[queue.bind_count].view = view;
+ queue.bind_count++;
+ }
+ }
+
if (queue.bind_count) {
SVGA3dTextureState *ts;
SVGA_FIFOCommitAll(svga->swc);
}
- svga->rebind.texture_samplers = FALSE;
+ svga->rebind.flags.texture_samplers = FALSE;
return PIPE_OK;
}
struct svga_tracked_state svga_hw_tss_binding = {
"texture binding emit",
SVGA_NEW_TEXTURE_BINDING |
+ SVGA_NEW_STIPPLE |
SVGA_NEW_SAMPLER,
update_tss_binding
};
};
-#define EMIT_TS(svga, unit, val, token, fail) \
+static inline void
+svga_queue_tss( struct ts_queue *q,
+ unsigned unit,
+ unsigned tss,
+ unsigned value )
+{
+ assert(q->ts_count < sizeof(q->ts)/sizeof(q->ts[0]));
+ q->ts[q->ts_count].stage = unit;
+ q->ts[q->ts_count].name = tss;
+ q->ts[q->ts_count].value = value;
+ q->ts_count++;
+}
+
+
+#define EMIT_TS(svga, unit, val, token) \
do { \
assert(unit < Elements(svga->state.hw_draw.ts)); \
assert(SVGA3D_TS_##token < Elements(svga->state.hw_draw.ts[unit])); \
if (svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] != val) { \
- svga_queue_tss( &queue, unit, SVGA3D_TS_##token, val ); \
+ svga_queue_tss( queue, unit, SVGA3D_TS_##token, val ); \
svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] = val; \
} \
} while (0)
-#define EMIT_TS_FLOAT(svga, unit, fvalue, token, fail) \
+#define EMIT_TS_FLOAT(svga, unit, fvalue, token) \
do { \
unsigned val = fui(fvalue); \
assert(unit < Elements(svga->state.hw_draw.ts)); \
assert(SVGA3D_TS_##token < Elements(svga->state.hw_draw.ts[unit])); \
if (svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] != val) { \
- svga_queue_tss( &queue, unit, SVGA3D_TS_##token, val ); \
+ svga_queue_tss( queue, unit, SVGA3D_TS_##token, val ); \
svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] = val; \
} \
} while (0)
-static inline void
-svga_queue_tss( struct ts_queue *q,
- unsigned unit,
- unsigned tss,
- unsigned value )
+/**
+ * Emit texture sampler state (tss) for one texture unit.
+ */
+static void
+emit_tss_unit(struct svga_context *svga, unsigned unit,
+ const struct svga_sampler_state *state,
+ struct ts_queue *queue)
{
- assert(q->ts_count < sizeof(q->ts)/sizeof(q->ts[0]));
- q->ts[q->ts_count].stage = unit;
- q->ts[q->ts_count].name = tss;
- q->ts[q->ts_count].value = value;
- q->ts_count++;
+ EMIT_TS(svga, unit, state->mipfilter, MIPFILTER);
+ EMIT_TS(svga, unit, state->min_lod, TEXTURE_MIPMAP_LEVEL);
+ EMIT_TS(svga, unit, state->magfilter, MAGFILTER);
+ EMIT_TS(svga, unit, state->minfilter, MINFILTER);
+ EMIT_TS(svga, unit, state->aniso_level, TEXTURE_ANISOTROPIC_LEVEL);
+ EMIT_TS_FLOAT(svga, unit, state->lod_bias, TEXTURE_LOD_BIAS);
+ EMIT_TS(svga, unit, state->addressu, ADDRESSU);
+ EMIT_TS(svga, unit, state->addressw, ADDRESSW);
+ EMIT_TS(svga, unit, state->bordercolor, BORDERCOLOR);
+ // TEXCOORDINDEX -- hopefully not needed
+
+ if (svga->curr.tex_flags.flag_1d & (1 << unit))
+ EMIT_TS(svga, unit, SVGA3D_TEX_ADDRESS_WRAP, ADDRESSV);
+ else
+ EMIT_TS(svga, unit, state->addressv, ADDRESSV);
+
+ if (svga->curr.tex_flags.flag_srgb & (1 << unit))
+ EMIT_TS_FLOAT(svga, unit, 2.2f, GAMMA);
+ else
+ EMIT_TS_FLOAT(svga, unit, 1.0f, GAMMA);
}
-
static enum pipe_error
update_tss(struct svga_context *svga,
unsigned dirty )
{
+ const unsigned shader = PIPE_SHADER_FRAGMENT;
unsigned i;
struct ts_queue queue;
- queue.ts_count = 0;
- for (i = 0; i < svga->curr.num_samplers; i++) {
- if (svga->curr.sampler[i]) {
- const struct svga_sampler_state *curr = svga->curr.sampler[i];
-
- EMIT_TS(svga, i, curr->mipfilter, MIPFILTER, fail);
- EMIT_TS(svga, i, curr->min_lod, TEXTURE_MIPMAP_LEVEL, fail);
- EMIT_TS(svga, i, curr->magfilter, MAGFILTER, fail);
- EMIT_TS(svga, i, curr->minfilter, MINFILTER, fail);
- EMIT_TS(svga, i, curr->aniso_level, TEXTURE_ANISOTROPIC_LEVEL, fail);
- EMIT_TS_FLOAT(svga, i, curr->lod_bias, TEXTURE_LOD_BIAS, fail);
- EMIT_TS(svga, i, curr->addressu, ADDRESSU, fail);
- EMIT_TS(svga, i, curr->addressw, ADDRESSW, fail);
- EMIT_TS(svga, i, curr->bordercolor, BORDERCOLOR, fail);
- // TEXCOORDINDEX -- hopefully not needed
-
- if (svga->curr.tex_flags.flag_1d & (1 << i)) {
- EMIT_TS(svga, i, SVGA3D_TEX_ADDRESS_WRAP, ADDRESSV, fail);
- }
- else
- EMIT_TS(svga, i, curr->addressv, ADDRESSV, fail);
-
- if (svga->curr.tex_flags.flag_srgb & (1 << i))
- EMIT_TS_FLOAT(svga, i, 2.2f, GAMMA, fail);
- else
- EMIT_TS_FLOAT(svga, i, 1.0f, GAMMA, fail);
+ if (svga_have_vgpu10(svga))
+ return PIPE_OK;
+ queue.ts_count = 0;
+ for (i = 0; i < svga->curr.num_samplers[shader]; i++) {
+ if (svga->curr.sampler[shader][i]) {
+ const struct svga_sampler_state *curr = svga->curr.sampler[shader][i];
+ emit_tss_unit(svga, i, curr, &queue);
}
}
+
+ /* polygon stipple sampler */
+ if (svga->curr.rast->templ.poly_stipple_enable) {
+ emit_tss_unit(svga,
+ svga->state.hw_draw.fs->pstipple_sampler_unit,
+ svga->polygon_stipple.sampler,
+ &queue);
+ }
if (queue.ts_count) {
SVGA3dTextureState *ts;
struct svga_tracked_state svga_hw_tss = {
"texture state emit",
(SVGA_NEW_SAMPLER |
+ SVGA_NEW_STIPPLE |
SVGA_NEW_TEXTURE_FLAGS),
update_tss
};
#include "svga_draw.h"
#include "svga_tgsi.h"
#include "svga_screen.h"
+#include "svga_shader.h"
#include "svga_resource_buffer.h"
#include "svga_hw_reg.h"
emit_hw_vs_vdecl(struct svga_context *svga, unsigned dirty)
{
const struct pipe_vertex_element *ve = svga->curr.velems->velem;
+ SVGA3dVertexDecl decls[SVGA3D_INPUTREG_MAX];
+ unsigned buffer_indexes[SVGA3D_INPUTREG_MAX];
unsigned i;
unsigned neg_bias = 0;
assert(svga->curr.velems->count >=
svga->curr.vs->base.info.file_count[TGSI_FILE_INPUT]);
- /* specify number of vertex element declarations to come */
- svga_hwtnl_reset_vdecl( svga->hwtnl,
- svga->curr.velems->count );
-
/**
* We can't set the VDECL offset to something negative, so we
* must calculate a common negative additional index bias, and modify
for (i = 0; i < svga->curr.velems->count; i++) {
const struct pipe_vertex_buffer *vb =
&svga->curr.vb[ve[i].vertex_buffer_index];
- const struct svga_buffer *buffer;
+ struct svga_buffer *buffer;
unsigned int offset = vb->buffer_offset + ve[i].src_offset;
+ unsigned tmp_neg_bias = 0;
if (!vb->buffer)
continue;
buffer = svga_buffer(vb->buffer);
if (buffer->uploaded.start > offset) {
- unsigned tmp_neg_bias = buffer->uploaded.start - offset;
+ tmp_neg_bias = buffer->uploaded.start - offset;
if (vb->stride)
tmp_neg_bias = (tmp_neg_bias + vb->stride - 1) / vb->stride;
neg_bias = MAX2(neg_bias, tmp_neg_bias);
const struct pipe_vertex_buffer *vb =
&svga->curr.vb[ve[i].vertex_buffer_index];
unsigned usage, index;
- const struct svga_buffer *buffer;
- SVGA3dVertexDecl decl;
+ struct svga_buffer *buffer;
if (!vb->buffer)
continue;
/* SVGA_NEW_VELEMENT
*/
- decl.identity.type = svga->curr.velems->decl_type[i];
- decl.identity.method = SVGA3D_DECLMETHOD_DEFAULT;
- decl.identity.usage = usage;
- decl.identity.usageIndex = index;
- decl.array.stride = vb->stride;
+ decls[i].identity.type = svga->curr.velems->decl_type[i];
+ decls[i].identity.method = SVGA3D_DECLMETHOD_DEFAULT;
+ decls[i].identity.usage = usage;
+ decls[i].identity.usageIndex = index;
+ decls[i].array.stride = vb->stride;
/* Compensate for partially uploaded vbo, and
* for the negative index bias.
*/
- decl.array.offset = (vb->buffer_offset
+ decls[i].array.offset = (vb->buffer_offset
+ ve[i].src_offset
+ neg_bias * vb->stride
- buffer->uploaded.start);
- assert(decl.array.offset >= 0);
+ assert(decls[i].array.offset >= 0);
+
+ buffer_indexes[i] = ve[i].vertex_buffer_index;
- svga_hwtnl_vdecl( svga->hwtnl,
- i,
- &decl,
- buffer->uploaded.buffer ? buffer->uploaded.buffer :
- vb->buffer );
+ assert(!buffer->uploaded.buffer);
}
+ svga_hwtnl_vertex_decls(svga->hwtnl,
+ svga->curr.velems->count,
+ decls,
+ buffer_indexes,
+ svga->curr.velems->id);
+
+ svga_hwtnl_vertex_buffers(svga->hwtnl,
+ svga->curr.num_vertex_buffers,
+ svga->curr.vb);
+
svga_hwtnl_set_index_bias( svga->hwtnl, -(int) neg_bias );
return PIPE_OK;
}
#include "util/u_inlines.h"
#include "pipe/p_defines.h"
-#include "util/u_format.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_bitmask.h"
#include "svga_hw_reg.h"
-static inline int
-compare_vs_keys(const struct svga_vs_compile_key *a,
- const struct svga_vs_compile_key *b)
-{
- unsigned keysize = svga_vs_key_size( a );
- return memcmp( a, b, keysize );
-}
-
-
-/** Search for a vertex shader variant */
-static struct svga_shader_variant *
-search_vs_key(const struct svga_vertex_shader *vs,
- const struct svga_vs_compile_key *key)
-{
- struct svga_shader_variant *variant = vs->base.variants;
-
- assert(key);
-
- for ( ; variant; variant = variant->next) {
- if (compare_vs_keys( key, &variant->key.vkey ) == 0)
- return variant;
- }
-
- return NULL;
-}
-
-
/**
* If we fail to compile a vertex shader we'll use a dummy/fallback shader
* that simply emits a (0,0,0,1) vertex position.
}
+static struct svga_shader_variant *
+translate_vertex_program(struct svga_context *svga,
+ const struct svga_vertex_shader *vs,
+ const struct svga_compile_key *key)
+{
+ if (svga_have_vgpu10(svga)) {
+ return svga_tgsi_vgpu10_translate(svga, &vs->base, key,
+ PIPE_SHADER_VERTEX);
+ }
+ else {
+ return svga_tgsi_vgpu9_translate(&vs->base, key, PIPE_SHADER_VERTEX);
+ }
+}
+
+
/**
* Replace the given shader's instruction with a simple / dummy shader.
* We use this when normal shader translation fails.
*/
static struct svga_shader_variant *
-get_compiled_dummy_vertex_shader(struct svga_vertex_shader *vs,
- const struct svga_vs_compile_key *key)
+get_compiled_dummy_vertex_shader(struct svga_context *svga,
+ struct svga_vertex_shader *vs,
+ const struct svga_compile_key *key)
{
const struct tgsi_token *dummy = get_dummy_vertex_shader();
struct svga_shader_variant *variant;
FREE((void *) vs->base.tokens);
vs->base.tokens = dummy;
- variant = svga_translate_vertex_program(vs, key);
+ variant = translate_vertex_program(svga, vs, key);
return variant;
}
static enum pipe_error
compile_vs(struct svga_context *svga,
struct svga_vertex_shader *vs,
- const struct svga_vs_compile_key *key,
+ const struct svga_compile_key *key,
struct svga_shader_variant **out_variant)
{
struct svga_shader_variant *variant;
enum pipe_error ret = PIPE_ERROR;
- variant = svga_translate_vertex_program( vs, key );
+ variant = translate_vertex_program(svga, vs, key);
if (variant == NULL) {
- /* some problem during translation, try the dummy shader */
- variant = get_compiled_dummy_vertex_shader(vs, key);
- if (!variant) {
- ret = PIPE_ERROR;
- goto fail;
- }
+ debug_printf("Failed to compile vertex shader,"
+ " using dummy shader instead.\n");
+ variant = get_compiled_dummy_vertex_shader(svga, vs, key);
}
-
- if (svga_shader_too_large(svga, variant)) {
+ else if (svga_shader_too_large(svga, variant)) {
/* too big, use dummy shader */
- debug_printf("Shader too large (%lu bytes),"
+ debug_printf("Shader too large (%u bytes),"
" using dummy shader instead.\n",
- (unsigned long ) variant->nr_tokens
- * sizeof(variant->tokens[0]));
- variant = get_compiled_dummy_vertex_shader(vs, key);
- if (!variant) {
- ret = PIPE_ERROR;
- goto fail;
- }
+ (unsigned) (variant->nr_tokens
+ * sizeof(variant->tokens[0])));
+ /* Free the too-large variant */
+ svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
+ /* Use simple pass-through shader instead */
+ variant = get_compiled_dummy_vertex_shader(svga, vs, key);
+ }
+
+ if (!variant) {
+ return PIPE_ERROR;
}
ret = svga_define_shader(svga, SVGA3D_SHADERTYPE_VS, variant);
- if (ret != PIPE_OK)
- goto fail;
+ if (ret != PIPE_OK) {
+ svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
+ return ret;
+ }
*out_variant = variant;
- /* insert variants at head of linked list */
- variant->next = vs->base.variants;
- vs->base.variants = variant;
-
return PIPE_OK;
-
-fail:
- if (variant) {
- svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
- }
- return ret;
}
+
/* SVGA_NEW_PRESCALE, SVGA_NEW_RAST, SVGA_NEW_FS
*/
static void
-make_vs_key(struct svga_context *svga, struct svga_vs_compile_key *key)
+make_vs_key(struct svga_context *svga, struct svga_compile_key *key)
{
+ const unsigned shader = PIPE_SHADER_VERTEX;
+
memset(key, 0, sizeof *key);
- key->need_prescale = svga->state.hw_clear.prescale.enabled;
- key->allow_psiz = svga->curr.rast->templ.point_size_per_vertex;
+
+ if (svga->state.sw.need_swtnl && svga_have_vgpu10(svga)) {
+ /* Set both of these flags, to match compile_passthrough_vs() */
+ key->vs.passthrough = 1;
+ key->vs.undo_viewport = 1;
+ return;
+ }
+
+ key->vs.need_prescale = svga->state.hw_clear.prescale.enabled &&
+ (svga->curr.gs == NULL);
+ key->vs.allow_psiz = svga->curr.rast->templ.point_size_per_vertex;
/* SVGA_NEW_FS */
- key->fs_generic_inputs = svga->curr.fs->generic_inputs;
+ key->vs.fs_generic_inputs = svga->curr.fs->generic_inputs;
+
+ svga_remap_generics(key->vs.fs_generic_inputs, key->generic_remap_table);
/* SVGA_NEW_VELEMENT */
- key->adjust_attrib_range = svga->curr.velems->adjust_attrib_range;
- key->adjust_attrib_w_1 = svga->curr.velems->adjust_attrib_w_1;
+ key->vs.adjust_attrib_range = svga->curr.velems->adjust_attrib_range;
+ key->vs.adjust_attrib_w_1 = svga->curr.velems->adjust_attrib_w_1;
+ key->vs.attrib_is_pure_int = svga->curr.velems->attrib_is_pure_int;
+ key->vs.adjust_attrib_itof = svga->curr.velems->adjust_attrib_itof;
+ key->vs.adjust_attrib_utof = svga->curr.velems->adjust_attrib_utof;
+ key->vs.attrib_is_bgra = svga->curr.velems->attrib_is_bgra;
+ key->vs.attrib_puint_to_snorm = svga->curr.velems->attrib_puint_to_snorm;
+ key->vs.attrib_puint_to_uscaled = svga->curr.velems->attrib_puint_to_uscaled;
+ key->vs.attrib_puint_to_sscaled = svga->curr.velems->attrib_puint_to_sscaled;
+
+ /* SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER */
+ svga_init_shader_key_common(svga, shader, key);
+
+ /* SVGA_NEW_RAST */
+ key->clip_plane_enable = svga->curr.rast->templ.clip_plane_enable;
}
svga_reemit_vs_bindings(struct svga_context *svga)
{
enum pipe_error ret;
- struct svga_winsys_gb_shader *gbshader =
- svga->state.hw_draw.vs ? svga->state.hw_draw.vs->gb_shader : NULL;
+ struct svga_winsys_gb_shader *gbshader = NULL;
+ SVGA3dShaderId shaderId = SVGA3D_INVALID_ID;
- assert(svga->rebind.vs);
+ assert(svga->rebind.flags.vs);
assert(svga_have_gb_objects(svga));
- ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_VS, gbshader);
+ if (svga->state.hw_draw.vs) {
+ gbshader = svga->state.hw_draw.vs->gb_shader;
+ shaderId = svga->state.hw_draw.vs->id;
+ }
+
+ if (!svga_need_to_rebind_resources(svga)) {
+ ret = svga->swc->resource_rebind(svga->swc, NULL, gbshader,
+ SVGA_RELOC_READ);
+ goto out;
+ }
+
+ if (svga_have_vgpu10(svga))
+ ret = SVGA3D_vgpu10_SetShader(svga->swc, SVGA3D_SHADERTYPE_VS,
+ gbshader, shaderId);
+ else
+ ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_VS, gbshader);
+
+ out:
+ if (ret != PIPE_OK)
+ return ret;
+
+ svga->rebind.flags.vs = FALSE;
+ return PIPE_OK;
+}
+
+
+/**
+ * The current vertex shader is already executed by the 'draw'
+ * module, so we just need to generate a simple vertex shader
+ * to pass through all those VS outputs that will
+ * be consumed by the fragment shader.
+ * Used when we employ the 'draw' module.
+ */
+static enum pipe_error
+compile_passthrough_vs(struct svga_context *svga,
+ struct svga_vertex_shader *vs,
+ struct svga_fragment_shader *fs,
+ struct svga_shader_variant **out_variant)
+{
+ struct svga_shader_variant *variant = NULL;
+ unsigned num_inputs;
+ unsigned i;
+ unsigned num_elements;
+ struct svga_vertex_shader new_vs;
+ struct ureg_src src[PIPE_MAX_SHADER_INPUTS];
+ struct ureg_dst dst[PIPE_MAX_SHADER_OUTPUTS];
+ struct ureg_program *ureg;
+ unsigned num_tokens;
+ struct svga_compile_key key;
+ enum pipe_error ret;
+
+ assert(svga_have_vgpu10(svga));
+ assert(fs);
+
+ num_inputs = fs->base.info.num_inputs;
+
+ ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
+ if (!ureg)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ /* draw will always add position */
+ dst[0] = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
+ src[0] = ureg_DECL_vs_input(ureg, 0);
+ num_elements = 1;
+
+ /**
+ * swtnl backend redefines the input layout based on the
+ * fragment shader's inputs. So we only need to passthrough
+ * those inputs that will be consumed by the fragment shader.
+ * Note: DX10 requires the number of vertex elements
+ * specified in the input layout to be no less than the
+ * number of inputs to the vertex shader.
+ */
+ for (i = 0; i < num_inputs; i++) {
+ switch (fs->base.info.input_semantic_name[i]) {
+ case TGSI_SEMANTIC_COLOR:
+ case TGSI_SEMANTIC_GENERIC:
+ case TGSI_SEMANTIC_FOG:
+ dst[num_elements] = ureg_DECL_output(ureg,
+ fs->base.info.input_semantic_name[i],
+ fs->base.info.input_semantic_index[i]);
+ src[num_elements] = ureg_DECL_vs_input(ureg, num_elements);
+ num_elements++;
+ break;
+ default:
+ break;
+ }
+ }
+
+ for (i = 0; i < num_elements; i++) {
+ ureg_MOV(ureg, dst[i], src[i]);
+ }
+
+ ureg_END(ureg);
+
+ memset(&new_vs, 0, sizeof(new_vs));
+ new_vs.base.tokens = ureg_get_tokens(ureg, &num_tokens);
+ tgsi_scan_shader(new_vs.base.tokens, &new_vs.base.info);
+
+ memset(&key, 0, sizeof(key));
+ key.vs.undo_viewport = 1;
+
+ ret = compile_vs(svga, &new_vs, &key, &variant);
if (ret != PIPE_OK)
return ret;
- svga->rebind.vs = FALSE;
+ ureg_free_tokens(new_vs.base.tokens);
+ ureg_destroy(ureg);
+
+ /* Overwrite the variant key to indicate it's a pass-through VS */
+ memset(&variant->key, 0, sizeof(variant->key));
+ variant->key.vs.passthrough = 1;
+ variant->key.vs.undo_viewport = 1;
+
+ *out_variant = variant;
+
return PIPE_OK;
}
static enum pipe_error
emit_hw_vs(struct svga_context *svga, unsigned dirty)
{
- struct svga_shader_variant *variant = NULL;
+ struct svga_shader_variant *variant;
+ struct svga_vertex_shader *vs = svga->curr.vs;
+ struct svga_fragment_shader *fs = svga->curr.fs;
enum pipe_error ret = PIPE_OK;
+ struct svga_compile_key key;
+
+ /* If there is an active geometry shader, and it has stream output
+ * defined, then we will skip the stream output from the vertex shader
+ */
+ if (!svga_have_gs_streamout(svga)) {
+ /* No GS stream out */
+ if (svga_have_vs_streamout(svga)) {
+ /* Set VS stream out */
+ svga_set_stream_output(svga, vs->base.stream_output);
+ }
+ else {
+ /* turn off stream out */
+ svga_set_stream_output(svga, NULL);
+ }
+ }
/* SVGA_NEW_NEED_SWTNL */
- if (!svga->state.sw.need_swtnl) {
- struct svga_vertex_shader *vs = svga->curr.vs;
- struct svga_vs_compile_key key;
+ if (svga->state.sw.need_swtnl && !svga_have_vgpu10(svga)) {
+ /* No vertex shader is needed */
+ variant = NULL;
+ }
+ else {
+ make_vs_key(svga, &key);
- make_vs_key( svga, &key );
+ /* See if we already have a VS variant that matches the key */
+ variant = svga_search_shader_key(&vs->base, &key);
- variant = search_vs_key( vs, &key );
if (!variant) {
- ret = compile_vs( svga, vs, &key, &variant );
+ /* Create VS variant now */
+ if (key.vs.passthrough) {
+ ret = compile_passthrough_vs(svga, vs, fs, &variant);
+ }
+ else {
+ ret = compile_vs(svga, vs, &key, &variant);
+ }
if (ret != PIPE_OK)
return ret;
- }
- assert(variant);
+ /* insert the new variant at head of linked list */
+ assert(variant);
+ variant->next = vs->base.variants;
+ vs->base.variants = variant;
+ }
}
if (variant != svga->state.hw_draw.vs) {
- if (svga_have_gb_objects(svga)) {
- struct svga_winsys_gb_shader *gbshader =
- variant ? variant->gb_shader : NULL;
- ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_VS, gbshader);
- if (ret != PIPE_OK)
- return ret;
-
- svga->rebind.vs = FALSE;
- }
- else {
- unsigned id = variant ? variant->id : SVGA_ID_INVALID;
- ret = SVGA3D_SetShader(svga->swc, SVGA3D_SHADERTYPE_VS, id);
+ /* Bind the new variant */
+ if (variant) {
+ ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_VS, variant);
if (ret != PIPE_OK)
return ret;
+ svga->rebind.flags.vs = FALSE;
}
svga->dirty |= SVGA_NEW_VS_VARIANT;
- svga->state.hw_draw.vs = variant;
+ svga->state.hw_draw.vs = variant;
}
return PIPE_OK;
"vertex shader (hwtnl)",
(SVGA_NEW_VS |
SVGA_NEW_FS |
+ SVGA_NEW_TEXTURE_BINDING |
+ SVGA_NEW_SAMPLER |
+ SVGA_NEW_RAST |
SVGA_NEW_PRESCALE |
SVGA_NEW_VELEMENT |
SVGA_NEW_NEED_SWTNL),
--- /dev/null
+/**********************************************************
+ * Copyright 2014 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_STREAMOUT_H
+#define SVGA_STREAMOUT_H
+
+struct svga_shader;
+
+struct svga_stream_output {
+ struct pipe_stream_output_info info;
+ unsigned pos_out_index; // position output index
+ unsigned id;
+};
+
+struct svga_stream_output *
+svga_create_stream_output(struct svga_context *svga,
+ struct svga_shader *shader,
+ const struct pipe_stream_output_info *info);
+
+enum pipe_error
+svga_set_stream_output(struct svga_context *svga,
+ struct svga_stream_output *streamout);
+
+void
+svga_delete_stream_output(struct svga_context *svga,
+ struct svga_stream_output *streamout);
+
+#endif /* SVGA_STREAMOUT_H */
#include "pipe/p_defines.h"
#include "util/u_inlines.h"
#include "os/os_thread.h"
+#include "util/u_bitmask.h"
#include "util/u_format.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "svga_format.h"
#include "svga_screen.h"
#include "svga_context.h"
+#include "svga_sampler_view.h"
#include "svga_resource_texture.h"
#include "svga_surface.h"
#include "svga_debug.h"
+static void svga_mark_surface_dirty(struct pipe_surface *surf);
void
svga_texture_copy_handle(struct svga_context *svga,
struct svga_winsys_surface *src_handle,
unsigned src_x, unsigned src_y, unsigned src_z,
- unsigned src_level, unsigned src_face,
+ unsigned src_level, unsigned src_layer,
struct svga_winsys_surface *dst_handle,
unsigned dst_x, unsigned dst_y, unsigned dst_z,
- unsigned dst_level, unsigned dst_face,
+ unsigned dst_level, unsigned dst_layer,
unsigned width, unsigned height, unsigned depth)
{
struct svga_surface dst, src;
src.handle = src_handle;
src.real_level = src_level;
- src.real_face = src_face;
+ src.real_layer = src_layer;
src.real_zslice = 0;
dst.handle = dst_handle;
dst.real_level = dst_level;
- dst.real_face = dst_face;
+ dst.real_layer = dst_layer;
dst.real_zslice = 0;
box.x = dst_x;
struct svga_winsys_surface *
svga_texture_view_surface(struct svga_context *svga,
struct svga_texture *tex,
+ unsigned bind_flags,
SVGA3dSurfaceFlags flags,
SVGA3dSurfaceFormat format,
unsigned start_mip,
unsigned num_mip,
- int face_pick,
+ int layer_pick,
+ unsigned num_layers,
int zslice_pick,
struct svga_host_surface_cache_key *key) /* OUT */
{
unsigned z_offset = 0;
SVGA_DBG(DEBUG_PERF,
- "svga: Create surface view: face %d zslice %d mips %d..%d\n",
- face_pick, zslice_pick, start_mip, start_mip+num_mip-1);
+ "svga: Create surface view: layer %d zslice %d mips %d..%d\n",
+ layer_pick, zslice_pick, start_mip, start_mip+num_mip-1);
key->flags = flags;
key->format = format;
key->size.height = u_minify(tex->b.b.height0, start_mip);
key->size.depth = zslice_pick < 0 ? u_minify(tex->b.b.depth0, start_mip) : 1;
key->cachable = 1;
+ key->arraySize = 1;
+ key->numFaces = 1;
+ key->sampleCount = tex->b.b.nr_samples;
+
+ if (key->sampleCount > 1) {
+ key->flags |= SVGA3D_SURFACE_MASKABLE_ANTIALIAS;
+ }
- if (tex->b.b.target == PIPE_TEXTURE_CUBE && face_pick < 0) {
+ if (tex->b.b.target == PIPE_TEXTURE_CUBE && layer_pick < 0) {
key->flags |= SVGA3D_SURFACE_CUBEMAP;
key->numFaces = 6;
- } else {
- key->numFaces = 1;
+ } else if (tex->b.b.target == PIPE_TEXTURE_1D_ARRAY ||
+ tex->b.b.target == PIPE_TEXTURE_2D_ARRAY) {
+ key->arraySize = num_layers;
}
if (key->format == SVGA3D_FORMAT_INVALID) {
}
SVGA_DBG(DEBUG_DMA, "surface_create for texture view\n");
- handle = svga_screen_surface_create(ss, key);
+ handle = svga_screen_surface_create(ss, bind_flags, PIPE_USAGE_DEFAULT, key);
if (!handle) {
key->cachable = 0;
return NULL;
SVGA_DBG(DEBUG_DMA, " --> got sid %p (texture view)\n", handle);
- if (face_pick < 0)
- face_pick = 0;
+ if (layer_pick < 0)
+ layer_pick = 0;
if (zslice_pick >= 0)
z_offset = zslice_pick;
for (i = 0; i < key->numMipLevels; i++) {
- for (j = 0; j < key->numFaces; j++) {
- if (svga_is_texture_level_defined(tex, j + face_pick, i + start_mip)) {
+ for (j = 0; j < key->numFaces * key->arraySize; j++) {
+ if (svga_is_texture_level_defined(tex, j + layer_pick, i + start_mip)) {
unsigned depth = (zslice_pick < 0 ?
u_minify(tex->b.b.depth0, i + start_mip) :
1);
tex->handle,
0, 0, z_offset,
i + start_mip,
- j + face_pick,
+ j + layer_pick,
handle, 0, 0, 0, i, j,
u_minify(tex->b.b.width0, i + start_mip),
u_minify(tex->b.b.height0, i + start_mip),
}
+/**
+ * A helper function to create a surface view.
+ * The view boolean flag specifies whether svga_texture_view_surface()
+ * will be called to create a cloned surface and resource for the view.
+ */
static struct pipe_surface *
-svga_create_surface(struct pipe_context *pipe,
- struct pipe_resource *pt,
- const struct pipe_surface *surf_tmpl)
+svga_create_surface_view(struct pipe_context *pipe,
+ struct pipe_resource *pt,
+ const struct pipe_surface *surf_tmpl,
+ boolean view)
{
struct svga_context *svga = svga_context(pipe);
struct svga_texture *tex = svga_texture(pt);
struct pipe_screen *screen = pipe->screen;
struct svga_screen *ss = svga_screen(screen);
struct svga_surface *s;
- unsigned face, zslice;
- boolean view = FALSE;
- SVGA3dSurfaceFlags flags;
+ unsigned layer, zslice, bind;
+ unsigned nlayers = 1;
+ SVGA3dSurfaceFlags flags = 0;
SVGA3dSurfaceFormat format;
- assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
-
s = CALLOC_STRUCT(svga_surface);
if (!s)
return NULL;
if (pt->target == PIPE_TEXTURE_CUBE) {
- face = surf_tmpl->u.tex.first_layer;
+ layer = surf_tmpl->u.tex.first_layer;
zslice = 0;
}
+ else if (pt->target == PIPE_TEXTURE_1D_ARRAY ||
+ pt->target == PIPE_TEXTURE_2D_ARRAY) {
+ layer = surf_tmpl->u.tex.first_layer;
+ zslice = 0;
+ nlayers = surf_tmpl->u.tex.last_layer - surf_tmpl->u.tex.first_layer + 1;
+ }
else {
- face = 0;
+ layer = 0;
zslice = surf_tmpl->u.tex.first_layer;
}
s->base.u.tex.level = surf_tmpl->u.tex.level;
s->base.u.tex.first_layer = surf_tmpl->u.tex.first_layer;
s->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer;
+ s->view_id = SVGA3D_INVALID_ID;
+
+ s->backed = NULL;
if (util_format_is_depth_or_stencil(surf_tmpl->format)) {
- flags = SVGA3D_SURFACE_HINT_DEPTHSTENCIL;
+ flags = SVGA3D_SURFACE_HINT_DEPTHSTENCIL |
+ SVGA3D_SURFACE_BIND_DEPTH_STENCIL;
+ bind = PIPE_BIND_DEPTH_STENCIL;
}
else {
- flags = SVGA3D_SURFACE_HINT_RENDERTARGET;
+ flags = SVGA3D_SURFACE_HINT_RENDERTARGET |
+ SVGA3D_SURFACE_BIND_RENDER_TARGET;
+ bind = PIPE_BIND_RENDER_TARGET;
}
- format = svga_translate_format(ss, surf_tmpl->format, 0);
+ if (tex->imported)
+ format = tex->key.format;
+ else
+ format = svga_translate_format(ss, surf_tmpl->format, bind);
+
assert(format != SVGA3D_FORMAT_INVALID);
- if (svga_screen(screen)->debug.force_surface_view)
- view = TRUE;
+ if (view) {
+ SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: yes %p, level %u layer %u z %u, %p\n",
+ pt, surf_tmpl->u.tex.level, layer, zslice, s);
+
+ if (svga_have_vgpu10(svga)) {
+ switch (pt->target) {
+ case PIPE_TEXTURE_1D:
+ flags |= SVGA3D_SURFACE_1D;
+ break;
+ case PIPE_TEXTURE_1D_ARRAY:
+ flags |= SVGA3D_SURFACE_1D | SVGA3D_SURFACE_ARRAY;
+ break;
+ case PIPE_TEXTURE_2D_ARRAY:
+ flags |= SVGA3D_SURFACE_ARRAY;
+ break;
+ case PIPE_TEXTURE_3D:
+ flags |= SVGA3D_SURFACE_VOLUME;
+ break;
+ case PIPE_TEXTURE_CUBE:
+ if (nlayers == 6)
+ flags |= SVGA3D_SURFACE_CUBEMAP;
+ break;
+ default:
+ break;
+ }
+ }
- /* Currently only used for compressed textures */
- if (format != svga_translate_format(ss, surf_tmpl->format, 0)) {
- view = TRUE;
+ /* When we clone the surface view resource, use the format used in
+ * the creation of the original resource.
+ */
+ s->handle = svga_texture_view_surface(svga, tex, bind, flags, tex->key.format,
+ surf_tmpl->u.tex.level, 1,
+ layer, nlayers, zslice, &s->key);
+ if (!s->handle) {
+ FREE(s);
+ return NULL;
+ }
+
+ s->key.format = format;
+ s->real_layer = 0;
+ s->real_level = 0;
+ s->real_zslice = 0;
+ } else {
+ SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: no %p, level %u, layer %u, z %u, %p\n",
+ pt, surf_tmpl->u.tex.level, layer, zslice, s);
+
+ memset(&s->key, 0, sizeof s->key);
+ s->key.format = format;
+ s->handle = tex->handle;
+ s->real_layer = layer;
+ s->real_zslice = zslice;
+ s->real_level = surf_tmpl->u.tex.level;
}
+ return &s->base;
+}
+
+
+static struct pipe_surface *
+svga_create_surface(struct pipe_context *pipe,
+ struct pipe_resource *pt,
+ const struct pipe_surface *surf_tmpl)
+{
+ struct svga_context *svga = svga_context(pipe);
+ struct pipe_screen *screen = pipe->screen;
+ boolean view = FALSE;
+
+ if (svga_screen(screen)->debug.force_surface_view)
+ view = TRUE;
+
if (surf_tmpl->u.tex.level != 0 &&
svga_screen(screen)->debug.force_level_surface_view)
view = TRUE;
if (pt->target == PIPE_TEXTURE_3D)
view = TRUE;
- if (svga_screen(screen)->debug.no_surface_view)
+ if (svga_have_vgpu10(svga) || svga_screen(screen)->debug.no_surface_view)
view = FALSE;
- if (view) {
- SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: yes %p, level %u face %u z %u, %p\n",
- pt, surf_tmpl->u.tex.level, face, zslice, s);
+ return svga_create_surface_view(pipe, pt, surf_tmpl, view);
+}
- s->handle = svga_texture_view_surface(svga, tex, flags, format,
- surf_tmpl->u.tex.level,
- 1, face, zslice, &s->key);
- s->real_face = 0;
- s->real_level = 0;
- s->real_zslice = 0;
- } else {
- SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: no %p, level %u, face %u, z %u, %p\n",
- pt, surf_tmpl->u.tex.level, face, zslice, s);
- memset(&s->key, 0, sizeof s->key);
- s->handle = tex->handle;
- s->real_face = face;
- s->real_zslice = zslice;
- s->real_level = surf_tmpl->u.tex.level;
+/**
+ * Clone the surface view and its associated resource.
+ */
+static struct svga_surface *
+create_backed_surface_view(struct svga_context *svga, struct svga_surface *s)
+{
+ struct svga_surface *bs = s->backed;
+
+ if (bs == NULL) {
+ struct svga_texture *tex = svga_texture(s->base.texture);
+ struct pipe_surface *backed_view;
+
+ backed_view = svga_create_surface_view(&svga->pipe,
+ &tex->b.b,
+ &s->base,
+ TRUE);
+ if (!backed_view)
+ return NULL;
+
+ bs = svga_surface(backed_view);
+ s->backed = bs;
}
+ svga_mark_surface_dirty(&bs->base);
+
+ return bs;
+}
+
+/**
+ * Create a DX RenderTarget/DepthStencil View for the given surface,
+ * if needed.
+ */
+struct pipe_surface *
+svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s)
+{
+ enum pipe_error ret = PIPE_OK;
+ unsigned shader;
+
+ assert(svga_have_vgpu10(svga));
+
+ /**
+ * DX spec explicitly specifies that no resource can be bound to a render
+ * target view and a shader resource view simultanously.
+ * So first check if the resource bound to this surface view collides with
+ * a sampler view. If so, then we will clone this surface view and its
+ * associated resource. We will then use the cloned surface view for
+ * render target.
+ */
+ for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
+ if (svga_check_sampler_view_resource_collision(svga, s->handle, shader)) {
+ SVGA_DBG(DEBUG_VIEWS,
+ "same resource used in shaderResource and renderTarget 0x%x\n",
+ s->handle);
+ s = create_backed_surface_view(svga, s);
+ if (!s)
+ return NULL;
+
+ break;
+ }
+ }
+
+ if (s->view_id == SVGA3D_INVALID_ID) {
+ SVGA3dResourceType resType;
+ SVGA3dRenderTargetViewDesc desc;
+
+ desc.tex.mipSlice = s->real_level;
+ desc.tex.firstArraySlice = s->real_layer + s->real_zslice;
+ desc.tex.arraySize =
+ s->base.u.tex.last_layer - s->base.u.tex.first_layer + 1;
+
+ s->view_id = util_bitmask_add(svga->surface_view_id_bm);
+
+ switch (s->base.texture->target) {
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_1D_ARRAY:
+ resType = SVGA3D_RESOURCE_TEXTURE1D;
+ break;
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_2D_ARRAY:
+ case PIPE_TEXTURE_CUBE:
+ /* drawing to cube map is treated as drawing to 2D array */
+ resType = SVGA3D_RESOURCE_TEXTURE2D;
+ break;
+ case PIPE_TEXTURE_3D:
+ resType = SVGA3D_RESOURCE_TEXTURE3D;
+ break;
+ default:
+ assert(!"Unexpected texture target");
+ resType = SVGA3D_RESOURCE_TEXTURE2D;
+ }
+
+ if (util_format_is_depth_or_stencil(s->base.format)) {
+ ret = SVGA3D_vgpu10_DefineDepthStencilView(svga->swc,
+ s->view_id,
+ s->handle,
+ s->key.format,
+ resType,
+ &desc);
+ }
+ else {
+ ret = SVGA3D_vgpu10_DefineRenderTargetView(svga->swc,
+ s->view_id,
+ s->handle,
+ s->key.format,
+ resType,
+ &desc);
+ }
+
+ if (ret != PIPE_OK) {
+ util_bitmask_clear(svga->surface_view_id_bm, s->view_id);
+ s->view_id = SVGA3D_INVALID_ID;
+ return NULL;
+ }
+ }
return &s->base;
}
+
static void
svga_surface_destroy(struct pipe_context *pipe,
struct pipe_surface *surf)
{
+ struct svga_context *svga = svga_context(pipe);
struct svga_surface *s = svga_surface(surf);
struct svga_texture *t = svga_texture(surf->texture);
struct svga_screen *ss = svga_screen(surf->texture->screen);
+ enum pipe_error ret = PIPE_OK;
+
+ /* Destroy the backed view surface if it exists */
+ if (s->backed) {
+ svga_surface_destroy(pipe, &s->backed->base);
+ s->backed = NULL;
+ }
if (s->handle != t->handle) {
SVGA_DBG(DEBUG_DMA, "unref sid %p (tex surface)\n", s->handle);
svga_screen_surface_destroy(ss, &s->key, &s->handle);
}
+ if (s->view_id != SVGA3D_INVALID_ID) {
+ unsigned try;
+
+ assert(svga_have_vgpu10(svga));
+ for (try = 0; try < 2; try++) {
+ if (util_format_is_depth_or_stencil(s->base.format)) {
+ ret = SVGA3D_vgpu10_DestroyDepthStencilView(svga->swc, s->view_id);
+ }
+ else {
+ ret = SVGA3D_vgpu10_DestroyRenderTargetView(svga->swc, s->view_id);
+ }
+ if (ret == PIPE_OK)
+ break;
+ svga_context_flush(svga, NULL);
+ }
+ assert(ret == PIPE_OK);
+ util_bitmask_clear(svga->surface_view_id_bm, s->view_id);
+ }
+
pipe_resource_reference(&surf->texture, NULL);
FREE(surf);
}
svga_mark_surface_dirty(struct pipe_surface *surf)
{
struct svga_surface *s = svga_surface(surf);
+ struct svga_texture *tex = svga_texture(surf->texture);
if (!s->dirty) {
- struct svga_texture *tex = svga_texture(surf->texture);
-
s->dirty = TRUE;
if (s->handle == tex->handle) {
/* hmm so 3d textures always have all their slices marked ? */
- if (surf->texture->target == PIPE_TEXTURE_CUBE)
- svga_define_texture_level(tex, surf->u.tex.first_layer,
- surf->u.tex.level);
- else
- svga_define_texture_level(tex, 0, surf->u.tex.level);
+ svga_define_texture_level(tex, surf->u.tex.first_layer,
+ surf->u.tex.level);
}
else {
/* this will happen later in svga_propagate_surface */
}
-
- /* Increment the view_age and texture age for this surface's mipmap
- * level so that any sampler views into the texture are re-validated too.
- */
- svga_age_texture_view(tex, surf->u.tex.level);
}
+
+ /* Increment the view_age and texture age for this surface's mipmap
+ * level so that any sampler views into the texture are re-validated too.
+ */
+ svga_age_texture_view(tex, surf->u.tex.level);
}
struct svga_surface *s = svga_surface(surf);
struct svga_texture *tex = svga_texture(surf->texture);
struct svga_screen *ss = svga_screen(surf->texture->screen);
- unsigned zslice, face;
+ unsigned zslice, layer;
+ unsigned nlayers = 1;
+ unsigned i;
if (!s->dirty)
return;
if (surf->texture->target == PIPE_TEXTURE_CUBE) {
zslice = 0;
- face = surf->u.tex.first_layer;
+ layer = surf->u.tex.first_layer;
+ }
+ else if (surf->texture->target == PIPE_TEXTURE_1D_ARRAY ||
+ surf->texture->target == PIPE_TEXTURE_2D_ARRAY) {
+ zslice = 0;
+ layer = surf->u.tex.first_layer;
+ nlayers = surf->u.tex.last_layer - surf->u.tex.first_layer + 1;
}
else {
zslice = surf->u.tex.first_layer;
- face = 0;
+ layer = 0;
}
s->dirty = FALSE;
SVGA_DBG(DEBUG_VIEWS,
"svga: Surface propagate: tex %p, level %u, from %p\n",
tex, surf->u.tex.level, surf);
- svga_texture_copy_handle(svga,
- s->handle, 0, 0, 0, s->real_level, s->real_face,
- tex->handle, 0, 0, zslice, surf->u.tex.level, face,
- u_minify(tex->b.b.width0, surf->u.tex.level),
- u_minify(tex->b.b.height0, surf->u.tex.level), 1);
- svga_define_texture_level(tex, face, surf->u.tex.level);
+ for (i = 0; i < nlayers; i++) {
+ svga_texture_copy_handle(svga,
+ s->handle, 0, 0, 0, s->real_level, s->real_layer + i,
+ tex->handle, 0, 0, zslice, surf->u.tex.level, layer + i,
+ u_minify(tex->b.b.width0, surf->u.tex.level),
+ u_minify(tex->b.b.height0, surf->u.tex.level), 1);
+ svga_define_texture_level(tex, layer + i, surf->u.tex.level);
+ }
}
}
}
+static void
+svga_get_sample_position(struct pipe_context *context,
+ unsigned sample_count, unsigned sample_index,
+ float *pos_out)
+{
+ /* We can't actually query the device to learn the sample positions.
+ * These were grabbed from nvidia's driver.
+ */
+ static const float pos1[1][2] = {
+ { 0.5, 0.5 }
+ };
+ static const float pos4[4][2] = {
+ { 0.375000, 0.125000 },
+ { 0.875000, 0.375000 },
+ { 0.125000, 0.625000 },
+ { 0.625000, 0.875000 }
+ };
+ static const float pos8[8][2] = {
+ { 0.562500, 0.312500 },
+ { 0.437500, 0.687500 },
+ { 0.812500, 0.562500 },
+ { 0.312500, 0.187500 },
+ { 0.187500, 0.812500 },
+ { 0.062500, 0.437500 },
+ { 0.687500, 0.937500 },
+ { 0.937500, 0.062500 }
+ };
+ static const float pos16[16][2] = {
+ { 0.187500, 0.062500 },
+ { 0.437500, 0.187500 },
+ { 0.062500, 0.312500 },
+ { 0.312500, 0.437500 },
+ { 0.687500, 0.062500 },
+ { 0.937500, 0.187500 },
+ { 0.562500, 0.312500 },
+ { 0.812500, 0.437500 },
+ { 0.187500, 0.562500 },
+ { 0.437500, 0.687500 },
+ { 0.062500, 0.812500 },
+ { 0.312500, 0.937500 },
+ { 0.687500, 0.562500 },
+ { 0.937500, 0.687500 },
+ { 0.562500, 0.812500 },
+ { 0.812500, 0.937500 }
+ };
+ const float (*positions)[2];
+
+ switch (sample_count) {
+ case 4:
+ positions = pos4;
+ break;
+ case 8:
+ positions = pos8;
+ break;
+ case 16:
+ positions = pos16;
+ break;
+ default:
+ positions = pos1;
+ }
+
+ pos_out[0] = positions[sample_index][0];
+ pos_out[1] = positions[sample_index][1];
+}
+
void
svga_init_surface_functions(struct svga_context *svga)
{
svga->pipe.create_surface = svga_create_surface;
svga->pipe.surface_destroy = svga_surface_destroy;
+ svga->pipe.get_sample_position = svga_get_sample_position;
}
struct svga_host_surface_cache_key key;
struct svga_winsys_surface *handle;
- unsigned real_face;
+ unsigned real_layer;
unsigned real_level;
unsigned real_zslice;
boolean dirty;
+
+ /* VGPU10 */
+ SVGA3dRenderTargetViewId view_id;
+ struct svga_surface *backed;
};
struct svga_winsys_surface *
svga_texture_view_surface(struct svga_context *svga,
struct svga_texture *tex,
+ unsigned bind_flags,
SVGA3dSurfaceFlags flags,
SVGA3dSurfaceFormat format,
unsigned start_mip,
unsigned num_mip,
- int face_pick,
+ int layer_pick,
+ unsigned num_layers,
int zslice_pick,
struct svga_host_surface_cache_key *key); /* OUT */
return (const struct svga_surface *)surface;
}
+struct pipe_surface *
+svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s);
+
+
#endif
#include "svga_reg.h"
#include "svga3d_reg.h"
#include "svga_draw.h"
+#include "svga_shader.h"
#include "svga_swtnl_private.h"
PIPE_TRANSFER_DISCARD_RANGE |
PIPE_TRANSFER_UNSYNCHRONIZED,
&svga_render->vbuf_transfer);
- if (ptr)
+ if (ptr) {
+ svga_render->vbuf_ptr = ptr;
return ptr + svga_render->vbuf_offset;
+ }
else {
+ svga_render->vbuf_ptr = NULL;
svga_render->vbuf_transfer = NULL;
return NULL;
}
offset = svga_render->vbuf_offset + svga_render->vertex_size * min_index;
length = svga_render->vertex_size * (max_index + 1 - min_index);
+
+ if (0) {
+ /* dump vertex data */
+ const float *f = (const float *) ((char *) svga_render->vbuf_ptr +
+ svga_render->vbuf_offset);
+ unsigned i;
+ debug_printf("swtnl vertex data:\n");
+ for (i = 0; i < length / 4; i += 4) {
+ debug_printf("%u: %f %f %f %f\n", i, f[i], f[i+1], f[i+2], f[i+3]);
+ }
+ }
+
pipe_buffer_flush_mapped_range(&svga->pipe,
svga_render->vbuf_transfer,
offset, length);
SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS];
enum pipe_error ret;
unsigned i;
+ static const unsigned zero[PIPE_MAX_ATTRIBS] = {0};
/* if the vdecl or vbuf hasn't changed do nothing */
if (!svga->swtnl.new_vdecl)
ret = svga_hwtnl_flush(svga->hwtnl);
/* if we hit this path we might become synced with hw */
svga->swtnl.new_vbuf = TRUE;
- assert(ret == 0);
+ assert(ret == PIPE_OK);
}
- svga_hwtnl_reset_vdecl(svga->hwtnl, svga_render->vdecl_count);
-
for (i = 0; i < svga_render->vdecl_count; i++) {
vdecl[i].array.offset += svga_render->vdecl_offset;
+ }
- svga_hwtnl_vdecl( svga->hwtnl,
- i,
- &vdecl[i],
- svga_render->vbuf );
+ svga_hwtnl_vertex_decls(svga->hwtnl,
+ svga_render->vdecl_count,
+ vdecl,
+ zero,
+ svga_render->layout_id);
+
+ /* Specify the vertex buffer (there's only ever one) */
+ {
+ struct pipe_vertex_buffer vb;
+ vb.buffer = svga_render->vbuf;
+ vb.buffer_offset = svga_render->vdecl_offset;
+ vb.stride = vdecl[0].array.stride;
+ vb.user_buffer = NULL;
+ svga_hwtnl_vertex_buffers(svga->hwtnl, 1, &vb);
}
/* We have already taken care of flatshading, so let the hwtnl
*/
if (svga->state.sw.need_pipeline) {
svga_hwtnl_set_flatshade(svga->hwtnl, FALSE, FALSE);
- svga_hwtnl_set_unfilled(svga->hwtnl, PIPE_POLYGON_MODE_FILL);
+ svga_hwtnl_set_fillmode(svga->hwtnl, PIPE_POLYGON_MODE_FILL);
}
else {
svga_hwtnl_set_flatshade( svga->hwtnl,
- svga->curr.rast->templ.flatshade,
+ svga->curr.rast->templ.flatshade ||
+ svga->state.hw_draw.fs->uses_flat_interp,
svga->curr.rast->templ.flatshade_first );
- svga_hwtnl_set_unfilled( svga->hwtnl,
- svga->curr.rast->hw_unfilled );
+ svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode);
}
svga->swtnl.new_vdecl = FALSE;
static void
svga_vbuf_render_draw_arrays( struct vbuf_render *render,
- unsigned start,
- uint nr )
+ unsigned start, uint nr )
{
struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
struct svga_context *svga = svga_render->svga;
unsigned bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size;
enum pipe_error ret = PIPE_OK;
+ /* instancing will already have been resolved at this point by 'draw' */
+ const unsigned start_instance = 0;
+ const unsigned instance_count = 1;
/* off to hardware */
svga_vbuf_submit_state(svga_render);
*/
svga_update_state_retry( svga, SVGA_STATE_HW_DRAW );
- ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr);
+ ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr,
+ start_instance, instance_count);
if (ret != PIPE_OK) {
svga_context_flush(svga, NULL);
- ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr);
+ ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim,
+ start + bias, nr,
+ start_instance, instance_count);
svga->swtnl.new_vbuf = TRUE;
assert(ret == PIPE_OK);
}
int bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size;
boolean ret;
size_t size = 2 * nr_indices;
+ /* instancing will already have been resolved at this point by 'draw' */
+ const unsigned start_instance = 0;
+ const unsigned instance_count = 1;
assert(( svga_render->vbuf_offset - svga_render->vdecl_offset) % svga_render->vertex_size == 0);
svga_render->min_index,
svga_render->max_index,
svga_render->prim,
- svga_render->ibuf_offset / 2, nr_indices);
+ svga_render->ibuf_offset / 2, nr_indices,
+ start_instance, instance_count);
if(ret != PIPE_OK) {
svga_context_flush(svga, NULL);
ret = svga_hwtnl_draw_range_elements(svga->hwtnl,
svga_render->min_index,
svga_render->max_index,
svga_render->prim,
- svga_render->ibuf_offset / 2, nr_indices);
+ svga_render->ibuf_offset / 2,
+ nr_indices,
+ start_instance, instance_count);
svga->swtnl.new_vbuf = TRUE;
assert(ret == PIPE_OK);
}
svga_render->vbuf_size = 0;
svga_render->ibuf_alloc_size = 4*1024;
svga_render->vbuf_alloc_size = 64*1024;
+ svga_render->layout_id = SVGA3D_INVALID_ID;
svga_render->base.max_vertex_buffer_bytes = 64*1024/10;
svga_render->base.max_indices = 65536;
svga_render->base.get_vertex_info = svga_vbuf_render_get_vertex_info;
{
struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = { 0 };
struct pipe_transfer *ib_transfer = NULL;
- struct pipe_transfer *cb_transfer = NULL;
+ struct pipe_transfer *cb_transfer[SVGA_MAX_CONST_BUFS] = { 0 };
struct draw_context *draw = svga->swtnl.draw;
- unsigned i;
+ unsigned i, old_num_vertex_buffers;
const void *map;
enum pipe_error ret;
draw_set_mapped_vertex_buffer(draw, i, map, ~0);
}
}
+ old_num_vertex_buffers = svga->curr.num_vertex_buffers;
/* Map index buffer, if present */
map = NULL;
svga->curr.ib.index_size, ~0);
}
- if (svga->curr.cbufs[PIPE_SHADER_VERTEX].buffer) {
+ /* Map constant buffers */
+ for (i = 0; i < Elements(svga->curr.constbufs[PIPE_SHADER_VERTEX]); ++i) {
+ if (svga->curr.constbufs[PIPE_SHADER_VERTEX][i].buffer == NULL) {
+ continue;
+ }
+
map = pipe_buffer_map(&svga->pipe,
- svga->curr.cbufs[PIPE_SHADER_VERTEX].buffer,
+ svga->curr.constbufs[PIPE_SHADER_VERTEX][i].buffer,
PIPE_TRANSFER_READ,
- &cb_transfer);
+ &cb_transfer[i]);
assert(map);
draw_set_mapped_constant_buffer(
- draw, PIPE_SHADER_VERTEX, 0,
+ draw, PIPE_SHADER_VERTEX, i,
map,
- svga->curr.cbufs[PIPE_SHADER_VERTEX].buffer->width0);
+ svga->curr.constbufs[PIPE_SHADER_VERTEX][i].buffer->width0);
}
draw_vbo(draw, info);
draw_flush(svga->swtnl.draw);
/* Ensure the draw module didn't touch this */
- assert(i == svga->curr.num_vertex_buffers);
-
+ assert(old_num_vertex_buffers == svga->curr.num_vertex_buffers);
+
/*
* unmap vertex/index buffers
*/
draw_set_indexes(draw, NULL, 0, 0);
}
- if (svga->curr.cbufs[PIPE_SHADER_VERTEX].buffer) {
- pipe_buffer_unmap(&svga->pipe, cb_transfer);
+ for (i = 0; i < Elements(svga->curr.constbufs[PIPE_SHADER_VERTEX]); ++i) {
+ if (svga->curr.constbufs[PIPE_SHADER_VERTEX][i].buffer) {
+ pipe_buffer_unmap(&svga->pipe, cb_transfer[i]);
+ }
}
/* Now safe to remove the need_swtnl flag in any update_state call */
if (!screen->haveLineSmooth)
draw_install_aaline_stage(svga->swtnl.draw, &svga->pipe);
- /* always install polygon stipple stage */
- draw_install_pstipple_stage(svga->swtnl.draw, &svga->pipe);
-
/* enable/disable line stipple stage depending on device caps */
draw_enable_line_stipple(svga->swtnl.draw, !screen->haveLineStipple);
unsigned vertex_size;
+ SVGA3dElementLayoutId layout_id; /**< current element layout id */
+
unsigned prim;
struct pipe_resource *vbuf;
struct pipe_transfer *vbuf_transfer;
struct pipe_transfer *ibuf_transfer;
+ void *vbuf_ptr;
+
/* current size of buffer */
size_t vbuf_size;
size_t ibuf_size;
#include "draw/draw_context.h"
#include "draw/draw_vbuf.h"
+#include "util/u_bitmask.h"
#include "util/u_inlines.h"
#include "pipe/p_state.h"
+#include "svga_cmd.h"
#include "svga_context.h"
+#include "svga_shader.h"
#include "svga_swtnl.h"
#include "svga_state.h"
#include "svga_tgsi.h"
float adjx = 0.0f;
float adjy = 0.0f;
- switch (svga->curr.reduced_prim) {
- case PIPE_PRIM_POINTS:
- adjx = SVGA_POINT_ADJ_X;
- adjy = SVGA_POINT_ADJ_Y;
- break;
- case PIPE_PRIM_LINES:
- /* XXX: This is to compensate for the fact that wide lines are
- * going to be drawn with triangles, but we're not catching all
- * cases where that will happen.
- */
- if (svga->curr.rast->need_pipeline & SVGA_PIPELINE_FLAG_LINES)
- {
- adjx = SVGA_LINE_ADJ_X + 0.175f;
- adjy = SVGA_LINE_ADJ_Y - 0.175f;
+ if (svga_have_vgpu10(svga)) {
+ if (svga->curr.reduced_prim == PIPE_PRIM_TRIANGLES) {
+ adjy = 0.25;
}
- else {
- adjx = SVGA_LINE_ADJ_X;
- adjy = SVGA_LINE_ADJ_Y;
+ }
+ else {
+ switch (svga->curr.reduced_prim) {
+ case PIPE_PRIM_POINTS:
+ adjx = SVGA_POINT_ADJ_X;
+ adjy = SVGA_POINT_ADJ_Y;
+ break;
+ case PIPE_PRIM_LINES:
+ /* XXX: This is to compensate for the fact that wide lines are
+ * going to be drawn with triangles, but we're not catching all
+ * cases where that will happen.
+ */
+ if (svga->curr.rast->need_pipeline & SVGA_PIPELINE_FLAG_LINES)
+ {
+ adjx = SVGA_LINE_ADJ_X + 0.175f;
+ adjy = SVGA_LINE_ADJ_Y - 0.175f;
+ }
+ else {
+ adjx = SVGA_LINE_ADJ_X;
+ adjy = SVGA_LINE_ADJ_Y;
+ }
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ adjx += SVGA_TRIANGLE_ADJ_X;
+ adjy += SVGA_TRIANGLE_ADJ_Y;
+ break;
}
- break;
- case PIPE_PRIM_TRIANGLES:
- adjx += SVGA_TRIANGLE_ADJ_X;
- adjy += SVGA_TRIANGLE_ADJ_Y;
- break;
}
vp.translate[0] += adjx;
};
+static SVGA3dSurfaceFormat
+translate_vertex_format(SVGA3dDeclType format)
+{
+ switch (format) {
+ case SVGA3D_DECLTYPE_FLOAT1:
+ return SVGA3D_R32_FLOAT;
+ case SVGA3D_DECLTYPE_FLOAT2:
+ return SVGA3D_R32G32_FLOAT;
+ case SVGA3D_DECLTYPE_FLOAT3:
+ return SVGA3D_R32G32B32_FLOAT;
+ case SVGA3D_DECLTYPE_FLOAT4:
+ return SVGA3D_R32G32B32A32_FLOAT;
+ default:
+ assert(!"Unexpected format in translate_vertex_format()");
+ return SVGA3D_R32G32B32A32_FLOAT;
+ }
+}
+
+
+static SVGA3dElementLayoutId
+svga_vdecl_to_input_element(struct svga_context *svga,
+ const SVGA3dVertexDecl *vdecl, unsigned num_decls)
+{
+ SVGA3dElementLayoutId id;
+ SVGA3dInputElementDesc elements[PIPE_MAX_ATTRIBS];
+ enum pipe_error ret;
+ unsigned i;
+
+ assert(num_decls <= PIPE_MAX_ATTRIBS);
+ assert(svga_have_vgpu10(svga));
+
+ for (i = 0; i < num_decls; i++) {
+ elements[i].inputSlot = 0; /* vertex buffer index */
+ elements[i].alignedByteOffset = vdecl[i].array.offset;
+ elements[i].format = translate_vertex_format(vdecl[i].identity.type);
+ elements[i].inputSlotClass = SVGA3D_INPUT_PER_VERTEX_DATA;
+ elements[i].instanceDataStepRate = 0;
+ elements[i].inputRegister = i;
+ }
+
+ id = util_bitmask_add(svga->input_element_object_id_bm);
+
+ ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, num_decls, id, elements);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, num_decls, id, elements);
+ assert(ret == PIPE_OK);
+ }
+
+ return id;
+}
+
+
enum pipe_error
svga_swtnl_update_vdecl( struct svga_context *svga )
{
int nr_decls = 0;
int src;
unsigned i;
+ int any_change;
memset(vinfo, 0, sizeof(*vinfo));
memset(vdecl, 0, sizeof(vdecl));
draw_prepare_shader_outputs(draw);
+
/* always add position */
src = draw_find_shader_output(draw, TGSI_SEMANTIC_POSITION, 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src);
vinfo->attrib[0].emit = EMIT_4F;
vdecl[0].array.offset = offset;
+ vdecl[0].identity.method = SVGA3D_DECLMETHOD_DEFAULT;
vdecl[0].identity.type = SVGA3D_DECLTYPE_FLOAT4;
vdecl[0].identity.usage = SVGA3D_DECLUSAGE_POSITIONT;
vdecl[0].identity.usageIndex = 0;
draw_compute_vertex_size(vinfo);
svga_render->vdecl_count = nr_decls;
- for (i = 0; i < svga_render->vdecl_count; i++)
+ for (i = 0; i < svga_render->vdecl_count; i++) {
vdecl[i].array.stride = offset;
+ }
- if (memcmp(svga_render->vdecl, vdecl, sizeof(vdecl)) == 0)
- return PIPE_OK;
+ any_change = memcmp(svga_render->vdecl, vdecl, sizeof(vdecl));
+
+ if (svga_have_vgpu10(svga)) {
+ enum pipe_error ret;
+
+ if (!any_change && svga_render->layout_id != SVGA3D_INVALID_ID) {
+ return PIPE_OK;
+ }
+
+ if (svga_render->layout_id != SVGA3D_INVALID_ID) {
+ /* destroy old */
+ ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc,
+ svga_render->layout_id);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc,
+ svga_render->layout_id);
+ assert(ret == PIPE_OK);
+ }
+
+ /**
+ * reset current layout id state after the element layout is
+ * destroyed, so that if a new layout has the same layout id, we
+ * will know to re-issue the SetInputLayout command.
+ */
+ if (svga->state.hw_draw.layout_id == svga_render->layout_id)
+ svga->state.hw_draw.layout_id = SVGA3D_INVALID_ID;
+
+ util_bitmask_clear(svga->input_element_object_id_bm,
+ svga_render->layout_id);
+ }
+
+ svga_render->layout_id =
+ svga_vdecl_to_input_element(svga, vdecl, nr_decls);
+
+ /* bind new */
+ if (svga->state.hw_draw.layout_id != svga_render->layout_id) {
+ ret = SVGA3D_vgpu10_SetInputLayout(svga->swc, svga_render->layout_id);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_vgpu10_SetInputLayout(svga->swc,
+ svga_render->layout_id);
+ assert(ret == PIPE_OK);
+ }
+
+ svga->state.hw_draw.layout_id = svga_render->layout_id;
+ }
+ }
+ else {
+ if (!any_change)
+ return PIPE_OK;
+ }
memcpy(svga_render->vdecl, vdecl, sizeof(vdecl));
svga->swtnl.new_vdecl = TRUE;
- return PIPE_OK;
+ return 0;
}
#include "svgadump/svga_shader_dump.h"
#include "svga_context.h"
+#include "svga_shader.h"
#include "svga_tgsi.h"
#include "svga_tgsi_emit.h"
#include "svga_debug.h"
/**
- * Use the shader info to generate a bitmask indicating which generic
- * inputs are used by the shader. A set bit indicates that GENERIC[i]
- * is used.
- */
-unsigned
-svga_get_generic_inputs_mask(const struct tgsi_shader_info *info)
-{
- unsigned i, mask = 0x0;
-
- for (i = 0; i < info->num_inputs; i++) {
- if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
- unsigned j = info->input_semantic_index[i];
- assert(j < sizeof(mask) * 8);
- mask |= 1 << j;
- }
- }
-
- return mask;
-}
-
-
-/**
- * Given a mask of used generic variables (as returned by the above functions)
- * fill in a table which maps those indexes to small integers.
- * This table is used by the remap_generic_index() function in
- * svga_tgsi_decl_sm30.c
- * Example: if generics_mask = binary(1010) it means that GENERIC[1] and
- * GENERIC[3] are used. The remap_table will contain:
- * table[1] = 0;
- * table[3] = 1;
- * The remaining table entries will be filled in with the next unused
- * generic index (in this example, 2).
- */
-void
-svga_remap_generics(unsigned generics_mask,
- int8_t remap_table[MAX_GENERIC_VARYING])
-{
- /* Note texcoord[0] is reserved so start at 1 */
- unsigned count = 1, i;
-
- for (i = 0; i < MAX_GENERIC_VARYING; i++) {
- remap_table[i] = -1;
- }
-
- /* for each bit set in generic_mask */
- while (generics_mask) {
- unsigned index = ffs(generics_mask) - 1;
- remap_table[index] = count++;
- generics_mask &= ~(1 << index);
- }
-}
-
-
-/**
- * Use the generic remap table to map a TGSI generic varying variable
- * index to a small integer. If the remapping table doesn't have a
- * valid value for the given index (the table entry is -1) it means
- * the fragment shader doesn't use that VS output. Just allocate
- * the next free value in that case. Alternately, we could cull
- * VS instructions that write to register, or replace the register
- * with a dummy temp register.
- * XXX TODO: we should do one of the later as it would save precious
- * texcoord registers.
- */
-int
-svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
- int generic_index)
-{
- assert(generic_index < MAX_GENERIC_VARYING);
-
- if (generic_index >= MAX_GENERIC_VARYING) {
- /* just don't return a random/garbage value */
- generic_index = MAX_GENERIC_VARYING - 1;
- }
-
- if (remap_table[generic_index] == -1) {
- /* This is a VS output that has no matching PS input. Find a
- * free index.
- */
- int i, max = 0;
- for (i = 0; i < MAX_GENERIC_VARYING; i++) {
- max = MAX2(max, remap_table[i]);
- }
- remap_table[generic_index] = max + 1;
- }
-
- return remap_table[generic_index];
-}
-
-
-/**
* Parse TGSI shader and translate to SVGA/DX9 serialized
* representation.
*
* can be dynamically grown. Once we've finished and know how large
* it is, it will be copied to a hardware buffer for upload.
*/
-static struct svga_shader_variant *
-svga_tgsi_translate(const struct svga_shader *shader,
- const struct svga_compile_key *key, unsigned unit)
+struct svga_shader_variant *
+svga_tgsi_vgpu9_translate(const struct svga_shader *shader,
+ const struct svga_compile_key *key, unsigned unit)
{
struct svga_shader_variant *variant = NULL;
struct svga_shader_emitter emit;
emit.imm_start = emit.info.file_max[TGSI_FILE_CONSTANT] + 1;
if (unit == PIPE_SHADER_FRAGMENT)
- emit.imm_start += key->fkey.num_unnormalized_coords;
+ emit.imm_start += key->num_unnormalized_coords;
if (unit == PIPE_SHADER_VERTEX) {
- emit.imm_start += key->vkey.need_prescale ? 2 : 0;
+ emit.imm_start += key->vs.need_prescale ? 2 : 0;
}
emit.nr_hw_float_const =
memcpy(&variant->key, key, sizeof(*key));
variant->id = UTIL_BITMASK_INVALID_INDEX;
- if (SVGA_DEBUG & DEBUG_TGSI) {
+ variant->pstipple_sampler_unit = emit.pstipple_sampler_unit;
+
+#if 0
+ if (!svga_shader_verify(variant->tokens, variant->nr_tokens) ||
+ SVGA_DEBUG & DEBUG_TGSI) {
debug_printf("#####################################\n");
debug_printf("Shader %u below\n", shader->id);
tgsi_dump(shader->tokens, 0);
}
debug_printf("#####################################\n");
}
+#endif
return variant;
FREE(emit.buf);
return NULL;
}
-
-
-struct svga_shader_variant *
-svga_translate_fragment_program(const struct svga_fragment_shader *fs,
- const struct svga_fs_compile_key *fkey)
-{
- struct svga_compile_key key;
-
- memset(&key, 0, sizeof(key));
-
- memcpy(&key.fkey, fkey, sizeof *fkey);
-
- memcpy(key.generic_remap_table, fs->generic_remap_table,
- sizeof(fs->generic_remap_table));
-
- return svga_tgsi_translate(&fs->base, &key, PIPE_SHADER_FRAGMENT);
-}
-
-
-struct svga_shader_variant *
-svga_translate_vertex_program(const struct svga_vertex_shader *vs,
- const struct svga_vs_compile_key *vkey)
-{
- struct svga_compile_key key;
-
- memset(&key, 0, sizeof(key));
-
- memcpy(&key.vkey, vkey, sizeof *vkey);
-
- /* Note: we could alternately store the remap table in the vkey but
- * that would make it larger. We just regenerate it here instead.
- */
- svga_remap_generics(vkey->fs_generic_inputs, key.generic_remap_table);
-
- return svga_tgsi_translate(&vs->base, &key, PIPE_SHADER_VERTEX);
-}
#ifndef SVGA_TGSI_H
#define SVGA_TGSI_H
-#include "pipe/p_state.h"
+#include "pipe/p_compiler.h"
+#include "svga3d_reg.h"
-#include "svga_hw_reg.h"
+#define MAX_VGPU10_ADDR_REGS 2
-/**
- * We use a 32-bit mask to keep track of the generic indexes.
- */
-#define MAX_GENERIC_VARYING 32
-
-
-struct svga_fragment_shader;
-struct svga_vertex_shader;
+struct svga_compile_key;
+struct svga_context;
struct svga_shader;
-struct tgsi_shader_info;
-struct tgsi_token;
-
-
-struct svga_vs_compile_key
-{
- unsigned fs_generic_inputs;
- unsigned need_prescale:1;
- unsigned allow_psiz:1;
- unsigned adjust_attrib_range:16;
- unsigned adjust_attrib_w_1:16;
-};
-
-struct svga_fs_compile_key
-{
- unsigned light_twoside:1;
- unsigned front_ccw:1;
- unsigned white_fragments:1;
- unsigned write_color0_to_n_cbufs:3;
- unsigned num_textures:8;
- unsigned num_unnormalized_coords:8;
- unsigned sprite_origin_lower_left:1;
- struct {
- unsigned compare_mode:1;
- unsigned compare_func:3;
- unsigned unnormalized:1;
- unsigned width_height_idx:7;
- unsigned texture_target:8;
- unsigned sprite_texgen:1;
- unsigned swizzle_r:3;
- unsigned swizzle_g:3;
- unsigned swizzle_b:3;
- unsigned swizzle_a:3;
- } tex[PIPE_MAX_SAMPLERS];
-};
-
-/**
- * Key/index for identifying shader variants.
- */
-struct svga_compile_key {
- struct svga_vs_compile_key vkey;
- struct svga_fs_compile_key fkey;
- int8_t generic_remap_table[MAX_GENERIC_VARYING];
-};
-
-
-/**
- * A single TGSI shader may be compiled into different variants of
- * SVGA3D shaders depending on the compile key. Each user shader
- * will have a linked list of these variants.
- */
-struct svga_shader_variant
-{
- const struct svga_shader *shader;
-
- /** Parameters used to generate this variant */
- struct svga_compile_key key;
-
- /* Compiled shader tokens:
- */
- const unsigned *tokens;
- unsigned nr_tokens;
-
- /** Per-context shader identifier used with SVGA_3D_CMD_SHADER_DEFINE,
- * SVGA_3D_CMD_SET_SHADER and SVGA_3D_CMD_SHADER_DESTROY.
- */
- unsigned id;
-
- /* GB object buffer containing the bytecode */
- struct svga_winsys_gb_shader *gb_shader;
-
- /** Next variant */
- struct svga_shader_variant *next;
-};
+struct svga_shader_variant;
/* TGSI doesn't provide use with VS input semantics (they're actually
-static inline unsigned svga_vs_key_size( const struct svga_vs_compile_key *key )
-{
- return sizeof *key;
-}
-
-static inline unsigned svga_fs_key_size( const struct svga_fs_compile_key *key )
-{
- return (const char *)&key->tex[key->num_textures] - (const char *)key;
-}
-
struct svga_shader_variant *
-svga_translate_fragment_program( const struct svga_fragment_shader *fs,
- const struct svga_fs_compile_key *fkey );
+svga_tgsi_vgpu9_translate(const struct svga_shader *shader,
+ const struct svga_compile_key *key, unsigned unit);
struct svga_shader_variant *
-svga_translate_vertex_program( const struct svga_vertex_shader *fs,
- const struct svga_vs_compile_key *vkey );
-
-
-unsigned
-svga_get_generic_inputs_mask(const struct tgsi_shader_info *info);
-
-unsigned
-svga_get_generic_outputs_mask(const struct tgsi_shader_info *info);
-
-void
-svga_remap_generics(unsigned generics_mask,
- int8_t remap_table[MAX_GENERIC_VARYING]);
+svga_tgsi_vgpu10_translate(struct svga_context *svga,
+ const struct svga_shader *shader,
+ const struct svga_compile_key *key,
+ unsigned unit);
-int
-svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
- int generic_index);
+boolean svga_shader_verify(const uint32_t *tokens, unsigned nr_tokens);
#endif
return emit_decl( emit, reg, 0, 0 );
}
- else if (emit->key.fkey.light_twoside &&
+ else if (emit->key.fs.light_twoside &&
(semantic.Name == TGSI_SEMANTIC_COLOR)) {
if (!translate_vs_ps_semantic( emit, semantic, &usage, &index ))
return FALSE;
if (semantic.Name == TGSI_SEMANTIC_GENERIC &&
- emit->key.fkey.sprite_origin_lower_left &&
+ emit->key.sprite_origin_lower_left &&
index >= 1 &&
- emit->key.fkey.tex[index - 1].sprite_texgen) {
+ emit->key.tex[index - 1].sprite_texgen) {
/* This is a sprite texture coord with lower-left origin.
* We need to invert the texture T coordinate since the SVGA3D
* device only supports an upper-left origin.
switch (semantic.Name) {
case TGSI_SEMANTIC_COLOR:
if (emit->unit == PIPE_SHADER_FRAGMENT) {
- if (emit->key.fkey.white_fragments) {
+ if (emit->key.fs.white_fragments) {
/* Used for XOR logicop mode */
emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
emit->nr_hw_temp++ );
emit->true_color_output[idx] = dst_register(SVGA3DREG_COLOROUT,
semantic.Index);
}
- else if (emit->key.fkey.write_color0_to_n_cbufs) {
+ else if (emit->key.fs.write_color0_to_n_cbufs) {
/* We'll write color output [0] to all render targets.
* Prepare all the output registers here, but only when the
* semantic.Index == 0 so we don't do this more than once.
*/
if (semantic.Index == 0) {
unsigned i;
- for (i = 0; i < emit->key.fkey.write_color0_to_n_cbufs; i++) {
+ for (i = 0; i < emit->key.fs.write_color0_to_n_cbufs; i++) {
emit->output_map[idx+i] = dst_register(SVGA3DREG_TEMP,
emit->nr_hw_temp++);
emit->temp_color_output[i] = emit->output_map[idx+i];
/* This has the effect of not declaring psiz (below) and not
* emitting the final MOV to true_psiz in the postamble.
*/
- if (!emit->key.vkey.allow_psiz)
+ if (!emit->key.vs.allow_psiz)
return TRUE;
emit->true_psiz = dcl.dst;
static ubyte
svga_tgsi_sampler_type(const struct svga_shader_emitter *emit, int idx)
{
- switch (emit->key.fkey.tex[idx].texture_target) {
+ switch (emit->key.tex[idx].texture_target) {
case PIPE_TEXTURE_1D:
return SVGA3DSAMP_2D;
case PIPE_TEXTURE_2D:
#include "tgsi/tgsi_scan.h"
#include "svga_hw_reg.h"
+#include "svga_shader.h"
#include "svga_tgsi.h"
#include "svga3d_shaderdefs.h"
struct svga_arl_consts arl_consts[12];
int num_arl_consts;
int current_arl;
+
+ unsigned pstipple_sampler_unit;
};
#include "tgsi/tgsi_parse.h"
#include "util/u_memory.h"
#include "util/u_math.h"
+#include "util/u_pstipple.h"
#include "svga_tgsi_emit.h"
#include "svga_context.h"
idx++;
/* Emit constant {2, 0, 0, 0} (only the 2 is used for now) */
- if (emit->key.vkey.adjust_attrib_range) {
+ if (emit->key.vs.adjust_attrib_range) {
if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
idx, 2.0f, 0.0f, 0.0f, 0.0f ))
return FALSE;
struct src_register reg;
/* the width/height indexes start right after constants */
- idx = emit->key.fkey.tex[sampler_num].width_height_idx +
+ idx = emit->key.tex[sampler_num].width_height_idx +
emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
reg = src_register( SVGA3DREG_CONST, idx );
texcoord = translate_src_register( emit, &insn->Src[0] );
sampler = translate_src_register( emit, &insn->Src[1] );
- if (emit->key.fkey.tex[sampler.base.num].unnormalized ||
+ if (emit->key.tex[sampler.base.num].unnormalized ||
emit->dynamic_branching_level > 0)
tmp = get_temp( emit );
/* Explicit normalization of texcoords:
*/
- if (emit->key.fkey.tex[sampler.base.num].unnormalized) {
+ if (emit->key.tex[sampler.base.num].unnormalized) {
struct src_register wh = get_tex_dimensions( emit, sampler.base.num );
/* MUL tmp, SRC0, WH */
const unsigned unit = src1.base.num;
/* check for shadow samplers */
- boolean compare = (emit->key.fkey.tex[unit].compare_mode ==
+ boolean compare = (emit->key.tex[unit].compare_mode ==
PIPE_TEX_COMPARE_R_TO_TEXTURE);
/* texture swizzle */
- boolean swizzle = (emit->key.fkey.tex[unit].swizzle_r != PIPE_SWIZZLE_RED ||
- emit->key.fkey.tex[unit].swizzle_g != PIPE_SWIZZLE_GREEN ||
- emit->key.fkey.tex[unit].swizzle_b != PIPE_SWIZZLE_BLUE ||
- emit->key.fkey.tex[unit].swizzle_a != PIPE_SWIZZLE_ALPHA);
+ boolean swizzle = (emit->key.tex[unit].swizzle_r != PIPE_SWIZZLE_RED ||
+ emit->key.tex[unit].swizzle_g != PIPE_SWIZZLE_GREEN ||
+ emit->key.tex[unit].swizzle_b != PIPE_SWIZZLE_BLUE ||
+ emit->key.tex[unit].swizzle_a != PIPE_SWIZZLE_ALPHA);
boolean saturate = insn->Instruction.Saturate;
/* Compare texture sample value against R component of texcoord */
if (!emit_select(emit,
- emit->key.fkey.tex[unit].compare_func,
+ emit->key.tex[unit].compare_func,
writemask( dst2, TGSI_WRITEMASK_XYZ ),
r_coord,
tex_src_x))
/* swizzle from tex_result to dst (handles saturation too, if any) */
emit_tex_swizzle(emit,
dst, src(tex_result),
- emit->key.fkey.tex[unit].swizzle_r,
- emit->key.fkey.tex[unit].swizzle_g,
- emit->key.fkey.tex[unit].swizzle_b,
- emit->key.fkey.tex[unit].swizzle_a);
+ emit->key.tex[unit].swizzle_r,
+ emit->key.tex[unit].swizzle_g,
+ emit->key.tex[unit].swizzle_b,
+ emit->key.tex[unit].swizzle_a);
}
return TRUE;
static boolean
emit_vs_preamble(struct svga_shader_emitter *emit)
{
- if (!emit->key.vkey.need_prescale) {
+ if (!emit->key.vs.need_prescale) {
if (!make_immediate( emit, 0, 0, .5, .5,
&emit->imm_0055))
return FALSE;
* logicop workaround.
*/
if (emit->unit == PIPE_SHADER_FRAGMENT &&
- emit->key.fkey.white_fragments) {
+ emit->key.fs.white_fragments) {
struct src_register one = get_one_immediate(emit);
if (!submit_op1( emit,
return FALSE;
}
else if (emit->unit == PIPE_SHADER_FRAGMENT &&
- i < emit->key.fkey.write_color0_to_n_cbufs) {
+ i < emit->key.fs.write_color0_to_n_cbufs) {
/* Write temp color output [0] to true output [i] */
if (!submit_op1(emit, inst_token(SVGA3DOP_MOV),
emit->true_color_output[i],
/* Need to perform various manipulations on vertex position to cope
* with the different GL and D3D clip spaces.
*/
- if (emit->key.vkey.need_prescale) {
+ if (emit->key.vs.need_prescale) {
SVGA3dShaderDestToken temp_pos = emit->temp_pos;
SVGA3dShaderDestToken depth = emit->depth_pos;
SVGA3dShaderDestToken pos = emit->true_pos;
if_token = inst_token( SVGA3DOP_IFC );
- if (emit->key.fkey.front_ccw)
+ if (emit->key.fs.front_ccw)
if_token.control = SVGA3DOPCOMP_LT;
else
if_token.control = SVGA3DOPCOMP_GT;
temp = dst_register( SVGA3DREG_TEMP,
emit->nr_hw_temp++ );
- if (emit->key.fkey.front_ccw) {
+ if (emit->key.fs.front_ccw) {
pass = get_zero_immediate(emit);
fail = get_one_immediate(emit);
} else {
static boolean
emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit)
{
- unsigned adjust_mask = (emit->key.vkey.adjust_attrib_range |
- emit->key.vkey.adjust_attrib_w_1);
+ unsigned adjust_mask = (emit->key.vs.adjust_attrib_range |
+ emit->key.vs.adjust_attrib_w_1);
while (adjust_mask) {
/* Adjust vertex attrib range and/or set W component = 1 */
tmp = src_register(SVGA3DREG_TEMP, emit->nr_hw_temp);
emit->nr_hw_temp++;
- if (emit->key.vkey.adjust_attrib_range & (1 << index)) {
+ if (emit->key.vs.adjust_attrib_range & (1 << index)) {
/* The vertex input/attribute is supposed to be a signed value in
* the range [-1,1] but we actually fetched/converted it to the
* range [0,1]. This most likely happens when the app specifies a
return FALSE;
}
- if (emit->key.vkey.adjust_attrib_w_1 & (1 << index)) {
+ if (emit->key.vs.adjust_attrib_w_1 & (1 << index)) {
/* move 1 into W position of tmp */
if (!submit_op1(emit,
inst_token(SVGA3DOP_MOV),
unsigned i;
if (emit->unit == PIPE_SHADER_FRAGMENT) {
- if (emit->key.fkey.light_twoside)
+ if (emit->key.fs.light_twoside)
return TRUE;
- if (emit->key.fkey.white_fragments)
+ if (emit->key.fs.white_fragments)
return TRUE;
if (emit->emit_frontface)
return TRUE;
/* look for any PIPE_SWIZZLE_ZERO/ONE terms */
- for (i = 0; i < emit->key.fkey.num_textures; i++) {
- if (emit->key.fkey.tex[i].swizzle_r > PIPE_SWIZZLE_ALPHA ||
- emit->key.fkey.tex[i].swizzle_g > PIPE_SWIZZLE_ALPHA ||
- emit->key.fkey.tex[i].swizzle_b > PIPE_SWIZZLE_ALPHA ||
- emit->key.fkey.tex[i].swizzle_a > PIPE_SWIZZLE_ALPHA)
+ for (i = 0; i < emit->key.num_textures; i++) {
+ if (emit->key.tex[i].swizzle_r > PIPE_SWIZZLE_ALPHA ||
+ emit->key.tex[i].swizzle_g > PIPE_SWIZZLE_ALPHA ||
+ emit->key.tex[i].swizzle_b > PIPE_SWIZZLE_ALPHA ||
+ emit->key.tex[i].swizzle_a > PIPE_SWIZZLE_ALPHA)
return TRUE;
}
- for (i = 0; i < emit->key.fkey.num_textures; i++) {
- if (emit->key.fkey.tex[i].compare_mode
+ for (i = 0; i < emit->key.num_textures; i++) {
+ if (emit->key.tex[i].compare_mode
== PIPE_TEX_COMPARE_R_TO_TEXTURE)
return TRUE;
}
else if (emit->unit == PIPE_SHADER_VERTEX) {
if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1)
return TRUE;
- if (emit->key.vkey.adjust_attrib_range ||
- emit->key.vkey.adjust_attrib_w_1)
+ if (emit->key.vs.adjust_attrib_range ||
+ emit->key.vs.adjust_attrib_w_1)
return TRUE;
}
if (!emit_ps_preamble( emit ))
return FALSE;
- if (emit->key.fkey.light_twoside) {
+ if (emit->key.fs.light_twoside) {
if (!emit_light_twoside( emit ))
return FALSE;
}
}
else {
assert(emit->unit == PIPE_SHADER_VERTEX);
- if (emit->key.vkey.adjust_attrib_range ||
- emit->key.vkey.adjust_attrib_w_1) {
- if (!emit_adjusted_vertex_attribs(emit))
+ if (emit->key.vs.adjust_attrib_range) {
+ if (!emit_adjusted_vertex_attribs(emit) ||
+ emit->key.vs.adjust_attrib_w_1) {
return FALSE;
+ }
}
}
-
return TRUE;
}
const struct tgsi_token *tokens)
{
struct tgsi_parse_context parse;
+ const struct tgsi_token *new_tokens = NULL;
boolean ret = TRUE;
boolean helpers_emitted = FALSE;
unsigned line_nr = 0;
+ if (emit->unit == PIPE_SHADER_FRAGMENT && emit->key.fs.pstipple) {
+ unsigned unit;
+
+ new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0);
+
+ if (new_tokens) {
+ /* Setup texture state for stipple */
+ emit->key.tex[unit].texture_target = PIPE_TEXTURE_2D;
+ emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
+ emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
+ emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
+ emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
+
+ emit->pstipple_sampler_unit = unit;
+
+ tokens = new_tokens;
+ }
+ }
+
tgsi_parse_init( &parse, tokens );
emit->internal_imm_count = 0;
done:
tgsi_parse_free( &parse );
+ if (new_tokens) {
+ tgsi_free_tokens(new_tokens);
+ }
+
return ret;
}
--- /dev/null
+/**********************************************************
+ * Copyright 1998-2013 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file svga_tgsi_vgpu10.c
+ *
+ * TGSI -> VGPU10 shader translation.
+ *
+ * \author Mingcheng Chen
+ * \author Brian Paul
+ */
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_defines.h"
+#include "tgsi/tgsi_build.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_two_side.h"
+#include "tgsi/tgsi_aa_point.h"
+#include "tgsi/tgsi_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_bitmask.h"
+#include "util/u_debug.h"
+#include "util/u_pstipple.h"
+
+#include "svga_context.h"
+#include "svga_debug.h"
+#include "svga_link.h"
+#include "svga_shader.h"
+#include "svga_tgsi.h"
+
+#include "VGPU10ShaderTokens.h"
+
+
+#define INVALID_INDEX 99999
+#define MAX_INTERNAL_TEMPS 3
+#define MAX_SYSTEM_VALUES 4
+#define MAX_IMMEDIATE_COUNT \
+ (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4)
+#define MAX_TEMP_ARRAYS 64 /* Enough? */
+
+
+/**
+ * Clipping is complicated. There's four different cases which we
+ * handle during VS/GS shader translation:
+ */
+enum clipping_mode
+{
+ CLIP_NONE, /**< No clipping enabled */
+ CLIP_LEGACY, /**< The shader has no clipping declarations or code but
+ * one or more user-defined clip planes are enabled. We
+ * generate extra code to emit clip distances.
+ */
+ CLIP_DISTANCE, /**< The shader already declares clip distance output
+ * registers and has code to write to them.
+ */
+ CLIP_VERTEX /**< The shader declares a clip vertex output register and
+ * has code that writes to the register. We convert the
+ * clipvertex position into one or more clip distances.
+ */
+};
+
+
+struct svga_shader_emitter_v10
+{
+ /* The token output buffer */
+ unsigned size;
+ char *buf;
+ char *ptr;
+
+ /* Information about the shader and state (does not change) */
+ struct svga_compile_key key;
+ struct tgsi_shader_info info;
+ unsigned unit;
+
+ unsigned inst_start_token;
+ boolean discard_instruction; /**< throw away current instruction? */
+
+ union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4];
+ unsigned num_immediates; /**< Number of immediates emitted */
+ unsigned common_immediate_pos[8]; /**< literals for common immediates */
+ unsigned num_common_immediates;
+ boolean immediates_emitted;
+
+ unsigned num_outputs; /**< include any extra outputs */
+ /** The first extra output is reserved for
+ * non-adjusted vertex position for
+ * stream output purpose
+ */
+
+ /* Temporary Registers */
+ unsigned num_shader_temps; /**< num of temps used by original shader */
+ unsigned internal_temp_count; /**< currently allocated internal temps */
+ struct {
+ unsigned start, size;
+ } temp_arrays[MAX_TEMP_ARRAYS];
+ unsigned num_temp_arrays;
+
+ /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */
+ struct {
+ unsigned arrayId, index;
+ } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */
+
+ /** Number of constants used by original shader for each constant buffer.
+ * The size should probably always match with that of svga_state.constbufs.
+ */
+ unsigned num_shader_consts[SVGA_MAX_CONST_BUFS];
+
+ /* Samplers */
+ unsigned num_samplers;
+
+ /* Address regs (really implemented with temps) */
+ unsigned num_address_regs;
+ unsigned address_reg_index[MAX_VGPU10_ADDR_REGS];
+
+ /* Output register usage masks */
+ ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS];
+
+ /* To map TGSI system value index to VGPU shader input indexes */
+ ubyte system_value_indexes[MAX_SYSTEM_VALUES];
+
+ struct {
+ /* vertex position scale/translation */
+ unsigned out_index; /**< the real position output reg */
+ unsigned tmp_index; /**< the fake/temp position output reg */
+ unsigned so_index; /**< the non-adjusted position output reg */
+ unsigned prescale_scale_index, prescale_trans_index;
+ boolean need_prescale;
+ } vposition;
+
+ /* For vertex shaders only */
+ struct {
+ /* viewport constant */
+ unsigned viewport_index;
+
+ /* temp index of adjusted vertex attributes */
+ unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS];
+ } vs;
+
+ /* For fragment shaders only */
+ struct {
+ /* apha test */
+ unsigned color_out_index[PIPE_MAX_COLOR_BUFS]; /**< the real color output regs */
+ unsigned color_tmp_index; /**< fake/temp color output reg */
+ unsigned alpha_ref_index; /**< immediate constant for alpha ref */
+
+ /* front-face */
+ unsigned face_input_index; /**< real fragment shader face reg (bool) */
+ unsigned face_tmp_index; /**< temp face reg converted to -1 / +1 */
+
+ unsigned pstipple_sampler_unit;
+
+ unsigned fragcoord_input_index; /**< real fragment position input reg */
+ unsigned fragcoord_tmp_index; /**< 1/w modified position temp reg */
+ } fs;
+
+ /* For geometry shaders only */
+ struct {
+ VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */
+ VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */
+ unsigned input_size; /**< size of input arrays */
+ unsigned prim_id_index; /**< primitive id register index */
+ unsigned max_out_vertices; /**< maximum number of output vertices */
+ } gs;
+
+ /* For vertex or geometry shaders */
+ enum clipping_mode clip_mode;
+ unsigned clip_dist_out_index; /**< clip distance output register index */
+ unsigned clip_dist_tmp_index; /**< clip distance temporary register */
+ unsigned clip_dist_so_index; /**< clip distance shadow copy */
+
+ /** Index of temporary holding the clipvertex coordinate */
+ unsigned clip_vertex_out_index; /**< clip vertex output register index */
+ unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */
+
+ /* user clip plane constant slot indexes */
+ unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES];
+
+ boolean uses_flat_interp;
+
+ /* For all shaders: const reg index for RECT coord scaling */
+ unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS];
+
+ /* For all shaders: const reg index for texture buffer size */
+ unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS];
+
+ /* VS/GS/FS Linkage info */
+ struct shader_linkage linkage;
+
+ bool register_overflow; /**< Set if we exceed a VGPU10 register limit */
+};
+
+
+static boolean
+emit_post_helpers(struct svga_shader_emitter_v10 *emit);
+
+static boolean
+emit_vertex(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst);
+
+static char err_buf[128];
+
+static boolean
+expand(struct svga_shader_emitter_v10 *emit)
+{
+ char *new_buf;
+ unsigned newsize = emit->size * 2;
+
+ if (emit->buf != err_buf)
+ new_buf = REALLOC(emit->buf, emit->size, newsize);
+ else
+ new_buf = NULL;
+
+ if (new_buf == NULL) {
+ emit->ptr = err_buf;
+ emit->buf = err_buf;
+ emit->size = sizeof(err_buf);
+ return FALSE;
+ }
+
+ emit->size = newsize;
+ emit->ptr = new_buf + (emit->ptr - emit->buf);
+ emit->buf = new_buf;
+ return TRUE;
+}
+
+/**
+ * Create and initialize a new svga_shader_emitter_v10 object.
+ */
+static struct svga_shader_emitter_v10 *
+alloc_emitter(void)
+{
+ struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit));
+
+ if (!emit)
+ return NULL;
+
+ /* to initialize the output buffer */
+ emit->size = 512;
+ if (!expand(emit)) {
+ FREE(emit);
+ return NULL;
+ }
+ return emit;
+}
+
+/**
+ * Free an svga_shader_emitter_v10 object.
+ */
+static void
+free_emitter(struct svga_shader_emitter_v10 *emit)
+{
+ assert(emit);
+ FREE(emit->buf); /* will be NULL if translation succeeded */
+ FREE(emit);
+}
+
+static inline boolean
+reserve(struct svga_shader_emitter_v10 *emit,
+ unsigned nr_dwords)
+{
+ while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) {
+ if (!expand(emit))
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static boolean
+emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword)
+{
+ if (!reserve(emit, 1))
+ return FALSE;
+
+ *(uint32 *)emit->ptr = dword;
+ emit->ptr += sizeof dword;
+ return TRUE;
+}
+
+static boolean
+emit_dwords(struct svga_shader_emitter_v10 *emit,
+ const uint32 *dwords,
+ unsigned nr)
+{
+ if (!reserve(emit, nr))
+ return FALSE;
+
+ memcpy(emit->ptr, dwords, nr * sizeof *dwords);
+ emit->ptr += nr * sizeof *dwords;
+ return TRUE;
+}
+
+/** Return the number of tokens in the emitter's buffer */
+static unsigned
+emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit)
+{
+ return (emit->ptr - emit->buf) / sizeof(unsigned);
+}
+
+
+/**
+ * Check for register overflow. If we overflow we'll set an
+ * error flag. This function can be called for register declarations
+ * or use as src/dst instruction operands.
+ * \param type register type. One of VGPU10_OPERAND_TYPE_x
+ or VGPU10_OPCODE_DCL_x
+ * \param index the register index
+ */
+static void
+check_register_index(struct svga_shader_emitter_v10 *emit,
+ unsigned operandType, unsigned index)
+{
+ bool overflow_before = emit->register_overflow;
+
+ switch (operandType) {
+ case VGPU10_OPERAND_TYPE_TEMP:
+ case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP:
+ case VGPU10_OPCODE_DCL_TEMPS:
+ if (index >= VGPU10_MAX_TEMPS) {
+ emit->register_overflow = TRUE;
+ }
+ break;
+ case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER:
+ case VGPU10_OPCODE_DCL_CONSTANT_BUFFER:
+ if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
+ emit->register_overflow = TRUE;
+ }
+ break;
+ case VGPU10_OPERAND_TYPE_INPUT:
+ case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID:
+ case VGPU10_OPCODE_DCL_INPUT:
+ case VGPU10_OPCODE_DCL_INPUT_SGV:
+ case VGPU10_OPCODE_DCL_INPUT_SIV:
+ case VGPU10_OPCODE_DCL_INPUT_PS:
+ case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
+ case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
+ if ((emit->unit == PIPE_SHADER_VERTEX &&
+ index >= VGPU10_MAX_VS_INPUTS) ||
+ (emit->unit == PIPE_SHADER_GEOMETRY &&
+ index >= VGPU10_MAX_GS_INPUTS) ||
+ (emit->unit == PIPE_SHADER_FRAGMENT &&
+ index >= VGPU10_MAX_FS_INPUTS)) {
+ emit->register_overflow = TRUE;
+ }
+ break;
+ case VGPU10_OPERAND_TYPE_OUTPUT:
+ case VGPU10_OPCODE_DCL_OUTPUT:
+ case VGPU10_OPCODE_DCL_OUTPUT_SGV:
+ case VGPU10_OPCODE_DCL_OUTPUT_SIV:
+ if ((emit->unit == PIPE_SHADER_VERTEX &&
+ index >= VGPU10_MAX_VS_OUTPUTS) ||
+ (emit->unit == PIPE_SHADER_GEOMETRY &&
+ index >= VGPU10_MAX_GS_OUTPUTS) ||
+ (emit->unit == PIPE_SHADER_FRAGMENT &&
+ index >= VGPU10_MAX_FS_OUTPUTS)) {
+ emit->register_overflow = TRUE;
+ }
+ break;
+ case VGPU10_OPERAND_TYPE_SAMPLER:
+ case VGPU10_OPCODE_DCL_SAMPLER:
+ if (index >= VGPU10_MAX_SAMPLERS) {
+ emit->register_overflow = TRUE;
+ }
+ break;
+ case VGPU10_OPERAND_TYPE_RESOURCE:
+ case VGPU10_OPCODE_DCL_RESOURCE:
+ if (index >= VGPU10_MAX_RESOURCES) {
+ emit->register_overflow = TRUE;
+ }
+ break;
+ case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
+ if (index >= MAX_IMMEDIATE_COUNT) {
+ emit->register_overflow = TRUE;
+ }
+ break;
+ default:
+ assert(0);
+ ; /* nothing */
+ }
+
+ if (emit->register_overflow && !overflow_before) {
+ debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n",
+ operandType, index);
+ }
+}
+
+
+/**
+ * Examine misc state to determine the clipping mode.
+ */
+static void
+determine_clipping_mode(struct svga_shader_emitter_v10 *emit)
+{
+ if (emit->info.num_written_clipdistance > 0) {
+ emit->clip_mode = CLIP_DISTANCE;
+ }
+ else if (emit->info.writes_clipvertex) {
+ emit->clip_mode = CLIP_VERTEX;
+ }
+ else if (emit->key.clip_plane_enable) {
+ emit->clip_mode = CLIP_LEGACY;
+ }
+ else {
+ emit->clip_mode = CLIP_NONE;
+ }
+}
+
+
+/**
+ * For clip distance register declarations and clip distance register
+ * writes we need to mask the declaration usage or instruction writemask
+ * (respectively) against the set of the really-enabled clipping planes.
+ *
+ * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables
+ * has a VS that writes to all 8 clip distance registers, but the plane enable
+ * flags are a subset of that.
+ *
+ * This function is used to apply the plane enable flags to the register
+ * declaration or instruction writemask.
+ *
+ * \param writemask the declaration usage mask or instruction writemask
+ * \param clip_reg_index which clip plane register is being declared/written.
+ * The legal values are 0 and 1 (two clip planes per
+ * register, for a total of 8 clip planes)
+ */
+static unsigned
+apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit,
+ unsigned writemask, unsigned clip_reg_index)
+{
+ unsigned shift;
+
+ assert(clip_reg_index < 2);
+
+ /* four clip planes per clip register: */
+ shift = clip_reg_index * 4;
+ writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf);
+
+ return writemask;
+}
+
+
+/**
+ * Translate gallium shader type into VGPU10 type.
+ */
+static VGPU10_PROGRAM_TYPE
+translate_shader_type(unsigned type)
+{
+ switch (type) {
+ case PIPE_SHADER_VERTEX:
+ return VGPU10_VERTEX_SHADER;
+ case PIPE_SHADER_GEOMETRY:
+ return VGPU10_GEOMETRY_SHADER;
+ case PIPE_SHADER_FRAGMENT:
+ return VGPU10_PIXEL_SHADER;
+ default:
+ assert(!"Unexpected shader type");
+ return VGPU10_VERTEX_SHADER;
+ }
+}
+
+
+/**
+ * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x
+ * Note: we only need to translate the opcodes for "simple" instructions,
+ * as seen below. All other opcodes are handled/translated specially.
+ */
+static VGPU10_OPCODE_TYPE
+translate_opcode(unsigned opcode)
+{
+ switch (opcode) {
+ case TGSI_OPCODE_MOV:
+ return VGPU10_OPCODE_MOV;
+ case TGSI_OPCODE_MUL:
+ return VGPU10_OPCODE_MUL;
+ case TGSI_OPCODE_ADD:
+ return VGPU10_OPCODE_ADD;
+ case TGSI_OPCODE_DP3:
+ return VGPU10_OPCODE_DP3;
+ case TGSI_OPCODE_DP4:
+ return VGPU10_OPCODE_DP4;
+ case TGSI_OPCODE_MIN:
+ return VGPU10_OPCODE_MIN;
+ case TGSI_OPCODE_MAX:
+ return VGPU10_OPCODE_MAX;
+ case TGSI_OPCODE_MAD:
+ return VGPU10_OPCODE_MAD;
+ case TGSI_OPCODE_SQRT:
+ return VGPU10_OPCODE_SQRT;
+ case TGSI_OPCODE_FRC:
+ return VGPU10_OPCODE_FRC;
+ case TGSI_OPCODE_FLR:
+ return VGPU10_OPCODE_ROUND_NI;
+ case TGSI_OPCODE_FSEQ:
+ return VGPU10_OPCODE_EQ;
+ case TGSI_OPCODE_FSGE:
+ return VGPU10_OPCODE_GE;
+ case TGSI_OPCODE_FSNE:
+ return VGPU10_OPCODE_NE;
+ case TGSI_OPCODE_DDX:
+ return VGPU10_OPCODE_DERIV_RTX;
+ case TGSI_OPCODE_DDY:
+ return VGPU10_OPCODE_DERIV_RTY;
+ case TGSI_OPCODE_RET:
+ return VGPU10_OPCODE_RET;
+ case TGSI_OPCODE_DIV:
+ return VGPU10_OPCODE_DIV;
+ case TGSI_OPCODE_IDIV:
+ return VGPU10_OPCODE_IDIV;
+ case TGSI_OPCODE_DP2:
+ return VGPU10_OPCODE_DP2;
+ case TGSI_OPCODE_BRK:
+ return VGPU10_OPCODE_BREAK;
+ case TGSI_OPCODE_IF:
+ return VGPU10_OPCODE_IF;
+ case TGSI_OPCODE_ELSE:
+ return VGPU10_OPCODE_ELSE;
+ case TGSI_OPCODE_ENDIF:
+ return VGPU10_OPCODE_ENDIF;
+ case TGSI_OPCODE_CEIL:
+ return VGPU10_OPCODE_ROUND_PI;
+ case TGSI_OPCODE_I2F:
+ return VGPU10_OPCODE_ITOF;
+ case TGSI_OPCODE_NOT:
+ return VGPU10_OPCODE_NOT;
+ case TGSI_OPCODE_TRUNC:
+ return VGPU10_OPCODE_ROUND_Z;
+ case TGSI_OPCODE_SHL:
+ return VGPU10_OPCODE_ISHL;
+ case TGSI_OPCODE_AND:
+ return VGPU10_OPCODE_AND;
+ case TGSI_OPCODE_OR:
+ return VGPU10_OPCODE_OR;
+ case TGSI_OPCODE_XOR:
+ return VGPU10_OPCODE_XOR;
+ case TGSI_OPCODE_CONT:
+ return VGPU10_OPCODE_CONTINUE;
+ case TGSI_OPCODE_EMIT:
+ return VGPU10_OPCODE_EMIT;
+ case TGSI_OPCODE_ENDPRIM:
+ return VGPU10_OPCODE_CUT;
+ case TGSI_OPCODE_BGNLOOP:
+ return VGPU10_OPCODE_LOOP;
+ case TGSI_OPCODE_ENDLOOP:
+ return VGPU10_OPCODE_ENDLOOP;
+ case TGSI_OPCODE_ENDSUB:
+ return VGPU10_OPCODE_RET;
+ case TGSI_OPCODE_NOP:
+ return VGPU10_OPCODE_NOP;
+ case TGSI_OPCODE_BREAKC:
+ return VGPU10_OPCODE_BREAKC;
+ case TGSI_OPCODE_END:
+ return VGPU10_OPCODE_RET;
+ case TGSI_OPCODE_F2I:
+ return VGPU10_OPCODE_FTOI;
+ case TGSI_OPCODE_IMAX:
+ return VGPU10_OPCODE_IMAX;
+ case TGSI_OPCODE_IMIN:
+ return VGPU10_OPCODE_IMIN;
+ case TGSI_OPCODE_UDIV:
+ case TGSI_OPCODE_UMOD:
+ case TGSI_OPCODE_MOD:
+ return VGPU10_OPCODE_UDIV;
+ case TGSI_OPCODE_IMUL_HI:
+ return VGPU10_OPCODE_IMUL;
+ case TGSI_OPCODE_INEG:
+ return VGPU10_OPCODE_INEG;
+ case TGSI_OPCODE_ISHR:
+ return VGPU10_OPCODE_ISHR;
+ case TGSI_OPCODE_ISGE:
+ return VGPU10_OPCODE_IGE;
+ case TGSI_OPCODE_ISLT:
+ return VGPU10_OPCODE_ILT;
+ case TGSI_OPCODE_F2U:
+ return VGPU10_OPCODE_FTOU;
+ case TGSI_OPCODE_UADD:
+ return VGPU10_OPCODE_IADD;
+ case TGSI_OPCODE_U2F:
+ return VGPU10_OPCODE_UTOF;
+ case TGSI_OPCODE_UCMP:
+ return VGPU10_OPCODE_MOVC;
+ case TGSI_OPCODE_UMAD:
+ return VGPU10_OPCODE_UMAD;
+ case TGSI_OPCODE_UMAX:
+ return VGPU10_OPCODE_UMAX;
+ case TGSI_OPCODE_UMIN:
+ return VGPU10_OPCODE_UMIN;
+ case TGSI_OPCODE_UMUL:
+ case TGSI_OPCODE_UMUL_HI:
+ return VGPU10_OPCODE_UMUL;
+ case TGSI_OPCODE_USEQ:
+ return VGPU10_OPCODE_IEQ;
+ case TGSI_OPCODE_USGE:
+ return VGPU10_OPCODE_UGE;
+ case TGSI_OPCODE_USHR:
+ return VGPU10_OPCODE_USHR;
+ case TGSI_OPCODE_USLT:
+ return VGPU10_OPCODE_ULT;
+ case TGSI_OPCODE_USNE:
+ return VGPU10_OPCODE_INE;
+ case TGSI_OPCODE_SWITCH:
+ return VGPU10_OPCODE_SWITCH;
+ case TGSI_OPCODE_CASE:
+ return VGPU10_OPCODE_CASE;
+ case TGSI_OPCODE_DEFAULT:
+ return VGPU10_OPCODE_DEFAULT;
+ case TGSI_OPCODE_ENDSWITCH:
+ return VGPU10_OPCODE_ENDSWITCH;
+ case TGSI_OPCODE_FSLT:
+ return VGPU10_OPCODE_LT;
+ case TGSI_OPCODE_ROUND:
+ return VGPU10_OPCODE_ROUND_NE;
+ default:
+ assert(!"Unexpected TGSI opcode in translate_opcode()");
+ return VGPU10_OPCODE_NOP;
+ }
+}
+
+
+/**
+ * Translate a TGSI register file type into a VGPU10 operand type.
+ * \param array is the TGSI_FILE_TEMPORARY register an array?
+ */
+static VGPU10_OPERAND_TYPE
+translate_register_file(enum tgsi_file_type file, boolean array)
+{
+ switch (file) {
+ case TGSI_FILE_CONSTANT:
+ return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
+ case TGSI_FILE_INPUT:
+ return VGPU10_OPERAND_TYPE_INPUT;
+ case TGSI_FILE_OUTPUT:
+ return VGPU10_OPERAND_TYPE_OUTPUT;
+ case TGSI_FILE_TEMPORARY:
+ return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP
+ : VGPU10_OPERAND_TYPE_TEMP;
+ case TGSI_FILE_IMMEDIATE:
+ /* all immediates are 32-bit values at this time so
+ * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time.
+ */
+ return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER;
+ case TGSI_FILE_SAMPLER:
+ return VGPU10_OPERAND_TYPE_SAMPLER;
+ case TGSI_FILE_SYSTEM_VALUE:
+ return VGPU10_OPERAND_TYPE_INPUT;
+
+ /* XXX TODO more cases to finish */
+
+ default:
+ assert(!"Bad tgsi register file!");
+ return VGPU10_OPERAND_TYPE_NULL;
+ }
+}
+
+
+/**
+ * Emit a null dst register
+ */
+static void
+emit_null_dst_register(struct svga_shader_emitter_v10 *emit)
+{
+ VGPU10OperandToken0 operand;
+
+ operand.value = 0;
+ operand.operandType = VGPU10_OPERAND_TYPE_NULL;
+ operand.numComponents = VGPU10_OPERAND_0_COMPONENT;
+
+ emit_dword(emit, operand.value);
+}
+
+
+/**
+ * If the given register is a temporary, return the array ID.
+ * Else return zero.
+ */
+static unsigned
+get_temp_array_id(const struct svga_shader_emitter_v10 *emit,
+ unsigned file, unsigned index)
+{
+ if (file == TGSI_FILE_TEMPORARY) {
+ return emit->temp_map[index].arrayId;
+ }
+ else {
+ return 0;
+ }
+}
+
+
+/**
+ * If the given register is a temporary, convert the index from a TGSI
+ * TEMPORARY index to a VGPU10 temp index.
+ */
+static unsigned
+remap_temp_index(const struct svga_shader_emitter_v10 *emit,
+ unsigned file, unsigned index)
+{
+ if (file == TGSI_FILE_TEMPORARY) {
+ return emit->temp_map[index].index;
+ }
+ else {
+ return index;
+ }
+}
+
+
+/**
+ * Setup the operand0 fields related to indexing (1D, 2D, relative, etc).
+ * Note: the operandType field must already be initialized.
+ */
+static VGPU10OperandToken0
+setup_operand0_indexing(struct svga_shader_emitter_v10 *emit,
+ VGPU10OperandToken0 operand0,
+ unsigned file,
+ boolean indirect, boolean index2D,
+ unsigned tempArrayID)
+{
+ unsigned indexDim, index0Rep, index1Rep = VGPU10_OPERAND_INDEX_0D;
+
+ /*
+ * Compute index dimensions
+ */
+ if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 ||
+ operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
+ /* there's no swizzle for in-line immediates */
+ indexDim = VGPU10_OPERAND_INDEX_0D;
+ assert(operand0.selectionMode == 0);
+ }
+ else {
+ if (index2D ||
+ tempArrayID > 0 ||
+ operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) {
+ indexDim = VGPU10_OPERAND_INDEX_2D;
+ }
+ else {
+ indexDim = VGPU10_OPERAND_INDEX_1D;
+ }
+ }
+
+ /*
+ * Compute index representations (immediate, relative, etc).
+ */
+ if (tempArrayID > 0) {
+ assert(file == TGSI_FILE_TEMPORARY);
+ /* First index is the array ID, second index is the array element */
+ index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+ if (indirect) {
+ index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
+ }
+ else {
+ index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+ }
+ }
+ else if (indirect) {
+ if (file == TGSI_FILE_CONSTANT) {
+ /* index[0] indicates which constant buffer while index[1] indicates
+ * the position in the constant buffer.
+ */
+ index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+ index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
+ }
+ else {
+ /* All other register files are 1-dimensional */
+ index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
+ }
+ }
+ else {
+ index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+ index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+ }
+
+ operand0.indexDimension = indexDim;
+ operand0.index0Representation = index0Rep;
+ operand0.index1Representation = index1Rep;
+
+ return operand0;
+}
+
+
+/**
+ * Emit the operand for expressing an address register for indirect indexing.
+ * Note that the address register is really just a temp register.
+ * \param addr_reg_index which address register to use
+ */
+static void
+emit_indirect_register(struct svga_shader_emitter_v10 *emit,
+ unsigned addr_reg_index)
+{
+ unsigned tmp_reg_index;
+ VGPU10OperandToken0 operand0;
+
+ assert(addr_reg_index < MAX_VGPU10_ADDR_REGS);
+
+ tmp_reg_index = emit->address_reg_index[addr_reg_index];
+
+ /* operand0 is a simple temporary register, selecting one component */
+ operand0.value = 0;
+ operand0.operandType = VGPU10_OPERAND_TYPE_TEMP;
+ operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+ operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+ operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+ operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
+ operand0.swizzleX = 0;
+ operand0.swizzleY = 1;
+ operand0.swizzleZ = 2;
+ operand0.swizzleW = 3;
+
+ emit_dword(emit, operand0.value);
+ emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index));
+}
+
+
+/**
+ * Translate the dst register of a TGSI instruction and emit VGPU10 tokens.
+ * \param emit the emitter context
+ * \param reg the TGSI dst register to translate
+ */
+static void
+emit_dst_register(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_dst_register *reg)
+{
+ unsigned file = reg->Register.File;
+ unsigned index = reg->Register.Index;
+ const unsigned sem_name = emit->info.output_semantic_name[index];
+ const unsigned sem_index = emit->info.output_semantic_index[index];
+ unsigned writemask = reg->Register.WriteMask;
+ const unsigned indirect = reg->Register.Indirect;
+ const unsigned tempArrayId = get_temp_array_id(emit, file, index);
+ const unsigned index2d = reg->Register.Dimension;
+ VGPU10OperandToken0 operand0;
+
+ if (file == TGSI_FILE_OUTPUT) {
+ if (emit->unit == PIPE_SHADER_VERTEX ||
+ emit->unit == PIPE_SHADER_GEOMETRY) {
+ if (index == emit->vposition.out_index &&
+ emit->vposition.tmp_index != INVALID_INDEX) {
+ /* replace OUTPUT[POS] with TEMP[POS]. We need to store the
+ * vertex position result in a temporary so that we can modify
+ * it in the post_helper() code.
+ */
+ file = TGSI_FILE_TEMPORARY;
+ index = emit->vposition.tmp_index;
+ }
+ else if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
+ emit->clip_dist_tmp_index != INVALID_INDEX) {
+ /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
+ * We store the clip distance in a temporary first, then
+ * we'll copy it to the shadow copy and to CLIPDIST with the
+ * enabled planes mask in emit_clip_distance_instructions().
+ */
+ file = TGSI_FILE_TEMPORARY;
+ index = emit->clip_dist_tmp_index + sem_index;
+ }
+ else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
+ emit->clip_vertex_tmp_index != INVALID_INDEX) {
+ /* replace the CLIPVERTEX output register with a temporary */
+ assert(emit->clip_mode == CLIP_VERTEX);
+ assert(sem_index == 0);
+ file = TGSI_FILE_TEMPORARY;
+ index = emit->clip_vertex_tmp_index;
+ }
+ }
+ else if (emit->unit == PIPE_SHADER_FRAGMENT) {
+ if (sem_name == TGSI_SEMANTIC_POSITION) {
+ /* Fragment depth output register */
+ operand0.value = 0;
+ operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
+ operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
+ operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
+ emit_dword(emit, operand0.value);
+ return;
+ }
+ else if (index == emit->fs.color_out_index[0] &&
+ emit->fs.color_tmp_index != INVALID_INDEX) {
+ /* replace OUTPUT[COLOR] with TEMP[COLOR]. We need to store the
+ * fragment color result in a temporary so that we can read it
+ * it in the post_helper() code.
+ */
+ file = TGSI_FILE_TEMPORARY;
+ index = emit->fs.color_tmp_index;
+ }
+ else {
+ /* Typically, for fragment shaders, the output register index
+ * matches the color semantic index. But not when we write to
+ * the fragment depth register. In that case, OUT[0] will be
+ * fragdepth and OUT[1] will be the 0th color output. We need
+ * to use the semantic index for color outputs.
+ */
+ assert(sem_name == TGSI_SEMANTIC_COLOR);
+ index = emit->info.output_semantic_index[index];
+ }
+ }
+ }
+
+ /* init operand tokens to all zero */
+ operand0.value = 0;
+
+ operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+
+ /* the operand has a writemask */
+ operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
+
+ /* Which of the four dest components to write to. Note that we can use a
+ * simple assignment here since TGSI writemasks match VGPU10 writemasks.
+ */
+ STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X);
+ operand0.mask = writemask;
+
+ /* translate TGSI register file type to VGPU10 operand type */
+ operand0.operandType = translate_register_file(file, tempArrayId > 0);
+
+ check_register_index(emit, operand0.operandType, index);
+
+ operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
+ index2d, tempArrayId);
+
+ /* Emit tokens */
+ emit_dword(emit, operand0.value);
+ if (tempArrayId > 0) {
+ emit_dword(emit, tempArrayId);
+ }
+
+ emit_dword(emit, remap_temp_index(emit, file, index));
+
+ if (indirect) {
+ emit_indirect_register(emit, reg->Indirect.Index);
+ }
+}
+
+
+/**
+ * Translate a src register of a TGSI instruction and emit VGPU10 tokens.
+ */
+static void
+emit_src_register(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_src_register *reg)
+{
+ unsigned file = reg->Register.File;
+ unsigned index = reg->Register.Index;
+ const unsigned indirect = reg->Register.Indirect;
+ const unsigned tempArrayId = get_temp_array_id(emit, file, index);
+ const unsigned index2d = reg->Register.Dimension;
+ const unsigned swizzleX = reg->Register.SwizzleX;
+ const unsigned swizzleY = reg->Register.SwizzleY;
+ const unsigned swizzleZ = reg->Register.SwizzleZ;
+ const unsigned swizzleW = reg->Register.SwizzleW;
+ const unsigned absolute = reg->Register.Absolute;
+ const unsigned negate = reg->Register.Negate;
+ bool is_prim_id = FALSE;
+
+ VGPU10OperandToken0 operand0;
+ VGPU10OperandToken1 operand1;
+
+ if (emit->unit == PIPE_SHADER_FRAGMENT &&
+ file == TGSI_FILE_INPUT) {
+ if (index == emit->fs.face_input_index) {
+ /* Replace INPUT[FACE] with TEMP[FACE] */
+ file = TGSI_FILE_TEMPORARY;
+ index = emit->fs.face_tmp_index;
+ }
+ else if (index == emit->fs.fragcoord_input_index) {
+ /* Replace INPUT[POSITION] with TEMP[POSITION] */
+ file = TGSI_FILE_TEMPORARY;
+ index = emit->fs.fragcoord_tmp_index;
+ }
+ else {
+ /* We remap fragment shader inputs to that FS input indexes
+ * match up with VS/GS output indexes.
+ */
+ index = emit->linkage.input_map[index];
+ }
+ }
+ else if (emit->unit == PIPE_SHADER_GEOMETRY &&
+ file == TGSI_FILE_INPUT) {
+ is_prim_id = (index == emit->gs.prim_id_index);
+ index = emit->linkage.input_map[index];
+ }
+ else if (emit->unit == PIPE_SHADER_VERTEX) {
+ if (file == TGSI_FILE_INPUT) {
+ /* if input is adjusted... */
+ if ((emit->key.vs.adjust_attrib_w_1 |
+ emit->key.vs.adjust_attrib_itof |
+ emit->key.vs.adjust_attrib_utof |
+ emit->key.vs.attrib_is_bgra |
+ emit->key.vs.attrib_puint_to_snorm |
+ emit->key.vs.attrib_puint_to_uscaled |
+ emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) {
+ file = TGSI_FILE_TEMPORARY;
+ index = emit->vs.adjusted_input[index];
+ }
+ }
+ else if (file == TGSI_FILE_SYSTEM_VALUE) {
+ assert(index < Elements(emit->system_value_indexes));
+ index = emit->system_value_indexes[index];
+ }
+ }
+
+ operand0.value = operand1.value = 0;
+
+ if (is_prim_id) {
+ operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
+ operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
+ }
+ else {
+ operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+ operand0.operandType = translate_register_file(file, tempArrayId > 0);
+ }
+
+ operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
+ index2d, tempArrayId);
+
+ if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 &&
+ operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
+ /* there's no swizzle for in-line immediates */
+ if (swizzleX == swizzleY &&
+ swizzleX == swizzleZ &&
+ swizzleX == swizzleW) {
+ operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
+ }
+ else {
+ operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
+ }
+
+ operand0.swizzleX = swizzleX;
+ operand0.swizzleY = swizzleY;
+ operand0.swizzleZ = swizzleZ;
+ operand0.swizzleW = swizzleW;
+
+ if (absolute || negate) {
+ operand0.extended = 1;
+ operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER;
+ if (absolute && !negate)
+ operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS;
+ if (!absolute && negate)
+ operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG;
+ if (absolute && negate)
+ operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG;
+ }
+ }
+
+ /* Emit the operand tokens */
+ emit_dword(emit, operand0.value);
+ if (operand0.extended)
+ emit_dword(emit, operand1.value);
+
+ if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) {
+ /* Emit the four float/int in-line immediate values */
+ unsigned *c;
+ assert(index < Elements(emit->immediates));
+ assert(file == TGSI_FILE_IMMEDIATE);
+ assert(swizzleX < 4);
+ assert(swizzleY < 4);
+ assert(swizzleZ < 4);
+ assert(swizzleW < 4);
+ c = (unsigned *) emit->immediates[index];
+ emit_dword(emit, c[swizzleX]);
+ emit_dword(emit, c[swizzleY]);
+ emit_dword(emit, c[swizzleZ]);
+ emit_dword(emit, c[swizzleW]);
+ }
+ else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) {
+ /* Emit the register index(es) */
+ if (index2d ||
+ operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) {
+ emit_dword(emit, reg->Dimension.Index);
+ }
+
+ if (tempArrayId > 0) {
+ emit_dword(emit, tempArrayId);
+ }
+
+ emit_dword(emit, remap_temp_index(emit, file, index));
+
+ if (indirect) {
+ emit_indirect_register(emit, reg->Indirect.Index);
+ }
+ }
+}
+
+
+/**
+ * Emit a resource operand (for use with a SAMPLE instruction).
+ */
+static void
+emit_resource_register(struct svga_shader_emitter_v10 *emit,
+ unsigned resource_number)
+{
+ VGPU10OperandToken0 operand0;
+
+ check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number);
+
+ /* init */
+ operand0.value = 0;
+
+ operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
+ operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+ operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+ operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
+ operand0.swizzleX = VGPU10_COMPONENT_X;
+ operand0.swizzleY = VGPU10_COMPONENT_Y;
+ operand0.swizzleZ = VGPU10_COMPONENT_Z;
+ operand0.swizzleW = VGPU10_COMPONENT_W;
+
+ emit_dword(emit, operand0.value);
+ emit_dword(emit, resource_number);
+}
+
+
+/**
+ * Emit a sampler operand (for use with a SAMPLE instruction).
+ */
+static void
+emit_sampler_register(struct svga_shader_emitter_v10 *emit,
+ unsigned sampler_number)
+{
+ VGPU10OperandToken0 operand0;
+
+ check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number);
+
+ /* init */
+ operand0.value = 0;
+
+ operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
+ operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+
+ emit_dword(emit, operand0.value);
+ emit_dword(emit, sampler_number);
+}
+
+
+/**
+ * Emit an operand which reads the IS_FRONT_FACING register.
+ */
+static void
+emit_face_register(struct svga_shader_emitter_v10 *emit)
+{
+ VGPU10OperandToken0 operand0;
+ unsigned index = emit->linkage.input_map[emit->fs.face_input_index];
+
+ /* init */
+ operand0.value = 0;
+
+ operand0.operandType = VGPU10_OPERAND_TYPE_INPUT;
+ operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+ operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
+ operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+
+ operand0.swizzleX = VGPU10_COMPONENT_X;
+ operand0.swizzleY = VGPU10_COMPONENT_X;
+ operand0.swizzleZ = VGPU10_COMPONENT_X;
+ operand0.swizzleW = VGPU10_COMPONENT_X;
+
+ emit_dword(emit, operand0.value);
+ emit_dword(emit, index);
+}
+
+
+/**
+ * Emit the token for a VGPU10 opcode.
+ * \param saturate clamp result to [0,1]?
+ */
+static void
+emit_opcode(struct svga_shader_emitter_v10 *emit,
+ unsigned vgpu10_opcode, boolean saturate)
+{
+ VGPU10OpcodeToken0 token0;
+
+ token0.value = 0; /* init all fields to zero */
+ token0.opcodeType = vgpu10_opcode;
+ token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
+ token0.saturate = saturate;
+
+ emit_dword(emit, token0.value);
+}
+
+
+/**
+ * Emit the token for a VGPU10 resinfo instruction.
+ * \param modifier return type modifier, _uint or _rcpFloat.
+ * TODO: We may want to remove this parameter if it will
+ * only ever be used as _uint.
+ */
+static void
+emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit,
+ VGPU10_RESINFO_RETURN_TYPE modifier)
+{
+ VGPU10OpcodeToken0 token0;
+
+ token0.value = 0; /* init all fields to zero */
+ token0.opcodeType = VGPU10_OPCODE_RESINFO;
+ token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
+ token0.resinfoReturnType = modifier;
+
+ emit_dword(emit, token0.value);
+}
+
+
+/**
+ * Emit opcode tokens for a texture sample instruction. Texture instructions
+ * can be rather complicated (texel offsets, etc) so we have this specialized
+ * function.
+ */
+static void
+emit_sample_opcode(struct svga_shader_emitter_v10 *emit,
+ unsigned vgpu10_opcode, boolean saturate,
+ const int offsets[3])
+{
+ VGPU10OpcodeToken0 token0;
+ VGPU10OpcodeToken1 token1;
+
+ token0.value = 0; /* init all fields to zero */
+ token0.opcodeType = vgpu10_opcode;
+ token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
+ token0.saturate = saturate;
+
+ if (offsets[0] || offsets[1] || offsets[2]) {
+ assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
+ assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
+ assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
+ assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
+ assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
+ assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
+
+ token0.extended = 1;
+ token1.value = 0;
+ token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS;
+ token1.offsetU = offsets[0];
+ token1.offsetV = offsets[1];
+ token1.offsetW = offsets[2];
+ }
+
+ emit_dword(emit, token0.value);
+ if (token0.extended) {
+ emit_dword(emit, token1.value);
+ }
+}
+
+
+/**
+ * Emit a DISCARD opcode token.
+ * If nonzero is set, we'll discard the fragment if the X component is not 0.
+ * Otherwise, we'll discard the fragment if the X component is 0.
+ */
+static void
+emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero)
+{
+ VGPU10OpcodeToken0 opcode0;
+
+ opcode0.value = 0;
+ opcode0.opcodeType = VGPU10_OPCODE_DISCARD;
+ if (nonzero)
+ opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
+
+ emit_dword(emit, opcode0.value);
+}
+
+
+/**
+ * We need to call this before we begin emitting a VGPU10 instruction.
+ */
+static void
+begin_emit_instruction(struct svga_shader_emitter_v10 *emit)
+{
+ assert(emit->inst_start_token == 0);
+ /* Save location of the instruction's VGPU10OpcodeToken0 token.
+ * Note, we can't save a pointer because it would become invalid if
+ * we have to realloc the output buffer.
+ */
+ emit->inst_start_token = emit_get_num_tokens(emit);
+}
+
+
+/**
+ * We need to call this after we emit the last token of a VGPU10 instruction.
+ * This function patches in the opcode token's instructionLength field.
+ */
+static void
+end_emit_instruction(struct svga_shader_emitter_v10 *emit)
+{
+ VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
+ unsigned inst_length;
+
+ assert(emit->inst_start_token > 0);
+
+ if (emit->discard_instruction) {
+ /* Back up the emit->ptr to where this instruction started so
+ * that we discard the current instruction.
+ */
+ emit->ptr = (char *) (tokens + emit->inst_start_token);
+ }
+ else {
+ /* Compute instruction length and patch that into the start of
+ * the instruction.
+ */
+ inst_length = emit_get_num_tokens(emit) - emit->inst_start_token;
+
+ assert(inst_length > 0);
+
+ tokens[emit->inst_start_token].instructionLength = inst_length;
+ }
+
+ emit->inst_start_token = 0; /* reset to zero for error checking */
+ emit->discard_instruction = FALSE;
+}
+
+
+/**
+ * Return index for a free temporary register.
+ */
+static unsigned
+get_temp_index(struct svga_shader_emitter_v10 *emit)
+{
+ assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS);
+ return emit->num_shader_temps + emit->internal_temp_count++;
+}
+
+
+/**
+ * Release the temporaries which were generated by get_temp_index().
+ */
+static void
+free_temp_indexes(struct svga_shader_emitter_v10 *emit)
+{
+ emit->internal_temp_count = 0;
+}
+
+
+/**
+ * Create a tgsi_full_src_register.
+ */
+static struct tgsi_full_src_register
+make_src_reg(unsigned file, unsigned index)
+{
+ struct tgsi_full_src_register reg;
+
+ memset(®, 0, sizeof(reg));
+ reg.Register.File = file;
+ reg.Register.Index = index;
+ reg.Register.SwizzleX = TGSI_SWIZZLE_X;
+ reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
+ reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
+ reg.Register.SwizzleW = TGSI_SWIZZLE_W;
+ return reg;
+}
+
+
+/**
+ * Create a tgsi_full_src_register for a temporary.
+ */
+static struct tgsi_full_src_register
+make_src_temp_reg(unsigned index)
+{
+ return make_src_reg(TGSI_FILE_TEMPORARY, index);
+}
+
+
+/**
+ * Create a tgsi_full_src_register for a constant.
+ */
+static struct tgsi_full_src_register
+make_src_const_reg(unsigned index)
+{
+ return make_src_reg(TGSI_FILE_CONSTANT, index);
+}
+
+
+/**
+ * Create a tgsi_full_src_register for an immediate constant.
+ */
+static struct tgsi_full_src_register
+make_src_immediate_reg(unsigned index)
+{
+ return make_src_reg(TGSI_FILE_IMMEDIATE, index);
+}
+
+
+/**
+ * Create a tgsi_full_dst_register.
+ */
+static struct tgsi_full_dst_register
+make_dst_reg(unsigned file, unsigned index)
+{
+ struct tgsi_full_dst_register reg;
+
+ memset(®, 0, sizeof(reg));
+ reg.Register.File = file;
+ reg.Register.Index = index;
+ reg.Register.WriteMask = TGSI_WRITEMASK_XYZW;
+ return reg;
+}
+
+
+/**
+ * Create a tgsi_full_dst_register for a temporary.
+ */
+static struct tgsi_full_dst_register
+make_dst_temp_reg(unsigned index)
+{
+ return make_dst_reg(TGSI_FILE_TEMPORARY, index);
+}
+
+
+/**
+ * Create a tgsi_full_dst_register for an output.
+ */
+static struct tgsi_full_dst_register
+make_dst_output_reg(unsigned index)
+{
+ return make_dst_reg(TGSI_FILE_OUTPUT, index);
+}
+
+
+/**
+ * Create negated tgsi_full_src_register.
+ */
+static struct tgsi_full_src_register
+negate_src(const struct tgsi_full_src_register *reg)
+{
+ struct tgsi_full_src_register neg = *reg;
+ neg.Register.Negate = !reg->Register.Negate;
+ return neg;
+}
+
+/**
+ * Create absolute value of a tgsi_full_src_register.
+ */
+static struct tgsi_full_src_register
+absolute_src(const struct tgsi_full_src_register *reg)
+{
+ struct tgsi_full_src_register absolute = *reg;
+ absolute.Register.Absolute = 1;
+ return absolute;
+}
+
+
+/** Return the named swizzle term from the src register */
+static inline unsigned
+get_swizzle(const struct tgsi_full_src_register *reg, unsigned term)
+{
+ switch (term) {
+ case TGSI_SWIZZLE_X:
+ return reg->Register.SwizzleX;
+ case TGSI_SWIZZLE_Y:
+ return reg->Register.SwizzleY;
+ case TGSI_SWIZZLE_Z:
+ return reg->Register.SwizzleZ;
+ case TGSI_SWIZZLE_W:
+ return reg->Register.SwizzleW;
+ default:
+ assert(!"Bad swizzle");
+ return TGSI_SWIZZLE_X;
+ }
+}
+
+
+/**
+ * Create swizzled tgsi_full_src_register.
+ */
+static struct tgsi_full_src_register
+swizzle_src(const struct tgsi_full_src_register *reg,
+ unsigned swizzleX, unsigned swizzleY,
+ unsigned swizzleZ, unsigned swizzleW)
+{
+ struct tgsi_full_src_register swizzled = *reg;
+ /* Note: we swizzle the current swizzle */
+ swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX);
+ swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY);
+ swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ);
+ swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW);
+ return swizzled;
+}
+
+
+/**
+ * Create swizzled tgsi_full_src_register where all the swizzle
+ * terms are the same.
+ */
+static struct tgsi_full_src_register
+scalar_src(const struct tgsi_full_src_register *reg, unsigned swizzle)
+{
+ struct tgsi_full_src_register swizzled = *reg;
+ /* Note: we swizzle the current swizzle */
+ swizzled.Register.SwizzleX =
+ swizzled.Register.SwizzleY =
+ swizzled.Register.SwizzleZ =
+ swizzled.Register.SwizzleW = get_swizzle(reg, swizzle);
+ return swizzled;
+}
+
+
+/**
+ * Create new tgsi_full_dst_register with writemask.
+ * \param mask bitmask of TGSI_WRITEMASK_[XYZW]
+ */
+static struct tgsi_full_dst_register
+writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask)
+{
+ struct tgsi_full_dst_register masked = *reg;
+ masked.Register.WriteMask = mask;
+ return masked;
+}
+
+
+/**
+ * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW.
+ */
+static boolean
+same_swizzle_terms(const struct tgsi_full_src_register *reg)
+{
+ return (reg->Register.SwizzleX == reg->Register.SwizzleY &&
+ reg->Register.SwizzleY == reg->Register.SwizzleZ &&
+ reg->Register.SwizzleZ == reg->Register.SwizzleW);
+}
+
+
+/**
+ * Search the vector for the value 'x' and return its position.
+ */
+static int
+find_imm_in_vec4(const union tgsi_immediate_data vec[4],
+ union tgsi_immediate_data x)
+{
+ unsigned i;
+ for (i = 0; i < 4; i++) {
+ if (vec[i].Int == x.Int)
+ return i;
+ }
+ return -1;
+}
+
+
+/**
+ * Helper used by make_immediate_reg(), make_immediate_reg_4().
+ */
+static int
+find_immediate(struct svga_shader_emitter_v10 *emit,
+ union tgsi_immediate_data x, unsigned startIndex)
+{
+ const unsigned endIndex = emit->num_immediates;
+ unsigned i;
+
+ assert(emit->immediates_emitted);
+
+ /* Search immediates for x, y, z, w */
+ for (i = startIndex; i < endIndex; i++) {
+ if (x.Int == emit->immediates[i][0].Int ||
+ x.Int == emit->immediates[i][1].Int ||
+ x.Int == emit->immediates[i][2].Int ||
+ x.Int == emit->immediates[i][3].Int) {
+ return i;
+ }
+ }
+ /* Should never try to use an immediate value that wasn't pre-declared */
+ assert(!"find_immediate() failed!");
+ return -1;
+}
+
+
+/**
+ * Return a tgsi_full_src_register for an immediate/literal
+ * union tgsi_immediate_data[4] value.
+ * Note: the values must have been previously declared/allocated in
+ * emit_pre_helpers(). And, all of x,y,z,w must be located in the same
+ * vec4 immediate.
+ */
+static struct tgsi_full_src_register
+make_immediate_reg_4(struct svga_shader_emitter_v10 *emit,
+ const union tgsi_immediate_data imm[4])
+{
+ struct tgsi_full_src_register reg;
+ unsigned i;
+
+ for (i = 0; i < emit->num_common_immediates; i++) {
+ /* search for first component value */
+ int immpos = find_immediate(emit, imm[0], i);
+ int x, y, z, w;
+
+ assert(immpos >= 0);
+
+ /* find remaining components within the immediate vector */
+ x = find_imm_in_vec4(emit->immediates[immpos], imm[0]);
+ y = find_imm_in_vec4(emit->immediates[immpos], imm[1]);
+ z = find_imm_in_vec4(emit->immediates[immpos], imm[2]);
+ w = find_imm_in_vec4(emit->immediates[immpos], imm[3]);
+
+ if (x >=0 && y >= 0 && z >= 0 && w >= 0) {
+ /* found them all */
+ memset(®, 0, sizeof(reg));
+ reg.Register.File = TGSI_FILE_IMMEDIATE;
+ reg.Register.Index = immpos;
+ reg.Register.SwizzleX = x;
+ reg.Register.SwizzleY = y;
+ reg.Register.SwizzleZ = z;
+ reg.Register.SwizzleW = w;
+ return reg;
+ }
+ /* else, keep searching */
+ }
+
+ assert(!"Failed to find immediate register!");
+
+ /* Just return IMM[0].xxxx */
+ memset(®, 0, sizeof(reg));
+ reg.Register.File = TGSI_FILE_IMMEDIATE;
+ return reg;
+}
+
+
+/**
+ * Return a tgsi_full_src_register for an immediate/literal
+ * union tgsi_immediate_data value of the form {value, value, value, value}.
+ * \sa make_immediate_reg_4() regarding allowed values.
+ */
+static struct tgsi_full_src_register
+make_immediate_reg(struct svga_shader_emitter_v10 *emit,
+ union tgsi_immediate_data value)
+{
+ struct tgsi_full_src_register reg;
+ int immpos = find_immediate(emit, value, 0);
+
+ assert(immpos >= 0);
+
+ memset(®, 0, sizeof(reg));
+ reg.Register.File = TGSI_FILE_IMMEDIATE;
+ reg.Register.Index = immpos;
+ reg.Register.SwizzleX =
+ reg.Register.SwizzleY =
+ reg.Register.SwizzleZ =
+ reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value);
+
+ return reg;
+}
+
+
+/**
+ * Return a tgsi_full_src_register for an immediate/literal float[4] value.
+ * \sa make_immediate_reg_4() regarding allowed values.
+ */
+static struct tgsi_full_src_register
+make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit,
+ float x, float y, float z, float w)
+{
+ union tgsi_immediate_data imm[4];
+ imm[0].Float = x;
+ imm[1].Float = y;
+ imm[2].Float = z;
+ imm[3].Float = w;
+ return make_immediate_reg_4(emit, imm);
+}
+
+
+/**
+ * Return a tgsi_full_src_register for an immediate/literal float value
+ * of the form {value, value, value, value}.
+ * \sa make_immediate_reg_4() regarding allowed values.
+ */
+static struct tgsi_full_src_register
+make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value)
+{
+ union tgsi_immediate_data imm;
+ imm.Float = value;
+ return make_immediate_reg(emit, imm);
+}
+
+
+/**
+ * Return a tgsi_full_src_register for an immediate/literal int[4] vector.
+ */
+static struct tgsi_full_src_register
+make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit,
+ int x, int y, int z, int w)
+{
+ union tgsi_immediate_data imm[4];
+ imm[0].Int = x;
+ imm[1].Int = y;
+ imm[2].Int = z;
+ imm[3].Int = w;
+ return make_immediate_reg_4(emit, imm);
+}
+
+
+/**
+ * Return a tgsi_full_src_register for an immediate/literal int value
+ * of the form {value, value, value, value}.
+ * \sa make_immediate_reg_4() regarding allowed values.
+ */
+static struct tgsi_full_src_register
+make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value)
+{
+ union tgsi_immediate_data imm;
+ imm.Int = value;
+ return make_immediate_reg(emit, imm);
+}
+
+
+/**
+ * Allocate space for a union tgsi_immediate_data[4] immediate.
+ * \return the index/position of the immediate.
+ */
+static unsigned
+alloc_immediate_4(struct svga_shader_emitter_v10 *emit,
+ const union tgsi_immediate_data imm[4])
+{
+ unsigned n = emit->num_immediates++;
+ assert(!emit->immediates_emitted);
+ assert(n < Elements(emit->immediates));
+ emit->immediates[n][0] = imm[0];
+ emit->immediates[n][1] = imm[1];
+ emit->immediates[n][2] = imm[2];
+ emit->immediates[n][3] = imm[3];
+ return n;
+}
+
+
+/**
+ * Allocate space for a float[4] immediate.
+ * \return the index/position of the immediate.
+ */
+static unsigned
+alloc_immediate_float4(struct svga_shader_emitter_v10 *emit,
+ float x, float y, float z, float w)
+{
+ union tgsi_immediate_data imm[4];
+ imm[0].Float = x;
+ imm[1].Float = y;
+ imm[2].Float = z;
+ imm[3].Float = w;
+ return alloc_immediate_4(emit, imm);
+}
+
+
+/**
+ * Allocate space for a int[4] immediate.
+ * \return the index/position of the immediate.
+ */
+static unsigned
+alloc_immediate_int4(struct svga_shader_emitter_v10 *emit,
+ int x, int y, int z, int w)
+{
+ union tgsi_immediate_data imm[4];
+ imm[0].Int = x;
+ imm[1].Int = y;
+ imm[2].Int = z;
+ imm[3].Int = w;
+ return alloc_immediate_4(emit, imm);
+}
+
+
+/**
+ * Allocate a shader input to store a system value.
+ */
+static unsigned
+alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index)
+{
+ const unsigned n = emit->info.num_inputs + index;
+ assert(index < Elements(emit->system_value_indexes));
+ emit->system_value_indexes[index] = n;
+ return n;
+}
+
+
+/**
+ * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10.
+ */
+static boolean
+emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_immediate *imm)
+{
+ /* We don't actually emit any code here. We just save the
+ * immediate values and emit them later.
+ */
+ alloc_immediate_4(emit, imm->u);
+ return TRUE;
+}
+
+
+/**
+ * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block
+ * containing all the immediate values previously allocated
+ * with alloc_immediate_4().
+ */
+static boolean
+emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit)
+{
+ VGPU10OpcodeToken0 token;
+
+ assert(!emit->immediates_emitted);
+
+ token.value = 0;
+ token.opcodeType = VGPU10_OPCODE_CUSTOMDATA;
+ token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER;
+
+ /* Note: no begin/end_emit_instruction() calls */
+ emit_dword(emit, token.value);
+ emit_dword(emit, 2 + 4 * emit->num_immediates);
+ emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates);
+
+ emit->immediates_emitted = TRUE;
+
+ return TRUE;
+}
+
+
+/**
+ * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10
+ * interpolation mode.
+ * \return a VGPU10_INTERPOLATION_x value
+ */
+static unsigned
+translate_interpolation(const struct svga_shader_emitter_v10 *emit,
+ unsigned interp, unsigned interpolate_loc)
+{
+ if (interp == TGSI_INTERPOLATE_COLOR) {
+ interp = emit->key.fs.flatshade ?
+ TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
+ }
+
+ switch (interp) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ return VGPU10_INTERPOLATION_CONSTANT;
+ case TGSI_INTERPOLATE_LINEAR:
+ return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ?
+ VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID :
+ VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE;
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ?
+ VGPU10_INTERPOLATION_LINEAR_CENTROID :
+ VGPU10_INTERPOLATION_LINEAR;
+ default:
+ assert(!"Unexpected interpolation mode");
+ return VGPU10_INTERPOLATION_CONSTANT;
+ }
+}
+
+
+/**
+ * Translate a TGSI property to VGPU10.
+ * Don't emit any instructions yet, only need to gather the primitive property information.
+ * The output primitive topology might be changed later. The final property instructions
+ * will be emitted as part of the pre-helper code.
+ */
+static boolean
+emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_property *prop)
+{
+ static const VGPU10_PRIMITIVE primType[] = {
+ VGPU10_PRIMITIVE_POINT, /* PIPE_PRIM_POINTS */
+ VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINES */
+ VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_LOOP */
+ VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_STRIP */
+ VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLES */
+ VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_STRIP */
+ VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_FAN */
+ VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUADS */
+ VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */
+ VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_POLYGON */
+ VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */
+ VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
+ VGPU10_PRIMITIVE_TRIANGLE_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */
+ VGPU10_PRIMITIVE_TRIANGLE_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
+ };
+
+ static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = {
+ VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST, /* PIPE_PRIM_POINTS */
+ VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINES */
+ VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINE_LOOP */
+ VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP, /* PIPE_PRIM_LINE_STRIP */
+ VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST, /* PIPE_PRIM_TRIANGLES */
+ VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */
+ VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */
+ VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUADS */
+ VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */
+ VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_POLYGON */
+ VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */
+ VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
+ VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */
+ VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
+ };
+
+ static const unsigned inputArraySize[] = {
+ 0, /* VGPU10_PRIMITIVE_UNDEFINED */
+ 1, /* VGPU10_PRIMITIVE_POINT */
+ 2, /* VGPU10_PRIMITIVE_LINE */
+ 3, /* VGPU10_PRIMITIVE_TRIANGLE */
+ 0,
+ 0,
+ 4, /* VGPU10_PRIMITIVE_LINE_ADJ */
+ 6 /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */
+ };
+
+ switch (prop->Property.PropertyName) {
+ case TGSI_PROPERTY_GS_INPUT_PRIM:
+ assert(prop->u[0].Data < Elements(primType));
+ emit->gs.prim_type = primType[prop->u[0].Data];
+ assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED);
+ emit->gs.input_size = inputArraySize[emit->gs.prim_type];
+ break;
+
+ case TGSI_PROPERTY_GS_OUTPUT_PRIM:
+ assert(prop->u[0].Data < Elements(primTopology));
+ emit->gs.prim_topology = primTopology[prop->u[0].Data];
+ assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED);
+ break;
+
+ case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
+ emit->gs.max_out_vertices = prop->u[0].Data;
+ break;
+
+ default:
+ break;
+ }
+
+ return TRUE;
+}
+
+
+static void
+emit_property_instruction(struct svga_shader_emitter_v10 *emit,
+ VGPU10OpcodeToken0 opcode0, unsigned nData,
+ unsigned data)
+{
+ begin_emit_instruction(emit);
+ emit_dword(emit, opcode0.value);
+ if (nData)
+ emit_dword(emit, data);
+ end_emit_instruction(emit);
+}
+
+
+/**
+ * Emit property instructions
+ */
+static void
+emit_property_instructions(struct svga_shader_emitter_v10 *emit)
+{
+ VGPU10OpcodeToken0 opcode0;
+
+ assert(emit->unit == PIPE_SHADER_GEOMETRY);
+
+ /* emit input primitive type declaration */
+ opcode0.value = 0;
+ opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE;
+ opcode0.primitive = emit->gs.prim_type;
+ emit_property_instruction(emit, opcode0, 0, 0);
+
+ /* emit output primitive topology declaration */
+ opcode0.value = 0;
+ opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY;
+ opcode0.primitiveTopology = emit->gs.prim_topology;
+ emit_property_instruction(emit, opcode0, 0, 0);
+
+ /* emit max output vertices */
+ opcode0.value = 0;
+ opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT;
+ emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices);
+}
+
+
+/**
+ * Emit a vgpu10 declaration "instruction".
+ * \param index the register index
+ * \param size array size of the operand. In most cases, it is 1,
+ * but for inputs to geometry shader, the array size varies
+ * depending on the primitive type.
+ */
+static void
+emit_decl_instruction(struct svga_shader_emitter_v10 *emit,
+ VGPU10OpcodeToken0 opcode0,
+ VGPU10OperandToken0 operand0,
+ VGPU10NameToken name_token,
+ unsigned index, unsigned size)
+{
+ assert(opcode0.opcodeType);
+ assert(operand0.mask);
+
+ begin_emit_instruction(emit);
+ emit_dword(emit, opcode0.value);
+
+ emit_dword(emit, operand0.value);
+
+ if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) {
+ /* Next token is the index of the register to declare */
+ emit_dword(emit, index);
+ }
+ else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) {
+ /* Next token is the size of the register */
+ emit_dword(emit, size);
+
+ /* Followed by the index of the register */
+ emit_dword(emit, index);
+ }
+
+ if (name_token.value) {
+ emit_dword(emit, name_token.value);
+ }
+
+ end_emit_instruction(emit);
+}
+
+
+/**
+ * Emit the declaration for a shader input.
+ * \param opcodeType opcode type, one of VGPU10_OPCODE_DCL_INPUTx
+ * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x
+ * \param dim index dimension
+ * \param index the input register index
+ * \param size array size of the operand. In most cases, it is 1,
+ * but for inputs to geometry shader, the array size varies
+ * depending on the primitive type.
+ * \param name one of VGPU10_NAME_x
+ * \parma numComp number of components
+ * \param selMode component selection mode
+ * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
+ * \param interpMode interpolation mode
+ */
+static void
+emit_input_declaration(struct svga_shader_emitter_v10 *emit,
+ unsigned opcodeType, unsigned operandType,
+ unsigned dim, unsigned index, unsigned size,
+ unsigned name, unsigned numComp,
+ unsigned selMode, unsigned usageMask,
+ unsigned interpMode)
+{
+ VGPU10OpcodeToken0 opcode0;
+ VGPU10OperandToken0 operand0;
+ VGPU10NameToken name_token;
+
+ assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
+ assert(opcodeType == VGPU10_OPCODE_DCL_INPUT ||
+ opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV ||
+ opcodeType == VGPU10_OPCODE_DCL_INPUT_PS ||
+ opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV);
+ assert(operandType == VGPU10_OPERAND_TYPE_INPUT ||
+ operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID);
+ assert(numComp <= VGPU10_OPERAND_4_COMPONENT);
+ assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
+ assert(dim <= VGPU10_OPERAND_INDEX_3D);
+ assert(name == VGPU10_NAME_UNDEFINED ||
+ name == VGPU10_NAME_POSITION ||
+ name == VGPU10_NAME_INSTANCE_ID ||
+ name == VGPU10_NAME_VERTEX_ID ||
+ name == VGPU10_NAME_PRIMITIVE_ID ||
+ name == VGPU10_NAME_IS_FRONT_FACE);
+ assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED ||
+ interpMode == VGPU10_INTERPOLATION_CONSTANT ||
+ interpMode == VGPU10_INTERPOLATION_LINEAR ||
+ interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID ||
+ interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE ||
+ interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID);
+
+ check_register_index(emit, opcodeType, index);
+
+ opcode0.value = operand0.value = name_token.value = 0;
+
+ opcode0.opcodeType = opcodeType;
+ opcode0.interpolationMode = interpMode;
+
+ operand0.operandType = operandType;
+ operand0.numComponents = numComp;
+ operand0.selectionMode = selMode;
+ operand0.mask = usageMask;
+ operand0.indexDimension = dim;
+ operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+ if (dim == VGPU10_OPERAND_INDEX_2D)
+ operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+
+ name_token.name = name;
+
+ emit_decl_instruction(emit, opcode0, operand0, name_token, index, size);
+}
+
+
+/**
+ * Emit the declaration for a shader output.
+ * \param type one of VGPU10_OPCODE_DCL_OUTPUTx
+ * \param index the output register index
+ * \param name one of VGPU10_NAME_x
+ * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
+ */
+static void
+emit_output_declaration(struct svga_shader_emitter_v10 *emit,
+ unsigned type, unsigned index,
+ unsigned name, unsigned usageMask)
+{
+ VGPU10OpcodeToken0 opcode0;
+ VGPU10OperandToken0 operand0;
+ VGPU10NameToken name_token;
+
+ assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
+ assert(type == VGPU10_OPCODE_DCL_OUTPUT ||
+ type == VGPU10_OPCODE_DCL_OUTPUT_SGV ||
+ type == VGPU10_OPCODE_DCL_OUTPUT_SIV);
+ assert(name == VGPU10_NAME_UNDEFINED ||
+ name == VGPU10_NAME_POSITION ||
+ name == VGPU10_NAME_PRIMITIVE_ID ||
+ name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
+ name == VGPU10_NAME_CLIP_DISTANCE);
+
+ check_register_index(emit, type, index);
+
+ opcode0.value = operand0.value = name_token.value = 0;
+
+ opcode0.opcodeType = type;
+ operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
+ operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+ operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
+ operand0.mask = usageMask;
+ operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+ operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+
+ name_token.name = name;
+
+ emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
+}
+
+
+/**
+ * Emit the declaration for the fragment depth output.
+ */
+static void
+emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit)
+{
+ VGPU10OpcodeToken0 opcode0;
+ VGPU10OperandToken0 operand0;
+ VGPU10NameToken name_token;
+
+ assert(emit->unit == PIPE_SHADER_FRAGMENT);
+
+ opcode0.value = operand0.value = name_token.value = 0;
+
+ opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
+ operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
+ operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
+ operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
+ operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
+
+ emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
+}
+
+
+/**
+ * Emit the declaration for a system value input/output.
+ */
+static void
+emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
+ unsigned semantic_name, unsigned index)
+{
+ switch (semantic_name) {
+ case TGSI_SEMANTIC_INSTANCEID:
+ index = alloc_system_value_index(emit, index);
+ emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
+ VGPU10_OPERAND_TYPE_INPUT,
+ VGPU10_OPERAND_INDEX_1D,
+ index, 1,
+ VGPU10_NAME_INSTANCE_ID,
+ VGPU10_OPERAND_4_COMPONENT,
+ VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+ VGPU10_OPERAND_4_COMPONENT_MASK_X,
+ VGPU10_INTERPOLATION_UNDEFINED);
+ break;
+ case TGSI_SEMANTIC_VERTEXID:
+ index = alloc_system_value_index(emit, index);
+ emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
+ VGPU10_OPERAND_TYPE_INPUT,
+ VGPU10_OPERAND_INDEX_1D,
+ index, 1,
+ VGPU10_NAME_VERTEX_ID,
+ VGPU10_OPERAND_4_COMPONENT,
+ VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+ VGPU10_OPERAND_4_COMPONENT_MASK_X,
+ VGPU10_INTERPOLATION_UNDEFINED);
+ break;
+ default:
+ ; /* XXX */
+ }
+}
+
+/**
+ * Translate a TGSI declaration to VGPU10.
+ */
+static boolean
+emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_declaration *decl)
+{
+ switch (decl->Declaration.File) {
+ case TGSI_FILE_INPUT:
+ /* do nothing - see emit_input_declarations() */
+ return TRUE;
+
+ case TGSI_FILE_OUTPUT:
+ assert(decl->Range.First == decl->Range.Last);
+ emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask;
+ return TRUE;
+
+ case TGSI_FILE_TEMPORARY:
+ /* Don't declare the temps here. Just keep track of how many
+ * and emit the declaration later.
+ */
+ if (decl->Declaration.Array) {
+ /* Indexed temporary array. Save the start index of the array
+ * and the size of the array.
+ */
+ const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS);
+ unsigned i;
+
+ assert(arrayID < ARRAY_SIZE(emit->temp_arrays));
+
+ /* Save this array so we can emit the declaration for it later */
+ emit->temp_arrays[arrayID].start = decl->Range.First;
+ emit->temp_arrays[arrayID].size =
+ decl->Range.Last - decl->Range.First + 1;
+
+ emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1);
+ assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS);
+ emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS);
+
+ /* Fill in the temp_map entries for this array */
+ for (i = decl->Range.First; i <= decl->Range.Last; i++) {
+ emit->temp_map[i].arrayId = arrayID;
+ emit->temp_map[i].index = i - decl->Range.First;
+ }
+ }
+
+ /* for all temps, indexed or not, keep track of highest index */
+ emit->num_shader_temps = MAX2(emit->num_shader_temps,
+ decl->Range.Last + 1);
+ return TRUE;
+
+ case TGSI_FILE_CONSTANT:
+ /* Don't declare constants here. Just keep track and emit later. */
+ {
+ unsigned constbuf = 0, num_consts;
+ if (decl->Declaration.Dimension) {
+ constbuf = decl->Dim.Index2D;
+ }
+ /* We throw an assertion here when, in fact, the shader should never
+ * have linked due to constbuf index out of bounds, so we shouldn't
+ * have reached here.
+ */
+ assert(constbuf < Elements(emit->num_shader_consts));
+
+ num_consts = MAX2(emit->num_shader_consts[constbuf],
+ decl->Range.Last + 1);
+
+ if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
+ debug_printf("Warning: constant buffer is declared to size [%u]"
+ " but [%u] is the limit.\n",
+ num_consts,
+ VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
+ }
+ /* The linker doesn't enforce the max UBO size so we clamp here */
+ emit->num_shader_consts[constbuf] =
+ MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
+ }
+ return TRUE;
+
+ case TGSI_FILE_IMMEDIATE:
+ assert(!"TGSI_FILE_IMMEDIATE not handled yet!");
+ return FALSE;
+
+ case TGSI_FILE_SYSTEM_VALUE:
+ emit_system_value_declaration(emit, decl->Semantic.Name,
+ decl->Range.First);
+ return TRUE;
+
+ case TGSI_FILE_SAMPLER:
+ /* Don't declare samplers here. Just keep track and emit later. */
+ emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
+ return TRUE;
+
+ case TGSI_FILE_RESOURCE:
+ /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/
+ /* XXX more, VGPU10_RETURN_TYPE_FLOAT */
+ assert(!"TGSI_FILE_RESOURCE not handled yet");
+ return FALSE;
+
+ case TGSI_FILE_ADDRESS:
+ emit->num_address_regs = MAX2(emit->num_address_regs,
+ decl->Range.Last + 1);
+ return TRUE;
+
+ case TGSI_FILE_SAMPLER_VIEW:
+ /* Not used at this time, but maybe in the future.
+ * See emit_resource_declarations().
+ */
+ return TRUE;
+
+ default:
+ assert(!"Unexpected type of declaration");
+ return FALSE;
+ }
+}
+
+
+
+/**
+ * Emit all input declarations.
+ */
+static boolean
+emit_input_declarations(struct svga_shader_emitter_v10 *emit)
+{
+ unsigned i;
+
+ if (emit->unit == PIPE_SHADER_FRAGMENT) {
+
+ for (i = 0; i < emit->linkage.num_inputs; i++) {
+ unsigned semantic_name = emit->info.input_semantic_name[i];
+ unsigned usage_mask = emit->info.input_usage_mask[i];
+ unsigned index = emit->linkage.input_map[i];
+ unsigned type, interpolationMode, name;
+
+ if (usage_mask == 0)
+ continue; /* register is not actually used */
+
+ if (semantic_name == TGSI_SEMANTIC_POSITION) {
+ /* fragment position input */
+ type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
+ interpolationMode = VGPU10_INTERPOLATION_LINEAR;
+ name = VGPU10_NAME_POSITION;
+ if (usage_mask & TGSI_WRITEMASK_W) {
+ /* we need to replace use of 'w' with '1/w' */
+ emit->fs.fragcoord_input_index = i;
+ }
+ }
+ else if (semantic_name == TGSI_SEMANTIC_FACE) {
+ /* fragment front-facing input */
+ type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
+ interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
+ name = VGPU10_NAME_IS_FRONT_FACE;
+ emit->fs.face_input_index = i;
+ }
+ else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
+ /* primitive ID */
+ type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
+ interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
+ name = VGPU10_NAME_PRIMITIVE_ID;
+ }
+ else {
+ /* general fragment input */
+ type = VGPU10_OPCODE_DCL_INPUT_PS;
+ interpolationMode =
+ translate_interpolation(emit,
+ emit->info.input_interpolate[i],
+ emit->info.input_interpolate_loc[i]);
+
+ /* keeps track if flat interpolation mode is being used */
+ emit->uses_flat_interp = emit->uses_flat_interp ||
+ (interpolationMode == VGPU10_INTERPOLATION_CONSTANT);
+
+ name = VGPU10_NAME_UNDEFINED;
+ }
+
+ emit_input_declaration(emit, type,
+ VGPU10_OPERAND_TYPE_INPUT,
+ VGPU10_OPERAND_INDEX_1D, index, 1,
+ name,
+ VGPU10_OPERAND_4_COMPONENT,
+ VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+ VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+ interpolationMode);
+ }
+ }
+ else if (emit->unit == PIPE_SHADER_GEOMETRY) {
+
+ for (i = 0; i < emit->info.num_inputs; i++) {
+ unsigned semantic_name = emit->info.input_semantic_name[i];
+ unsigned usage_mask = emit->info.input_usage_mask[i];
+ unsigned index = emit->linkage.input_map[i];
+ unsigned opcodeType, operandType;
+ unsigned numComp, selMode;
+ unsigned name;
+ unsigned dim;
+
+ if (usage_mask == 0)
+ continue; /* register is not actually used */
+
+ opcodeType = VGPU10_OPCODE_DCL_INPUT;
+ operandType = VGPU10_OPERAND_TYPE_INPUT;
+ numComp = VGPU10_OPERAND_4_COMPONENT;
+ selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
+ name = VGPU10_NAME_UNDEFINED;
+
+ /* all geometry shader inputs are two dimensional except gl_PrimitiveID */
+ dim = VGPU10_OPERAND_INDEX_2D;
+
+ if (semantic_name == TGSI_SEMANTIC_PRIMID) {
+ /* Primitive ID */
+ operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
+ dim = VGPU10_OPERAND_INDEX_0D;
+ numComp = VGPU10_OPERAND_0_COMPONENT;
+ selMode = 0;
+
+ /* also save the register index so we can check for
+ * primitive id when emit src register. We need to modify the
+ * operand type, index dimension when emit primitive id src reg.
+ */
+ emit->gs.prim_id_index = i;
+ }
+ else if (semantic_name == TGSI_SEMANTIC_POSITION) {
+ /* vertex position input */
+ opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV;
+ name = VGPU10_NAME_POSITION;
+ }
+
+ emit_input_declaration(emit, opcodeType, operandType,
+ dim, index,
+ emit->gs.input_size,
+ name,
+ numComp, selMode,
+ VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+ VGPU10_INTERPOLATION_UNDEFINED);
+ }
+ }
+ else {
+ assert(emit->unit == PIPE_SHADER_VERTEX);
+
+ for (i = 0; i < emit->info.num_inputs; i++) {
+ unsigned usage_mask = emit->info.input_usage_mask[i];
+ unsigned index = i;
+
+ if (usage_mask == 0)
+ continue; /* register is not actually used */
+
+ emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
+ VGPU10_OPERAND_TYPE_INPUT,
+ VGPU10_OPERAND_INDEX_1D, index, 1,
+ VGPU10_NAME_UNDEFINED,
+ VGPU10_OPERAND_4_COMPONENT,
+ VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+ VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+ VGPU10_INTERPOLATION_UNDEFINED);
+ }
+ }
+
+ return TRUE;
+}
+
+
+/**
+ * Emit all output declarations.
+ */
+static boolean
+emit_output_declarations(struct svga_shader_emitter_v10 *emit)
+{
+ unsigned i;
+
+ for (i = 0; i < emit->info.num_outputs; i++) {
+ /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/
+ const unsigned semantic_name = emit->info.output_semantic_name[i];
+ const unsigned semantic_index = emit->info.output_semantic_index[i];
+ unsigned index = i;
+
+ if (emit->unit == PIPE_SHADER_FRAGMENT) {
+ if (semantic_name == TGSI_SEMANTIC_COLOR) {
+ assert(semantic_index < Elements(emit->fs.color_out_index));
+
+ emit->fs.color_out_index[semantic_index] = index;
+
+ /* The semantic index is the shader's color output/buffer index */
+ emit_output_declaration(emit,
+ VGPU10_OPCODE_DCL_OUTPUT, semantic_index,
+ VGPU10_NAME_UNDEFINED,
+ VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
+
+ if (semantic_index == 0) {
+ if (emit->key.fs.write_color0_to_n_cbufs > 1) {
+ /* Emit declarations for the additional color outputs
+ * for broadcasting.
+ */
+ unsigned j;
+ for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) {
+ /* Allocate a new output index */
+ unsigned idx = emit->info.num_outputs + j - 1;
+ emit->fs.color_out_index[j] = idx;
+ emit_output_declaration(emit,
+ VGPU10_OPCODE_DCL_OUTPUT, idx,
+ VGPU10_NAME_UNDEFINED,
+ VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
+ emit->info.output_semantic_index[idx] = j;
+ }
+ }
+ }
+ else {
+ assert(!emit->key.fs.write_color0_to_n_cbufs);
+ }
+ }
+ else if (semantic_name == TGSI_SEMANTIC_POSITION) {
+ /* Fragment depth output */
+ emit_fragdepth_output_declaration(emit);
+ }
+ else {
+ assert(!"Bad output semantic name");
+ }
+ }
+ else {
+ /* VS or GS */
+ unsigned name, type;
+ unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
+
+ switch (semantic_name) {
+ case TGSI_SEMANTIC_POSITION:
+ assert(emit->unit != PIPE_SHADER_FRAGMENT);
+ type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
+ name = VGPU10_NAME_POSITION;
+ /* Save the index of the vertex position output register */
+ emit->vposition.out_index = index;
+ break;
+ case TGSI_SEMANTIC_CLIPDIST:
+ type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
+ name = VGPU10_NAME_CLIP_DISTANCE;
+ /* save the starting index of the clip distance output register */
+ if (semantic_index == 0)
+ emit->clip_dist_out_index = index;
+ writemask = emit->output_usage_mask[index];
+ writemask = apply_clip_plane_mask(emit, writemask, semantic_index);
+ if (writemask == 0x0) {
+ continue; /* discard this do-nothing declaration */
+ }
+ break;
+ case TGSI_SEMANTIC_PRIMID:
+ assert(emit->unit == PIPE_SHADER_GEOMETRY);
+ type = VGPU10_OPCODE_DCL_OUTPUT_SGV;
+ name = VGPU10_NAME_PRIMITIVE_ID;
+ break;
+ case TGSI_SEMANTIC_LAYER:
+ assert(emit->unit == PIPE_SHADER_GEOMETRY);
+ type = VGPU10_OPCODE_DCL_OUTPUT_SGV;
+ name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX;
+ break;
+ case TGSI_SEMANTIC_CLIPVERTEX:
+ type = VGPU10_OPCODE_DCL_OUTPUT;
+ name = VGPU10_NAME_UNDEFINED;
+ emit->clip_vertex_out_index = index;
+ break;
+ default:
+ /* generic output */
+ type = VGPU10_OPCODE_DCL_OUTPUT;
+ name = VGPU10_NAME_UNDEFINED;
+ }
+
+ emit_output_declaration(emit, type, index, name, writemask);
+ }
+ }
+
+ if (emit->vposition.so_index != INVALID_INDEX &&
+ emit->vposition.out_index != INVALID_INDEX) {
+
+ assert(emit->unit != PIPE_SHADER_FRAGMENT);
+
+ /* Emit the declaration for the non-adjusted vertex position
+ * for stream output purpose
+ */
+ emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
+ emit->vposition.so_index,
+ VGPU10_NAME_UNDEFINED,
+ VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
+ }
+
+ if (emit->clip_dist_so_index != INVALID_INDEX &&
+ emit->clip_dist_out_index != INVALID_INDEX) {
+
+ assert(emit->unit != PIPE_SHADER_FRAGMENT);
+
+ /* Emit the declaration for the clip distance shadow copy which
+ * will be used for stream output purpose and for clip distance
+ * varying variable
+ */
+ emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
+ emit->clip_dist_so_index,
+ VGPU10_NAME_UNDEFINED,
+ emit->output_usage_mask[emit->clip_dist_out_index]);
+
+ if (emit->info.num_written_clipdistance > 4) {
+ /* for the second clip distance register, each handles 4 planes */
+ emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
+ emit->clip_dist_so_index + 1,
+ VGPU10_NAME_UNDEFINED,
+ emit->output_usage_mask[emit->clip_dist_out_index+1]);
+ }
+ }
+
+ return TRUE;
+}
+
+
+/**
+ * Emit the declaration for the temporary registers.
+ */
+static boolean
+emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
+{
+ unsigned total_temps, reg, i;
+
+ total_temps = emit->num_shader_temps;
+
+ /* Allocate extra temps for specially-implemented instructions,
+ * such as LIT.
+ */
+ total_temps += MAX_INTERNAL_TEMPS;
+
+ if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) {
+ if (emit->vposition.need_prescale || emit->key.vs.undo_viewport ||
+ emit->key.clip_plane_enable ||
+ emit->vposition.so_index != INVALID_INDEX) {
+ emit->vposition.tmp_index = total_temps;
+ total_temps += 1;
+ }
+
+ if (emit->unit == PIPE_SHADER_VERTEX) {
+ unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 |
+ emit->key.vs.adjust_attrib_itof |
+ emit->key.vs.adjust_attrib_utof |
+ emit->key.vs.attrib_is_bgra |
+ emit->key.vs.attrib_puint_to_snorm |
+ emit->key.vs.attrib_puint_to_uscaled |
+ emit->key.vs.attrib_puint_to_sscaled);
+ while (attrib_mask) {
+ unsigned index = u_bit_scan(&attrib_mask);
+ emit->vs.adjusted_input[index] = total_temps++;
+ }
+ }
+
+ if (emit->clip_mode == CLIP_DISTANCE) {
+ /* We need to write the clip distance to a temporary register
+ * first. Then it will be copied to the shadow copy for
+ * the clip distance varying variable and stream output purpose.
+ * It will also be copied to the actual CLIPDIST register
+ * according to the enabled clip planes
+ */
+ emit->clip_dist_tmp_index = total_temps++;
+ if (emit->info.num_written_clipdistance > 4)
+ total_temps++; /* second clip register */
+ }
+ else if (emit->clip_mode == CLIP_VERTEX) {
+ /* We need to convert the TGSI CLIPVERTEX output to one or more
+ * clip distances. Allocate a temp reg for the clipvertex here.
+ */
+ assert(emit->info.writes_clipvertex > 0);
+ emit->clip_vertex_tmp_index = total_temps;
+ total_temps++;
+ }
+ }
+ else if (emit->unit == PIPE_SHADER_FRAGMENT) {
+ if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS ||
+ emit->key.fs.write_color0_to_n_cbufs > 1) {
+ /* Allocate a temp to hold the output color */
+ emit->fs.color_tmp_index = total_temps;
+ total_temps += 1;
+ }
+
+ if (emit->fs.face_input_index != INVALID_INDEX) {
+ /* Allocate a temp for the +/-1 face register */
+ emit->fs.face_tmp_index = total_temps;
+ total_temps += 1;
+ }
+
+ if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
+ /* Allocate a temp for modified fragment position register */
+ emit->fs.fragcoord_tmp_index = total_temps;
+ total_temps += 1;
+ }
+ }
+
+ for (i = 0; i < emit->num_address_regs; i++) {
+ emit->address_reg_index[i] = total_temps++;
+ }
+
+ /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10
+ * temp indexes. Basically, we compact all the non-array temp register
+ * indexes into a consecutive series.
+ *
+ * Before, we may have some TGSI declarations like:
+ * DCL TEMP[0..1], LOCAL
+ * DCL TEMP[2..4], ARRAY(1), LOCAL
+ * DCL TEMP[5..7], ARRAY(2), LOCAL
+ * plus, some extra temps, like TEMP[8], TEMP[9] for misc things
+ *
+ * After, we'll have a map like this:
+ * temp_map[0] = { array 0, index 0 }
+ * temp_map[1] = { array 0, index 1 }
+ * temp_map[2] = { array 1, index 0 }
+ * temp_map[3] = { array 1, index 1 }
+ * temp_map[4] = { array 1, index 2 }
+ * temp_map[5] = { array 2, index 0 }
+ * temp_map[6] = { array 2, index 1 }
+ * temp_map[7] = { array 2, index 2 }
+ * temp_map[8] = { array 0, index 2 }
+ * temp_map[9] = { array 0, index 3 }
+ *
+ * We'll declare two arrays of 3 elements, plus a set of four non-indexed
+ * temps numbered 0..3
+ *
+ * Any time we emit a temporary register index, we'll have to use the
+ * temp_map[] table to convert the TGSI index to the VGPU10 index.
+ *
+ * Finally, we recompute the total_temps value here.
+ */
+ reg = 0;
+ for (i = 0; i < total_temps; i++) {
+ if (emit->temp_map[i].arrayId == 0) {
+ emit->temp_map[i].index = reg++;
+ }
+ }
+ total_temps = reg;
+
+ if (0) {
+ debug_printf("total_temps %u\n", total_temps);
+ for (i = 0; i < 30; i++) {
+ debug_printf("temp %u -> array %u index %u\n",
+ i, emit->temp_map[i].arrayId, emit->temp_map[i].index);
+ }
+ }
+
+ /* Emit declaration of ordinary temp registers */
+ if (total_temps > 0) {
+ VGPU10OpcodeToken0 opcode0;
+
+ opcode0.value = 0;
+ opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS;
+
+ begin_emit_instruction(emit);
+ emit_dword(emit, opcode0.value);
+ emit_dword(emit, total_temps);
+ end_emit_instruction(emit);
+ }
+
+ /* Emit declarations for indexable temp arrays. Skip 0th entry since
+ * it's unused.
+ */
+ for (i = 1; i < emit->num_temp_arrays; i++) {
+ unsigned num_temps = emit->temp_arrays[i].size;
+
+ if (num_temps > 0) {
+ VGPU10OpcodeToken0 opcode0;
+
+ opcode0.value = 0;
+ opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP;
+
+ begin_emit_instruction(emit);
+ emit_dword(emit, opcode0.value);
+ emit_dword(emit, i); /* which array */
+ emit_dword(emit, num_temps);
+ emit_dword(emit, 4); /* num components */
+ end_emit_instruction(emit);
+
+ total_temps += num_temps;
+ }
+ }
+
+ /* Check that the grand total of all regular and indexed temps is
+ * under the limit.
+ */
+ check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1);
+
+ return TRUE;
+}
+
+
+static boolean
+emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
+{
+ VGPU10OpcodeToken0 opcode0;
+ VGPU10OperandToken0 operand0;
+ unsigned total_consts, i;
+
+ opcode0.value = 0;
+ opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER;
+ opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED;
+ /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */
+
+ operand0.value = 0;
+ operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+ operand0.indexDimension = VGPU10_OPERAND_INDEX_2D;
+ operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+ operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+ operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
+ operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
+ operand0.swizzleX = 0;
+ operand0.swizzleY = 1;
+ operand0.swizzleZ = 2;
+ operand0.swizzleW = 3;
+
+ /**
+ * Emit declaration for constant buffer [0]. We also allocate
+ * room for the extra constants here.
+ */
+ total_consts = emit->num_shader_consts[0];
+
+ /* Now, allocate constant slots for the "extra" constants */
+
+ /* Vertex position scale/translation */
+ if (emit->vposition.need_prescale) {
+ emit->vposition.prescale_scale_index = total_consts++;
+ emit->vposition.prescale_trans_index = total_consts++;
+ }
+
+ if (emit->unit == PIPE_SHADER_VERTEX) {
+ if (emit->key.vs.undo_viewport) {
+ emit->vs.viewport_index = total_consts++;
+ }
+ }
+
+ /* user-defined clip planes */
+ if (emit->key.clip_plane_enable) {
+ unsigned n = util_bitcount(emit->key.clip_plane_enable);
+ assert(emit->unit == PIPE_SHADER_VERTEX ||
+ emit->unit == PIPE_SHADER_GEOMETRY);
+ for (i = 0; i < n; i++) {
+ emit->clip_plane_const[i] = total_consts++;
+ }
+ }
+
+ /* Texcoord scale factors for RECT textures */
+ {
+ for (i = 0; i < emit->num_samplers; i++) {
+ if (emit->key.tex[i].unnormalized) {
+ emit->texcoord_scale_index[i] = total_consts++;
+ }
+ }
+ }
+
+ /* Texture buffer sizes */
+ for (i = 0; i < emit->num_samplers; i++) {
+ if (emit->key.tex[i].texture_target == PIPE_BUFFER) {
+ emit->texture_buffer_size_index[i] = total_consts++;
+ }
+ }
+
+ if (total_consts > 0) {
+ begin_emit_instruction(emit);
+ emit_dword(emit, opcode0.value);
+ emit_dword(emit, operand0.value);
+ emit_dword(emit, 0); /* which const buffer slot */
+ emit_dword(emit, total_consts);
+ end_emit_instruction(emit);
+ }
+
+ /* Declare remaining constant buffers (UBOs) */
+ for (i = 1; i < Elements(emit->num_shader_consts); i++) {
+ if (emit->num_shader_consts[i] > 0) {
+ begin_emit_instruction(emit);
+ emit_dword(emit, opcode0.value);
+ emit_dword(emit, operand0.value);
+ emit_dword(emit, i); /* which const buffer slot */
+ emit_dword(emit, emit->num_shader_consts[i]);
+ end_emit_instruction(emit);
+ }
+ }
+
+ return TRUE;
+}
+
+
+/**
+ * Emit declarations for samplers.
+ */
+static boolean
+emit_sampler_declarations(struct svga_shader_emitter_v10 *emit)
+{
+ unsigned i;
+
+ for (i = 0; i < emit->num_samplers; i++) {
+ VGPU10OpcodeToken0 opcode0;
+ VGPU10OperandToken0 operand0;
+
+ opcode0.value = 0;
+ opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER;
+ opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT;
+
+ operand0.value = 0;
+ operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
+ operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
+ operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+ operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+
+ begin_emit_instruction(emit);
+ emit_dword(emit, opcode0.value);
+ emit_dword(emit, operand0.value);
+ emit_dword(emit, i);
+ end_emit_instruction(emit);
+ }
+
+ return TRUE;
+}
+
+
+/**
+ * Translate PIPE_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x.
+ */
+static unsigned
+pipe_texture_to_resource_dimension(unsigned target, bool msaa)
+{
+ switch (target) {
+ case PIPE_BUFFER:
+ return VGPU10_RESOURCE_DIMENSION_BUFFER;
+ case PIPE_TEXTURE_1D:
+ return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
+ return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS
+ : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
+ case PIPE_TEXTURE_3D:
+ return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
+ case PIPE_TEXTURE_CUBE:
+ return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
+ case PIPE_TEXTURE_1D_ARRAY:
+ return VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY;
+ case PIPE_TEXTURE_2D_ARRAY:
+ return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
+ : VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY;
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ return VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY;
+ default:
+ assert(!"Unexpected resource type");
+ return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
+ }
+}
+
+
+/**
+ * Given a tgsi_return_type, return true iff it is an integer type.
+ */
+static boolean
+is_integer_type(enum tgsi_return_type type)
+{
+ switch (type) {
+ case TGSI_RETURN_TYPE_SINT:
+ case TGSI_RETURN_TYPE_UINT:
+ return TRUE;
+ case TGSI_RETURN_TYPE_FLOAT:
+ case TGSI_RETURN_TYPE_UNORM:
+ case TGSI_RETURN_TYPE_SNORM:
+ return FALSE;
+ case TGSI_RETURN_TYPE_COUNT:
+ default:
+ assert(!"is_integer_type: Unknown tgsi_return_type");
+ return FALSE;
+ }
+}
+
+
+/**
+ * Emit declarations for resources.
+ * XXX When we're sure that all TGSI shaders will be generated with
+ * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may
+ * rework this code.
+ */
+static boolean
+emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
+{
+ unsigned i;
+
+ /* Emit resource decl for each sampler */
+ for (i = 0; i < emit->num_samplers; i++) {
+ VGPU10OpcodeToken0 opcode0;
+ VGPU10OperandToken0 operand0;
+ VGPU10ResourceReturnTypeToken return_type;
+ VGPU10_RESOURCE_RETURN_TYPE rt;
+
+ opcode0.value = 0;
+ opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;
+ opcode0.resourceDimension =
+ pipe_texture_to_resource_dimension(emit->key.tex[i].texture_target,
+ emit->key.tex[i].texture_msaa);
+ operand0.value = 0;
+ operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
+ operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
+ operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+ operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+
+#if 1
+ /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */
+ STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1);
+ STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1);
+ STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1);
+ STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1);
+ STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1);
+ assert(emit->key.tex[i].return_type <= TGSI_RETURN_TYPE_FLOAT);
+ rt = emit->key.tex[i].return_type + 1;
+#else
+ switch (emit->key.tex[i].return_type) {
+ case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break;
+ case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break;
+ case TGSI_RETURN_TYPE_SINT: rt = VGPU10_RETURN_TYPE_SINT; break;
+ case TGSI_RETURN_TYPE_UINT: rt = VGPU10_RETURN_TYPE_UINT; break;
+ case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break;
+ case TGSI_RETURN_TYPE_COUNT:
+ default:
+ rt = VGPU10_RETURN_TYPE_FLOAT;
+ assert(!"emit_resource_declarations: Unknown tgsi_return_type");
+ }
+#endif
+
+ return_type.value = 0;
+ return_type.component0 = rt;
+ return_type.component1 = rt;
+ return_type.component2 = rt;
+ return_type.component3 = rt;
+
+ begin_emit_instruction(emit);
+ emit_dword(emit, opcode0.value);
+ emit_dword(emit, operand0.value);
+ emit_dword(emit, i);
+ emit_dword(emit, return_type.value);
+ end_emit_instruction(emit);
+ }
+
+ return TRUE;
+}
+
+static void
+emit_instruction_op1(struct svga_shader_emitter_v10 *emit,
+ unsigned opcode,
+ const struct tgsi_full_dst_register *dst,
+ const struct tgsi_full_src_register *src,
+ boolean saturate)
+{
+ begin_emit_instruction(emit);
+ emit_opcode(emit, opcode, saturate);
+ emit_dst_register(emit, dst);
+ emit_src_register(emit, src);
+ end_emit_instruction(emit);
+}
+
+static void
+emit_instruction_op2(struct svga_shader_emitter_v10 *emit,
+ unsigned opcode,
+ const struct tgsi_full_dst_register *dst,
+ const struct tgsi_full_src_register *src1,
+ const struct tgsi_full_src_register *src2,
+ boolean saturate)
+{
+ begin_emit_instruction(emit);
+ emit_opcode(emit, opcode, saturate);
+ emit_dst_register(emit, dst);
+ emit_src_register(emit, src1);
+ emit_src_register(emit, src2);
+ end_emit_instruction(emit);
+}
+
+static void
+emit_instruction_op3(struct svga_shader_emitter_v10 *emit,
+ unsigned opcode,
+ const struct tgsi_full_dst_register *dst,
+ const struct tgsi_full_src_register *src1,
+ const struct tgsi_full_src_register *src2,
+ const struct tgsi_full_src_register *src3,
+ boolean saturate)
+{
+ begin_emit_instruction(emit);
+ emit_opcode(emit, opcode, saturate);
+ emit_dst_register(emit, dst);
+ emit_src_register(emit, src1);
+ emit_src_register(emit, src2);
+ emit_src_register(emit, src3);
+ end_emit_instruction(emit);
+}
+
+/**
+ * Emit the actual clip distance instructions to be used for clipping
+ * by copying the clip distance from the temporary registers to the
+ * CLIPDIST registers written with the enabled planes mask.
+ * Also copy the clip distance from the temporary to the clip distance
+ * shadow copy register which will be referenced by the input shader
+ */
+static void
+emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
+{
+ struct tgsi_full_src_register tmp_clip_dist_src;
+ struct tgsi_full_dst_register clip_dist_dst;
+
+ unsigned i;
+ unsigned clip_plane_enable = emit->key.clip_plane_enable;
+ unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index;
+ unsigned num_written_clipdist = emit->info.num_written_clipdistance;
+
+ assert(emit->clip_dist_out_index != INVALID_INDEX);
+ assert(emit->clip_dist_tmp_index != INVALID_INDEX);
+
+ /**
+ * Temporary reset the temporary clip dist register index so
+ * that the copy to the real clip dist register will not
+ * attempt to copy to the temporary register again
+ */
+ emit->clip_dist_tmp_index = INVALID_INDEX;
+
+ for (i = 0; i < 2 && num_written_clipdist; i++, num_written_clipdist-=4) {
+
+ tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i);
+
+ /**
+ * copy to the shadow copy for use by varying variable and
+ * stream output. All clip distances
+ * will be written regardless of the enabled clipping planes.
+ */
+ clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
+ emit->clip_dist_so_index + i);
+
+ /* MOV clip_dist_so, tmp_clip_dist */
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
+ &tmp_clip_dist_src, FALSE);
+
+ /**
+ * copy those clip distances to enabled clipping planes
+ * to CLIPDIST registers for clipping
+ */
+ if (clip_plane_enable & 0xf) {
+ clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
+ emit->clip_dist_out_index + i);
+ clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf);
+
+ /* MOV CLIPDIST, tmp_clip_dist */
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
+ &tmp_clip_dist_src, FALSE);
+ }
+ /* four clip planes per clip register */
+ clip_plane_enable >>= 4;
+ }
+ /**
+ * set the temporary clip dist register index back to the
+ * temporary index for the next vertex
+ */
+ emit->clip_dist_tmp_index = clip_dist_tmp_index;
+}
+
+/* Declare clip distance output registers for user-defined clip planes
+ * or the TGSI_CLIPVERTEX output.
+ */
+static void
+emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit)
+{
+ unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
+ unsigned index = emit->num_outputs;
+ unsigned plane_mask;
+
+ assert(emit->unit == PIPE_SHADER_VERTEX ||
+ emit->unit == PIPE_SHADER_GEOMETRY);
+ assert(num_clip_planes <= 8);
+
+ if (emit->clip_mode != CLIP_LEGACY &&
+ emit->clip_mode != CLIP_VERTEX) {
+ return;
+ }
+
+ if (num_clip_planes == 0)
+ return;
+
+ /* Declare one or two clip output registers. The number of components
+ * in the mask reflects the number of clip planes. For example, if 5
+ * clip planes are needed, we'll declare outputs similar to:
+ * dcl_output_siv o2.xyzw, clip_distance
+ * dcl_output_siv o3.x, clip_distance
+ */
+ emit->clip_dist_out_index = index; /* save the starting clip dist reg index */
+
+ plane_mask = (1 << num_clip_planes) - 1;
+ if (plane_mask & 0xf) {
+ unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
+ emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index,
+ VGPU10_NAME_CLIP_DISTANCE, cmask);
+ emit->num_outputs++;
+ }
+ if (plane_mask & 0xf0) {
+ unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
+ emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1,
+ VGPU10_NAME_CLIP_DISTANCE, cmask);
+ emit->num_outputs++;
+ }
+}
+
+
+/**
+ * Emit the instructions for writing to the clip distance registers
+ * to handle legacy/automatic clip planes.
+ * For each clip plane, the distance is the dot product of the vertex
+ * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients.
+ * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE
+ * output registers already declared.
+ */
+static void
+emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit,
+ unsigned vpos_tmp_index)
+{
+ unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
+
+ assert(emit->clip_mode == CLIP_LEGACY);
+ assert(num_clip_planes <= 8);
+
+ assert(emit->unit == PIPE_SHADER_VERTEX ||
+ emit->unit == PIPE_SHADER_GEOMETRY);
+
+ for (i = 0; i < num_clip_planes; i++) {
+ struct tgsi_full_dst_register dst;
+ struct tgsi_full_src_register plane_src, vpos_src;
+ unsigned reg_index = emit->clip_dist_out_index + i / 4;
+ unsigned comp = i % 4;
+ unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
+
+ /* create dst, src regs */
+ dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
+ dst = writemask_dst(&dst, writemask);
+
+ plane_src = make_src_const_reg(emit->clip_plane_const[i]);
+ vpos_src = make_src_temp_reg(vpos_tmp_index);
+
+ /* DP4 clip_dist, plane, vpos */
+ emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
+ &plane_src, &vpos_src, FALSE);
+ }
+}
+
+
+/**
+ * Emit the instructions for computing the clip distance results from
+ * the clip vertex temporary.
+ * For each clip plane, the distance is the dot product of the clip vertex
+ * position (found in a temp reg) and the clip plane coefficients.
+ */
+static void
+emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit)
+{
+ const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable);
+ unsigned i;
+ struct tgsi_full_dst_register dst;
+ struct tgsi_full_src_register clipvert_src;
+ const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index;
+
+ assert(emit->unit == PIPE_SHADER_VERTEX ||
+ emit->unit == PIPE_SHADER_GEOMETRY);
+
+ assert(emit->clip_mode == CLIP_VERTEX);
+
+ clipvert_src = make_src_temp_reg(clip_vertex_tmp);
+
+ for (i = 0; i < num_clip; i++) {
+ struct tgsi_full_src_register plane_src;
+ unsigned reg_index = emit->clip_dist_out_index + i / 4;
+ unsigned comp = i % 4;
+ unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
+
+ /* create dst, src regs */
+ dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
+ dst = writemask_dst(&dst, writemask);
+
+ plane_src = make_src_const_reg(emit->clip_plane_const[i]);
+
+ /* DP4 clip_dist, plane, vpos */
+ emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
+ &plane_src, &clipvert_src, FALSE);
+ }
+
+ /* copy temporary clip vertex register to the clip vertex register */
+
+ assert(emit->clip_vertex_out_index != INVALID_INDEX);
+
+ /**
+ * temporary reset the temporary clip vertex register index so
+ * that copy to the clip vertex register will not attempt
+ * to copy to the temporary register again
+ */
+ emit->clip_vertex_tmp_index = INVALID_INDEX;
+
+ /* MOV clip_vertex, clip_vertex_tmp */
+ dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+ &dst, &clipvert_src, FALSE);
+
+ /**
+ * set the temporary clip vertex register index back to the
+ * temporary index for the next vertex
+ */
+ emit->clip_vertex_tmp_index = clip_vertex_tmp;
+}
+
+/**
+ * Emit code to convert RGBA to BGRA
+ */
+static void
+emit_swap_r_b(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_dst_register *dst,
+ const struct tgsi_full_src_register *src)
+{
+ struct tgsi_full_src_register bgra_src =
+ swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W);
+
+ begin_emit_instruction(emit);
+ emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
+ emit_dst_register(emit, dst);
+ emit_src_register(emit, &bgra_src);
+ end_emit_instruction(emit);
+}
+
+
+/** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */
+static void
+emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_dst_register *dst,
+ const struct tgsi_full_src_register *src)
+{
+ struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f);
+ struct tgsi_full_src_register two =
+ make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f);
+ struct tgsi_full_src_register neg_two =
+ make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
+
+ unsigned val_tmp = get_temp_index(emit);
+ struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp);
+ struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp);
+
+ unsigned bias_tmp = get_temp_index(emit);
+ struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp);
+ struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp);
+
+ /* val = src * 2.0 */
+ emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst,
+ src, &two, FALSE);
+
+ /* bias = src > 0.5 */
+ emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst,
+ src, &half, FALSE);
+
+ /* bias = bias & -2.0 */
+ emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst,
+ &bias_src, &neg_two, FALSE);
+
+ /* dst = val + bias */
+ emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst,
+ &val_src, &bias_src, FALSE);
+
+ free_temp_indexes(emit);
+}
+
+
+/** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */
+static void
+emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_dst_register *dst,
+ const struct tgsi_full_src_register *src)
+{
+ struct tgsi_full_src_register scale =
+ make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f);
+
+ /* dst = src * scale */
+ emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale, FALSE);
+}
+
+
+/** Convert from R32_UINT to 10_10_10_2_sscaled */
+static void
+emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_dst_register *dst,
+ const struct tgsi_full_src_register *src)
+{
+ struct tgsi_full_src_register lshift =
+ make_immediate_reg_int4(emit, 22, 12, 2, 0);
+ struct tgsi_full_src_register rshift =
+ make_immediate_reg_int4(emit, 22, 22, 22, 30);
+
+ struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X);
+
+ unsigned tmp = get_temp_index(emit);
+ struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+
+ /*
+ * r = (pixel << 22) >> 22; # signed int in [511, -512]
+ * g = (pixel << 12) >> 22; # signed int in [511, -512]
+ * b = (pixel << 2) >> 22; # signed int in [511, -512]
+ * a = (pixel << 0) >> 30; # signed int in [1, -2]
+ * dst = i_to_f(r,g,b,a); # convert to float
+ */
+ emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst,
+ &src_xxxx, &lshift, FALSE);
+ emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst,
+ &tmp_src, &rshift, FALSE);
+ emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src, FALSE);
+
+ free_temp_indexes(emit);
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_ABS instruction.
+ */
+static boolean
+emit_abs(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ /* dst = ABS(s0):
+ * dst = abs(s0)
+ * Translates into:
+ * MOV dst, abs(s0)
+ */
+ struct tgsi_full_src_register abs_src0 = absolute_src(&inst->Src[0]);
+
+ /* MOV dst, abs(s0) */
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
+ &abs_src0, inst->Instruction.Saturate);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction.
+ */
+static boolean
+emit_arl_uarl(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ unsigned index = inst->Dst[0].Register.Index;
+ struct tgsi_full_dst_register dst;
+ unsigned opcode;
+
+ assert(index < MAX_VGPU10_ADDR_REGS);
+ dst = make_dst_temp_reg(emit->address_reg_index[index]);
+
+ /* ARL dst, s0
+ * Translates into:
+ * FTOI address_tmp, s0
+ *
+ * UARL dst, s0
+ * Translates into:
+ * MOV address_tmp, s0
+ */
+ if (inst->Instruction.Opcode == TGSI_OPCODE_ARL)
+ opcode = VGPU10_OPCODE_FTOI;
+ else
+ opcode = VGPU10_OPCODE_MOV;
+
+ emit_instruction_op1(emit, opcode, &dst, &inst->Src[0], FALSE);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_CAL instruction.
+ */
+static boolean
+emit_cal(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ unsigned label = inst->Label.Label;
+ VGPU10OperandToken0 operand;
+ operand.value = 0;
+ operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
+
+ begin_emit_instruction(emit);
+ emit_dword(emit, operand.value);
+ emit_dword(emit, label);
+ end_emit_instruction(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_IABS instruction.
+ */
+static boolean
+emit_iabs(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ /* dst.x = (src0.x < 0) ? -src0.x : src0.x
+ * dst.y = (src0.y < 0) ? -src0.y : src0.y
+ * dst.z = (src0.z < 0) ? -src0.z : src0.z
+ * dst.w = (src0.w < 0) ? -src0.w : src0.w
+ *
+ * Translates into
+ * IMAX dst, src, neg(src)
+ */
+ struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
+ emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0],
+ &inst->Src[0], &neg_src, FALSE);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_CMP instruction.
+ */
+static boolean
+emit_cmp(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ /* dst.x = (src0.x < 0) ? src1.x : src2.x
+ * dst.y = (src0.y < 0) ? src1.y : src2.y
+ * dst.z = (src0.z < 0) ? src1.z : src2.z
+ * dst.w = (src0.w < 0) ? src1.w : src2.w
+ *
+ * Translates into
+ * LT tmp, src0, 0.0
+ * MOVC dst, tmp, src1, src2
+ */
+ struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+ unsigned tmp = get_temp_index(emit);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+ struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+ emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst,
+ &inst->Src[0], &zero, FALSE);
+ emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0],
+ &tmp_src, &inst->Src[1], &inst->Src[2],
+ inst->Instruction.Saturate);
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_DP2A instruction.
+ */
+static boolean
+emit_dp2a(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ /* dst.x = src0.x * src1.x + src0.y * src1.y + src2.x
+ * dst.y = src0.x * src1.x + src0.y * src1.y + src2.x
+ * dst.z = src0.x * src1.x + src0.y * src1.y + src2.x
+ * dst.w = src0.x * src1.x + src0.y * src1.y + src2.x
+ * Translate into
+ * MAD tmp.x, s0.y, s1.y, s2.x
+ * MAD tmp.x, s0.x, s1.x, tmp.x
+ * MOV dst.xyzw, tmp.xxxx
+ */
+ unsigned tmp = get_temp_index(emit);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+ struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+ struct tgsi_full_src_register tmp_src_xxxx =
+ scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+ struct tgsi_full_dst_register tmp_dst_x =
+ writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+
+ struct tgsi_full_src_register src0_xxxx =
+ scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
+ struct tgsi_full_src_register src0_yyyy =
+ scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
+ struct tgsi_full_src_register src1_xxxx =
+ scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
+ struct tgsi_full_src_register src1_yyyy =
+ scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
+ struct tgsi_full_src_register src2_xxxx =
+ scalar_src(&inst->Src[2], TGSI_SWIZZLE_X);
+
+ emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_yyyy,
+ &src1_yyyy, &src2_xxxx, FALSE);
+ emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_xxxx,
+ &src1_xxxx, &tmp_src_xxxx, FALSE);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
+ &tmp_src_xxxx, inst->Instruction.Saturate);
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_DPH instruction.
+ */
+static boolean
+emit_dph(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ /*
+ * DP3 tmp, s0, s1
+ * ADD dst, tmp, s1.wwww
+ */
+
+ struct tgsi_full_src_register s1_wwww =
+ swizzle_src(&inst->Src[1], TGSI_SWIZZLE_W, TGSI_SWIZZLE_W,
+ TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
+
+ unsigned tmp = get_temp_index(emit);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+ struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+ /* DP3 tmp, s0, s1 */
+ emit_instruction_op2(emit, VGPU10_OPCODE_DP3, &tmp_dst, &inst->Src[0],
+ &inst->Src[1], FALSE);
+
+ /* ADD dst, tmp, s1.wwww */
+ emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], &tmp_src,
+ &s1_wwww, inst->Instruction.Saturate);
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_DST instruction.
+ */
+static boolean
+emit_dst(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ /*
+ * dst.x = 1
+ * dst.y = src0.y * src1.y
+ * dst.z = src0.z
+ * dst.w = src1.w
+ */
+
+ struct tgsi_full_src_register s0_yyyy =
+ scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
+ struct tgsi_full_src_register s0_zzzz =
+ scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
+ struct tgsi_full_src_register s1_yyyy =
+ scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
+ struct tgsi_full_src_register s1_wwww =
+ scalar_src(&inst->Src[1], TGSI_SWIZZLE_W);
+
+ /*
+ * If dst and either src0 and src1 are the same we need
+ * to create a temporary for it and insert a extra move.
+ */
+ unsigned tmp_move = get_temp_index(emit);
+ struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
+ struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
+
+ /* MOV dst.x, 1.0 */
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+ struct tgsi_full_dst_register dst_x =
+ writemask_dst(&move_dst, TGSI_WRITEMASK_X);
+ struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE);
+ }
+
+ /* MUL dst.y, s0.y, s1.y */
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+ struct tgsi_full_dst_register dst_y =
+ writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
+
+ emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy,
+ &s1_yyyy, inst->Instruction.Saturate);
+ }
+
+ /* MOV dst.z, s0.z */
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+ struct tgsi_full_dst_register dst_z =
+ writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
+
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, &s0_zzzz,
+ inst->Instruction.Saturate);
+ }
+
+ /* MOV dst.w, s1.w */
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+ struct tgsi_full_dst_register dst_w =
+ writemask_dst(&move_dst, TGSI_WRITEMASK_W);
+
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &s1_wwww,
+ inst->Instruction.Saturate);
+ }
+
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
+ FALSE);
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+
+/**
+ * Emit code for TGSI_OPCODE_ENDPRIM (GS only)
+ */
+static boolean
+emit_endprim(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ assert(emit->unit == PIPE_SHADER_GEOMETRY);
+
+ /* We can't use emit_simple() because the TGSI instruction has one
+ * operand (vertex stream number) which we must ignore for VGPU10.
+ */
+ begin_emit_instruction(emit);
+ emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE);
+ end_emit_instruction(emit);
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_EX2 (2^x) instruction.
+ */
+static boolean
+emit_ex2(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ /* Note that TGSI_OPCODE_EX2 computes only one value from src.x
+ * while VGPU10 computes four values.
+ *
+ * dst = EX2(src):
+ * dst.xyzw = 2.0 ^ src.x
+ */
+
+ struct tgsi_full_src_register src_xxxx =
+ swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
+ TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+
+ /* EXP tmp, s0.xxxx */
+ emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx,
+ inst->Instruction.Saturate);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_EXP instruction.
+ */
+static boolean
+emit_exp(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ /*
+ * dst.x = 2 ^ floor(s0.x)
+ * dst.y = s0.x - floor(s0.x)
+ * dst.z = 2 ^ s0.x
+ * dst.w = 1.0
+ */
+
+ struct tgsi_full_src_register src_xxxx =
+ scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
+ unsigned tmp = get_temp_index(emit);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+ struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+ /*
+ * If dst and src are the same we need to create
+ * a temporary for it and insert a extra move.
+ */
+ unsigned tmp_move = get_temp_index(emit);
+ struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
+ struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
+
+ /* only use X component of temp reg */
+ tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+ tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+
+ /* ROUND_NI tmp.x, s0.x */
+ emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
+ &src_xxxx, FALSE); /* round to -infinity */
+
+ /* EXP dst.x, tmp.x */
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+ struct tgsi_full_dst_register dst_x =
+ writemask_dst(&move_dst, TGSI_WRITEMASK_X);
+
+ emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src,
+ inst->Instruction.Saturate);
+ }
+
+ /* ADD dst.y, s0.x, -tmp */
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+ struct tgsi_full_dst_register dst_y =
+ writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
+ struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src);
+
+ emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx,
+ &neg_tmp_src, inst->Instruction.Saturate);
+ }
+
+ /* EXP dst.z, s0.x */
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+ struct tgsi_full_dst_register dst_z =
+ writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
+
+ emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx,
+ inst->Instruction.Saturate);
+ }
+
+ /* MOV dst.w, 1.0 */
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+ struct tgsi_full_dst_register dst_w =
+ writemask_dst(&move_dst, TGSI_WRITEMASK_W);
+ struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one,
+ FALSE);
+ }
+
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
+ FALSE);
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_IF instruction.
+ */
+static boolean
+emit_if(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ VGPU10OpcodeToken0 opcode0;
+
+ /* The src register should be a scalar */
+ assert(inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleY &&
+ inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleZ &&
+ inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleW);
+
+ /* The only special thing here is that we need to set the
+ * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if
+ * src.x is non-zero.
+ */
+ opcode0.value = 0;
+ opcode0.opcodeType = VGPU10_OPCODE_IF;
+ opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
+
+ begin_emit_instruction(emit);
+ emit_dword(emit, opcode0.value);
+ emit_src_register(emit, &inst->Src[0]);
+ end_emit_instruction(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of
+ * the register components are negative).
+ */
+static boolean
+emit_kill_if(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ unsigned tmp = get_temp_index(emit);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+ struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+ struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+
+ struct tgsi_full_dst_register tmp_dst_x =
+ writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+ struct tgsi_full_src_register tmp_src_xxxx =
+ scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+
+ /* tmp = src[0] < 0.0 */
+ emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
+ &zero, FALSE);
+
+ if (!same_swizzle_terms(&inst->Src[0])) {
+ /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to
+ * logically OR the swizzle terms. Most uses of KILL_IF only
+ * test one channel so it's good to avoid these extra steps.
+ */
+ struct tgsi_full_src_register tmp_src_yyyy =
+ scalar_src(&tmp_src, TGSI_SWIZZLE_Y);
+ struct tgsi_full_src_register tmp_src_zzzz =
+ scalar_src(&tmp_src, TGSI_SWIZZLE_Z);
+ struct tgsi_full_src_register tmp_src_wwww =
+ scalar_src(&tmp_src, TGSI_SWIZZLE_W);
+
+ emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
+ &tmp_src_yyyy, FALSE);
+ emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
+ &tmp_src_zzzz, FALSE);
+ emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
+ &tmp_src_wwww, FALSE);
+ }
+
+ begin_emit_instruction(emit);
+ emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */
+ emit_src_register(emit, &tmp_src_xxxx);
+ end_emit_instruction(emit);
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard).
+ */
+static boolean
+emit_kill(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+
+ /* DISCARD if 0.0 is zero */
+ begin_emit_instruction(emit);
+ emit_discard_opcode(emit, FALSE);
+ emit_src_register(emit, &zero);
+ end_emit_instruction(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_LG2 instruction.
+ */
+static boolean
+emit_lg2(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ /* Note that TGSI_OPCODE_LG2 computes only one value from src.x
+ * while VGPU10 computes four values.
+ *
+ * dst = LG2(src):
+ * dst.xyzw = log2(src.x)
+ */
+
+ struct tgsi_full_src_register src_xxxx =
+ swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
+ TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+
+ /* LOG tmp, s0.xxxx */
+ emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &inst->Dst[0], &src_xxxx,
+ inst->Instruction.Saturate);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_LIT instruction.
+ */
+static boolean
+emit_lit(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+ /*
+ * If dst and src are the same we need to create
+ * a temporary for it and insert a extra move.
+ */
+ unsigned tmp_move = get_temp_index(emit);
+ struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
+ struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
+
+ /*
+ * dst.x = 1
+ * dst.y = max(src.x, 0)
+ * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
+ * dst.w = 1
+ */
+
+ /* MOV dst.x, 1.0 */
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+ struct tgsi_full_dst_register dst_x =
+ writemask_dst(&move_dst, TGSI_WRITEMASK_X);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE);
+ }
+
+ /* MOV dst.w, 1.0 */
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+ struct tgsi_full_dst_register dst_w =
+ writemask_dst(&move_dst, TGSI_WRITEMASK_W);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE);
+ }
+
+ /* MAX dst.y, src.x, 0.0 */
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+ struct tgsi_full_dst_register dst_y =
+ writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
+ struct tgsi_full_src_register zero =
+ make_immediate_reg_float(emit, 0.0f);
+ struct tgsi_full_src_register src_xxxx =
+ swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
+ TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+
+ emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx,
+ &zero, inst->Instruction.Saturate);
+ }
+
+ /*
+ * tmp1 = clamp(src.w, -128, 128);
+ * MAX tmp1, src.w, -128
+ * MIN tmp1, tmp1, 128
+ *
+ * tmp2 = max(tmp2, 0);
+ * MAX tmp2, src.y, 0
+ *
+ * tmp1 = pow(tmp2, tmp1);
+ * LOG tmp2, tmp2
+ * MUL tmp1, tmp2, tmp1
+ * EXP tmp1, tmp1
+ *
+ * tmp1 = (src.w == 0) ? 1 : tmp1;
+ * EQ tmp2, 0, src.w
+ * MOVC tmp1, tmp2, 1.0, tmp1
+ *
+ * dst.z = (0 < src.x) ? tmp1 : 0;
+ * LT tmp2, 0, src.x
+ * MOVC dst.z, tmp2, tmp1, 0.0
+ */
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+ struct tgsi_full_dst_register dst_z =
+ writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
+
+ unsigned tmp1 = get_temp_index(emit);
+ struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
+ struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
+ unsigned tmp2 = get_temp_index(emit);
+ struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
+ struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
+
+ struct tgsi_full_src_register src_xxxx =
+ scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
+ struct tgsi_full_src_register src_yyyy =
+ scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
+ struct tgsi_full_src_register src_wwww =
+ scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
+
+ struct tgsi_full_src_register zero =
+ make_immediate_reg_float(emit, 0.0f);
+ struct tgsi_full_src_register lowerbound =
+ make_immediate_reg_float(emit, -128.0f);
+ struct tgsi_full_src_register upperbound =
+ make_immediate_reg_float(emit, 128.0f);
+
+ emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww,
+ &lowerbound, FALSE);
+ emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src,
+ &upperbound, FALSE);
+ emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy,
+ &zero, FALSE);
+
+ /* POW tmp1, tmp2, tmp1 */
+ /* LOG tmp2, tmp2 */
+ emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src,
+ FALSE);
+
+ /* MUL tmp1, tmp2, tmp1 */
+ emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src,
+ &tmp1_src, FALSE);
+
+ /* EXP tmp1, tmp1 */
+ emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src,
+ FALSE);
+
+ /* EQ tmp2, 0, src.w */
+ emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero,
+ &src_wwww, FALSE);
+ /* MOVC tmp1.z, tmp2, tmp1, 1.0 */
+ emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst,
+ &tmp2_src, &one, &tmp1_src, FALSE);
+
+ /* LT tmp2, 0, src.x */
+ emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero,
+ &src_xxxx, FALSE);
+ /* MOVC dst.z, tmp2, tmp1, 0.0 */
+ emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z,
+ &tmp2_src, &tmp1_src, &zero, FALSE);
+ }
+
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
+ FALSE);
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_LOG instruction.
+ */
+static boolean
+emit_log(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ /*
+ * dst.x = floor(lg2(abs(s0.x)))
+ * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x))))
+ * dst.z = lg2(abs(s0.x))
+ * dst.w = 1.0
+ */
+
+ struct tgsi_full_src_register src_xxxx =
+ scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
+ unsigned tmp = get_temp_index(emit);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+ struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+ struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx);
+
+ /* only use X component of temp reg */
+ tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+ tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+
+ /* LOG tmp.x, abs(s0.x) */
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
+ emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst,
+ &abs_src_xxxx, FALSE);
+ }
+
+ /* MOV dst.z, tmp.x */
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+ struct tgsi_full_dst_register dst_z =
+ writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z);
+
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z,
+ &tmp_src, inst->Instruction.Saturate);
+ }
+
+ /* FLR tmp.x, tmp.x */
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
+ emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
+ &tmp_src, FALSE);
+ }
+
+ /* MOV dst.x, tmp.x */
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+ struct tgsi_full_dst_register dst_x =
+ writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
+
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &tmp_src,
+ inst->Instruction.Saturate);
+ }
+
+ /* EXP tmp.x, tmp.x */
+ /* DIV dst.y, abs(s0.x), tmp.x */
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+ struct tgsi_full_dst_register dst_y =
+ writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
+
+ emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src,
+ FALSE);
+ emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx,
+ &tmp_src, inst->Instruction.Saturate);
+ }
+
+ /* MOV dst.w, 1.0 */
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+ struct tgsi_full_dst_register dst_w =
+ writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W);
+ struct tgsi_full_src_register one =
+ make_immediate_reg_float(emit, 1.0f);
+
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE);
+ }
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_LRP instruction.
+ */
+static boolean
+emit_lrp(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ /* dst = LRP(s0, s1, s2):
+ * dst = s0 * (s1 - s2) + s2
+ * Translates into:
+ * SUB tmp, s1, s2; tmp = s1 - s2
+ * MAD dst, s0, tmp, s2; dst = s0 * t1 + s2
+ */
+ unsigned tmp = get_temp_index(emit);
+ struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp);
+ struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp);
+ struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]);
+
+ /* ADD tmp, s1, -s2 */
+ emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_tmp,
+ &inst->Src[1], &neg_src2, FALSE);
+
+ /* MAD dst, s1, tmp, s3 */
+ emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &inst->Dst[0],
+ &inst->Src[0], &src_tmp, &inst->Src[2],
+ inst->Instruction.Saturate);
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_POW instruction.
+ */
+static boolean
+emit_pow(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ /* Note that TGSI_OPCODE_POW computes only one value from src0.x and
+ * src1.x while VGPU10 computes four values.
+ *
+ * dst = POW(src0, src1):
+ * dst.xyzw = src0.x ^ src1.x
+ */
+ unsigned tmp = get_temp_index(emit);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+ struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+ struct tgsi_full_src_register src0_xxxx =
+ swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
+ TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+ struct tgsi_full_src_register src1_xxxx =
+ swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
+ TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+
+ /* LOG tmp, s0.xxxx */
+ emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &src0_xxxx,
+ FALSE);
+
+ /* MUL tmp, tmp, s1.xxxx */
+ emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, &tmp_src,
+ &src1_xxxx, FALSE);
+
+ /* EXP tmp, s0.xxxx */
+ emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0],
+ &tmp_src, inst->Instruction.Saturate);
+
+ /* free tmp */
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction.
+ */
+static boolean
+emit_rcp(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+ unsigned tmp = get_temp_index(emit);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+ struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+ struct tgsi_full_dst_register tmp_dst_x =
+ writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+ struct tgsi_full_src_register tmp_src_xxxx =
+ scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+
+ /* DIV tmp.x, 1.0, s0 */
+ emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst_x, &one,
+ &inst->Src[0], FALSE);
+
+ /* MOV dst, tmp.xxxx */
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
+ &tmp_src_xxxx, inst->Instruction.Saturate);
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_RSQ instruction.
+ */
+static boolean
+emit_rsq(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ /* dst = RSQ(src):
+ * dst.xyzw = 1 / sqrt(src.x)
+ * Translates into:
+ * RSQ tmp, src.x
+ * MOV dst, tmp.xxxx
+ */
+
+ unsigned tmp = get_temp_index(emit);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+ struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+ struct tgsi_full_dst_register tmp_dst_x =
+ writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+ struct tgsi_full_src_register tmp_src_xxxx =
+ scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+
+ /* RSQ tmp, src.x */
+ emit_instruction_op1(emit, VGPU10_OPCODE_RSQ, &tmp_dst_x,
+ &inst->Src[0], FALSE);
+
+ /* MOV dst, tmp.xxxx */
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
+ &tmp_src_xxxx, inst->Instruction.Saturate);
+
+ /* free tmp */
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SCS instruction.
+ */
+static boolean
+emit_scs(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ /* dst.x = cos(src.x)
+ * dst.y = sin(src.x)
+ * dst.z = 0.0
+ * dst.w = 1.0
+ */
+ struct tgsi_full_dst_register dst_x =
+ writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
+ struct tgsi_full_dst_register dst_y =
+ writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
+ struct tgsi_full_dst_register dst_zw =
+ writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_ZW);
+
+ struct tgsi_full_src_register zero_one =
+ make_immediate_reg_float4(emit, 0.0f, 0.0f, 0.0f, 1.0f);
+
+ begin_emit_instruction(emit);
+ emit_opcode(emit, VGPU10_OPCODE_SINCOS, inst->Instruction.Saturate);
+ emit_dst_register(emit, &dst_y);
+ emit_dst_register(emit, &dst_x);
+ emit_src_register(emit, &inst->Src[0]);
+ end_emit_instruction(emit);
+
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+ &dst_zw, &zero_one, inst->Instruction.Saturate);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
+ */
+static boolean
+emit_seq(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ /* dst = SEQ(s0, s1):
+ * dst = s0 == s1 ? 1.0 : 0.0 (per component)
+ * Translates into:
+ * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp)
+ * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
+ */
+ unsigned tmp = get_temp_index(emit);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+ struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+ struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+ struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+ /* EQ tmp, s0, s1 */
+ emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0],
+ &inst->Src[1], FALSE);
+
+ /* MOVC dst, tmp, one, zero */
+ emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
+ &one, &zero, FALSE);
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction.
+ */
+static boolean
+emit_sge(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ /* dst = SGE(s0, s1):
+ * dst = s0 >= s1 ? 1.0 : 0.0 (per component)
+ * Translates into:
+ * GE tmp, s0, s1; tmp = s0 >= s1 : 0xffffffff : 0 (per comp)
+ * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
+ */
+ unsigned tmp = get_temp_index(emit);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+ struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+ struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+ struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+ /* GE tmp, s0, s1 */
+ emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0],
+ &inst->Src[1], FALSE);
+
+ /* MOVC dst, tmp, one, zero */
+ emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
+ &one, &zero, FALSE);
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction.
+ */
+static boolean
+emit_sgt(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ /* dst = SGT(s0, s1):
+ * dst = s0 > s1 ? 1.0 : 0.0 (per component)
+ * Translates into:
+ * LT tmp, s1, s0; tmp = s1 < s0 ? 0xffffffff : 0 (per comp)
+ * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
+ */
+ unsigned tmp = get_temp_index(emit);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+ struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+ struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+ struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+ /* LT tmp, s1, s0 */
+ emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1],
+ &inst->Src[0], FALSE);
+
+ /* MOVC dst, tmp, one, zero */
+ emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
+ &one, &zero, FALSE);
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions.
+ */
+static boolean
+emit_sincos(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ unsigned tmp = get_temp_index(emit);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+ struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+ struct tgsi_full_src_register tmp_src_xxxx =
+ scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+ struct tgsi_full_dst_register tmp_dst_x =
+ writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+
+ begin_emit_instruction(emit);
+ emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE);
+
+ if(inst->Instruction.Opcode == TGSI_OPCODE_SIN)
+ {
+ emit_dst_register(emit, &tmp_dst_x); /* first destination register */
+ emit_null_dst_register(emit); /* second destination register */
+ }
+ else {
+ emit_null_dst_register(emit);
+ emit_dst_register(emit, &tmp_dst_x);
+ }
+
+ emit_src_register(emit, &inst->Src[0]);
+ end_emit_instruction(emit);
+
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
+ &tmp_src_xxxx, inst->Instruction.Saturate);
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction.
+ */
+static boolean
+emit_sle(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ /* dst = SLE(s0, s1):
+ * dst = s0 <= s1 ? 1.0 : 0.0 (per component)
+ * Translates into:
+ * GE tmp, s1, s0; tmp = s1 >= s0 : 0xffffffff : 0 (per comp)
+ * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
+ */
+ unsigned tmp = get_temp_index(emit);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+ struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+ struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+ struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+ /* GE tmp, s1, s0 */
+ emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1],
+ &inst->Src[0], FALSE);
+
+ /* MOVC dst, tmp, one, zero */
+ emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
+ &one, &zero, FALSE);
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction.
+ */
+static boolean
+emit_slt(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ /* dst = SLT(s0, s1):
+ * dst = s0 < s1 ? 1.0 : 0.0 (per component)
+ * Translates into:
+ * LT tmp, s0, s1; tmp = s0 < s1 ? 0xffffffff : 0 (per comp)
+ * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
+ */
+ unsigned tmp = get_temp_index(emit);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+ struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+ struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+ struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+ /* LT tmp, s0, s1 */
+ emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
+ &inst->Src[1], FALSE);
+
+ /* MOVC dst, tmp, one, zero */
+ emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
+ &one, &zero, FALSE);
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction.
+ */
+static boolean
+emit_sne(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ /* dst = SNE(s0, s1):
+ * dst = s0 != s1 ? 1.0 : 0.0 (per component)
+ * Translates into:
+ * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp)
+ * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
+ */
+ unsigned tmp = get_temp_index(emit);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+ struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+ struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+ struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+
+ /* NE tmp, s0, s1 */
+ emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0],
+ &inst->Src[1], FALSE);
+
+ /* MOVC dst, tmp, one, zero */
+ emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
+ &one, &zero, FALSE);
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction.
+ */
+static boolean
+emit_ssg(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
+ * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
+ * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
+ * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
+ * Translates into:
+ * LT tmp1, src, zero; tmp1 = src < zero ? 0xffffffff : 0 (per comp)
+ * MOVC tmp2, tmp1, -1.0, 0.0; tmp2 = tmp1 ? -1.0 : 0.0 (per component)
+ * LT tmp1, zero, src; tmp1 = zero < src ? 0xffffffff : 0 (per comp)
+ * MOVC dst, tmp1, 1.0, tmp2; dst = tmp1 ? 1.0 : tmp2 (per component)
+ */
+ struct tgsi_full_src_register zero =
+ make_immediate_reg_float(emit, 0.0f);
+ struct tgsi_full_src_register one =
+ make_immediate_reg_float(emit, 1.0f);
+ struct tgsi_full_src_register neg_one =
+ make_immediate_reg_float(emit, -1.0f);
+
+ unsigned tmp1 = get_temp_index(emit);
+ struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
+ struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
+
+ unsigned tmp2 = get_temp_index(emit);
+ struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
+ struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
+
+ emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0],
+ &zero, FALSE);
+ emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src,
+ &neg_one, &zero, FALSE);
+ emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero,
+ &inst->Src[0], FALSE);
+ emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src,
+ &one, &tmp2_src, FALSE);
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction.
+ */
+static boolean
+emit_issg(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
+ * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
+ * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
+ * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
+ * Translates into:
+ * ILT tmp1, src, 0 tmp1 = src < 0 ? -1 : 0 (per component)
+ * ILT tmp2, 0, src tmp2 = 0 < src ? -1 : 0 (per component)
+ * IADD dst, tmp1, neg(tmp2) dst = tmp1 - tmp2 (per component)
+ */
+ struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
+
+ unsigned tmp1 = get_temp_index(emit);
+ struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
+ struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
+
+ unsigned tmp2 = get_temp_index(emit);
+ struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
+ struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
+
+ struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src);
+
+ emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst,
+ &inst->Src[0], &zero, FALSE);
+ emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst,
+ &zero, &inst->Src[0], FALSE);
+ emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0],
+ &tmp1_src, &neg_tmp2, FALSE);
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SUB instruction.
+ */
+static boolean
+emit_sub(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ /* dst = SUB(s0, s1):
+ * dst = s0 - s1
+ * Translates into:
+ * ADD dst, s0, neg(s1)
+ */
+ struct tgsi_full_src_register neg_src1 = negate_src(&inst->Src[1]);
+
+ /* ADD dst, s0, neg(s1) */
+ emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0],
+ &inst->Src[0], &neg_src1,
+ inst->Instruction.Saturate);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit a comparison instruction. The dest register will get
+ * 0 or ~0 values depending on the outcome of comparing src0 to src1.
+ */
+static void
+emit_comparison(struct svga_shader_emitter_v10 *emit,
+ SVGA3dCmpFunc func,
+ const struct tgsi_full_dst_register *dst,
+ const struct tgsi_full_src_register *src0,
+ const struct tgsi_full_src_register *src1)
+{
+ struct tgsi_full_src_register immediate;
+ VGPU10OpcodeToken0 opcode0;
+ boolean swapSrc = FALSE;
+
+ /* Sanity checks for svga vs. gallium enums */
+ STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1));
+ STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1));
+
+ opcode0.value = 0;
+
+ switch (func) {
+ case SVGA3D_CMP_NEVER:
+ immediate = make_immediate_reg_int(emit, 0);
+ /* MOV dst, {0} */
+ begin_emit_instruction(emit);
+ emit_dword(emit, VGPU10_OPCODE_MOV);
+ emit_dst_register(emit, dst);
+ emit_src_register(emit, &immediate);
+ end_emit_instruction(emit);
+ return;
+ case SVGA3D_CMP_ALWAYS:
+ immediate = make_immediate_reg_int(emit, -1);
+ /* MOV dst, {-1} */
+ begin_emit_instruction(emit);
+ emit_dword(emit, VGPU10_OPCODE_MOV);
+ emit_dst_register(emit, dst);
+ emit_src_register(emit, &immediate);
+ end_emit_instruction(emit);
+ return;
+ case SVGA3D_CMP_LESS:
+ opcode0.opcodeType = VGPU10_OPCODE_LT;
+ break;
+ case SVGA3D_CMP_EQUAL:
+ opcode0.opcodeType = VGPU10_OPCODE_EQ;
+ break;
+ case SVGA3D_CMP_LESSEQUAL:
+ opcode0.opcodeType = VGPU10_OPCODE_GE;
+ swapSrc = TRUE;
+ break;
+ case SVGA3D_CMP_GREATER:
+ opcode0.opcodeType = VGPU10_OPCODE_LT;
+ swapSrc = TRUE;
+ break;
+ case SVGA3D_CMP_NOTEQUAL:
+ opcode0.opcodeType = VGPU10_OPCODE_NE;
+ break;
+ case SVGA3D_CMP_GREATEREQUAL:
+ opcode0.opcodeType = VGPU10_OPCODE_GE;
+ break;
+ default:
+ assert(!"Unexpected comparison mode");
+ opcode0.opcodeType = VGPU10_OPCODE_EQ;
+ }
+
+ begin_emit_instruction(emit);
+ emit_dword(emit, opcode0.value);
+ emit_dst_register(emit, dst);
+ if (swapSrc) {
+ emit_src_register(emit, src1);
+ emit_src_register(emit, src0);
+ }
+ else {
+ emit_src_register(emit, src0);
+ emit_src_register(emit, src1);
+ }
+ end_emit_instruction(emit);
+}
+
+
+/**
+ * Get texel/address offsets for a texture instruction.
+ */
+static void
+get_texel_offsets(const struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst, int offsets[3])
+{
+ if (inst->Texture.NumOffsets == 1) {
+ /* According to OpenGL Shader Language spec the offsets are only
+ * fetched from a previously-declared immediate/literal.
+ */
+ const struct tgsi_texture_offset *off = inst->TexOffsets;
+ const unsigned index = off[0].Index;
+ const unsigned swizzleX = off[0].SwizzleX;
+ const unsigned swizzleY = off[0].SwizzleY;
+ const unsigned swizzleZ = off[0].SwizzleZ;
+ const union tgsi_immediate_data *imm = emit->immediates[index];
+
+ assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE);
+
+ offsets[0] = imm[swizzleX].Int;
+ offsets[1] = imm[swizzleY].Int;
+ offsets[2] = imm[swizzleZ].Int;
+ }
+ else {
+ offsets[0] = offsets[1] = offsets[2] = 0;
+ }
+}
+
+
+/**
+ * Set up the coordinate register for texture sampling.
+ * When we're sampling from a RECT texture we have to scale the
+ * unnormalized coordinate to a normalized coordinate.
+ * We do that by multiplying the coordinate by an "extra" constant.
+ * An alternative would be to use the RESINFO instruction to query the
+ * texture's size.
+ */
+static struct tgsi_full_src_register
+setup_texcoord(struct svga_shader_emitter_v10 *emit,
+ unsigned unit,
+ const struct tgsi_full_src_register *coord)
+{
+ if (emit->key.tex[unit].unnormalized) {
+ unsigned scale_index = emit->texcoord_scale_index[unit];
+ unsigned tmp = get_temp_index(emit);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+ struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+ struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index);
+
+ /* MUL tmp, coord, const[] */
+ emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
+ coord, &scale_src, FALSE);
+ return tmp_src;
+ }
+ else {
+ /* use texcoord as-is */
+ return *coord;
+ }
+}
+
+
+/**
+ * For SAMPLE_C instructions, emit the extra src register which indicates
+ * the reference/comparision value.
+ */
+static void
+emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit,
+ unsigned target,
+ const struct tgsi_full_src_register *coord)
+{
+ struct tgsi_full_src_register coord_src_ref;
+ unsigned component;
+
+ assert(tgsi_is_shadow_target(target));
+
+ assert(target != TGSI_TEXTURE_SHADOWCUBE_ARRAY); /* XXX not implemented */
+ if (target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
+ target == TGSI_TEXTURE_SHADOWCUBE)
+ component = TGSI_SWIZZLE_W;
+ else
+ component = TGSI_SWIZZLE_Z;
+
+ coord_src_ref = scalar_src(coord, component);
+
+ emit_src_register(emit, &coord_src_ref);
+}
+
+
+/**
+ * Info for implementing texture swizzles.
+ * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle()
+ * functions use this to encapsulate the extra steps needed to perform
+ * a texture swizzle, or shadow/depth comparisons.
+ * The shadow/depth comparison is only done here if for the cases where
+ * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare).
+ */
+struct tex_swizzle_info
+{
+ boolean swizzled;
+ boolean shadow_compare;
+ unsigned unit;
+ unsigned texture_target; /**< TGSI_TEXTURE_x */
+ struct tgsi_full_src_register tmp_src;
+ struct tgsi_full_dst_register tmp_dst;
+ const struct tgsi_full_dst_register *inst_dst;
+ const struct tgsi_full_src_register *coord_src;
+};
+
+
+/**
+ * Do setup for handling texture swizzles or shadow compares.
+ * \param unit the texture unit
+ * \param inst the TGSI texture instruction
+ * \param shadow_compare do shadow/depth comparison?
+ * \param swz returns the swizzle info
+ */
+static void
+begin_tex_swizzle(struct svga_shader_emitter_v10 *emit,
+ unsigned unit,
+ const struct tgsi_full_instruction *inst,
+ boolean shadow_compare,
+ struct tex_swizzle_info *swz)
+{
+ swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X ||
+ emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y ||
+ emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z ||
+ emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W);
+
+ swz->shadow_compare = shadow_compare;
+ swz->texture_target = inst->Texture.Texture;
+
+ if (swz->swizzled || shadow_compare) {
+ /* Allocate temp register for the result of the SAMPLE instruction
+ * and the source of the MOV/compare/swizzle instructions.
+ */
+ unsigned tmp = get_temp_index(emit);
+ swz->tmp_src = make_src_temp_reg(tmp);
+ swz->tmp_dst = make_dst_temp_reg(tmp);
+
+ swz->unit = unit;
+ }
+ swz->inst_dst = &inst->Dst[0];
+ swz->coord_src = &inst->Src[0];
+}
+
+
+/**
+ * Returns the register to put the SAMPLE instruction results into.
+ * This will either be the original instruction dst reg (if no swizzle
+ * and no shadow comparison) or a temporary reg if there is a swizzle.
+ */
+static const struct tgsi_full_dst_register *
+get_tex_swizzle_dst(const struct tex_swizzle_info *swz)
+{
+ return (swz->swizzled || swz->shadow_compare)
+ ? &swz->tmp_dst : swz->inst_dst;
+}
+
+
+/**
+ * This emits the MOV instruction that actually implements a texture swizzle
+ * and/or shadow comparison.
+ */
+static void
+end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
+ const struct tex_swizzle_info *swz)
+{
+ if (swz->shadow_compare) {
+ /* Emit extra instructions to compare the fetched texel value against
+ * a texture coordinate component. The result of the comparison
+ * is 0.0 or 1.0.
+ */
+ struct tgsi_full_src_register coord_src;
+ struct tgsi_full_src_register texel_src =
+ scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X);
+ struct tgsi_full_src_register one =
+ make_immediate_reg_float(emit, 1.0f);
+ /* convert gallium comparison func to SVGA comparison func */
+ SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1;
+
+ assert(emit->unit == PIPE_SHADER_FRAGMENT);
+
+ switch (swz->texture_target) {
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ case TGSI_TEXTURE_SHADOW1D_ARRAY:
+ coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z);
+ break;
+ case TGSI_TEXTURE_SHADOW1D:
+ coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Y);
+ break;
+ case TGSI_TEXTURE_SHADOWCUBE:
+ case TGSI_TEXTURE_SHADOW2D_ARRAY:
+ coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_W);
+ break;
+ default:
+ assert(!"Unexpected texture target in end_tex_swizzle()");
+ coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z);
+ }
+
+ /* COMPARE tmp, coord, texel */
+ /* XXX it would seem that the texel and coord arguments should
+ * be transposed here, but piglit tests indicate otherwise.
+ */
+ emit_comparison(emit, compare_func,
+ &swz->tmp_dst, &texel_src, &coord_src);
+
+ /* AND dest, tmp, {1.0} */
+ begin_emit_instruction(emit);
+ emit_opcode(emit, VGPU10_OPCODE_AND, FALSE);
+ if (swz->swizzled) {
+ emit_dst_register(emit, &swz->tmp_dst);
+ }
+ else {
+ emit_dst_register(emit, swz->inst_dst);
+ }
+ emit_src_register(emit, &swz->tmp_src);
+ emit_src_register(emit, &one);
+ end_emit_instruction(emit);
+ }
+
+ if (swz->swizzled) {
+ unsigned swz_r = emit->key.tex[swz->unit].swizzle_r;
+ unsigned swz_g = emit->key.tex[swz->unit].swizzle_g;
+ unsigned swz_b = emit->key.tex[swz->unit].swizzle_b;
+ unsigned swz_a = emit->key.tex[swz->unit].swizzle_a;
+ unsigned writemask_0 = 0, writemask_1 = 0;
+ boolean int_tex = is_integer_type(emit->key.tex[swz->unit].return_type);
+
+ /* Swizzle w/out zero/one terms */
+ struct tgsi_full_src_register src_swizzled =
+ swizzle_src(&swz->tmp_src,
+ swz_r < PIPE_SWIZZLE_ZERO ? swz_r : PIPE_SWIZZLE_RED,
+ swz_g < PIPE_SWIZZLE_ZERO ? swz_g : PIPE_SWIZZLE_GREEN,
+ swz_b < PIPE_SWIZZLE_ZERO ? swz_b : PIPE_SWIZZLE_BLUE,
+ swz_a < PIPE_SWIZZLE_ZERO ? swz_a : PIPE_SWIZZLE_ALPHA);
+
+ /* MOV dst, color(tmp).<swizzle> */
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+ swz->inst_dst, &src_swizzled, FALSE);
+
+ /* handle swizzle zero terms */
+ writemask_0 = (((swz_r == PIPE_SWIZZLE_ZERO) << 0) |
+ ((swz_g == PIPE_SWIZZLE_ZERO) << 1) |
+ ((swz_b == PIPE_SWIZZLE_ZERO) << 2) |
+ ((swz_a == PIPE_SWIZZLE_ZERO) << 3));
+
+ if (writemask_0) {
+ struct tgsi_full_src_register zero = int_tex ?
+ make_immediate_reg_int(emit, 0) :
+ make_immediate_reg_float(emit, 0.0f);
+ struct tgsi_full_dst_register dst =
+ writemask_dst(swz->inst_dst, writemask_0);
+
+ /* MOV dst.writemask_0, {0,0,0,0} */
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+ &dst, &zero, FALSE);
+ }
+
+ /* handle swizzle one terms */
+ writemask_1 = (((swz_r == PIPE_SWIZZLE_ONE) << 0) |
+ ((swz_g == PIPE_SWIZZLE_ONE) << 1) |
+ ((swz_b == PIPE_SWIZZLE_ONE) << 2) |
+ ((swz_a == PIPE_SWIZZLE_ONE) << 3));
+
+ if (writemask_1) {
+ struct tgsi_full_src_register one = int_tex ?
+ make_immediate_reg_int(emit, 1) :
+ make_immediate_reg_float(emit, 1.0f);
+ struct tgsi_full_dst_register dst =
+ writemask_dst(swz->inst_dst, writemask_1);
+
+ /* MOV dst.writemask_1, {1,1,1,1} */
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one, FALSE);
+ }
+ }
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_SAMPLE instruction.
+ */
+static boolean
+emit_sample(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ const unsigned resource_unit = inst->Src[1].Register.Index;
+ const unsigned sampler_unit = inst->Src[2].Register.Index;
+ struct tgsi_full_src_register coord;
+ int offsets[3];
+ struct tex_swizzle_info swz_info;
+
+ begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info);
+
+ get_texel_offsets(emit, inst, offsets);
+
+ coord = setup_texcoord(emit, resource_unit, &inst->Src[0]);
+
+ /* SAMPLE dst, coord(s0), resource, sampler */
+ begin_emit_instruction(emit);
+
+ emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE,
+ inst->Instruction.Saturate, offsets);
+ emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+ emit_src_register(emit, &coord);
+ emit_resource_register(emit, resource_unit);
+ emit_sampler_register(emit, sampler_unit);
+ end_emit_instruction(emit);
+
+ end_tex_swizzle(emit, &swz_info);
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Check if a texture instruction is valid.
+ * An example of an invalid texture instruction is doing shadow comparison
+ * with an integer-valued texture.
+ * If we detect an invalid texture instruction, we replace it with:
+ * MOV dst, {1,1,1,1};
+ * \return TRUE if valid, FALSE if invalid.
+ */
+static boolean
+is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ const unsigned unit = inst->Src[1].Register.Index;
+ const unsigned target = inst->Texture.Texture;
+ boolean valid = TRUE;
+
+ if (tgsi_is_shadow_target(target) &&
+ is_integer_type(emit->key.tex[unit].return_type)) {
+ debug_printf("Invalid SAMPLE_C with an integer texture!\n");
+ valid = FALSE;
+ }
+ /* XXX might check for other conditions in the future here */
+
+ if (!valid) {
+ /* emit a MOV dst, {1,1,1,1} instruction. */
+ struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+ begin_emit_instruction(emit);
+ emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
+ emit_dst_register(emit, &inst->Dst[0]);
+ emit_src_register(emit, &one);
+ end_emit_instruction(emit);
+ }
+
+ return valid;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_TEX (simple texture lookup)
+ */
+static boolean
+emit_tex(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ const uint unit = inst->Src[1].Register.Index;
+ unsigned target = inst->Texture.Texture;
+ unsigned opcode;
+ struct tgsi_full_src_register coord;
+ int offsets[3];
+ struct tex_swizzle_info swz_info;
+
+ /* check that the sampler returns a float */
+ if (!is_valid_tex_instruction(emit, inst))
+ return TRUE;
+
+ begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
+
+ get_texel_offsets(emit, inst, offsets);
+
+ coord = setup_texcoord(emit, unit, &inst->Src[0]);
+
+ /* SAMPLE dst, coord(s0), resource, sampler */
+ begin_emit_instruction(emit);
+
+ if (tgsi_is_shadow_target(target))
+ opcode = VGPU10_OPCODE_SAMPLE_C;
+ else
+ opcode = VGPU10_OPCODE_SAMPLE;
+
+ emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
+ emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+ emit_src_register(emit, &coord);
+ emit_resource_register(emit, unit);
+ emit_sampler_register(emit, unit);
+ if (opcode == VGPU10_OPCODE_SAMPLE_C) {
+ emit_tex_compare_refcoord(emit, target, &coord);
+ }
+ end_emit_instruction(emit);
+
+ end_tex_swizzle(emit, &swz_info);
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_TXP (projective texture)
+ */
+static boolean
+emit_txp(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ const uint unit = inst->Src[1].Register.Index;
+ unsigned target = inst->Texture.Texture;
+ unsigned opcode;
+ int offsets[3];
+ unsigned tmp = get_temp_index(emit);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+ struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+ struct tgsi_full_src_register src0_wwww =
+ scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
+ struct tgsi_full_src_register coord;
+ struct tex_swizzle_info swz_info;
+
+ /* check that the sampler returns a float */
+ if (!is_valid_tex_instruction(emit, inst))
+ return TRUE;
+
+ begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
+
+ get_texel_offsets(emit, inst, offsets);
+
+ coord = setup_texcoord(emit, unit, &inst->Src[0]);
+
+ /* DIV tmp, coord, coord.wwww */
+ emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst,
+ &coord, &src0_wwww, FALSE);
+
+ /* SAMPLE dst, coord(tmp), resource, sampler */
+ begin_emit_instruction(emit);
+
+ if (tgsi_is_shadow_target(target))
+ opcode = VGPU10_OPCODE_SAMPLE_C;
+ else
+ opcode = VGPU10_OPCODE_SAMPLE;
+
+ emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
+ emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+ emit_src_register(emit, &tmp_src); /* projected coord */
+ emit_resource_register(emit, unit);
+ emit_sampler_register(emit, unit);
+ if (opcode == VGPU10_OPCODE_SAMPLE_C) {
+ emit_tex_compare_refcoord(emit, target, &tmp_src);
+ }
+ end_emit_instruction(emit);
+
+ end_tex_swizzle(emit, &swz_info);
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/*
+ * Emit code for TGSI_OPCODE_XPD instruction.
+ */
+static boolean
+emit_xpd(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ /* dst.x = src0.y * src1.z - src1.y * src0.z
+ * dst.y = src0.z * src1.x - src1.z * src0.x
+ * dst.z = src0.x * src1.y - src1.x * src0.y
+ * dst.w = 1
+ */
+ struct tgsi_full_src_register s0_xxxx =
+ scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
+ struct tgsi_full_src_register s0_yyyy =
+ scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
+ struct tgsi_full_src_register s0_zzzz =
+ scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
+
+ struct tgsi_full_src_register s1_xxxx =
+ scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
+ struct tgsi_full_src_register s1_yyyy =
+ scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
+ struct tgsi_full_src_register s1_zzzz =
+ scalar_src(&inst->Src[1], TGSI_SWIZZLE_Z);
+
+ unsigned tmp1 = get_temp_index(emit);
+ struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
+ struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
+
+ unsigned tmp2 = get_temp_index(emit);
+ struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
+ struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
+ struct tgsi_full_src_register neg_tmp2_src = negate_src(&tmp2_src);
+
+ unsigned tmp3 = get_temp_index(emit);
+ struct tgsi_full_src_register tmp3_src = make_src_temp_reg(tmp3);
+ struct tgsi_full_dst_register tmp3_dst = make_dst_temp_reg(tmp3);
+ struct tgsi_full_dst_register tmp3_dst_x =
+ writemask_dst(&tmp3_dst, TGSI_WRITEMASK_X);
+ struct tgsi_full_dst_register tmp3_dst_y =
+ writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Y);
+ struct tgsi_full_dst_register tmp3_dst_z =
+ writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Z);
+ struct tgsi_full_dst_register tmp3_dst_w =
+ writemask_dst(&tmp3_dst, TGSI_WRITEMASK_W);
+
+ /* Note: we put all the intermediate computations into tmp3 in case
+ * the XPD dest register is that same as one of the src regs (in which
+ * case we could clobber a src reg before we're done with it) .
+ *
+ * Note: we could get by with just one temp register instead of three
+ * since we're doing scalar operations and there's enough room in one
+ * temp for everything.
+ */
+
+ /* MUL tmp1, src0.y, src1.z */
+ /* MUL tmp2, src1.y, src0.z */
+ /* ADD tmp3.x, tmp1, -tmp2 */
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+ emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst,
+ &s0_yyyy, &s1_zzzz, FALSE);
+ emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst,
+ &s1_yyyy, &s0_zzzz, FALSE);
+ emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_x,
+ &tmp1_src, &neg_tmp2_src, FALSE);
+ }
+
+ /* MUL tmp1, src0.z, src1.x */
+ /* MUL tmp2, src1.z, src0.x */
+ /* ADD tmp3.y, tmp1, -tmp2 */
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+ emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_zzzz,
+ &s1_xxxx, FALSE);
+ emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_zzzz,
+ &s0_xxxx, FALSE);
+ emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_y,
+ &tmp1_src, &neg_tmp2_src, FALSE);
+ }
+
+ /* MUL tmp1, src0.x, src1.y */
+ /* MUL tmp2, src1.x, src0.y */
+ /* ADD tmp3.z, tmp1, -tmp2 */
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+ emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_xxxx,
+ &s1_yyyy, FALSE);
+ emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_xxxx,
+ &s0_yyyy, FALSE);
+ emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_z,
+ &tmp1_src, &neg_tmp2_src, FALSE);
+ }
+
+ /* MOV tmp3.w, 1.0 */
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+ struct tgsi_full_src_register one =
+ make_immediate_reg_float(emit, 1.0f);
+
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp3_dst_w, &one, FALSE);
+ }
+
+ /* MOV dst, tmp3 */
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &tmp3_src,
+ inst->Instruction.Saturate);
+
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_TXD (explicit derivatives)
+ */
+static boolean
+emit_txd(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ const uint unit = inst->Src[3].Register.Index;
+ unsigned target = inst->Texture.Texture;
+ int offsets[3];
+ struct tgsi_full_src_register coord;
+ struct tex_swizzle_info swz_info;
+
+ begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
+ &swz_info);
+
+ get_texel_offsets(emit, inst, offsets);
+
+ coord = setup_texcoord(emit, unit, &inst->Src[0]);
+
+ /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */
+ begin_emit_instruction(emit);
+ emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D,
+ inst->Instruction.Saturate, offsets);
+ emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+ emit_src_register(emit, &coord);
+ emit_resource_register(emit, unit);
+ emit_sampler_register(emit, unit);
+ emit_src_register(emit, &inst->Src[1]); /* Xderiv */
+ emit_src_register(emit, &inst->Src[2]); /* Yderiv */
+ end_emit_instruction(emit);
+
+ end_tex_swizzle(emit, &swz_info);
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_TXF (texel fetch)
+ */
+static boolean
+emit_txf(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ const uint unit = inst->Src[1].Register.Index;
+ const unsigned msaa = emit->key.tex[unit].texture_msaa;
+ int offsets[3];
+ struct tex_swizzle_info swz_info;
+
+ begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
+
+ get_texel_offsets(emit, inst, offsets);
+
+ if (msaa) {
+ /* Fetch one sample from an MSAA texture */
+ struct tgsi_full_src_register sampleIndex =
+ scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
+ /* LD_MS dst, coord(s0), resource, sampleIndex */
+ begin_emit_instruction(emit);
+ emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS,
+ inst->Instruction.Saturate, offsets);
+ emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+ emit_src_register(emit, &inst->Src[0]);
+ emit_resource_register(emit, unit);
+ emit_src_register(emit, &sampleIndex);
+ end_emit_instruction(emit);
+ }
+ else {
+ /* Fetch one texel specified by integer coordinate */
+ /* LD dst, coord(s0), resource */
+ begin_emit_instruction(emit);
+ emit_sample_opcode(emit, VGPU10_OPCODE_LD,
+ inst->Instruction.Saturate, offsets);
+ emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+ emit_src_register(emit, &inst->Src[0]);
+ emit_resource_register(emit, unit);
+ end_emit_instruction(emit);
+ }
+
+ end_tex_swizzle(emit, &swz_info);
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias)
+ * or TGSI_OPCODE_TXB2 (for cube shadow maps).
+ */
+static boolean
+emit_txl_txb(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ unsigned target = inst->Texture.Texture;
+ unsigned opcode, unit;
+ int offsets[3];
+ struct tgsi_full_src_register coord, lod_bias;
+ struct tex_swizzle_info swz_info;
+
+ assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
+ inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
+ inst->Instruction.Opcode == TGSI_OPCODE_TXB2);
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
+ lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
+ unit = inst->Src[2].Register.Index;
+ }
+ else {
+ lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
+ unit = inst->Src[1].Register.Index;
+ }
+
+ begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
+ &swz_info);
+
+ get_texel_offsets(emit, inst, offsets);
+
+ coord = setup_texcoord(emit, unit, &inst->Src[0]);
+
+ /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */
+ begin_emit_instruction(emit);
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
+ opcode = VGPU10_OPCODE_SAMPLE_L;
+ }
+ else {
+ opcode = VGPU10_OPCODE_SAMPLE_B;
+ }
+ emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
+ emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+ emit_src_register(emit, &coord);
+ emit_resource_register(emit, unit);
+ emit_sampler_register(emit, unit);
+ emit_src_register(emit, &lod_bias);
+ end_emit_instruction(emit);
+
+ end_tex_swizzle(emit, &swz_info);
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit code for TGSI_OPCODE_TXQ (texture query) instruction.
+ */
+static boolean
+emit_txq(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ const uint unit = inst->Src[1].Register.Index;
+
+ if (emit->key.tex[unit].texture_target == PIPE_BUFFER) {
+ /* RESINFO does not support querying texture buffers, so we instead
+ * store texture buffer sizes in shader constants, then copy them to
+ * implement TXQ instead of emitting RESINFO.
+ * MOV dst, const[texture_buffer_size_index[unit]]
+ */
+ struct tgsi_full_src_register size_src =
+ make_src_const_reg(emit->texture_buffer_size_index[unit]);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src,
+ FALSE);
+ } else {
+ /* RESINFO dst, srcMipLevel, resource */
+ begin_emit_instruction(emit);
+ emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
+ emit_dst_register(emit, &inst->Dst[0]);
+ emit_src_register(emit, &inst->Src[0]);
+ emit_resource_register(emit, unit);
+ end_emit_instruction(emit);
+ }
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit a simple instruction (like ADD, MUL, MIN, etc).
+ */
+static boolean
+emit_simple(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ const unsigned opcode = inst->Instruction.Opcode;
+ const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
+ unsigned i;
+
+ begin_emit_instruction(emit);
+ emit_opcode(emit, translate_opcode(inst->Instruction.Opcode),
+ inst->Instruction.Saturate);
+ for (i = 0; i < op->num_dst; i++) {
+ emit_dst_register(emit, &inst->Dst[i]);
+ }
+ for (i = 0; i < op->num_src; i++) {
+ emit_src_register(emit, &inst->Src[i]);
+ }
+ end_emit_instruction(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Emit a simple VGPU10 instruction which writes to multiple dest registers,
+ * where TGSI only uses one dest register.
+ */
+static boolean
+emit_simple_1dst(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst,
+ unsigned dst_count,
+ unsigned dst_index)
+{
+ const unsigned opcode = inst->Instruction.Opcode;
+ const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
+ unsigned i;
+
+ begin_emit_instruction(emit);
+ emit_opcode(emit, translate_opcode(inst->Instruction.Opcode),
+ inst->Instruction.Saturate);
+
+ for (i = 0; i < dst_count; i++) {
+ if (i == dst_index) {
+ emit_dst_register(emit, &inst->Dst[0]);
+ } else {
+ emit_null_dst_register(emit);
+ }
+ }
+
+ for (i = 0; i < op->num_src; i++) {
+ emit_src_register(emit, &inst->Src[i]);
+ }
+ end_emit_instruction(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Translate a single TGSI instruction to VGPU10.
+ */
+static boolean
+emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
+ unsigned inst_number,
+ const struct tgsi_full_instruction *inst)
+{
+ const unsigned opcode = inst->Instruction.Opcode;
+
+ switch (opcode) {
+ case TGSI_OPCODE_ADD:
+ case TGSI_OPCODE_AND:
+ case TGSI_OPCODE_BGNLOOP:
+ case TGSI_OPCODE_BRK:
+ case TGSI_OPCODE_CEIL:
+ case TGSI_OPCODE_CONT:
+ case TGSI_OPCODE_DDX:
+ case TGSI_OPCODE_DDY:
+ case TGSI_OPCODE_DIV:
+ case TGSI_OPCODE_DP2:
+ case TGSI_OPCODE_DP3:
+ case TGSI_OPCODE_DP4:
+ case TGSI_OPCODE_ELSE:
+ case TGSI_OPCODE_ENDIF:
+ case TGSI_OPCODE_ENDLOOP:
+ case TGSI_OPCODE_ENDSUB:
+ case TGSI_OPCODE_F2I:
+ case TGSI_OPCODE_F2U:
+ case TGSI_OPCODE_FLR:
+ case TGSI_OPCODE_FRC:
+ case TGSI_OPCODE_FSEQ:
+ case TGSI_OPCODE_FSGE:
+ case TGSI_OPCODE_FSLT:
+ case TGSI_OPCODE_FSNE:
+ case TGSI_OPCODE_I2F:
+ case TGSI_OPCODE_IMAX:
+ case TGSI_OPCODE_IMIN:
+ case TGSI_OPCODE_INEG:
+ case TGSI_OPCODE_ISGE:
+ case TGSI_OPCODE_ISHR:
+ case TGSI_OPCODE_ISLT:
+ case TGSI_OPCODE_MAD:
+ case TGSI_OPCODE_MAX:
+ case TGSI_OPCODE_MIN:
+ case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_MUL:
+ case TGSI_OPCODE_NOP:
+ case TGSI_OPCODE_NOT:
+ case TGSI_OPCODE_OR:
+ case TGSI_OPCODE_RET:
+ case TGSI_OPCODE_UADD:
+ case TGSI_OPCODE_USEQ:
+ case TGSI_OPCODE_USGE:
+ case TGSI_OPCODE_USLT:
+ case TGSI_OPCODE_UMIN:
+ case TGSI_OPCODE_UMAD:
+ case TGSI_OPCODE_UMAX:
+ case TGSI_OPCODE_ROUND:
+ case TGSI_OPCODE_SQRT:
+ case TGSI_OPCODE_SHL:
+ case TGSI_OPCODE_TRUNC:
+ case TGSI_OPCODE_U2F:
+ case TGSI_OPCODE_UCMP:
+ case TGSI_OPCODE_USHR:
+ case TGSI_OPCODE_USNE:
+ case TGSI_OPCODE_XOR:
+ /* simple instructions */
+ return emit_simple(emit, inst);
+
+
+ case TGSI_OPCODE_EMIT:
+ return emit_vertex(emit, inst);
+ case TGSI_OPCODE_ENDPRIM:
+ return emit_endprim(emit, inst);
+ case TGSI_OPCODE_ABS:
+ return emit_abs(emit, inst);
+ case TGSI_OPCODE_IABS:
+ return emit_iabs(emit, inst);
+ case TGSI_OPCODE_ARL:
+ /* fall-through */
+ case TGSI_OPCODE_UARL:
+ return emit_arl_uarl(emit, inst);
+ case TGSI_OPCODE_BGNSUB:
+ /* no-op */
+ return TRUE;
+ case TGSI_OPCODE_CAL:
+ return emit_cal(emit, inst);
+ case TGSI_OPCODE_CMP:
+ return emit_cmp(emit, inst);
+ case TGSI_OPCODE_COS:
+ return emit_sincos(emit, inst);
+ case TGSI_OPCODE_DP2A:
+ return emit_dp2a(emit, inst);
+ case TGSI_OPCODE_DPH:
+ return emit_dph(emit, inst);
+ case TGSI_OPCODE_DST:
+ return emit_dst(emit, inst);
+ case TGSI_OPCODE_EX2:
+ return emit_ex2(emit, inst);
+ case TGSI_OPCODE_EXP:
+ return emit_exp(emit, inst);
+ case TGSI_OPCODE_IF:
+ return emit_if(emit, inst);
+ case TGSI_OPCODE_KILL:
+ return emit_kill(emit, inst);
+ case TGSI_OPCODE_KILL_IF:
+ return emit_kill_if(emit, inst);
+ case TGSI_OPCODE_LG2:
+ return emit_lg2(emit, inst);
+ case TGSI_OPCODE_LIT:
+ return emit_lit(emit, inst);
+ case TGSI_OPCODE_LOG:
+ return emit_log(emit, inst);
+ case TGSI_OPCODE_LRP:
+ return emit_lrp(emit, inst);
+ case TGSI_OPCODE_POW:
+ return emit_pow(emit, inst);
+ case TGSI_OPCODE_RCP:
+ return emit_rcp(emit, inst);
+ case TGSI_OPCODE_RSQ:
+ return emit_rsq(emit, inst);
+ case TGSI_OPCODE_SAMPLE:
+ return emit_sample(emit, inst);
+ case TGSI_OPCODE_SCS:
+ return emit_scs(emit, inst);
+ case TGSI_OPCODE_SEQ:
+ return emit_seq(emit, inst);
+ case TGSI_OPCODE_SGE:
+ return emit_sge(emit, inst);
+ case TGSI_OPCODE_SGT:
+ return emit_sgt(emit, inst);
+ case TGSI_OPCODE_SIN:
+ return emit_sincos(emit, inst);
+ case TGSI_OPCODE_SLE:
+ return emit_sle(emit, inst);
+ case TGSI_OPCODE_SLT:
+ return emit_slt(emit, inst);
+ case TGSI_OPCODE_SNE:
+ return emit_sne(emit, inst);
+ case TGSI_OPCODE_SSG:
+ return emit_ssg(emit, inst);
+ case TGSI_OPCODE_ISSG:
+ return emit_issg(emit, inst);
+ case TGSI_OPCODE_SUB:
+ return emit_sub(emit, inst);
+ case TGSI_OPCODE_TEX:
+ return emit_tex(emit, inst);
+ case TGSI_OPCODE_TXP:
+ return emit_txp(emit, inst);
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXB2:
+ case TGSI_OPCODE_TXL:
+ return emit_txl_txb(emit, inst);
+ case TGSI_OPCODE_TXD:
+ return emit_txd(emit, inst);
+ case TGSI_OPCODE_TXF:
+ return emit_txf(emit, inst);
+ case TGSI_OPCODE_TXQ:
+ return emit_txq(emit, inst);
+ case TGSI_OPCODE_UIF:
+ return emit_if(emit, inst);
+ case TGSI_OPCODE_XPD:
+ return emit_xpd(emit, inst);
+ case TGSI_OPCODE_UMUL_HI:
+ case TGSI_OPCODE_IMUL_HI:
+ case TGSI_OPCODE_UDIV:
+ case TGSI_OPCODE_IDIV:
+ /* These cases use only the FIRST of two destination registers */
+ return emit_simple_1dst(emit, inst, 2, 0);
+ case TGSI_OPCODE_UMUL:
+ case TGSI_OPCODE_UMOD:
+ case TGSI_OPCODE_MOD:
+ /* These cases use only the SECOND of two destination registers */
+ return emit_simple_1dst(emit, inst, 2, 1);
+ case TGSI_OPCODE_END:
+ if (!emit_post_helpers(emit))
+ return FALSE;
+ return emit_simple(emit, inst);
+
+ default:
+ debug_printf("Unimplemented tgsi instruction %s\n",
+ tgsi_get_opcode_name(opcode));
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+/**
+ * Emit the extra instructions to adjust the vertex position.
+ * There are two possible adjustments:
+ * 1. Converting from Gallium to VGPU10 coordinate space by applying the
+ * "prescale" and "pretranslate" values.
+ * 2. Undoing the viewport transformation when we use the swtnl/draw path.
+ * \param vs_pos_tmp_index which temporary register contains the vertex pos.
+ */
+static void
+emit_vpos_instructions(struct svga_shader_emitter_v10 *emit,
+ unsigned vs_pos_tmp_index)
+{
+ struct tgsi_full_src_register tmp_pos_src;
+ struct tgsi_full_dst_register pos_dst;
+
+ /* Don't bother to emit any extra vertex instructions if vertex position is
+ * not written out
+ */
+ if (emit->vposition.out_index == INVALID_INDEX)
+ return;
+
+ tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index);
+ pos_dst = make_dst_output_reg(emit->vposition.out_index);
+
+ /* If non-adjusted vertex position register index
+ * is valid, copy the vertex position from the temporary
+ * vertex position register before it is modified by the
+ * prescale computation.
+ */
+ if (emit->vposition.so_index != INVALID_INDEX) {
+ struct tgsi_full_dst_register pos_so_dst =
+ make_dst_output_reg(emit->vposition.so_index);
+
+ /* MOV pos_so, tmp_pos */
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst,
+ &tmp_pos_src, FALSE);
+ }
+
+ if (emit->vposition.need_prescale) {
+ /* This code adjusts the vertex position to match the VGPU10 convention.
+ * If p is the position computed by the shader (usually by applying the
+ * modelview and projection matrices), the new position q is computed by:
+ *
+ * q.x = p.w * trans.x + p.x * scale.x
+ * q.y = p.w * trans.y + p.y * scale.y
+ * q.z = p.w * trans.z + p.z * scale.z;
+ * q.w = p.w * trans.w + p.w;
+ */
+ struct tgsi_full_src_register tmp_pos_src_w =
+ scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
+ struct tgsi_full_dst_register tmp_pos_dst =
+ make_dst_temp_reg(vs_pos_tmp_index);
+ struct tgsi_full_dst_register tmp_pos_dst_xyz =
+ writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ);
+
+ struct tgsi_full_src_register prescale_scale =
+ make_src_const_reg(emit->vposition.prescale_scale_index);
+ struct tgsi_full_src_register prescale_trans =
+ make_src_const_reg(emit->vposition.prescale_trans_index);
+
+ /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */
+ emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz,
+ &tmp_pos_src, &prescale_scale, FALSE);
+
+ /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */
+ emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w,
+ &prescale_trans, &tmp_pos_src, FALSE);
+ }
+ else if (emit->key.vs.undo_viewport) {
+ /* This code computes the final vertex position from the temporary
+ * vertex position by undoing the viewport transformation and the
+ * divide-by-W operation (we convert window coords back to clip coords).
+ * This is needed when we use the 'draw' module for fallbacks.
+ * If p is the temp pos in window coords, then the NDC coord q is:
+ * q.x = (p.x - vp.x_trans) / vp.x_scale * p.w
+ * q.y = (p.y - vp.y_trans) / vp.y_scale * p.w
+ * q.z = p.z * p.w
+ * q.w = p.w
+ * CONST[vs_viewport_index] contains:
+ * { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans }
+ */
+ struct tgsi_full_dst_register tmp_pos_dst =
+ make_dst_temp_reg(vs_pos_tmp_index);
+ struct tgsi_full_dst_register tmp_pos_dst_xy =
+ writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY);
+ struct tgsi_full_src_register tmp_pos_src_wwww =
+ scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
+
+ struct tgsi_full_dst_register pos_dst_xyz =
+ writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ);
+ struct tgsi_full_dst_register pos_dst_w =
+ writemask_dst(&pos_dst, TGSI_WRITEMASK_W);
+
+ struct tgsi_full_src_register vp_xyzw =
+ make_src_const_reg(emit->vs.viewport_index);
+ struct tgsi_full_src_register vp_zwww =
+ swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
+ TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
+
+ /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */
+ emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy,
+ &tmp_pos_src, &vp_zwww, FALSE);
+
+ /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */
+ emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy,
+ &tmp_pos_src, &vp_xyzw, FALSE);
+
+ /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */
+ emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz,
+ &tmp_pos_src, &tmp_pos_src_wwww, FALSE);
+
+ /* MOV pos.w, tmp_pos.w */
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w,
+ &tmp_pos_src, FALSE);
+ }
+ else if (vs_pos_tmp_index != INVALID_INDEX) {
+ /* This code is to handle the case where the temporary vertex
+ * position register is created when the vertex shader has stream
+ * output and prescale is disabled because rasterization is to be
+ * discarded.
+ */
+ struct tgsi_full_dst_register pos_dst =
+ make_dst_output_reg(emit->vposition.out_index);
+
+ /* MOV pos, tmp_pos */
+ begin_emit_instruction(emit);
+ emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
+ emit_dst_register(emit, &pos_dst);
+ emit_src_register(emit, &tmp_pos_src);
+ end_emit_instruction(emit);
+ }
+}
+
+static void
+emit_clipping_instructions(struct svga_shader_emitter_v10 *emit)
+{
+ if (emit->clip_mode == CLIP_DISTANCE) {
+ /* Copy from copy distance temporary to CLIPDIST & the shadow copy */
+ emit_clip_distance_instructions(emit);
+
+ } else if (emit->clip_mode == CLIP_VERTEX) {
+ /* Convert TGSI CLIPVERTEX to CLIPDIST */
+ emit_clip_vertex_instructions(emit);
+ }
+
+ /**
+ * Emit vertex position and take care of legacy user planes only if
+ * there is a valid vertex position register index.
+ * This is to take care of the case
+ * where the shader doesn't output vertex position. Then in
+ * this case, don't bother to emit more vertex instructions.
+ */
+ if (emit->vposition.out_index == INVALID_INDEX)
+ return;
+
+ /**
+ * Emit per-vertex clipping instructions for legacy user defined clip planes.
+ * NOTE: we must emit the clip distance instructions before the
+ * emit_vpos_instructions() call since the later function will change
+ * the TEMP[vs_pos_tmp_index] value.
+ */
+ if (emit->clip_mode == CLIP_LEGACY) {
+ /* Emit CLIPDIST for legacy user defined clip planes */
+ emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index);
+ }
+}
+
+
+/**
+ * Emit extra per-vertex instructions. This includes clip-coordinate
+ * space conversion and computing clip distances. This is called for
+ * each GS emit-vertex instruction and at the end of VS translation.
+ */
+static void
+emit_vertex_instructions(struct svga_shader_emitter_v10 *emit)
+{
+ const unsigned vs_pos_tmp_index = emit->vposition.tmp_index;
+
+ /* Emit clipping instructions based on clipping mode */
+ emit_clipping_instructions(emit);
+
+ /**
+ * Reset the temporary vertex position register index
+ * so that emit_dst_register() will use the real vertex position output
+ */
+ emit->vposition.tmp_index = INVALID_INDEX;
+
+ /* Emit vertex position instructions */
+ emit_vpos_instructions(emit, vs_pos_tmp_index);
+
+ /* Restore original vposition.tmp_index value for the next GS vertex.
+ * It doesn't matter for VS.
+ */
+ emit->vposition.tmp_index = vs_pos_tmp_index;
+}
+
+/**
+ * Translate the TGSI_OPCODE_EMIT GS instruction.
+ */
+static boolean
+emit_vertex(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ unsigned ret = TRUE;
+
+ assert(emit->unit == PIPE_SHADER_GEOMETRY);
+
+ emit_vertex_instructions(emit);
+
+ /* We can't use emit_simple() because the TGSI instruction has one
+ * operand (vertex stream number) which we must ignore for VGPU10.
+ */
+ begin_emit_instruction(emit);
+ emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE);
+ end_emit_instruction(emit);
+
+ return ret;
+}
+
+
+/**
+ * Emit the extra code to convert from VGPU10's boolean front-face
+ * register to TGSI's signed front-face register.
+ *
+ * TODO: Make temporary front-face register a scalar.
+ */
+static void
+emit_frontface_instructions(struct svga_shader_emitter_v10 *emit)
+{
+ assert(emit->unit == PIPE_SHADER_FRAGMENT);
+
+ if (emit->fs.face_input_index != INVALID_INDEX) {
+ /* convert vgpu10 boolean face register to gallium +/-1 value */
+ struct tgsi_full_dst_register tmp_dst =
+ make_dst_temp_reg(emit->fs.face_tmp_index);
+ struct tgsi_full_src_register one =
+ make_immediate_reg_float(emit, 1.0f);
+ struct tgsi_full_src_register neg_one =
+ make_immediate_reg_float(emit, -1.0f);
+
+ /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */
+ begin_emit_instruction(emit);
+ emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE);
+ emit_dst_register(emit, &tmp_dst);
+ emit_face_register(emit);
+ emit_src_register(emit, &one);
+ emit_src_register(emit, &neg_one);
+ end_emit_instruction(emit);
+ }
+}
+
+
+/**
+ * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w.
+ */
+static void
+emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit)
+{
+ assert(emit->unit == PIPE_SHADER_FRAGMENT);
+
+ if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
+ struct tgsi_full_dst_register tmp_dst =
+ make_dst_temp_reg(emit->fs.fragcoord_tmp_index);
+ struct tgsi_full_dst_register tmp_dst_xyz =
+ writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ);
+ struct tgsi_full_dst_register tmp_dst_w =
+ writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
+ struct tgsi_full_src_register one =
+ make_immediate_reg_float(emit, 1.0f);
+ struct tgsi_full_src_register fragcoord =
+ make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index);
+
+ /* save the input index */
+ unsigned fragcoord_input_index = emit->fs.fragcoord_input_index;
+ /* set to invalid to prevent substitution in emit_src_register() */
+ emit->fs.fragcoord_input_index = INVALID_INDEX;
+
+ /* MOV fragcoord_tmp.xyz, fragcoord.xyz */
+ begin_emit_instruction(emit);
+ emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
+ emit_dst_register(emit, &tmp_dst_xyz);
+ emit_src_register(emit, &fragcoord);
+ end_emit_instruction(emit);
+
+ /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */
+ begin_emit_instruction(emit);
+ emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE);
+ emit_dst_register(emit, &tmp_dst_w);
+ emit_src_register(emit, &one);
+ emit_src_register(emit, &fragcoord);
+ end_emit_instruction(emit);
+
+ /* restore saved value */
+ emit->fs.fragcoord_input_index = fragcoord_input_index;
+ }
+}
+
+
+/**
+ * Emit extra instructions to adjust VS inputs/attributes. This can
+ * mean casting a vertex attribute from int to float or setting the
+ * W component to 1, or both.
+ */
+static void
+emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit)
+{
+ const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1;
+ const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof;
+ const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof;
+ const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra;
+ const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm;
+ const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled;
+ const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled;
+
+ unsigned adjust_mask = (save_w_1_mask |
+ save_itof_mask |
+ save_utof_mask |
+ save_is_bgra_mask |
+ save_puint_to_snorm_mask |
+ save_puint_to_uscaled_mask |
+ save_puint_to_sscaled_mask);
+
+ assert(emit->unit == PIPE_SHADER_VERTEX);
+
+ if (adjust_mask) {
+ struct tgsi_full_src_register one =
+ make_immediate_reg_float(emit, 1.0f);
+
+ struct tgsi_full_src_register one_int =
+ make_immediate_reg_int(emit, 1);
+
+ /* We need to turn off these bitmasks while emitting the
+ * instructions below, then restore them afterward.
+ */
+ emit->key.vs.adjust_attrib_w_1 = 0;
+ emit->key.vs.adjust_attrib_itof = 0;
+ emit->key.vs.adjust_attrib_utof = 0;
+ emit->key.vs.attrib_is_bgra = 0;
+ emit->key.vs.attrib_puint_to_snorm = 0;
+ emit->key.vs.attrib_puint_to_uscaled = 0;
+ emit->key.vs.attrib_puint_to_sscaled = 0;
+
+ while (adjust_mask) {
+ unsigned index = u_bit_scan(&adjust_mask);
+ unsigned tmp = emit->vs.adjusted_input[index];
+ struct tgsi_full_src_register input_src =
+ make_src_reg(TGSI_FILE_INPUT, index);
+
+ struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+ struct tgsi_full_dst_register tmp_dst_w =
+ writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
+
+ /* ITOF/UTOF/MOV tmp, input[index] */
+ if (save_itof_mask & (1 << index)) {
+ emit_instruction_op1(emit, VGPU10_OPCODE_ITOF,
+ &tmp_dst, &input_src, FALSE);
+ }
+ else if (save_utof_mask & (1 << index)) {
+ emit_instruction_op1(emit, VGPU10_OPCODE_UTOF,
+ &tmp_dst, &input_src, FALSE);
+ }
+ else if (save_puint_to_snorm_mask & (1 << index)) {
+ emit_puint_to_snorm(emit, &tmp_dst, &input_src);
+ }
+ else if (save_puint_to_uscaled_mask & (1 << index)) {
+ emit_puint_to_uscaled(emit, &tmp_dst, &input_src);
+ }
+ else if (save_puint_to_sscaled_mask & (1 << index)) {
+ emit_puint_to_sscaled(emit, &tmp_dst, &input_src);
+ }
+ else {
+ assert((save_w_1_mask | save_is_bgra_mask) & (1 << index));
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+ &tmp_dst, &input_src, FALSE);
+ }
+
+ if (save_is_bgra_mask & (1 << index)) {
+ emit_swap_r_b(emit, &tmp_dst, &tmp_src);
+ }
+
+ if (save_w_1_mask & (1 << index)) {
+ /* MOV tmp.w, 1.0 */
+ if (emit->key.vs.attrib_is_pure_int & (1 << index)) {
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+ &tmp_dst_w, &one_int, FALSE);
+ }
+ else {
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+ &tmp_dst_w, &one, FALSE);
+ }
+ }
+ }
+
+ emit->key.vs.adjust_attrib_w_1 = save_w_1_mask;
+ emit->key.vs.adjust_attrib_itof = save_itof_mask;
+ emit->key.vs.adjust_attrib_utof = save_utof_mask;
+ emit->key.vs.attrib_is_bgra = save_is_bgra_mask;
+ emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask;
+ emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask;
+ emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask;
+ }
+}
+
+
+/**
+ * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed
+ * to implement some instructions. We pre-allocate those values here
+ * in the immediate constant buffer.
+ */
+static void
+alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
+{
+ unsigned n = 0;
+
+ emit->common_immediate_pos[n++] =
+ alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f);
+
+ emit->common_immediate_pos[n++] =
+ alloc_immediate_float4(emit, 128.0f, -128.0f, 2.0f, 3.0f);
+
+ emit->common_immediate_pos[n++] =
+ alloc_immediate_int4(emit, 0, 1, 0, -1);
+
+ if (emit->key.vs.attrib_puint_to_snorm) {
+ emit->common_immediate_pos[n++] =
+ alloc_immediate_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
+ }
+
+ if (emit->key.vs.attrib_puint_to_uscaled) {
+ emit->common_immediate_pos[n++] =
+ alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f);
+ }
+
+ if (emit->key.vs.attrib_puint_to_sscaled) {
+ emit->common_immediate_pos[n++] =
+ alloc_immediate_int4(emit, 22, 12, 2, 0);
+
+ emit->common_immediate_pos[n++] =
+ alloc_immediate_int4(emit, 22, 30, 0, 0);
+ }
+
+ assert(n <= Elements(emit->common_immediate_pos));
+ emit->num_common_immediates = n;
+}
+
+
+/**
+ * Emit any extra/helper declarations/code that we might need between
+ * the declaration section and code section.
+ */
+static boolean
+emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
+{
+ /* Properties */
+ if (emit->unit == PIPE_SHADER_GEOMETRY)
+ emit_property_instructions(emit);
+
+ /* Declare inputs */
+ if (!emit_input_declarations(emit))
+ return FALSE;
+
+ /* Declare outputs */
+ if (!emit_output_declarations(emit))
+ return FALSE;
+
+ /* Declare temporary registers */
+ emit_temporaries_declaration(emit);
+
+ /* Declare constant registers */
+ emit_constant_declaration(emit);
+
+ /* Declare samplers and resources */
+ emit_sampler_declarations(emit);
+ emit_resource_declarations(emit);
+
+ /* Declare clip distance output registers */
+ if (emit->unit == PIPE_SHADER_VERTEX ||
+ emit->unit == PIPE_SHADER_GEOMETRY) {
+ emit_clip_distance_declarations(emit);
+ }
+
+ alloc_common_immediates(emit);
+
+ if (emit->unit == PIPE_SHADER_FRAGMENT &&
+ emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
+ float alpha = emit->key.fs.alpha_ref;
+ emit->fs.alpha_ref_index =
+ alloc_immediate_float4(emit, alpha, alpha, alpha, alpha);
+ }
+
+ /* Now, emit the constant block containing all the immediates
+ * declared by shader, as well as the extra ones seen above.
+ */
+ emit_vgpu10_immediates_block(emit);
+
+ if (emit->unit == PIPE_SHADER_FRAGMENT) {
+ emit_frontface_instructions(emit);
+ emit_fragcoord_instructions(emit);
+ }
+ else if (emit->unit == PIPE_SHADER_VERTEX) {
+ emit_vertex_attrib_instructions(emit);
+ }
+
+ return TRUE;
+}
+
+
+/**
+ * Emit alpha test code. This compares TEMP[fs_color_tmp_index].w
+ * against the alpha reference value and discards the fragment if the
+ * comparison fails.
+ */
+static void
+emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
+ unsigned fs_color_tmp_index)
+{
+ /* compare output color's alpha to alpha ref and kill */
+ unsigned tmp = get_temp_index(emit);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+ struct tgsi_full_src_register tmp_src_x =
+ scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+ struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+ struct tgsi_full_src_register color_src =
+ make_src_temp_reg(fs_color_tmp_index);
+ struct tgsi_full_src_register color_src_w =
+ scalar_src(&color_src, TGSI_SWIZZLE_W);
+ struct tgsi_full_src_register ref_src =
+ make_src_immediate_reg(emit->fs.alpha_ref_index);
+ struct tgsi_full_dst_register color_dst =
+ make_dst_output_reg(emit->fs.color_out_index[0]);
+
+ assert(emit->unit == PIPE_SHADER_FRAGMENT);
+
+ /* dst = src0 'alpha_func' src1 */
+ emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst,
+ &color_src_w, &ref_src);
+
+ /* DISCARD if dst.x == 0 */
+ begin_emit_instruction(emit);
+ emit_discard_opcode(emit, FALSE); /* discard if src0.x is zero */
+ emit_src_register(emit, &tmp_src_x);
+ end_emit_instruction(emit);
+
+ /* If we don't need to broadcast the color below, emit final color here */
+ if (emit->key.fs.write_color0_to_n_cbufs <= 1) {
+ /* MOV output.color, tempcolor */
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst,
+ &color_src, FALSE); /* XXX saturate? */
+ }
+
+ free_temp_indexes(emit);
+}
+
+
+/**
+ * Emit instructions for writing a single color output to multiple
+ * color buffers.
+ * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS
+ * property is set and the number of render targets is greater than one.
+ * \param fs_color_tmp_index index of the temp register that holds the
+ * color to broadcast.
+ */
+static void
+emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit,
+ unsigned fs_color_tmp_index)
+{
+ const unsigned n = emit->key.fs.write_color0_to_n_cbufs;
+ unsigned i;
+ struct tgsi_full_src_register color_src =
+ make_src_temp_reg(fs_color_tmp_index);
+
+ assert(emit->unit == PIPE_SHADER_FRAGMENT);
+ assert(n > 1);
+
+ for (i = 0; i < n; i++) {
+ unsigned output_reg = emit->fs.color_out_index[i];
+ struct tgsi_full_dst_register color_dst =
+ make_dst_output_reg(output_reg);
+
+ /* Fill in this semantic here since we'll use it later in
+ * emit_dst_register().
+ */
+ emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR;
+
+ /* MOV output.color[i], tempcolor */
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst,
+ &color_src, FALSE); /* XXX saturate? */
+ }
+}
+
+
+/**
+ * Emit extra helper code after the original shader code, but before the
+ * last END/RET instruction.
+ * For vertex shaders this means emitting the extra code to apply the
+ * prescale scale/translation.
+ */
+static boolean
+emit_post_helpers(struct svga_shader_emitter_v10 *emit)
+{
+ if (emit->unit == PIPE_SHADER_VERTEX) {
+ emit_vertex_instructions(emit);
+ }
+ else if (emit->unit == PIPE_SHADER_FRAGMENT) {
+ const unsigned fs_color_tmp_index = emit->fs.color_tmp_index;
+
+ /* We no longer want emit_dst_register() to substitute the
+ * temporary fragment color register for the real color output.
+ */
+ emit->fs.color_tmp_index = INVALID_INDEX;
+
+ if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
+ emit_alpha_test_instructions(emit, fs_color_tmp_index);
+ }
+ if (emit->key.fs.write_color0_to_n_cbufs > 1) {
+ emit_broadcast_color_instructions(emit, fs_color_tmp_index);
+ }
+ }
+
+ return TRUE;
+}
+
+
+/**
+ * Translate the TGSI tokens into VGPU10 tokens.
+ */
+static boolean
+emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_token *tokens)
+{
+ struct tgsi_parse_context parse;
+ boolean ret = TRUE;
+ boolean pre_helpers_emitted = FALSE;
+ unsigned inst_number = 0;
+
+ tgsi_parse_init(&parse, tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate);
+ if (!ret)
+ goto done;
+ break;
+
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration);
+ if (!ret)
+ goto done;
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ if (!pre_helpers_emitted) {
+ ret = emit_pre_helpers(emit);
+ if (!ret)
+ goto done;
+ pre_helpers_emitted = TRUE;
+ }
+ ret = emit_vgpu10_instruction(emit, inst_number++,
+ &parse.FullToken.FullInstruction);
+ if (!ret)
+ goto done;
+ break;
+
+ case TGSI_TOKEN_TYPE_PROPERTY:
+ ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty);
+ if (!ret)
+ goto done;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+done:
+ tgsi_parse_free(&parse);
+ return ret;
+}
+
+
+/**
+ * Emit the first VGPU10 shader tokens.
+ */
+static boolean
+emit_vgpu10_header(struct svga_shader_emitter_v10 *emit)
+{
+ VGPU10ProgramToken ptoken;
+
+ /* First token: VGPU10ProgramToken (version info, program type (VS,GS,PS)) */
+ ptoken.majorVersion = 4;
+ ptoken.minorVersion = 0;
+ ptoken.programType = translate_shader_type(emit->unit);
+ if (!emit_dword(emit, ptoken.value))
+ return FALSE;
+
+ /* Second token: total length of shader, in tokens. We can't fill this
+ * in until we're all done. Emit zero for now.
+ */
+ return emit_dword(emit, 0);
+}
+
+
+static boolean
+emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit)
+{
+ VGPU10ProgramToken *tokens;
+
+ /* Replace the second token with total shader length */
+ tokens = (VGPU10ProgramToken *) emit->buf;
+ tokens[1].value = emit_get_num_tokens(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Modify the FS to read the BCOLORs and use the FACE register
+ * to choose between the front/back colors.
+ */
+static const struct tgsi_token *
+transform_fs_twoside(const struct tgsi_token *tokens)
+{
+ if (0) {
+ debug_printf("Before tgsi_add_two_side ------------------\n");
+ tgsi_dump(tokens,0);
+ }
+ tokens = tgsi_add_two_side(tokens);
+ if (0) {
+ debug_printf("After tgsi_add_two_side ------------------\n");
+ tgsi_dump(tokens, 0);
+ }
+ return tokens;
+}
+
+
+/**
+ * Modify the FS to do polygon stipple.
+ */
+static const struct tgsi_token *
+transform_fs_pstipple(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_token *tokens)
+{
+ const struct tgsi_token *new_tokens;
+ unsigned unit;
+
+ if (0) {
+ debug_printf("Before pstipple ------------------\n");
+ tgsi_dump(tokens,0);
+ }
+
+ new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0);
+
+ emit->fs.pstipple_sampler_unit = unit;
+
+ /* Setup texture state for stipple */
+ emit->key.tex[unit].texture_target = PIPE_TEXTURE_2D;
+ emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
+ emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
+ emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
+ emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
+
+ if (0) {
+ debug_printf("After pstipple ------------------\n");
+ tgsi_dump(new_tokens, 0);
+ }
+
+ return new_tokens;
+}
+
+/**
+ * Modify the FS to support anti-aliasing point.
+ */
+static const struct tgsi_token *
+transform_fs_aapoint(const struct tgsi_token *tokens,
+ int aa_coord_index)
+{
+ if (0) {
+ debug_printf("Before tgsi_add_aa_point ------------------\n");
+ tgsi_dump(tokens,0);
+ }
+ tokens = tgsi_add_aa_point(tokens, aa_coord_index);
+ if (0) {
+ debug_printf("After tgsi_add_aa_point ------------------\n");
+ tgsi_dump(tokens, 0);
+ }
+ return tokens;
+}
+
+/**
+ * This is the main entrypoint for the TGSI -> VPGU10 translator.
+ */
+struct svga_shader_variant *
+svga_tgsi_vgpu10_translate(struct svga_context *svga,
+ const struct svga_shader *shader,
+ const struct svga_compile_key *key,
+ unsigned unit)
+{
+ struct svga_shader_variant *variant = NULL;
+ struct svga_shader_emitter_v10 *emit;
+ const struct tgsi_token *tokens = shader->tokens;
+ struct svga_vertex_shader *vs = svga->curr.vs;
+ struct svga_geometry_shader *gs = svga->curr.gs;
+
+ assert(unit == PIPE_SHADER_VERTEX ||
+ unit == PIPE_SHADER_GEOMETRY ||
+ unit == PIPE_SHADER_FRAGMENT);
+
+ /* These two flags cannot be used together */
+ assert(key->vs.need_prescale + key->vs.undo_viewport <= 1);
+
+ /*
+ * Setup the code emitter
+ */
+ emit = alloc_emitter();
+ if (!emit)
+ return NULL;
+
+ emit->unit = unit;
+ emit->key = *key;
+
+ emit->vposition.need_prescale = (emit->key.vs.need_prescale ||
+ emit->key.gs.need_prescale);
+ emit->vposition.tmp_index = INVALID_INDEX;
+ emit->vposition.so_index = INVALID_INDEX;
+ emit->vposition.out_index = INVALID_INDEX;
+
+ emit->fs.color_tmp_index = INVALID_INDEX;
+ emit->fs.face_input_index = INVALID_INDEX;
+ emit->fs.fragcoord_input_index = INVALID_INDEX;
+
+ emit->gs.prim_id_index = INVALID_INDEX;
+
+ emit->clip_dist_out_index = INVALID_INDEX;
+ emit->clip_dist_tmp_index = INVALID_INDEX;
+ emit->clip_dist_so_index = INVALID_INDEX;
+ emit->clip_vertex_out_index = INVALID_INDEX;
+
+ if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) {
+ emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS;
+ }
+
+ if (unit == PIPE_SHADER_FRAGMENT) {
+ if (key->fs.light_twoside) {
+ tokens = transform_fs_twoside(tokens);
+ }
+ if (key->fs.pstipple) {
+ const struct tgsi_token *new_tokens =
+ transform_fs_pstipple(emit, tokens);
+ if (tokens != shader->tokens) {
+ /* free the two-sided shader tokens */
+ tgsi_free_tokens(tokens);
+ }
+ tokens = new_tokens;
+ }
+ if (key->fs.aa_point) {
+ tokens = transform_fs_aapoint(tokens, key->fs.aa_point_coord_index);
+ }
+ }
+
+ if (SVGA_DEBUG & DEBUG_TGSI) {
+ debug_printf("#####################################\n");
+ debug_printf("### TGSI Shader %u\n", shader->id);
+ tgsi_dump(tokens, 0);
+ }
+
+ /**
+ * Rescan the header if the token string is different from the one
+ * included in the shader; otherwise, the header info is already up-to-date
+ */
+ if (tokens != shader->tokens) {
+ tgsi_scan_shader(tokens, &emit->info);
+ } else {
+ emit->info = shader->info;
+ }
+
+ emit->num_outputs = emit->info.num_outputs;
+
+ if (unit == PIPE_SHADER_FRAGMENT) {
+ /* Compute FS input remapping to match the output from VS/GS */
+ if (gs) {
+ svga_link_shaders(&gs->base.info, &emit->info, &emit->linkage);
+ } else {
+ assert(vs);
+ svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage);
+ }
+ } else if (unit == PIPE_SHADER_GEOMETRY) {
+ assert(vs);
+ svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage);
+ }
+
+ determine_clipping_mode(emit);
+
+ if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX) {
+ if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) {
+ /* if there is stream output declarations associated
+ * with this shader or the shader writes to ClipDistance
+ * then reserve extra registers for the non-adjusted vertex position
+ * and the ClipDistance shadow copy
+ */
+ emit->vposition.so_index = emit->num_outputs++;
+
+ if (emit->clip_mode == CLIP_DISTANCE) {
+ emit->clip_dist_so_index = emit->num_outputs++;
+ if (emit->info.num_written_clipdistance > 4)
+ emit->num_outputs++;
+ }
+ }
+ }
+
+ /*
+ * Do actual shader translation.
+ */
+ if (!emit_vgpu10_header(emit)) {
+ debug_printf("svga: emit VGPU10 header failed\n");
+ goto cleanup;
+ }
+
+ if (!emit_vgpu10_instructions(emit, tokens)) {
+ debug_printf("svga: emit VGPU10 instructions failed\n");
+ goto cleanup;
+ }
+
+ if (!emit_vgpu10_tail(emit)) {
+ debug_printf("svga: emit VGPU10 tail failed\n");
+ goto cleanup;
+ }
+
+ if (emit->register_overflow) {
+ goto cleanup;
+ }
+
+ /*
+ * Create, initialize the 'variant' object.
+ */
+ variant = CALLOC_STRUCT(svga_shader_variant);
+ if (!variant)
+ goto cleanup;
+
+ variant->shader = shader;
+ variant->nr_tokens = emit_get_num_tokens(emit);
+ variant->tokens = (const unsigned *)emit->buf;
+ emit->buf = NULL; /* buffer is no longer owed by emitter context */
+ memcpy(&variant->key, key, sizeof(*key));
+ variant->id = UTIL_BITMASK_INVALID_INDEX;
+
+ /* The extra constant starting offset starts with the number of
+ * shader constants declared in the shader.
+ */
+ variant->extra_const_start = emit->num_shader_consts[0];
+ if (key->gs.wide_point) {
+ /**
+ * The extra constant added in the transformed shader
+ * for inverse viewport scale is to be supplied by the driver.
+ * So the extra constant starting offset needs to be reduced by 1.
+ */
+ assert(variant->extra_const_start > 0);
+ variant->extra_const_start--;
+ }
+
+ variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
+
+ /** keep track in the variant if flat interpolation is used
+ * for any of the varyings.
+ */
+ variant->uses_flat_interp = emit->uses_flat_interp;
+
+ if (tokens != shader->tokens) {
+ tgsi_free_tokens(tokens);
+ }
+
+cleanup:
+ free_emitter(emit);
+
+ return variant;
+}
#define SVGA_FENCE_FLAG_EXEC (1 << 0)
#define SVGA_FENCE_FLAG_QUERY (1 << 1)
-#define SVGA_SURFACE_USAGE_SHARED (1 << 0)
+#define SVGA_SURFACE_USAGE_SHARED (1 << 0)
+#define SVGA_SURFACE_USAGE_SCANOUT (1 << 1)
+
+#define SVGA_QUERY_FLAG_SET (1 << 0)
+#define SVGA_QUERY_FLAG_REF (1 << 1)
/** Opaque surface handle */
struct svga_winsys_surface;
-
/** Opaque guest-backed objects */
struct svga_winsys_gb_shader;
-
+struct svga_winsys_gb_query;
/**
uint32 *shid,
uint32 *mobid,
uint32 *offset,
- struct svga_winsys_gb_shader *shader);
+ struct svga_winsys_gb_shader *shader,
+ unsigned flags);
/**
* Emit a relocation for a guest-backed context.
uint32 offset,
unsigned flags);
+ /**
+ * Emit a relocation for a guest-backed query object.
+ *
+ * NOTE: Order of this call does matter. It should be the same order
+ * as relocations appear in the command buffer.
+ */
+ void
+ (*query_relocation)(struct svga_winsys_context *swc,
+ SVGAMobId *id,
+ struct svga_winsys_gb_query *query);
+
+ /**
+ * Bind queries to context.
+ * \param flags exactly one of SVGA_QUERY_FLAG_SET/REF
+ */
+ enum pipe_error
+ (*query_bind)(struct svga_winsys_context *sws,
+ struct svga_winsys_gb_query *query,
+ unsigned flags);
+
void
(*commit)(struct svga_winsys_context *swc);
struct svga_winsys_surface *surface,
boolean *rebind);
+ /**
+ * Create and define a DX GB shader that resides in the device COTable.
+ * Caller of this function will issue the DXDefineShader command.
+ */
+ struct svga_winsys_gb_shader *
+ (*shader_create)(struct svga_winsys_context *swc,
+ uint32 shaderId,
+ SVGA3dShaderType shaderType,
+ const uint32 *bytecode,
+ uint32 bytecodeLen);
+
+ /**
+ * Destroy a DX GB shader.
+ * This function will issue the DXDestroyShader command.
+ */
+ void
+ (*shader_destroy)(struct svga_winsys_context *swc,
+ struct svga_winsys_gb_shader *shader);
+
+ /**
+ * Rebind a DX GB resource to a context.
+ * This is called to reference a DX GB resource in the command stream in
+ * order to page in the associated resource in case the memory has been
+ * paged out, and to fence it if necessary after command submission.
+ */
+ enum pipe_error
+ (*resource_rebind)(struct svga_winsys_context *swc,
+ struct svga_winsys_surface *surface,
+ struct svga_winsys_gb_shader *shader,
+ unsigned flags);
};
* \param format Format Device surface format
* \param usage Winsys usage: bitmask of SVGA_SURFACE_USAGE_x flags
* \param size Surface size given in device format
- * \param numFaces Number of faces of the surface (1 or 6)
+ * \param numLayers Number of layers of the surface (or cube faces)
* \param numMipLevels Number of mipmap levels for each face
*
* Returns the surface ID (sid). Surfaces are generic
* - Each face has a list of mipmap levels
*
* - Each mipmap image may have multiple volume
- * slices, if the image is three dimensional.
+ * slices for 3D image, or multiple 2D slices for texture array.
*
* - Each slice is a 2D array of 'blocks'
*
SVGA3dSurfaceFormat format,
unsigned usage,
SVGA3dSize size,
- uint32 numFaces,
- uint32 numMipLevels);
+ uint32 numLayers,
+ uint32 numMipLevels,
+ unsigned sampleCount);
/**
* Creates a surface from a winsys handle.
(*surface_can_create)(struct svga_winsys_screen *sws,
SVGA3dSurfaceFormat format,
SVGA3dSize size,
- uint32 numFaces,
+ uint32 numLayers,
uint32 numMipLevels);
/**
*/
struct svga_winsys_gb_shader *
(*shader_create)(struct svga_winsys_screen *sws,
- SVGA3dShaderType type,
+ SVGA3dShaderType shaderType,
const uint32 *bytecode,
uint32 bytecodeLen);
(*shader_destroy)(struct svga_winsys_screen *sws,
struct svga_winsys_gb_shader *shader);
+ /**
+ * Create and define a GB query.
+ */
+ struct svga_winsys_gb_query *
+ (*query_create)(struct svga_winsys_screen *sws, uint32 len);
+
+ /**
+ * Destroy a GB query.
+ */
+ void
+ (*query_destroy)(struct svga_winsys_screen *sws,
+ struct svga_winsys_gb_query *query);
+
+ /**
+ * Initialize the query state of the query that resides in the slot
+ * specified in offset
+ * \return zero on success.
+ */
+ int
+ (*query_init)(struct svga_winsys_screen *sws,
+ struct svga_winsys_gb_query *query,
+ unsigned offset,
+ SVGA3dQueryState queryState);
+
+ /**
+ * Inquire for the query state and result of the query that resides
+ * in the slot specified in offset
+ */
+ void
+ (*query_get_result)(struct svga_winsys_screen *sws,
+ struct svga_winsys_gb_query *query,
+ unsigned offset,
+ SVGA3dQueryState *queryState,
+ void *result, uint32 resultLen);
+
+ /** Have VGPU v10 hardware? */
+ boolean have_vgpu10;
+
+ /** To rebind resources at the beginnning of a new command buffer */
+ boolean need_to_rebind_resources;
};
case SVGA3D_BUMPL6V5U5:
_debug_printf("\t\t.format = SVGA3D_BUMPL6V5U5\n");
break;
- case SVGA3D_BUMPX8L8V8U8:
- _debug_printf("\t\t.format = SVGA3D_BUMPX8L8V8U8\n");
- break;
- case SVGA3D_BUMPL8V8U8:
- _debug_printf("\t\t.format = SVGA3D_BUMPL8V8U8\n");
- break;
case SVGA3D_ARGB_S10E5:
_debug_printf("\t\t.format = SVGA3D_ARGB_S10E5\n");
break;
}
static void
-dump_SVGA3dCmdBindGBShaderConsts(const SVGA3dCmdBindGBShaderConsts *cmd)
-{
- _debug_printf("\t\t.cid = %u\n", cmd->cid);
- _debug_printf("\t\t.shaderType = %u\n", cmd->shaderType);
- _debug_printf("\t\t.shaderConstType = %u\n", cmd->shaderConstType);
- _debug_printf("\t\t.sid = %u\n", cmd->sid);
-}
-
-static void
dump_SVGA3dCmdBindGBSurface(const SVGA3dCmdBindGBSurface *cmd)
{
_debug_printf("\t\t.sid = %u\n", cmd->sid);
body = (const uint8_t *)&cmd[1];
}
break;
- case SVGA_3D_CMD_BIND_SHADERCONSTS:
- _debug_printf("\tSVGA_3D_CMD_BIND_SHADERCONSTS\n");
- {
- const SVGA3dCmdBindGBShaderConsts *cmd = (const SVGA3dCmdBindGBShaderConsts *) body;
- dump_SVGA3dCmdBindGBShaderConsts(cmd);
- body = (const uint8_t *)&cmd[1];
- }
- break;
case SVGA_3D_CMD_BIND_GB_SURFACE:
_debug_printf("\tSVGA_3D_CMD_BIND_GB_SURFACE\n");
{
* It doesn't include those that aren't part of the VPM, like
* point/line coordinates.
*/
- struct vc4_varying_semantic *input_semantics;
+ struct vc4_varying_slot *input_slots;
};
struct vc4_program_stateobj {
struct vc4_uncompiled_shader *bind_vs, *bind_fs;
struct vc4_compiled_shader *cs, *vs, *fs;
uint8_t num_exports;
- /* Indexed by semantic name or TGSI_SEMANTIC_COUNT + semantic index
- * for TGSI_SEMANTIC_GENERIC. Special vs exports (position and point-
- * size) are not included in this
+ /* Indexed by slot. Special vs exports (position and pointsize) are
+ * not included in this
*/
- uint8_t export_linkage[63];
+ uint8_t export_linkage[VARYING_SLOT_VAR0 + 8];
};
struct vc4_constbuf_stateobj {
}
}
assert(output_var);
- unsigned semantic_name = output_var->data.location;
- if (semantic_name != TGSI_SEMANTIC_COLOR)
+ if (output_var->data.location != FRAG_RESULT_COLOR &&
+ output_var->data.location != FRAG_RESULT_DATA0) {
continue;
+ }
nir_function_impl *impl =
nir_cf_node_get_function(&block->cf_node);
*/
#include "vc4_qir.h"
-#include "tgsi/tgsi_info.h"
#include "glsl/nir/nir_builder.h"
/**
/* Replace the old intrinsic with a reference to our reconstructed
* vec4.
*/
- nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(vec),
- ralloc_parent(b->impl));
+ nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(vec));
nir_instr_remove(&intr->instr);
}
}
}
assert(input_var);
- int semantic_name = input_var->data.location;
- int semantic_index = input_var->data.index;
/* All TGSI-to-NIR inputs are vec4. */
assert(intr->num_components == 4);
switch (c->stage) {
case QSTAGE_FRAG:
- switch (semantic_name) {
- case TGSI_SEMANTIC_FACE:
+ if (input_var->data.location == VARYING_SLOT_FACE) {
dests[0] = nir_fsub(b,
nir_imm_float(b, 1.0),
nir_fmul(b,
dests[1] = nir_imm_float(b, 0.0);
dests[2] = nir_imm_float(b, 0.0);
dests[3] = nir_imm_float(b, 1.0);
- break;
- case TGSI_SEMANTIC_GENERIC:
+ } else if (input_var->data.location >= VARYING_SLOT_VAR0) {
if (c->fs_key->point_sprite_mask &
- (1 << semantic_index)) {
+ (1 << (input_var->data.location -
+ VARYING_SLOT_VAR0))) {
if (!c->fs_key->is_points) {
dests[0] = nir_imm_float(b, 0.0);
dests[1] = nir_imm_float(b, 0.0);
dests[2] = nir_imm_float(b, 0.0);
dests[3] = nir_imm_float(b, 1.0);
}
- break;
}
break;
case QSTAGE_COORD:
}
}
assert(output_var);
- unsigned semantic_name = output_var->data.location;
if (c->stage == QSTAGE_COORD &&
- (semantic_name != TGSI_SEMANTIC_POSITION &&
- semantic_name != TGSI_SEMANTIC_PSIZE)) {
+ output_var->data.location != VARYING_SLOT_POS &&
+ output_var->data.location != VARYING_SLOT_PSIZ) {
nir_instr_remove(&intr->instr);
return;
}
/* Color output is lowered by vc4_nir_lower_blend(). */
- if (c->stage == QSTAGE_FRAG && semantic_name == TGSI_SEMANTIC_COLOR) {
+ if (c->stage == QSTAGE_FRAG &&
+ (output_var->data.location == FRAG_RESULT_COLOR ||
+ output_var->data.location == FRAG_RESULT_DATA0)) {
intr->const_index[0] *= 4;
return;
}
}
if (inst->src[i].file == QFILE_VARY &&
- c->input_semantics[inst->src[i].index].semantic == 0xff) {
+ c->input_slots[inst->src[i].index].slot == 0xff) {
return true;
}
}
#include "util/ralloc.h"
#include "util/hash_table.h"
#include "tgsi/tgsi_dump.h"
-#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_lowering.h"
#include "tgsi/tgsi_parse.h"
#include "glsl/nir/nir.h"
}
static struct qreg
-emit_fragment_varying(struct vc4_compile *c, uint8_t semantic,
- uint8_t index, uint8_t swizzle)
+emit_fragment_varying(struct vc4_compile *c, gl_varying_slot slot,
+ uint8_t swizzle)
{
- uint32_t i = c->num_input_semantics++;
+ uint32_t i = c->num_input_slots++;
struct qreg vary = {
QFILE_VARY,
i
};
- if (c->num_input_semantics >= c->input_semantics_array_size) {
- c->input_semantics_array_size =
- MAX2(4, c->input_semantics_array_size * 2);
+ if (c->num_input_slots >= c->input_slots_array_size) {
+ c->input_slots_array_size =
+ MAX2(4, c->input_slots_array_size * 2);
- c->input_semantics = reralloc(c, c->input_semantics,
- struct vc4_varying_semantic,
- c->input_semantics_array_size);
+ c->input_slots = reralloc(c, c->input_slots,
+ struct vc4_varying_slot,
+ c->input_slots_array_size);
}
- c->input_semantics[i].semantic = semantic;
- c->input_semantics[i].index = index;
- c->input_semantics[i].swizzle = swizzle;
+ c->input_slots[i].slot = slot;
+ c->input_slots[i].swizzle = swizzle;
return qir_VARY_ADD_C(c, qir_FMUL(c, vary, qir_FRAG_W(c)));
}
static void
-emit_fragment_input(struct vc4_compile *c, int attr,
- unsigned semantic_name, unsigned semantic_index)
+emit_fragment_input(struct vc4_compile *c, int attr, gl_varying_slot slot)
{
for (int i = 0; i < 4; i++) {
c->inputs[attr * 4 + i] =
- emit_fragment_varying(c,
- semantic_name,
- semantic_index,
- i);
+ emit_fragment_varying(c, slot, i);
c->num_inputs++;
}
}
static void
add_output(struct vc4_compile *c,
uint32_t decl_offset,
- uint8_t semantic_name,
- uint8_t semantic_index,
- uint8_t semantic_swizzle)
+ uint8_t slot,
+ uint8_t swizzle)
{
uint32_t old_array_size = c->outputs_array_size;
resize_qreg_array(c, &c->outputs, &c->outputs_array_size,
decl_offset + 1);
if (old_array_size != c->outputs_array_size) {
- c->output_semantics = reralloc(c,
- c->output_semantics,
- struct vc4_varying_semantic,
- c->outputs_array_size);
+ c->output_slots = reralloc(c,
+ c->output_slots,
+ struct vc4_varying_slot,
+ c->outputs_array_size);
}
- c->output_semantics[decl_offset].semantic = semantic_name;
- c->output_semantics[decl_offset].index = semantic_index;
- c->output_semantics[decl_offset].swizzle = semantic_swizzle;
+ c->output_slots[decl_offset].slot = slot;
+ c->output_slots[decl_offset].swizzle = swizzle;
}
static void
if (!(c->key->ucp_enables & (1 << i)))
continue;
- struct qreg dist = emit_fragment_varying(c,
- TGSI_SEMANTIC_CLIPDIST,
- i,
- TGSI_SWIZZLE_X);
+ struct qreg dist =
+ emit_fragment_varying(c,
+ VARYING_SLOT_CLIP_DIST0 + (i / 4),
+ i % 4);
qir_SF(c, dist);
*/
uint32_t output_index = c->num_outputs++;
add_output(c, output_index,
- TGSI_SEMANTIC_CLIPDIST,
- plane,
- TGSI_SWIZZLE_X);
+ VARYING_SLOT_CLIP_DIST0 + plane / 4,
+ plane % 4);
struct qreg dist = qir_uniform_f(c, 0.0);
static void
emit_vert_end(struct vc4_compile *c,
- struct vc4_varying_semantic *fs_inputs,
+ struct vc4_varying_slot *fs_inputs,
uint32_t num_fs_inputs)
{
struct qreg rcp_w = qir_RCP(c, c->outputs[c->output_position_index + 3]);
emit_point_size_write(c);
for (int i = 0; i < num_fs_inputs; i++) {
- struct vc4_varying_semantic *input = &fs_inputs[i];
+ struct vc4_varying_slot *input = &fs_inputs[i];
int j;
for (j = 0; j < c->num_outputs; j++) {
- struct vc4_varying_semantic *output =
- &c->output_semantics[j];
+ struct vc4_varying_slot *output =
+ &c->output_slots[j];
- if (input->semantic == output->semantic &&
- input->index == output->index &&
+ if (input->slot == output->slot &&
input->swizzle == output->swizzle) {
qir_VPM_WRITE(c, c->outputs[j]);
break;
for (unsigned i = 0; i < num_entries; i++) {
nir_variable *var = vars[i];
unsigned array_len = MAX2(glsl_get_length(var->type), 1);
- /* XXX: map loc slots to semantics */
- unsigned semantic_name = var->data.location;
- unsigned semantic_index = var->data.index;
unsigned loc = var->data.driver_location;
assert(array_len == 1);
(loc + 1) * 4);
if (c->stage == QSTAGE_FRAG) {
- if (semantic_name == TGSI_SEMANTIC_POSITION) {
+ if (var->data.location == VARYING_SLOT_POS) {
emit_fragcoord_input(c, loc);
- } else if (semantic_name == TGSI_SEMANTIC_FACE) {
+ } else if (var->data.location == VARYING_SLOT_FACE) {
c->inputs[loc * 4 + 0] = qir_FRAG_REV_FLAG(c);
- } else if (semantic_name == TGSI_SEMANTIC_GENERIC &&
+ } else if (var->data.location >= VARYING_SLOT_VAR0 &&
(c->fs_key->point_sprite_mask &
- (1 << semantic_index))) {
+ (1 << (var->data.location -
+ VARYING_SLOT_VAR0)))) {
c->inputs[loc * 4 + 0] = c->point_x;
c->inputs[loc * 4 + 1] = c->point_y;
} else {
- emit_fragment_input(c, loc,
- semantic_name,
- semantic_index);
+ emit_fragment_input(c, loc, var->data.location);
}
} else {
emit_vertex_input(c, loc);
{
foreach_list_typed(nir_variable, var, node, &c->s->outputs) {
unsigned array_len = MAX2(glsl_get_length(var->type), 1);
- /* XXX: map loc slots to semantics */
- unsigned semantic_name = var->data.location;
- unsigned semantic_index = var->data.index;
unsigned loc = var->data.driver_location * 4;
assert(array_len == 1);
(void)array_len;
- /* NIR hack to pass through
- * TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS */
- if (semantic_name == TGSI_SEMANTIC_COLOR &&
- semantic_index == -1)
- semantic_index = 0;
-
- for (int i = 0; i < 4; i++) {
- add_output(c,
- loc + i,
- semantic_name,
- semantic_index,
- i);
- }
+ for (int i = 0; i < 4; i++)
+ add_output(c, loc + i, var->data.location, i);
- switch (semantic_name) {
- case TGSI_SEMANTIC_POSITION:
- c->output_position_index = loc;
- break;
- case TGSI_SEMANTIC_CLIPVERTEX:
- c->output_clipvertex_index = loc;
- break;
- case TGSI_SEMANTIC_COLOR:
- c->output_color_index = loc;
- break;
- case TGSI_SEMANTIC_PSIZE:
- c->output_point_size_index = loc;
- break;
+ if (c->stage == QSTAGE_FRAG) {
+ switch (var->data.location) {
+ case FRAG_RESULT_COLOR:
+ case FRAG_RESULT_DATA0:
+ c->output_color_index = loc;
+ break;
+ case FRAG_RESULT_DEPTH:
+ c->output_position_index = loc;
+ break;
+ }
+ } else {
+ switch (var->data.location) {
+ case VARYING_SLOT_POS:
+ c->output_position_index = loc;
+ break;
+ case VARYING_SLOT_CLIP_VERTEX:
+ c->output_clipvertex_index = loc;
+ break;
+ case VARYING_SLOT_PSIZ:
+ c->output_point_size_index = loc;
+ break;
+ }
}
-
}
}
case QSTAGE_FRAG:
c->fs_key = (struct vc4_fs_key *)key;
if (c->fs_key->is_points) {
- c->point_x = emit_fragment_varying(c, ~0, ~0, 0);
- c->point_y = emit_fragment_varying(c, ~0, ~0, 0);
+ c->point_x = emit_fragment_varying(c, ~0, 0);
+ c->point_y = emit_fragment_varying(c, ~0, 0);
} else if (c->fs_key->is_lines) {
- c->line_x = emit_fragment_varying(c, ~0, ~0, 0);
+ c->line_x = emit_fragment_varying(c, ~0, 0);
}
break;
case QSTAGE_VERT:
break;
case QSTAGE_VERT:
emit_vert_end(c,
- vc4->prog.fs->input_semantics,
+ vc4->prog.fs->input_slots,
vc4->prog.fs->num_inputs);
break;
case QSTAGE_COORD:
shader->program_id = vc4->next_compiled_program_id++;
if (stage == QSTAGE_FRAG) {
- bool input_live[c->num_input_semantics];
+ bool input_live[c->num_input_slots];
memset(input_live, 0, sizeof(input_live));
list_for_each_entry(struct qinst, inst, &c->instructions, link) {
}
}
- shader->input_semantics = ralloc_array(shader,
- struct vc4_varying_semantic,
- c->num_input_semantics);
+ shader->input_slots = ralloc_array(shader,
+ struct vc4_varying_slot,
+ c->num_input_slots);
- for (int i = 0; i < c->num_input_semantics; i++) {
- struct vc4_varying_semantic *sem = &c->input_semantics[i];
+ for (int i = 0; i < c->num_input_slots; i++) {
+ struct vc4_varying_slot *slot = &c->input_slots[i];
if (!input_live[i])
continue;
/* Skip non-VS-output inputs. */
- if (sem->semantic == (uint8_t)~0)
+ if (slot->slot == (uint8_t)~0)
continue;
- if (sem->semantic == TGSI_SEMANTIC_COLOR ||
- sem->semantic == TGSI_SEMANTIC_BCOLOR) {
+ if (slot->slot == VARYING_SLOT_COL0 ||
+ slot->slot == VARYING_SLOT_COL1 ||
+ slot->slot == VARYING_SLOT_BFC0 ||
+ slot->slot == VARYING_SLOT_BFC1) {
shader->color_inputs |= (1 << shader->num_inputs);
}
- shader->input_semantics[shader->num_inputs] = *sem;
+ shader->input_slots[shader->num_inputs] = *slot;
shader->num_inputs++;
}
} else {
*/
for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
if (inst->src[i].file == QFILE_VARY &&
- c->input_semantics[inst->src[i].index].semantic == 0xff) {
+ c->input_slots[inst->src[i].index].slot == 0xff) {
return true;
}
reg.file = QFILE_TEMP;
reg.index = c->num_temps++;
+ reg.pack = 0;
if (c->num_temps > c->defs_array_size) {
uint32_t old_size = c->defs_array_size;
QUNIFORM_ALPHA_REF,
};
-struct vc4_varying_semantic {
- uint8_t semantic;
- uint8_t index;
+struct vc4_varying_slot {
+ uint8_t slot;
uint8_t swizzle;
};
uint8_t vattr_sizes[8];
/**
- * Array of the TGSI semantics of all FS QFILE_VARY reads.
+ * Array of the VARYING_SLOT_* of all FS QFILE_VARY reads.
*
* This includes those that aren't part of the VPM varyings, like
* point/line coordinates.
*/
- struct vc4_varying_semantic *input_semantics;
- uint32_t num_input_semantics;
- uint32_t input_semantics_array_size;
+ struct vc4_varying_slot *input_slots;
+ uint32_t num_input_slots;
+ uint32_t input_slots_array_size;
/**
- * An entry per outputs[] in the VS indicating what the semantic of
- * the output is. Used to emit from the VS in the order that the FS
- * needs.
+ * An entry per outputs[] in the VS indicating what the VARYING_SLOT_*
+ * of the output is. Used to emit from the VS in the order that the
+ * FS needs.
*/
- struct vc4_varying_semantic *output_semantics;
+ struct vc4_varying_slot *output_slots;
struct pipe_shader_state *shader_state;
struct vc4_key *key;
QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG));
/* Misc fields that have to match exactly. */
- ok = ok && merge_fields(&merge, a, b, QPU_SF | QPU_PM,
- ~0);
+ ok = ok && merge_fields(&merge, a, b, QPU_SF, ~0);
if (!merge_fields(&merge, a, b, QPU_RADDR_A_MASK,
QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A))) {
return 0;
}
- /* packing: Make sure that non-NOP packs agree, then deal with
- * special-case failing of adding a non-NOP pack to something with a
- * NOP pack.
- */
- if (!merge_fields(&merge, a, b, QPU_PACK_MASK, 0))
- return 0;
- bool new_a_pack = (QPU_GET_FIELD(a, QPU_PACK) !=
- QPU_GET_FIELD(merge, QPU_PACK));
- bool new_b_pack = (QPU_GET_FIELD(b, QPU_PACK) !=
- QPU_GET_FIELD(merge, QPU_PACK));
- if (!(merge & QPU_PM)) {
- /* Make sure we're not going to be putting a new
- * a-file packing on either half.
+ if (!merge_fields(&merge, a, b, QPU_PM, ~0)) {
+ /* If one instruction has PM bit set and the other not, the
+ * one without PM shouldn't do packing/unpacking, and we
+ * have to make sure non-NOP packing/unpacking from PM
+ * instruction aren't added to it.
*/
- if (new_a_pack && writes_a_file(a))
- return 0;
+ uint64_t temp;
- if (new_b_pack && writes_a_file(b))
- return 0;
- } else {
- /* Make sure we're not going to be putting new MUL packing on
- * either half.
- */
- if (new_a_pack && QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP)
- return 0;
+ /* Let a be the one with PM bit */
+ if (!(a & QPU_PM)) {
+ temp = a;
+ a = b;
+ b = temp;
+ }
- if (new_b_pack && QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
+ if ((b & (QPU_PACK_MASK | QPU_UNPACK_MASK)) != 0)
return 0;
- }
- /* unpacking: Make sure that non-NOP unpacks agree, then deal with
- * special-case failing of adding a non-NOP unpack to something with a
- * NOP unpack.
- */
- if (!merge_fields(&merge, a, b, QPU_UNPACK_MASK, 0))
- return 0;
- bool new_a_unpack = (QPU_GET_FIELD(a, QPU_UNPACK) !=
- QPU_GET_FIELD(merge, QPU_UNPACK));
- bool new_b_unpack = (QPU_GET_FIELD(b, QPU_UNPACK) !=
- QPU_GET_FIELD(merge, QPU_UNPACK));
- if (!(merge & QPU_PM)) {
- /* Make sure we're not going to be putting a new
- * a-file packing on either half.
- */
- if (new_a_unpack && QPU_GET_FIELD(a, QPU_RADDR_A) != QPU_R_NOP)
+ if ((a & QPU_PACK_MASK) != 0 &&
+ QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
return 0;
- if (new_b_unpack && QPU_GET_FIELD(b, QPU_RADDR_A) != QPU_R_NOP)
+ if ((a & QPU_UNPACK_MASK) != 0 && reads_r4(b))
return 0;
} else {
- /* Make sure we're not going to be putting new r4 unpack on
- * either half.
+ /* packing: Make sure that non-NOP packs agree, then deal with
+ * special-case failing of adding a non-NOP pack to something
+ * with a NOP pack.
*/
- if (new_a_unpack && reads_r4(a))
+ if (!merge_fields(&merge, a, b, QPU_PACK_MASK, 0))
return 0;
+ bool new_a_pack = (QPU_GET_FIELD(a, QPU_PACK) !=
+ QPU_GET_FIELD(merge, QPU_PACK));
+ bool new_b_pack = (QPU_GET_FIELD(b, QPU_PACK) !=
+ QPU_GET_FIELD(merge, QPU_PACK));
+ if (!(merge & QPU_PM)) {
+ /* Make sure we're not going to be putting a new
+ * a-file packing on either half.
+ */
+ if (new_a_pack && writes_a_file(a))
+ return 0;
+
+ if (new_b_pack && writes_a_file(b))
+ return 0;
+ } else {
+ /* Make sure we're not going to be putting new MUL
+ * packing oneither half.
+ */
+ if (new_a_pack &&
+ QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP)
+ return 0;
+
+ if (new_b_pack &&
+ QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
+ return 0;
+ }
- if (new_b_unpack && reads_r4(b))
+ /* unpacking: Make sure that non-NOP unpacks agree, then deal
+ * with special-case failing of adding a non-NOP unpack to
+ * something with a NOP unpack.
+ */
+ if (!merge_fields(&merge, a, b, QPU_UNPACK_MASK, 0))
return 0;
+ bool new_a_unpack = (QPU_GET_FIELD(a, QPU_UNPACK) !=
+ QPU_GET_FIELD(merge, QPU_UNPACK));
+ bool new_b_unpack = (QPU_GET_FIELD(b, QPU_UNPACK) !=
+ QPU_GET_FIELD(merge, QPU_UNPACK));
+ if (!(merge & QPU_PM)) {
+ /* Make sure we're not going to be putting a new
+ * a-file packing on either half.
+ */
+ if (new_a_unpack &&
+ QPU_GET_FIELD(a, QPU_RADDR_A) != QPU_R_NOP)
+ return 0;
+
+ if (new_b_unpack &&
+ QPU_GET_FIELD(b, QPU_RADDR_A) != QPU_R_NOP)
+ return 0;
+ } else {
+ /* Make sure we're not going to be putting new r4
+ * unpack on either half.
+ */
+ if (new_a_unpack && reads_r4(a))
+ return 0;
+
+ if (new_b_unpack && reads_r4(b))
+ return 0;
+ }
}
if (ok)
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
+ case PIPE_CAP_TGSI_TXQS:
return 0;
/* Stream output. */
float *out_value);
/**
+ * Query a timestamp in nanoseconds. This is completely equivalent to
+ * pipe_screen::get_timestamp() but takes a context handle for drivers
+ * that require a context.
+ */
+ uint64_t (*get_timestamp)(struct pipe_context *);
+
+ /**
* Flush the resource cache, so that the resource can be used
* by an external client. Possible usage:
* - flushing a resource before presenting it on the screen
PIPE_CAP_TEXTURE_FLOAT_LINEAR,
PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR,
PIPE_CAP_DEPTH_BOUNDS_TEST,
+ PIPE_CAP_TGSI_TXQS,
};
#define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0)
* Create a texture from a winsys_handle. The handle is often created in
* another process by first creating a pipe texture and then calling
* resource_get_handle.
+ *
+ * NOTE: in the case of DRM_API_HANDLE_TYPE_FD handles, the caller
+ * retains ownership of the FD. (This is consistent with
+ * EGL_EXT_image_dma_buf_import)
*/
struct pipe_resource * (*resource_from_handle)(struct pipe_screen *,
const struct pipe_resource *templat,
* Get a winsys_handle from a texture. Some platforms/winsys requires
* that the texture is created with a special usage flag like
* DISPLAYTARGET or PRIMARY.
+ *
+ * NOTE: in the case of DRM_API_HANDLE_TYPE_FD handles, the caller
+ * takes ownership of the FD. (This is consistent with
+ * EGL_MESA_image_dma_buf_export)
*/
boolean (*resource_get_handle)(struct pipe_screen *,
struct pipe_resource *tex,
#define TGSI_OPCODE_ENDLOOP 101
#define TGSI_OPCODE_ENDSUB 102
#define TGSI_OPCODE_TXQ_LZ 103 /* TXQ for mipmap level 0 */
+#define TGSI_OPCODE_TXQS 104
/* gap */
#define TGSI_OPCODE_NOP 107
using namespace clover;
namespace {
- const cl_mem_flags dev_access_flags =
- CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY;
- const cl_mem_flags host_ptr_flags =
- CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR;
- const cl_mem_flags host_access_flags =
- CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS;
- const cl_mem_flags all_mem_flags =
- dev_access_flags | host_ptr_flags | host_access_flags;
-
- void
- validate_flags(cl_mem_flags flags, cl_mem_flags valid) {
- if ((flags & ~valid) ||
- util_bitcount(flags & dev_access_flags) > 1 ||
- util_bitcount(flags & host_access_flags) > 1)
+ cl_mem_flags
+ validate_flags(cl_mem d_parent, cl_mem_flags d_flags) {
+ const cl_mem_flags dev_access_flags =
+ CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY;
+ const cl_mem_flags host_ptr_flags =
+ CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR;
+ const cl_mem_flags host_access_flags =
+ CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS;
+ const cl_mem_flags valid_flags =
+ dev_access_flags | host_access_flags | (d_parent ? 0 : host_ptr_flags);
+
+ if ((d_flags & ~valid_flags) ||
+ util_bitcount(d_flags & dev_access_flags) > 1 ||
+ util_bitcount(d_flags & host_access_flags) > 1)
throw error(CL_INVALID_VALUE);
- if ((flags & CL_MEM_USE_HOST_PTR) &&
- (flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR)))
+ if ((d_flags & CL_MEM_USE_HOST_PTR) &&
+ (d_flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR)))
throw error(CL_INVALID_VALUE);
+
+ if (d_parent) {
+ const auto &parent = obj(d_parent);
+ const cl_mem_flags flags = (d_flags |
+ (d_flags & dev_access_flags ? 0 :
+ parent.flags() & dev_access_flags) |
+ (d_flags & host_access_flags ? 0 :
+ parent.flags() & host_access_flags) |
+ (parent.flags() & host_ptr_flags));
+
+ if (~flags & parent.flags() &
+ ((dev_access_flags & ~CL_MEM_READ_WRITE) | host_access_flags))
+ throw error(CL_INVALID_VALUE);
+
+ return flags;
+
+ } else {
+ return d_flags | (d_flags & dev_access_flags ? 0 : CL_MEM_READ_WRITE);
+ }
}
}
CLOVER_API cl_mem
clCreateBuffer(cl_context d_ctx, cl_mem_flags d_flags, size_t size,
void *host_ptr, cl_int *r_errcode) try {
- const cl_mem_flags flags = d_flags |
- (d_flags & dev_access_flags ? 0 : CL_MEM_READ_WRITE);
+ const cl_mem_flags flags = validate_flags(NULL, d_flags);
auto &ctx = obj(d_ctx);
- validate_flags(d_flags, all_mem_flags);
-
if (bool(host_ptr) != bool(flags & (CL_MEM_USE_HOST_PTR |
CL_MEM_COPY_HOST_PTR)))
throw error(CL_INVALID_HOST_PTR);
cl_buffer_create_type op,
const void *op_info, cl_int *r_errcode) try {
auto &parent = obj<root_buffer>(d_mem);
- const cl_mem_flags flags = d_flags |
- (d_flags & dev_access_flags ? 0 : parent.flags() & dev_access_flags) |
- (d_flags & host_access_flags ? 0 : parent.flags() & host_access_flags) |
- (parent.flags() & host_ptr_flags);
-
- validate_flags(d_flags, dev_access_flags | host_access_flags);
-
- if (~flags & parent.flags() &
- ((dev_access_flags & ~CL_MEM_READ_WRITE) | host_access_flags))
- throw error(CL_INVALID_VALUE);
+ const cl_mem_flags flags = validate_flags(d_mem, d_flags);
if (op == CL_BUFFER_CREATE_TYPE_REGION) {
auto reg = reinterpret_cast<const cl_buffer_region *>(op_info);
}
CLOVER_API cl_mem
-clCreateImage2D(cl_context d_ctx, cl_mem_flags d_flags,
- const cl_image_format *format,
- size_t width, size_t height, size_t row_pitch,
- void *host_ptr, cl_int *r_errcode) try {
- const cl_mem_flags flags = d_flags |
- (d_flags & dev_access_flags ? 0 : CL_MEM_READ_WRITE);
+clCreateImage(cl_context d_ctx, cl_mem_flags d_flags,
+ const cl_image_format *format,
+ const cl_image_desc *desc,
+ void *host_ptr, cl_int *r_errcode) try {
auto &ctx = obj(d_ctx);
- validate_flags(d_flags, all_mem_flags);
-
if (!any_of(std::mem_fn(&device::image_support), ctx.devices()))
throw error(CL_INVALID_OPERATION);
if (!format)
throw error(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR);
- if (width < 1 || height < 1)
- throw error(CL_INVALID_IMAGE_SIZE);
+ if (!desc)
+ throw error(CL_INVALID_IMAGE_DESCRIPTOR);
- if (bool(host_ptr) != bool(flags & (CL_MEM_USE_HOST_PTR |
- CL_MEM_COPY_HOST_PTR)))
+ if (desc->image_array_size == 0 &&
+ (desc->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
+ desc->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY))
+ throw error(CL_INVALID_IMAGE_DESCRIPTOR);
+
+ if (!host_ptr &&
+ (desc->image_row_pitch || desc->image_slice_pitch))
+ throw error(CL_INVALID_IMAGE_DESCRIPTOR);
+
+ if (desc->num_mip_levels || desc->num_samples)
+ throw error(CL_INVALID_IMAGE_DESCRIPTOR);
+
+ if (bool(desc->buffer) != (desc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER))
+ throw error(CL_INVALID_IMAGE_DESCRIPTOR);
+
+ if (bool(host_ptr) != bool(d_flags & (CL_MEM_USE_HOST_PTR |
+ CL_MEM_COPY_HOST_PTR)))
throw error(CL_INVALID_HOST_PTR);
- if (!supported_formats(ctx, CL_MEM_OBJECT_IMAGE2D).count(*format))
+ const cl_mem_flags flags = validate_flags(desc->buffer, d_flags);
+
+ if (!supported_formats(ctx, desc->image_type).count(*format))
throw error(CL_IMAGE_FORMAT_NOT_SUPPORTED);
ret_error(r_errcode, CL_SUCCESS);
- return new image2d(ctx, flags, format, width, height,
- row_pitch, host_ptr);
+
+ switch (desc->image_type) {
+ case CL_MEM_OBJECT_IMAGE2D:
+ if (!desc->image_width || !desc->image_height)
+ throw error(CL_INVALID_IMAGE_SIZE);
+
+ if (all_of([=](const device &dev) {
+ const size_t max = 1 << dev.max_image_levels_2d();
+ return (desc->image_width > max ||
+ desc->image_height > max);
+ }, ctx.devices()))
+ throw error(CL_INVALID_IMAGE_SIZE);
+
+ return new image2d(ctx, flags, format,
+ desc->image_width, desc->image_height,
+ desc->image_row_pitch, host_ptr);
+
+ case CL_MEM_OBJECT_IMAGE3D:
+ if (!desc->image_width || !desc->image_height || !desc->image_depth)
+ throw error(CL_INVALID_IMAGE_SIZE);
+
+ if (all_of([=](const device &dev) {
+ const size_t max = 1 << dev.max_image_levels_3d();
+ return (desc->image_width > max ||
+ desc->image_height > max ||
+ desc->image_depth > max);
+ }, ctx.devices()))
+ throw error(CL_INVALID_IMAGE_SIZE);
+
+ return new image3d(ctx, flags, format,
+ desc->image_width, desc->image_height,
+ desc->image_depth, desc->image_row_pitch,
+ desc->image_slice_pitch, host_ptr);
+
+ case CL_MEM_OBJECT_IMAGE1D:
+ case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+ case CL_MEM_OBJECT_IMAGE1D_BUFFER:
+ case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+ // XXX - Not implemented.
+ throw error(CL_IMAGE_FORMAT_NOT_SUPPORTED);
+
+ default:
+ throw error(CL_INVALID_IMAGE_DESCRIPTOR);
+ }
} catch (error &e) {
ret_error(r_errcode, e);
}
CLOVER_API cl_mem
+clCreateImage2D(cl_context d_ctx, cl_mem_flags d_flags,
+ const cl_image_format *format,
+ size_t width, size_t height, size_t row_pitch,
+ void *host_ptr, cl_int *r_errcode) {
+ const cl_image_desc desc = { CL_MEM_OBJECT_IMAGE2D, width, height, 0, 0,
+ row_pitch, 0, 0, 0, NULL };
+
+ return clCreateImage(d_ctx, d_flags, format, &desc, host_ptr, r_errcode);
+}
+
+CLOVER_API cl_mem
clCreateImage3D(cl_context d_ctx, cl_mem_flags d_flags,
const cl_image_format *format,
size_t width, size_t height, size_t depth,
size_t row_pitch, size_t slice_pitch,
- void *host_ptr, cl_int *r_errcode) try {
- const cl_mem_flags flags = d_flags |
- (d_flags & dev_access_flags ? 0 : CL_MEM_READ_WRITE);
- auto &ctx = obj(d_ctx);
-
- validate_flags(d_flags, all_mem_flags);
-
- if (!any_of(std::mem_fn(&device::image_support), ctx.devices()))
- throw error(CL_INVALID_OPERATION);
+ void *host_ptr, cl_int *r_errcode) {
+ const cl_image_desc desc = { CL_MEM_OBJECT_IMAGE3D, width, height, depth, 0,
+ row_pitch, slice_pitch, 0, 0, NULL };
- if (!format)
- throw error(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR);
-
- if (width < 1 || height < 1 || depth < 2)
- throw error(CL_INVALID_IMAGE_SIZE);
-
- if (bool(host_ptr) != bool(flags & (CL_MEM_USE_HOST_PTR |
- CL_MEM_COPY_HOST_PTR)))
- throw error(CL_INVALID_HOST_PTR);
-
- if (!supported_formats(ctx, CL_MEM_OBJECT_IMAGE3D).count(*format))
- throw error(CL_IMAGE_FORMAT_NOT_SUPPORTED);
-
- ret_error(r_errcode, CL_SUCCESS);
- return new image3d(ctx, flags, format, width, height, depth,
- row_pitch, slice_pitch, host_ptr);
-
-} catch (error &e) {
- ret_error(r_errcode, e);
- return NULL;
+ return clCreateImage(d_ctx, d_flags, format, &desc, host_ptr, r_errcode);
}
CLOVER_API cl_int
auto &ctx = obj(d_ctx);
auto formats = supported_formats(ctx, type);
- validate_flags(flags, all_mem_flags);
+ validate_flags(NULL, flags);
if (r_buf && !r_count)
throw error(CL_INVALID_VALUE);
return e.get();
}
-CLOVER_API cl_mem
-clCreateImage(cl_context d_ctx, cl_mem_flags flags,
- const cl_image_format *format,
- const cl_image_desc *image_desc,
- void *host_ptr, cl_int *r_errcode) {
- CLOVER_NOT_SUPPORTED_UNTIL("1.2");
- ret_error(r_errcode, CL_INVALID_OPERATION);
- return NULL;
-}
-
CLOVER_API cl_int
clEnqueueFillBuffer(cl_command_queue command_queue, cl_mem buffer,
const void *pattern, size_t pattern_size,
const bool is_write_only = access_qual == "write_only";
const bool is_read_only = access_qual == "read_only";
- typename module::argument::type marg_type;
+ enum module::argument::type marg_type;
if (is_image2d && is_read_only) {
marg_type = module::argument::image2d_rd;
} else if (is_image2d && is_write_only) {
'#/src',
'#/src/mapi',
'#/src/mesa',
+ '#/include/HaikuGL',
])
sources = [
0, 0, PIPE_BLENDFACTOR_ONE, PIPE_BLENDFACTOR_ONE},
};
-
/*
- * The alpha value stored in a luminance texture is read by the
- * hardware as color.
+ * The alpha value stored in a L8 texture is read by the
+ * hardware as color, and R8 is read as red. The source alpha value
+ * at the end of the fragment shader is stored in all color channels,
+ * so the correct approach is to blend using DST_COLOR instead of
+ * DST_ALPHA and then output any color channel (L8) or the red channel (R8).
*/
static unsigned
xa_convert_blend_for_luminance(unsigned factor)
return factor;
}
-
static boolean
blend_for_op(struct xa_composite_blend *blend,
enum xa_composite_op op,
if (!dst_pic->srf)
return supported;
- if (dst_pic->srf->tex->format == PIPE_FORMAT_L8_UNORM) {
- blend->rgb_src = xa_convert_blend_for_luminance(blend->rgb_src);
- blend->rgb_dst = xa_convert_blend_for_luminance(blend->rgb_dst);
+ if ((dst_pic->srf->tex->format == PIPE_FORMAT_L8_UNORM ||
+ dst_pic->srf->tex->format == PIPE_FORMAT_R8_UNORM)) {
+ blend->rgb_src = xa_convert_blend_for_luminance(blend->rgb_src);
+ blend->rgb_dst = xa_convert_blend_for_luminance(blend->rgb_dst);
}
/*
ret |= mask ? FS_MASK_SET_ALPHA : FS_SRC_SET_ALPHA;
if (src_hw_format == src_pic_format) {
- if (src->tex->format == PIPE_FORMAT_L8_UNORM)
+ if (src->tex->format == PIPE_FORMAT_L8_UNORM ||
+ src->tex->format == PIPE_FORMAT_R8_UNORM)
return ((mask) ? FS_MASK_LUMINANCE : FS_SRC_LUMINANCE);
return ret;
fs_traits |= picture_format_fixups(mask_pic, 1);
}
- if (ctx->srf->format == PIPE_FORMAT_L8_UNORM)
+ if (ctx->srf->format == PIPE_FORMAT_L8_UNORM ||
+ ctx->srf->format == PIPE_FORMAT_R8_UNORM)
fs_traits |= FS_DST_LUMINANCE;
shader = xa_shaders_get(ctx->shaders, vs_traits, fs_traits);
}
/* shaders */
- if (src_texture->format == PIPE_FORMAT_L8_UNORM)
+ if (src_texture->format == PIPE_FORMAT_L8_UNORM ||
+ src_texture->format == PIPE_FORMAT_R8_UNORM)
fs_traits |= FS_SRC_LUMINANCE;
- if (dst_surface->format == PIPE_FORMAT_L8_UNORM)
+ if (dst_surface->format == PIPE_FORMAT_L8_UNORM ||
+ dst_surface->format == PIPE_FORMAT_R8_UNORM)
fs_traits |= FS_DST_LUMINANCE;
if (xa_format_a(dst_xa_format) != 0 &&
xa_format_a(src_xa_format) == 0)
};
static struct xa_format_descriptor
-xa_get_pipe_format(enum xa_formats xa_format)
+xa_get_pipe_format(struct xa_tracker *xa, enum xa_formats xa_format)
{
struct xa_format_descriptor fdesc;
fdesc.format = PIPE_FORMAT_B5G5R5A1_UNORM;
break;
case xa_format_a8:
- fdesc.format = PIPE_FORMAT_L8_UNORM;
+ if (xa->screen->is_format_supported(xa->screen, PIPE_FORMAT_R8_UNORM,
+ PIPE_TEXTURE_2D, 0,
+ stype_bind[xa_type_a] |
+ PIPE_BIND_RENDER_TARGET))
+ fdesc.format = PIPE_FORMAT_R8_UNORM;
+ else
+ fdesc.format = PIPE_FORMAT_L8_UNORM;
break;
case xa_format_z24:
fdesc.format = PIPE_FORMAT_Z24X8_UNORM;
fdesc.format = PIPE_FORMAT_S8_UINT_Z24_UNORM;
break;
case xa_format_yuv8:
- fdesc.format = PIPE_FORMAT_L8_UNORM;
+ if (xa->screen->is_format_supported(xa->screen, PIPE_FORMAT_R8_UNORM,
+ PIPE_TEXTURE_2D, 0,
+ stype_bind[xa_type_yuv_component]))
+ fdesc.format = PIPE_FORMAT_R8_UNORM;
+ else
+ fdesc.format = PIPE_FORMAT_L8_UNORM;
break;
default:
fdesc.xa_format = xa_format_unknown;
for (i = 0; i < num_preferred[stype]; ++i) {
xa_format = preferred[stype][i];
- struct xa_format_descriptor fdesc = xa_get_pipe_format(xa_format);
+ struct xa_format_descriptor fdesc =
+ xa_get_pipe_format(xa, xa_format);
if (xa->screen->is_format_supported(xa->screen, fdesc.format,
PIPE_TEXTURE_2D, 0, bind)) {
int found = 0;
for (i = xa->format_map[stype][0]; i <= xa->format_map[stype][1]; ++i) {
- fdesc = xa_get_pipe_format(xa->supported_formats[i]);
+ fdesc = xa_get_pipe_format(xa, xa->supported_formats[i]);
if (fdesc.xa_format != xa_format_unknown &&
xa_format_depth(fdesc.xa_format) == depth) {
found = 1;
xa_format_check_supported(struct xa_tracker *xa,
enum xa_formats xa_format, unsigned int flags)
{
- struct xa_format_descriptor fdesc = xa_get_pipe_format(xa_format);
+ struct xa_format_descriptor fdesc = xa_get_pipe_format(xa, xa_format);
unsigned int bind;
if (fdesc.xa_format == xa_format_unknown)
return XA_ERR_NONE;
}
+static unsigned
+handle_type(enum xa_handle_type type)
+{
+ switch (type) {
+ case xa_handle_type_kms:
+ return DRM_API_HANDLE_TYPE_KMS;
+ case xa_handle_type_fd:
+ return DRM_API_HANDLE_TYPE_FD;
+ case xa_handle_type_shared:
+ default:
+ return DRM_API_HANDLE_TYPE_SHARED;
+ }
+}
+
static struct xa_surface *
surface_create(struct xa_tracker *xa,
int width,
if (xa_format == xa_format_unknown)
fdesc = xa_get_format_stype_depth(xa, stype, depth);
else
- fdesc = xa_get_pipe_format(xa_format);
+ fdesc = xa_get_pipe_format(xa, xa_format);
if (fdesc.xa_format == xa_format_unknown)
return NULL;
enum xa_formats xa_format, unsigned int flags,
uint32_t handle, uint32_t stride)
{
+ return xa_surface_from_handle2(xa, width, height, depth, stype, xa_format,
+ DRM_API_HANDLE_TYPE_SHARED, flags, handle,
+ stride);
+}
+
+XA_EXPORT struct xa_surface *
+xa_surface_from_handle2(struct xa_tracker *xa,
+ int width,
+ int height,
+ int depth,
+ enum xa_surface_type stype,
+ enum xa_formats xa_format, unsigned int flags,
+ enum xa_handle_type type,
+ uint32_t handle, uint32_t stride)
+{
struct winsys_handle whandle;
memset(&whandle, 0, sizeof(whandle));
- whandle.type = DRM_API_HANDLE_TYPE_SHARED;
+ whandle.type = handle_type(type);
whandle.handle = handle;
whandle.stride = stride;
return surface_create(xa, width, height, depth, stype, xa_format, flags, &whandle);
if (xa_format == xa_format_unknown)
fdesc = xa_get_format_stype_depth(xa, stype, depth);
else
- fdesc = xa_get_pipe_format(xa_format);
+ fdesc = xa_get_pipe_format(xa, xa_format);
if (width == template->width0 && height == template->height0 &&
template->format == fdesc.format &&
boolean res;
memset(&whandle, 0, sizeof(whandle));
- switch (type) {
- case xa_handle_type_kms:
- whandle.type = DRM_API_HANDLE_TYPE_KMS;
- break;
- case xa_handle_type_shared:
- default:
- whandle.type = DRM_API_HANDLE_TYPE_SHARED;
- break;
- }
+ whandle.type = handle_type(type);
res = screen->resource_get_handle(screen, srf->tex, &whandle);
if (!res)
return -XA_ERR_INVAL;
#include <stdint.h>
#define XA_TRACKER_VERSION_MAJOR 2
-#define XA_TRACKER_VERSION_MINOR 2
+#define XA_TRACKER_VERSION_MINOR 3
#define XA_TRACKER_VERSION_PATCH 0
#define XA_FLAG_SHARED (1 << 0)
enum xa_handle_type {
xa_handle_type_shared,
xa_handle_type_kms,
+ xa_handle_type_fd,
};
extern void xa_tracker_version(int *major, int *minor, int *patch);
enum xa_formats pform,
unsigned int flags,
uint32_t handle, uint32_t stride);
+extern struct xa_surface *
+xa_surface_from_handle2(struct xa_tracker *xa,
+ int width,
+ int height,
+ int depth,
+ enum xa_surface_type stype,
+ enum xa_formats xa_format,
+ unsigned int flags,
+ enum xa_handle_type type,
+ uint32_t handle,
+ uint32_t stride);
enum xa_formats xa_surface_format(const struct xa_surface *srf);
nouveau_drm_screen_create;
radeon_drm_winsys_create;
amdgpu_winsys_create;
+ fd_drm_screen_create;
local:
*;
};
xa_surface_dma;
xa_surface_format;
xa_surface_from_handle;
+ xa_surface_from_handle2;
xa_surface_handle;
xa_surface_map;
xa_surface_redefine;
enum amdgpu_bo_handle_type type;
int r;
+ if ((void*)bo != (void*)buffer)
+ pb_cache_manager_remove_buffer(buffer);
+
switch (whandle->type) {
case DRM_API_HANDLE_TYPE_SHARED:
type = amdgpu_bo_handle_type_gem_flink_name;
static bool amdgpu_get_new_ib(struct amdgpu_cs *cs)
{
- /* The maximum size is 4MB - 1B, which is unaligned.
- * Use aligned size 4MB - 16B. */
- const unsigned max_ib_size = (1024 * 1024 - 16) * 4;
- const unsigned min_ib_size = 24 * 1024 * 4;
+ /* Small IBs are better than big IBs, because the GPU goes idle quicker
+ * and there is less waiting for buffers and fences. Proof:
+ * http://www.phoronix.com/scan.php?page=article&item=mesa-111-si&num=1
+ */
+ const unsigned buffer_size = 128 * 1024 * 4;
+ const unsigned ib_size = 20 * 1024 * 4;
cs->base.cdw = 0;
cs->base.buf = NULL;
/* Allocate a new buffer for IBs if the current buffer is all used. */
if (!cs->big_ib_buffer ||
- cs->used_ib_space + min_ib_size > cs->big_ib_buffer->size) {
+ cs->used_ib_space + ib_size > cs->big_ib_buffer->size) {
struct radeon_winsys *ws = &cs->ctx->ws->base;
struct radeon_winsys_cs_handle *winsys_bo;
cs->ib_mapped = NULL;
cs->used_ib_space = 0;
- cs->big_ib_buffer = ws->buffer_create(ws, max_ib_size,
+ cs->big_ib_buffer = ws->buffer_create(ws, buffer_size,
4096, true,
RADEON_DOMAIN_GTT,
RADEON_FLAG_CPU_ACCESS);
cs->ib.ib_mc_address = cs->big_ib_winsys_buffer->va + cs->used_ib_space;
cs->base.buf = (uint32_t*)(cs->ib_mapped + cs->used_ib_space);
- cs->base.max_dw = (cs->big_ib_buffer->size - cs->used_ib_space) / 4;
+ cs->base.max_dw = ib_size / 4;
return true;
}
switch (cs->base.ring_type) {
case RING_DMA:
/* pad DMA ring to 8 DWs */
- if (ws->info.chip_class <= SI) {
- while (rcs->cdw & 7)
- OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
- } else {
- while (rcs->cdw & 7)
- OUT_CS(&cs->base, 0x00000000); /* NOP packet */
- }
+ while (rcs->cdw & 7)
+ OUT_CS(&cs->base, 0x00000000); /* NOP packet */
break;
case RING_GFX:
- /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
- * r6xx, requires at least 4 dw alignment to avoid a hw bug.
- */
- if (ws->info.chip_class <= SI) {
- while (rcs->cdw & 7)
- OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
- } else {
- while (rcs->cdw & 7)
- OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
- }
+ /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements */
+ while (rcs->cdw & 7)
+ OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
break;
case RING_UVD:
while (rcs->cdw & 15)
struct amdgpu_heap_info vram, gtt;
struct drm_amdgpu_info_hw_ip dma = {}, uvd = {}, vce = {};
uint32_t vce_version = 0, vce_feature = 0;
- int r;
+ int r, i, j;
/* Query hardware and driver information. */
r = amdgpu_query_gpu_info(ws->dev, &ws->amdinfo);
ws->info.vram_size = vram.heap_size;
/* convert the shader clock from KHz to MHz */
ws->info.max_sclk = ws->amdinfo.max_engine_clk / 1000;
- ws->info.max_compute_units = 1; /* TODO */
ws->info.max_se = ws->amdinfo.num_shader_engines;
ws->info.max_sh_per_se = ws->amdinfo.num_shader_arrays_per_engine;
ws->info.has_uvd = uvd.available_rings != 0;
ws->info.r600_virtual_address = TRUE;
ws->info.r600_has_dma = dma.available_rings != 0;
+ /* Guess what the maximum compute unit number is by looking at the mask
+ * of enabled CUs.
+ */
+ for (i = 0; i < ws->info.max_se; i++)
+ for (j = 0; j < ws->info.max_sh_per_se; j++) {
+ unsigned max = util_last_bit(ws->amdinfo.cu_bitmap[i][j]);
+
+ if (ws->info.max_compute_units < max)
+ ws->info.max_compute_units = max;
+ }
+ ws->info.max_compute_units *= ws->info.max_se * ws->info.max_sh_per_se;
+
memcpy(ws->info.si_tile_mode_array, ws->amdinfo.gb_tile_mode,
sizeof(ws->amdinfo.gb_tile_mode));
ws->info.si_tile_mode_array_valid = TRUE;
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include <sys/stat.h>
+
#include "pipe/p_context.h"
#include "pipe/p_state.h"
#include "util/u_format.h"
#include "util/u_memory.h"
#include "util/u_inlines.h"
+#include "util/u_hash_table.h"
+#include "os/os_thread.h"
#include "freedreno_drm_public.h"
#include "freedreno/freedreno_screen.h"
+static struct util_hash_table *fd_tab = NULL;
+
+pipe_static_mutex(fd_screen_mutex);
+
+static void
+fd_drm_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct fd_screen *screen = fd_screen(pscreen);
+ boolean destroy;
+
+ pipe_mutex_lock(fd_screen_mutex);
+ destroy = --screen->refcnt == 0;
+ if (destroy) {
+ int fd = fd_device_fd(screen->dev);
+ util_hash_table_remove(fd_tab, intptr_to_pointer(fd));
+ }
+ pipe_mutex_unlock(fd_screen_mutex);
+
+ if (destroy) {
+ pscreen->destroy = screen->winsys_priv;
+ pscreen->destroy(pscreen);
+ }
+}
+
+static unsigned hash_fd(void *key)
+{
+ int fd = pointer_to_intptr(key);
+ struct stat stat;
+ fstat(fd, &stat);
+
+ return stat.st_dev ^ stat.st_ino ^ stat.st_rdev;
+}
+
+static int compare_fd(void *key1, void *key2)
+{
+ int fd1 = pointer_to_intptr(key1);
+ int fd2 = pointer_to_intptr(key2);
+ struct stat stat1, stat2;
+ fstat(fd1, &stat1);
+ fstat(fd2, &stat2);
+
+ return stat1.st_dev != stat2.st_dev ||
+ stat1.st_ino != stat2.st_ino ||
+ stat1.st_rdev != stat2.st_rdev;
+}
+
struct pipe_screen *
fd_drm_screen_create(int fd)
{
- struct fd_device *dev = fd_device_new_dup(fd);
- if (!dev)
- return NULL;
- return fd_screen_create(dev);
+ struct pipe_screen *pscreen = NULL;
+
+ pipe_mutex_lock(fd_screen_mutex);
+ if (!fd_tab) {
+ fd_tab = util_hash_table_create(hash_fd, compare_fd);
+ if (!fd_tab)
+ goto unlock;
+ }
+
+ pscreen = util_hash_table_get(fd_tab, intptr_to_pointer(fd));
+ if (pscreen) {
+ fd_screen(pscreen)->refcnt++;
+ } else {
+ struct fd_device *dev = fd_device_new_dup(fd);
+ if (!dev)
+ goto unlock;
+
+ pscreen = fd_screen_create(dev);
+ if (pscreen) {
+ int fd = fd_device_fd(dev);
+
+ util_hash_table_set(fd_tab, intptr_to_pointer(fd), pscreen);
+
+ /* Bit of a hack, to avoid circular linkage dependency,
+ * ie. pipe driver having to call in to winsys, we
+ * override the pipe drivers screen->destroy():
+ */
+ fd_screen(pscreen)->winsys_priv = pscreen->destroy;
+ pscreen->destroy = fd_drm_screen_destroy;
+ }
+ }
+
+unlock:
+ pipe_mutex_unlock(fd_screen_mutex);
+ return pscreen;
}
memset(&flink, 0, sizeof(flink));
+ if ((void*)bo != (void*)buffer)
+ pb_cache_manager_remove_buffer(buffer);
+
if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
if (!bo->flink_name) {
flink.handle = bo->handle;
}
break;
case RING_GFX:
- /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
+ /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements
* r6xx, requires at least 4 dw alignment to avoid a hw bug.
- * hawaii with old firmware needs type2 nop packet.
- * accel_working2 with value 3 indicates the new firmware.
*/
- if (cs->ws->info.chip_class <= SI ||
- (cs->ws->info.family == CHIP_HAWAII &&
- cs->ws->accel_working2 < 3)) {
+ if (cs->ws->info.gfx_ib_pad_with_type2) {
while (rcs->cdw & 7)
OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
} else {
ws->info.cik_macrotile_mode_array_valid = TRUE;
}
+ /* Hawaii with old firmware needs type2 nop packet.
+ * accel_working2 with value 3 indicates the new firmware.
+ */
+ ws->info.gfx_ib_pad_with_type2 = ws->info.chip_class <= SI ||
+ (ws->info.family == CHIP_HAWAII &&
+ ws->accel_working2 < 3);
+
return TRUE;
}
vmw_surface.c \
vmw_surface.h \
vmw_shader.c \
- vmw_shader.h
+ vmw_shader.h \
+ vmw_query.c \
+ vmw_query.h
/**************************************************************************
*
- * Copyright 2007-2010 VMware, Inc.
+ * Copyright 2007-2015 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
/* TODO: remove consequents buffers with the same fence? */
assert(!destroyed);
+ (void) destroyed;
fenced_buf->flags &= ~PB_USAGE_GPU_READ_WRITE;
boolean destroyed;
destroyed = fenced_buffer_remove_locked(fenced_mgr, fenced_buf);
assert(!destroyed);
+ (void) destroyed;
}
if (fence) {
ops->fence_reference(ops, &fenced_buf->fence, fence);
/**********************************************************
- * Copyright 2010 VMware, Inc. All rights reserved.
+ * Copyright 2010-2015 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
/**********************************************************
- * Copyright 2009 VMware, Inc. All rights reserved.
+ * Copyright 2009-2015 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
/**********************************************************
- * Copyright 2009 VMware, Inc. All rights reserved.
+ * Copyright 2009-2015 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
/**********************************************************
- * Copyright 2009 VMware, Inc. All rights reserved.
+ * Copyright 2009-2015 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
#include "vmw_surface.h"
#include "vmw_fence.h"
#include "vmw_shader.h"
+#include "vmw_query.h"
#define VMW_COMMAND_SIZE (64*1024)
#define VMW_SURFACE_RELOCS (1024)
{
struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
struct vmw_buffer_relocation *reloc;
+ struct pb_buffer *pb_buffer = vmw_pb_buffer(buffer);
- assert(vswc->region.staged < vswc->region.reserved);
+ if (id) {
+ assert(vswc->region.staged < vswc->region.reserved);
- reloc = &vswc->region.relocs[vswc->region.used + vswc->region.staged];
- reloc->mob.id = id;
- reloc->mob.offset_into_mob = offset_into_mob;
+ reloc = &vswc->region.relocs[vswc->region.used + vswc->region.staged];
+ reloc->mob.id = id;
+ reloc->mob.offset_into_mob = offset_into_mob;
- /*
- * pb_validate holds a refcount to the buffer, so no need to
- * refcount it again in the relocation.
- */
- reloc->buffer = vmw_pb_buffer(buffer);
- reloc->offset = offset;
- reloc->is_mob = TRUE;
- ++vswc->region.staged;
+ /*
+ * pb_validate holds a refcount to the buffer, so no need to
+ * refcount it again in the relocation.
+ */
+ reloc->buffer = pb_buffer;
+ reloc->offset = offset;
+ reloc->is_mob = TRUE;
+ ++vswc->region.staged;
+ }
- if (vmw_swc_add_validate_buffer(vswc, reloc->buffer, flags)) {
- vswc->seen_mobs += reloc->buffer->size;
+ if (vmw_swc_add_validate_buffer(vswc, pb_buffer, flags)) {
+ vswc->seen_mobs += pb_buffer->size;
/* divide by 5, tested for best performance */
if (vswc->seen_mobs >= vswc->vws->ioctl.max_mob_memory / VMW_MAX_MOB_MEM_FACTOR)
vswc->preemptive_flush = TRUE;
p_atomic_inc(&vsurf->validated);
}
- *where = vsurf->sid;
+ if (where)
+ *where = vsurf->sid;
}
static void
assert(swc->have_gb_objects || mobid == NULL);
- if(!surface) {
+ if (!surface) {
*where = SVGA3D_INVALID_ID;
if (mobid)
*mobid = SVGA3D_INVALID_ID;
uint32 *shid,
uint32 *mobid,
uint32 *offset,
- struct svga_winsys_gb_shader *shader)
+ struct svga_winsys_gb_shader *shader,
+ unsigned flags)
{
struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+ struct vmw_winsys_screen *vws = vswc->vws;
struct vmw_svga_winsys_shader *vshader;
struct vmw_ctx_validate_item *ishader;
+
if(!shader) {
*shid = SVGA3D_INVALID_ID;
return;
}
- assert(vswc->shader.staged < vswc->shader.reserved);
vshader = vmw_svga_winsys_shader(shader);
- ishader = util_hash_table_get(vswc->hash, vshader);
- if (ishader == NULL) {
- ishader = &vswc->shader.items[vswc->shader.used + vswc->shader.staged];
- vmw_svga_winsys_shader_reference(&ishader->vshader, vshader);
- ishader->referenced = FALSE;
- /*
- * Note that a failure here may just fall back to unhashed behavior
- * and potentially cause unnecessary flushing, so ignore the
- * return code.
- */
- (void) util_hash_table_set(vswc->hash, vshader, ishader);
- ++vswc->shader.staged;
- }
+ if (!vws->base.have_vgpu10) {
+ assert(vswc->shader.staged < vswc->shader.reserved);
+ ishader = util_hash_table_get(vswc->hash, vshader);
+
+ if (ishader == NULL) {
+ ishader = &vswc->shader.items[vswc->shader.used + vswc->shader.staged];
+ vmw_svga_winsys_shader_reference(&ishader->vshader, vshader);
+ ishader->referenced = FALSE;
+ /*
+ * Note that a failure here may just fall back to unhashed behavior
+ * and potentially cause unnecessary flushing, so ignore the
+ * return code.
+ */
+ (void) util_hash_table_set(vswc->hash, vshader, ishader);
+ ++vswc->shader.staged;
+ }
- if (!ishader->referenced) {
- ishader->referenced = TRUE;
- p_atomic_inc(&vshader->validated);
+ if (!ishader->referenced) {
+ ishader->referenced = TRUE;
+ p_atomic_inc(&vshader->validated);
+ }
}
- *shid = vshader->shid;
+ if (shid)
+ *shid = vshader->shid;
- if (mobid != NULL && vshader->buf)
+ if (vshader->buf)
vmw_swc_mob_relocation(swc, mobid, offset, vshader->buf,
0, SVGA_RELOC_READ);
}
static void
+vmw_swc_query_relocation(struct svga_winsys_context *swc,
+ SVGAMobId *id,
+ struct svga_winsys_gb_query *query)
+{
+ /* Queries are backed by one big MOB */
+ vmw_swc_mob_relocation(swc, id, NULL, query->buf, 0,
+ SVGA_RELOC_READ | SVGA_RELOC_WRITE);
+}
+
+static void
vmw_swc_commit(struct svga_winsys_context *swc)
{
struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
- assert(vswc->command.reserved);
assert(vswc->command.used + vswc->command.reserved <= vswc->command.size);
vswc->command.used += vswc->command.reserved;
vswc->command.reserved = 0;
return (key1 == key2) ? 0 : 1;
}
+
+/**
+ * vmw_svga_winsys_vgpu10_shader_screate - The winsys shader_crate callback
+ *
+ * @swc: The winsys context.
+ * @shaderId: Previously allocated shader id.
+ * @shaderType: The shader type.
+ * @bytecode: The shader bytecode
+ * @bytecodelen: The length of the bytecode.
+ *
+ * Creates an svga_winsys_gb_shader structure and allocates a buffer for the
+ * shader code and copies the shader code into the buffer. Shader
+ * resource creation is not done.
+ */
+static struct svga_winsys_gb_shader *
+vmw_svga_winsys_vgpu10_shader_create(struct svga_winsys_context *swc,
+ uint32 shaderId,
+ SVGA3dShaderType shaderType,
+ const uint32 *bytecode,
+ uint32 bytecodeLen)
+{
+ struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+ struct vmw_svga_winsys_shader *shader;
+ struct svga_winsys_gb_shader *gb_shader =
+ vmw_svga_winsys_shader_create(&vswc->vws->base, shaderType, bytecode,
+ bytecodeLen);
+ if (!gb_shader)
+ return NULL;
+
+ shader = vmw_svga_winsys_shader(gb_shader);
+ shader->shid = shaderId;
+
+ return gb_shader;
+}
+
+/**
+ * vmw_svga_winsys_vgpu10_shader_destroy - The winsys shader_destroy callback.
+ *
+ * @swc: The winsys context.
+ * @shader: A shader structure previously allocated by shader_create.
+ *
+ * Frees the shader structure and the buffer holding the shader code.
+ */
+static void
+vmw_svga_winsys_vgpu10_shader_destroy(struct svga_winsys_context *swc,
+ struct svga_winsys_gb_shader *shader)
+{
+ struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+
+ vmw_svga_winsys_shader_destroy(&vswc->vws->base, shader);
+}
+
+/**
+ * vmw_svga_winsys_resource_rebind - The winsys resource_rebind callback
+ *
+ * @swc: The winsys context.
+ * @surface: The surface to be referenced.
+ * @shader: The shader to be referenced.
+ * @flags: Relocation flags.
+ *
+ * This callback is needed because shader backing buffers are sub-allocated, and
+ * hence the kernel fencing is not sufficient. The buffers need to be put on
+ * the context's validation list and fenced after command submission to avoid
+ * reuse of busy shader buffers. In addition, surfaces need to be put on the
+ * validation list in order for the driver to regard them as referenced
+ * by the command stream.
+ */
+static enum pipe_error
+vmw_svga_winsys_resource_rebind(struct svga_winsys_context *swc,
+ struct svga_winsys_surface *surface,
+ struct svga_winsys_gb_shader *shader,
+ unsigned flags)
+{
+ /**
+ * Need to reserve one validation item for either the surface or
+ * the shader.
+ */
+ if (!vmw_swc_reserve(swc, 0, 1))
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ if (surface)
+ vmw_swc_surface_relocation(swc, NULL, NULL, surface, flags);
+ else if (shader)
+ vmw_swc_shader_relocation(swc, NULL, NULL, NULL, shader, flags);
+
+ vmw_swc_commit(swc);
+
+ return PIPE_OK;
+}
+
struct svga_winsys_context *
vmw_svga_winsys_context_create(struct svga_winsys_screen *sws)
{
vswc->base.surface_relocation = vmw_swc_surface_relocation;
vswc->base.region_relocation = vmw_swc_region_relocation;
vswc->base.mob_relocation = vmw_swc_mob_relocation;
+ vswc->base.query_relocation = vmw_swc_query_relocation;
+ vswc->base.query_bind = vmw_swc_query_bind;
vswc->base.context_relocation = vmw_swc_context_relocation;
vswc->base.shader_relocation = vmw_swc_shader_relocation;
vswc->base.commit = vmw_swc_commit;
vswc->base.surface_map = vmw_svga_winsys_surface_map;
vswc->base.surface_unmap = vmw_svga_winsys_surface_unmap;
- vswc->base.cid = vmw_ioctl_context_create(vws);
+ vswc->base.shader_create = vmw_svga_winsys_vgpu10_shader_create;
+ vswc->base.shader_destroy = vmw_svga_winsys_vgpu10_shader_destroy;
+
+ vswc->base.resource_rebind = vmw_svga_winsys_resource_rebind;
+
+ if (sws->have_vgpu10)
+ vswc->base.cid = vmw_ioctl_extended_context_create(vws, sws->have_vgpu10);
+ else
+ vswc->base.cid = vmw_ioctl_context_create(vws);
+
+ if (vswc->base.cid == -1)
+ goto out_no_context;
+
vswc->base.have_gb_objects = sws->have_gb_objects;
vswc->vws = vws;
out_no_hash:
pb_validate_destroy(vswc->validate);
out_no_validate:
+ vmw_ioctl_context_destroy(vws, vswc->base.cid);
+out_no_context:
FREE(vswc);
return NULL;
}
/**********************************************************
- * Copyright 2009 VMware, Inc. All rights reserved.
+ * Copyright 2009-2015 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
/**********************************************************
- * Copyright 2009-2011 VMware, Inc. All rights reserved.
+ * Copyright 2009-2015 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
/**********************************************************
- * Copyright 2009 VMware, Inc. All rights reserved.
+ * Copyright 2009-2015 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
--- /dev/null
+/**********************************************************
+ * Copyright 2015 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipebuffer/pb_bufmgr.h"
+#include "util/u_memory.h"
+
+#include "vmw_screen.h"
+#include "vmw_buffer.h"
+#include "vmw_query.h"
+
+
+
+struct svga_winsys_gb_query *
+vmw_svga_winsys_query_create(struct svga_winsys_screen *sws,
+ uint32 queryResultLen)
+{
+ struct vmw_winsys_screen *vws = vmw_winsys_screen(sws);
+ struct pb_manager *provider = vws->pools.gmr;
+ struct pb_desc desc = {0};
+ struct pb_buffer *pb_buf;
+ struct svga_winsys_gb_query *query;
+
+ query = CALLOC_STRUCT(svga_winsys_gb_query);
+ if (!query)
+ return NULL;
+
+ /* Allocate memory to hold queries for this context */
+ desc.alignment = 4096;
+ pb_buf = provider->create_buffer(provider, queryResultLen, &desc);
+ query->buf = vmw_svga_winsys_buffer_wrap(pb_buf);
+
+ if (!query->buf) {
+ debug_printf("Failed to allocate memory for queries\n");
+ FREE(query);
+ query = NULL;
+ }
+
+ return query;
+}
+
+
+
+void
+vmw_svga_winsys_query_destroy(struct svga_winsys_screen *sws,
+ struct svga_winsys_gb_query *query)
+{
+ vmw_svga_winsys_buffer_destroy(sws, query->buf);
+ FREE(query);
+}
+
+
+
+int
+vmw_svga_winsys_query_init(struct svga_winsys_screen *sws,
+ struct svga_winsys_gb_query *query,
+ unsigned offset,
+ SVGA3dQueryState queryState)
+{
+ SVGA3dQueryState *state;
+
+ state = (SVGA3dQueryState *) vmw_svga_winsys_buffer_map(sws,
+ query->buf,
+ PIPE_TRANSFER_WRITE);
+ if (!state) {
+ debug_printf("Failed to map query result memory for initialization\n");
+ return -1;
+ }
+
+ /* Initialize the query state for the specified query slot */
+ state = (SVGA3dQueryState *)((char *)state + offset);
+ *state = queryState;
+
+ vmw_svga_winsys_buffer_unmap(sws, query->buf);
+
+ return 0;
+}
+
+
+
+void
+vmw_svga_winsys_query_get_result(struct svga_winsys_screen *sws,
+ struct svga_winsys_gb_query *query,
+ unsigned offset,
+ SVGA3dQueryState *queryState,
+ void *result, uint32 resultLen)
+{
+ SVGA3dQueryState *state;
+
+ state = (SVGA3dQueryState *) vmw_svga_winsys_buffer_map(sws,
+ query->buf,
+ PIPE_TRANSFER_READ);
+ if (!state) {
+ debug_printf("Failed to lock query result memory\n");
+
+ if (queryState)
+ *queryState = SVGA3D_QUERYSTATE_FAILED;
+
+ return;
+ }
+
+ state = (SVGA3dQueryState *)((char *)state + offset);
+
+ if (queryState)
+ *queryState = *state;
+
+ if (result) {
+ memcpy(result, state + 1, resultLen);
+ }
+
+ vmw_svga_winsys_buffer_unmap(sws, query->buf);
+}
+
+
+enum pipe_error
+vmw_swc_query_bind(struct svga_winsys_context *swc,
+ struct svga_winsys_gb_query *query,
+ unsigned flags)
+{
+ /* no-op on Linux */
+ return PIPE_OK;
+}
+
--- /dev/null
+/**********************************************************
+ * Copyright 2015 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef VMW_DRM_QUERY_H
+#define VMW_DRM_QUERY_H
+
+#include "svga3d_reg.h"
+
+
+
+/** Guest-backed query */
+struct svga_winsys_gb_query
+{
+ struct svga_winsys_buffer *buf;
+};
+
+
+struct svga_winsys_gb_query *
+vmw_svga_winsys_query_create(struct svga_winsys_screen *sws,
+ uint32 queryResultLen);
+
+void
+vmw_svga_winsys_query_destroy(struct svga_winsys_screen *sws,
+ struct svga_winsys_gb_query *query);
+
+int
+vmw_svga_winsys_query_init(struct svga_winsys_screen *sws,
+ struct svga_winsys_gb_query *query,
+ unsigned offset,
+ SVGA3dQueryState queryState);
+
+void
+vmw_svga_winsys_query_get_result(struct svga_winsys_screen *sws,
+ struct svga_winsys_gb_query *query,
+ unsigned offset,
+ SVGA3dQueryState *queryState,
+ void *result, uint32 resultLen);
+
+enum pipe_error
+vmw_swc_query_bind(struct svga_winsys_context *swc,
+ struct svga_winsys_gb_query *query,
+ unsigned flags);
+
+#endif /* VMW_DRM_QUERY_H */
+
/**********************************************************
- * Copyright 2009 VMware, Inc. All rights reserved.
+ * Copyright 2009-2015 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
*/
struct vmw_winsys_screen *
-vmw_winsys_create( int fd, boolean use_old_scanout_flag )
+vmw_winsys_create( int fd )
{
struct vmw_winsys_screen *vws;
struct stat stat_buf;
vws->device = stat_buf.st_rdev;
vws->open_count = 1;
vws->ioctl.drm_fd = dup(fd);
- vws->use_old_scanout_flag = use_old_scanout_flag;
vws->base.have_gb_dma = TRUE;
+ vws->base.need_to_rebind_resources = FALSE;
if (!vmw_ioctl_init(vws))
goto out_no_ioctl;
/**********************************************************
- * Copyright 2009 VMware, Inc. All rights reserved.
+ * Copyright 2009-2015 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
{
struct svga_winsys_screen base;
- boolean use_old_scanout_flag;
-
struct {
int drm_fd;
uint32_t hwversion;
uint64_t max_surface_memory;
uint64_t max_texture_size;
boolean have_drm_2_6;
+ boolean have_drm_2_9;
+ uint32_t drm_execbuf_version;
} ioctl;
struct {
uint32
vmw_ioctl_context_create(struct vmw_winsys_screen *vws);
+uint32
+vmw_ioctl_extended_context_create(struct vmw_winsys_screen *vws,
+ boolean vgpu10);
+
void
vmw_ioctl_context_destroy(struct vmw_winsys_screen *vws,
uint32 cid);
unsigned usage,
SVGA3dSize size,
uint32 numFaces,
- uint32 numMipLevels);
+ uint32 numMipLevels,
+ unsigned sampleCount);
uint32
vmw_ioctl_gb_surface_create(struct vmw_winsys_screen *vws,
SVGA3dSurfaceFlags flags,
SVGA3dSize size,
uint32 numFaces,
uint32 numMipLevels,
+ unsigned sampleCount,
uint32 buffer_handle,
struct vmw_region **p_region);
void vmw_ioctl_cleanup(struct vmw_winsys_screen *vws);
void vmw_pools_cleanup(struct vmw_winsys_screen *vws);
-struct vmw_winsys_screen *vmw_winsys_create(int fd, boolean use_old_scanout_flag);
+struct vmw_winsys_screen *vmw_winsys_create(int fd);
void vmw_winsys_destroy(struct vmw_winsys_screen *sws);
void vmw_winsys_screen_set_throttling(struct pipe_screen *screen,
uint32_t throttle_us);
uint32_t emitted,
boolean has_emitted);
+struct svga_winsys_gb_shader *
+vmw_svga_winsys_shader_create(struct svga_winsys_screen *sws,
+ SVGA3dShaderType type,
+ const uint32 *bytecode,
+ uint32 bytecodeLen);
+void
+vmw_svga_winsys_shader_destroy(struct svga_winsys_screen *sws,
+ struct svga_winsys_gb_shader *shader);
+
#endif /* VMW_SCREEN_H_ */
/**********************************************************
- * Copyright 2009 VMware, Inc. All rights reserved.
+ * Copyright 2009-2015 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
&drm_compat, "vmwgfx drm driver"))
return NULL;
- vws = vmw_winsys_create( fd, FALSE );
+ vws = vmw_winsys_create(fd);
if (!vws)
goto out_no_vws;
/**********************************************************
- * Copyright 2009 VMware, Inc. All rights reserved.
+ * Copyright 2009-2015 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
uint32_t size;
};
-/* XXX: This isn't a real hardware flag, but just a hack for kernel to
- * know about primary surfaces. In newer versions of the kernel
- * interface the driver uses a special field.
- */
-#define SVGA3D_SURFACE_HINT_SCANOUT (1 << 9)
-
-
uint32_t
vmw_region_size(struct vmw_region *region)
{
return -1;
vmw_printf("Context id is %d\n", c_arg.cid);
-
return c_arg.cid;
}
+uint32
+vmw_ioctl_extended_context_create(struct vmw_winsys_screen *vws,
+ boolean vgpu10)
+{
+ union drm_vmw_extended_context_arg c_arg;
+ int ret;
+
+ VMW_FUNC;
+ memset(&c_arg, 0, sizeof(c_arg));
+ c_arg.req = (vgpu10 ? drm_vmw_context_vgpu10 : drm_vmw_context_legacy);
+ ret = drmCommandWriteRead(vws->ioctl.drm_fd,
+ DRM_VMW_CREATE_EXTENDED_CONTEXT,
+ &c_arg, sizeof(c_arg));
+
+ if (ret)
+ return -1;
+
+ vmw_printf("Context id is %d\n", c_arg.cid);
+ return c_arg.rep.cid;
+}
+
void
vmw_ioctl_context_destroy(struct vmw_winsys_screen *vws, uint32 cid)
{
SVGA3dSurfaceFormat format,
unsigned usage,
SVGA3dSize size,
- uint32_t numFaces, uint32_t numMipLevels)
+ uint32_t numFaces, uint32_t numMipLevels,
+ unsigned sampleCount)
{
union drm_vmw_surface_create_arg s_arg;
struct drm_vmw_surface_create_req *req = &s_arg.req;
vmw_printf("%s flags %d format %d\n", __FUNCTION__, flags, format);
memset(&s_arg, 0, sizeof(s_arg));
- if (vws->use_old_scanout_flag &&
- (flags & SVGA3D_SURFACE_HINT_SCANOUT)) {
- req->flags = (uint32_t) flags;
- req->scanout = false;
- } else if (flags & SVGA3D_SURFACE_HINT_SCANOUT) {
- req->flags = (uint32_t) (flags & ~SVGA3D_SURFACE_HINT_SCANOUT);
- req->scanout = true;
- } else {
- req->flags = (uint32_t) flags;
- req->scanout = false;
- }
+ req->flags = (uint32_t) flags;
+ req->scanout = !!(usage & SVGA_SURFACE_USAGE_SCANOUT);
req->format = (uint32_t) format;
req->shareable = !!(usage & SVGA_SURFACE_USAGE_SHARED);
SVGA3dSize size,
uint32_t numFaces,
uint32_t numMipLevels,
+ unsigned sampleCount,
uint32_t buffer_handle,
struct vmw_region **p_region)
{
}
memset(&s_arg, 0, sizeof(s_arg));
- if (flags & SVGA3D_SURFACE_HINT_SCANOUT) {
- req->svga3d_flags = (uint32_t) (flags & ~SVGA3D_SURFACE_HINT_SCANOUT);
- req->drm_surface_flags = drm_vmw_surface_flag_scanout;
- } else {
- req->svga3d_flags = (uint32_t) flags;
- }
+ req->svga3d_flags = (uint32_t) flags;
+ if (usage & SVGA_SURFACE_USAGE_SCANOUT)
+ req->drm_surface_flags |= drm_vmw_surface_flag_scanout;
req->format = (uint32_t) format;
if (usage & SVGA_SURFACE_USAGE_SHARED)
req->drm_surface_flags |= drm_vmw_surface_flag_shareable;
req->drm_surface_flags |= drm_vmw_surface_flag_create_buffer;
-
- assert(numFaces * numMipLevels < DRM_VMW_MAX_SURFACE_FACES*
- DRM_VMW_MAX_MIP_LEVELS);
req->base_size.width = size.width;
req->base_size.height = size.height;
req->base_size.depth = size.depth;
req->mip_levels = numMipLevels;
req->multisample_count = 0;
req->autogen_filter = SVGA3D_TEX_FILTER_NONE;
+
+ if (vws->base.have_vgpu10) {
+ req->array_size = numFaces;
+ req->multisample_count = sampleCount;
+ } else {
+ assert(numFaces * numMipLevels < DRM_VMW_MAX_SURFACE_FACES*
+ DRM_VMW_MAX_MIP_LEVELS);
+ req->array_size = 0;
+ }
+
if (buffer_handle)
req->buffer_handle = buffer_handle;
else
struct drm_vmw_execbuf_arg arg;
struct drm_vmw_fence_rep rep;
int ret;
+ int argsize;
#ifdef DEBUG
{
arg.commands = (unsigned long)commands;
arg.command_size = size;
arg.throttle_us = throttle_us;
- arg.version = DRM_VMW_EXECBUF_VERSION;
-
+ arg.version = vws->ioctl.drm_execbuf_version;
+ arg.context_handle = (vws->base.have_vgpu10 ? cid : SVGA3D_INVALID_ID);
+
+ /* In DRM_VMW_EXECBUF_VERSION 1, the drm_vmw_execbuf_arg structure ends with
+ * the flags field. The structure size sent to drmCommandWrite must match
+ * the drm_execbuf_version. Otherwise, an invalid value will be returned.
+ */
+ argsize = vws->ioctl.drm_execbuf_version > 1 ? sizeof(arg) :
+ offsetof(struct drm_vmw_execbuf_arg, context_handle);
do {
- ret = drmCommandWrite(vws->ioctl.drm_fd, DRM_VMW_EXECBUF, &arg, sizeof(arg));
+ ret = drmCommandWrite(vws->ioctl.drm_fd, DRM_VMW_EXECBUF, &arg, argsize);
} while(ret == -ERESTART);
if (ret) {
vmw_error("%s error %s.\n", __FUNCTION__, strerror(-ret));
+ abort();
}
if (rep.error) {
int ret;
uint32_t *cap_buffer;
drmVersionPtr version;
+ boolean drm_gb_capable;
boolean have_drm_2_5;
VMW_FUNC;
(version->version_major == 2 && version->version_minor > 4);
vws->ioctl.have_drm_2_6 = version->version_major > 2 ||
(version->version_major == 2 && version->version_minor > 5);
+ vws->ioctl.have_drm_2_9 = version->version_major > 2 ||
+ (version->version_major == 2 && version->version_minor > 8);
+
+ vws->ioctl.drm_execbuf_version = vws->ioctl.have_drm_2_9 ? 2 : 1;
+
+ drm_gb_capable = have_drm_2_5;
memset(&gp_arg, 0, sizeof(gp_arg));
gp_arg.param = DRM_VMW_PARAM_3D;
vws->base.have_gb_objects =
!!(gp_arg.value & (uint64_t) SVGA_CAP_GBOBJECTS);
- if (vws->base.have_gb_objects && !have_drm_2_5)
+ if (vws->base.have_gb_objects && !drm_gb_capable)
goto out_no_3d;
+ vws->base.have_vgpu10 = FALSE;
if (vws->base.have_gb_objects) {
memset(&gp_arg, 0, sizeof(gp_arg));
gp_arg.param = DRM_VMW_PARAM_3D_CAPS_SIZE;
/* Never early flush surfaces, mobs do accounting. */
vws->ioctl.max_surface_memory = -1;
+
+ if (vws->ioctl.have_drm_2_9) {
+
+ memset(&gp_arg, 0, sizeof(gp_arg));
+ gp_arg.param = DRM_VMW_PARAM_VGPU10;
+ ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GET_PARAM,
+ &gp_arg, sizeof(gp_arg));
+ if (ret == 0 && gp_arg.value != 0) {
+ const char *vgpu10_val;
+
+ debug_printf("Have VGPU10 interface and hardware.\n");
+ vws->base.have_vgpu10 = TRUE;
+ vgpu10_val = getenv("SVGA_VGPU10");
+ if (vgpu10_val && strcmp(vgpu10_val, "0") == 0) {
+ debug_printf("Disabling VGPU10 interface.\n");
+ vws->base.have_vgpu10 = FALSE;
+ } else {
+ debug_printf("Enabling VGPU10 interface.\n");
+ }
+ }
+ }
} else {
vws->ioctl.num_cap_3d = SVGA3D_DEVCAP_MAX;
size = SVGA_FIFO_3D_CAPS_SIZE * sizeof(uint32_t);
}
+ debug_printf("VGPU10 interface is %s.\n",
+ vws->base.have_vgpu10 ? "on" : "off");
+
cap_buffer = calloc(1, size);
if (!cap_buffer) {
debug_printf("Failed alloc fifo 3D caps buffer.\n");
/**********************************************************
- * Copyright 2009 VMware, Inc. All rights reserved.
+ * Copyright 2009-2015 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
/**********************************************************
- * Copyright 2009 VMware, Inc. All rights reserved.
+ * Copyright 2009-2015 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
#include "vmw_buffer.h"
#include "vmw_fence.h"
#include "vmw_shader.h"
+#include "vmw_query.h"
#include "svga3d_surfacedefs.h"
/**
SVGA3dSurfaceFormat format,
unsigned usage,
SVGA3dSize size,
- uint32 numFaces,
- uint32 numMipLevels)
+ uint32 numLayers,
+ uint32 numMipLevels,
+ unsigned sampleCount)
{
struct vmw_winsys_screen *vws = vmw_winsys_screen(sws);
struct vmw_svga_winsys_surface *surface;
struct pb_manager *provider;
uint32_t buffer_size;
-
memset(&desc, 0, sizeof(desc));
surface = CALLOC_STRUCT(vmw_svga_winsys_surface);
if(!surface)
* Used for the backing buffer GB surfaces, and to approximate
* when to flush on non-GB hosts.
*/
- buffer_size = svga3dsurface_get_serialized_size(format, size, numMipLevels, (numFaces == 6));
+ buffer_size = svga3dsurface_get_serialized_size(format, size, numMipLevels,
+ numLayers);
+ if (flags & SVGA3D_SURFACE_BIND_STREAM_OUTPUT)
+ buffer_size += sizeof(SVGA3dDXSOState);
+
if (buffer_size > vws->ioctl.max_texture_size) {
goto no_sid;
}
}
surface->sid = vmw_ioctl_gb_surface_create(vws, flags, format, usage,
- size, numFaces,
- numMipLevels, ptr.gmrId,
+ size, numLayers,
+ numMipLevels, sampleCount,
+ ptr.gmrId,
surface->buf ? NULL :
&desc.region);
vmw_svga_winsys_buffer_destroy(sws, surface->buf);
surface->buf = NULL;
surface->sid = vmw_ioctl_gb_surface_create(vws, flags, format, usage,
- size, numFaces,
- numMipLevels, 0,
- &desc.region);
+ size, numLayers,
+ numMipLevels, sampleCount,
+ 0, &desc.region);
if (surface->sid == SVGA3D_INVALID_ID)
goto no_sid;
}
}
} else {
surface->sid = vmw_ioctl_surface_create(vws, flags, format, usage,
- size, numFaces, numMipLevels);
+ size, numLayers, numMipLevels,
+ sampleCount);
if(surface->sid == SVGA3D_INVALID_ID)
goto no_sid;
vmw_svga_winsys_surface_can_create(struct svga_winsys_screen *sws,
SVGA3dSurfaceFormat format,
SVGA3dSize size,
- uint32 numFaces,
+ uint32 numLayers,
uint32 numMipLevels)
{
struct vmw_winsys_screen *vws = vmw_winsys_screen(sws);
buffer_size = svga3dsurface_get_serialized_size(format, size,
numMipLevels,
- (numFaces == 6));
+ numLayers);
if (buffer_size > vws->ioctl.max_texture_size) {
return FALSE;
}
{
struct vmw_winsys_screen *vws = vmw_winsys_screen(sws);
- if (index > vws->ioctl.num_cap_3d || !vws->ioctl.cap_3d[index].has_cap)
+ if (index > vws->ioctl.num_cap_3d ||
+ index >= SVGA3D_DEVCAP_MAX ||
+ !vws->ioctl.cap_3d[index].has_cap)
return FALSE;
*result = vws->ioctl.cap_3d[index].result;
return TRUE;
}
-static struct svga_winsys_gb_shader *
+struct svga_winsys_gb_shader *
vmw_svga_winsys_shader_create(struct svga_winsys_screen *sws,
SVGA3dShaderType type,
const uint32 *bytecode,
memcpy(code, bytecode, bytecodeLen);
vmw_svga_winsys_buffer_unmap(sws, shader->buf);
- shader->shid = vmw_ioctl_shader_create(vws, type, bytecodeLen);
- if(shader->shid == SVGA3D_INVALID_ID)
- goto out_no_shid;
+ if (!sws->have_vgpu10) {
+ shader->shid = vmw_ioctl_shader_create(vws, type, bytecodeLen);
+ if (shader->shid == SVGA3D_INVALID_ID)
+ goto out_no_shid;
+ }
return svga_winsys_shader(shader);
return NULL;
}
-static void
+void
vmw_svga_winsys_shader_destroy(struct svga_winsys_screen *sws,
struct svga_winsys_gb_shader *shader)
{
vws->base.shader_destroy = vmw_svga_winsys_shader_destroy;
vws->base.fence_finish = vmw_svga_winsys_fence_finish;
+ vws->base.query_create = vmw_svga_winsys_query_create;
+ vws->base.query_init = vmw_svga_winsys_query_init;
+ vws->base.query_destroy = vmw_svga_winsys_query_destroy;
+ vws->base.query_get_result = vmw_svga_winsys_query_get_result;
+
return TRUE;
}
/**********************************************************
- * Copyright 2009-2012 VMware, Inc. All rights reserved.
+ * Copyright 2009-2015 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
if (pipe_reference(dst_ref, src_ref)) {
struct svga_winsys_screen *sws = &dst->screen->base;
- vmw_ioctl_shader_destroy(dst->screen, dst->shid);
+ if (!sws->have_vgpu10)
+ vmw_ioctl_shader_destroy(dst->screen, dst->shid);
#ifdef DEBUG
/* to detect dangling pointers */
assert(p_atomic_read(&dst->validated) == 0);
/**********************************************************
- * Copyright 2009-2012 VMware, Inc. All rights reserved.
+ * Copyright 2009-2015 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
/**********************************************************
- * Copyright 2009 VMware, Inc. All rights reserved.
+ * Copyright 2009-2015 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
/**********************************************************
- * Copyright 2009 VMware, Inc. All rights reserved.
+ * Copyright 2009-2015 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
/**************************************************************************
*
- * Copyright © 2009 VMware, Inc., Palo Alto, CA., USA
+ * Copyright © 2009-2015 VMware, Inc., Palo Alto, CA., USA
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
#define DRM_VMW_GB_SURFACE_CREATE 23
#define DRM_VMW_GB_SURFACE_REF 24
#define DRM_VMW_SYNCCPU 25
+#define DRM_VMW_CREATE_EXTENDED_CONTEXT 26
/*************************************************************************/
/**
#define DRM_VMW_PARAM_3D_CAPS_SIZE 8
#define DRM_VMW_PARAM_MAX_MOB_MEMORY 9
#define DRM_VMW_PARAM_MAX_MOB_SIZE 10
+#define DRM_VMW_PARAM_SCREEN_TARGET 11
+#define DRM_VMW_PARAM_VGPU10 12
/**
* enum drm_vmw_handle_type - handle type for ref ioctls
* Argument to the DRM_VMW_EXECBUF Ioctl.
*/
-#define DRM_VMW_EXECBUF_VERSION 1
+#define DRM_VMW_EXECBUF_VERSION 2
struct drm_vmw_execbuf_arg {
uint64_t commands;
uint64_t fence_rep;
uint32_t version;
uint32_t flags;
+ uint32_t context_handle;
+ uint32_t pad64;
};
/**
enum drm_vmw_shader_type {
drm_vmw_shader_type_vs = 0,
drm_vmw_shader_type_ps,
- drm_vmw_shader_type_gs
};
* @buffer_handle Buffer handle of backup buffer. SVGA3D_INVALID_ID
* if none.
* @base_size Size of the base mip level for all faces.
+ * @array_size Must be zero for non-vgpu10 hardware, and if non-zero
+ * svga3d_flags must have proper bind flags setup.
*
* Input argument to the DRM_VMW_GB_SURFACE_CREATE Ioctl.
* Part of output argument for the DRM_VMW_GB_SURFACE_REF Ioctl.
uint32_t multisample_count;
uint32_t autogen_filter;
uint32_t buffer_handle;
- uint32_t pad64;
+ uint32_t array_size;
struct drm_vmw_size base_size;
};
uint32_t pad64;
};
+/*************************************************************************/
+/**
+ * DRM_VMW_CREATE_EXTENDED_CONTEXT - Create a host context.
+ *
+ * Allocates a device unique context id, and queues a create context command
+ * for the host. Does not wait for host completion.
+ */
+enum drm_vmw_extended_context {
+ drm_vmw_context_legacy,
+ drm_vmw_context_vgpu10
+};
+
+/**
+ * union drm_vmw_extended_context_arg
+ *
+ * @req: Context type.
+ * @rep: Context identifier.
+ *
+ * Argument to the DRM_VMW_CREATE_EXTENDED_CONTEXT Ioctl.
+ */
+union drm_vmw_extended_context_arg {
+ enum drm_vmw_extended_context req;
+ struct drm_vmw_context_arg rep;
+};
#endif
{
struct gbm_import_fd_data *fd_data = buffer;
int stride = fd_data->stride, offset = 0;
+ int dri_format;
+
+ switch (fd_data->format) {
+ case GBM_BO_FORMAT_XRGB8888:
+ dri_format = GBM_FORMAT_XRGB8888;
+ break;
+ case GBM_BO_FORMAT_ARGB8888:
+ dri_format = GBM_FORMAT_ARGB8888;
+ break;
+ default:
+ dri_format = fd_data->format;
+ }
image = dri->image->createImageFromFds(dri->screen,
fd_data->width,
fd_data->height,
- fd_data->format,
+ dri_format,
&fd_data->fd, 1,
&stride, &offset,
NULL);
+ if (image == NULL) {
+ errno = EINVAL;
+ return NULL;
+ }
gbm_format = fd_data->format;
break;
}
intermediates := $(call local-generated-sources-dir)
-sources := \
- glsl_lexer.cpp \
- glsl_parser.cpp \
- glcpp/glcpp-lex.c \
- glcpp/glcpp-parse.c \
- nir/nir_builder_opcodes.h \
- nir/nir_constant_expressions.c \
- nir/nir_opcodes.c \
- nir/nir_opcodes.h \
- nir/nir_opt_algebraic.c
-
-LOCAL_SRC_FILES := $(filter-out $(sources), $(LOCAL_SRC_FILES))
+LOCAL_SRC_FILES := $(LOCAL_SRC_FILES)
LOCAL_C_INCLUDES += \
$(intermediates)/glcpp \
LOCAL_EXPORT_C_INCLUDE_DIRS += \
$(intermediates)/nir
-sources := $(addprefix $(intermediates)/, $(sources))
-LOCAL_GENERATED_SOURCES += $(sources)
+LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \
+ $(LIBGLCPP_GENERATED_FILES) \
+ $(NIR_GENERATED_FILES) \
+ $(LIBGLSL_GENERATED_CXX_FILES))
define local-l-or-ll-to-c-or-cpp
@mkdir -p $(dir $@)
nir_constant_expressions_gen := $(LOCAL_PATH)/nir/nir_constant_expressions.py
nir_constant_expressions_deps := \
$(LOCAL_PATH)/nir/nir_opcodes.py \
- $(LOCAL_PATH)/nir/nir_constant_expressions.py \
- $(LOCAL_PATH)/nir/nir_constant_expressions.h
+ $(LOCAL_PATH)/nir/nir_constant_expressions.py
$(intermediates)/nir/nir_constant_expressions.c: $(nir_constant_expressions_deps)
@mkdir -p $(dir $@)
nir/nir_opcodes_c.py \
nir/nir_opcodes_h.py \
nir/nir_opt_algebraic.py \
+ nir/tests \
SConscript
include Makefile.sources
TESTS = glcpp/tests/glcpp-test \
glcpp/tests/glcpp-test-cr-lf \
+ nir/tests/control_flow_tests \
tests/blob-test \
tests/general-ir-test \
tests/optimization-test \
check_PROGRAMS = \
glcpp/glcpp \
glsl_test \
+ nir/tests/control_flow_tests \
tests/blob-test \
tests/general-ir-test \
tests/sampler-types-test \
glsl_parser.cpp \
glsl_parser.h \
$(LIBGLSL_FILES) \
- $(NIR_FILES)
+ $(NIR_FILES) \
+ $(NIR_GENERATED_FILES)
+
libnir_la_SOURCES = \
glsl_types.cpp \
builtin_types.cpp \
glsl_symbol_table.cpp \
- $(NIR_FILES)
+ $(NIR_FILES) \
+ $(NIR_GENERATED_FILES)
glsl_compiler_SOURCES = \
$(GLSL_COMPILER_CXX_FILES)
am__v_YACC_0 = @echo " YACC " $@;
am__v_YACC_1 =
+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS)
+LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS)
+
glsl_parser.cpp glsl_parser.h: glsl_parser.yy
- $(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $<
+ $(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $(srcdir)/glsl_parser.yy
glsl_lexer.cpp: glsl_lexer.ll
- $(AM_V_LEX) $(LEX) $(LFLAGS) -o $@ $<
+ $(LEX_GEN) -o $@ $(srcdir)/glsl_lexer.ll
glcpp/glcpp-parse.c glcpp/glcpp-parse.h: glcpp/glcpp-parse.y
- $(AM_V_at)$(MKDIR_P) glcpp
- $(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $<
+ $(MKDIR_GEN)
+ $(YACC_GEN) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $(srcdir)/glcpp/glcpp-parse.y
glcpp/glcpp-lex.c: glcpp/glcpp-lex.l
- $(AM_V_at)$(MKDIR_P) glcpp
- $(AM_V_LEX) $(LEX) $(LFLAGS) -o $@ $<
+ $(MKDIR_GEN)
+ $(LEX_GEN) -o $@ $(srcdir)/glcpp/glcpp-lex.l
# Only the parsers (specifically the header files generated at the same time)
# need to be in BUILT_SOURCES. Though if we list the parser headers YACC is
glsl_lexer.cpp \
glcpp/glcpp-parse.c \
glcpp/glcpp-lex.c \
- nir/nir_builder_opcodes.h \
- nir/nir_constant_expressions.c \
- nir/nir_opcodes.c \
- nir/nir_opcodes.h \
- nir/nir_opt_algebraic.c
+ $(NIR_GENERATED_FILES)
CLEANFILES = \
glcpp/glcpp-parse.h \
glsl_parser.h \
$(RM) glcpp/tests/*.out
$(RM) glcpp/tests/subtest*/*.out
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
+
nir/nir_builder_opcodes.h: nir/nir_opcodes.py nir/nir_builder_opcodes_h.py
- $(AM_V_at)$(MKDIR_P) nir
- $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_builder_opcodes_h.py > $@
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/nir/nir_builder_opcodes_h.py > $@
-nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py nir/nir_constant_expressions.h
- $(AM_V_at)$(MKDIR_P) nir
- $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_constant_expressions.py > $@
+nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/nir/nir_constant_expressions.py > $@
nir/nir_opcodes.h: nir/nir_opcodes.py nir/nir_opcodes_h.py
- $(AM_V_at)$(MKDIR_P) nir
- $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_h.py > $@
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_h.py > $@
nir/nir_opcodes.c: nir/nir_opcodes.py nir/nir_opcodes_c.py
- $(AM_V_at)$(MKDIR_P) nir
- $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_c.py > $@
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_c.py > $@
nir/nir_opt_algebraic.c: nir/nir_opt_algebraic.py nir/nir_algebraic.py
- $(AM_V_at)$(MKDIR_P) nir
- $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opt_algebraic.py > $@
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/nir/nir_opt_algebraic.py > $@
+
+nir_tests_control_flow_tests_SOURCES = \
+ nir/tests/control_flow_tests.cpp
+nir_tests_control_flow_tests_CFLAGS = \
+ $(PTHREAD_CFLAGS)
+nir_tests_control_flow_tests_LDADD = \
+ $(top_builddir)/src/gtest/libgtest.la \
+ $(top_builddir)/src/glsl/libnir.la \
+ $(top_builddir)/src/libglsl_util.la \
+ $(top_builddir)/src/util/libmesautil.la \
+ $(PTHREAD_LIBS)
nir/nir_control_flow_private.h \
nir/nir_dominance.c \
nir/nir_from_ssa.c \
+ nir/nir_gs_count_vertices.c \
nir/nir_intrinsics.c \
nir/nir_intrinsics.h \
nir/nir_live_variables.c \
nir/nir_lower_alu_to_scalar.c \
nir/nir_lower_atomics.c \
+ nir/nir_lower_clip.c \
nir/nir_lower_global_vars_to_local.c \
+ nir/nir_lower_gs_intrinsics.c \
nir/nir_lower_load_const_to_scalar.c \
nir/nir_lower_locals_to_regs.c \
nir/nir_lower_idiv.c \
nir/nir_lower_io.c \
nir/nir_lower_outputs_to_temporaries.c \
nir/nir_lower_phis_to_scalar.c \
- nir/nir_lower_samplers.cpp \
+ nir/nir_lower_samplers.c \
nir/nir_lower_system_values.c \
- nir/nir_lower_tex_projector.c \
+ nir/nir_lower_tex.c \
nir/nir_lower_to_source_mods.c \
+ nir/nir_lower_two_sided_color.c \
nir/nir_lower_vars_to_ssa.c \
nir/nir_lower_var_copies.c \
nir/nir_lower_vec_to_movs.c \
nir/nir_metadata.c \
+ nir/nir_move_vec_src_uses_to_dest.c \
nir/nir_normalize_cubemap_coords.c \
nir/nir_opt_constant_folding.c \
nir/nir_opt_copy_propagate.c \
nir/nir_opt_cse.c \
nir/nir_opt_dce.c \
+ nir/nir_opt_dead_cf.c \
nir/nir_opt_gcm.c \
nir/nir_opt_global_to_local.c \
nir/nir_opt_peephole_ffma.c \
nir/nir_worklist.h \
nir/nir_types.cpp \
nir/spirv_to_nir.c \
- nir/spirv_glsl450_to_nir.c \
- $(NIR_GENERATED_FILES)
+ nir/spirv_glsl450_to_nir.c
# libglsl
/** \name Layout qualifiers for GL_ARB_uniform_buffer_object */
/** \{ */
unsigned std140:1;
+ unsigned std430:1;
unsigned shared:1;
unsigned packed:1;
unsigned column_major:1;
*/
glsl_base_type image_base_type;
+ /** Flag to know if this represents a default value for a qualifier */
+ bool is_default_qualifier;
+
/**
* Return true if and only if an interpolation qualifier is present.
*/
check_builtin_array_max_size(const char *name, unsigned size,
YYLTYPE loc, struct _mesa_glsl_parse_state *state);
+extern void _mesa_ast_process_interface_block(YYLTYPE *locp,
+ _mesa_glsl_parse_state *state,
+ ast_interface_block *const block,
+ const struct ast_type_qualifier q);
+
#endif /* AST_H */
* by the linker.
*/
}
- else {
+ else if (array->variable_referenced()->data.mode !=
+ ir_var_shader_storage) {
_mesa_glsl_error(&loc, state, "unsized array index must be constant");
}
} else if (array->type->fields.array->is_interface()
return true;
}
+static bool
+verify_first_atomic_ssbo_parameter(YYLTYPE *loc, _mesa_glsl_parse_state *state,
+ ir_variable *var)
+{
+ if (!var || !var->is_in_shader_storage_block()) {
+ _mesa_glsl_error(loc, state, "First argument to atomic function "
+ "must be a buffer variable");
+ return false;
+ }
+ return true;
+}
+
+static bool
+is_atomic_ssbo_function(const char *func_name)
+{
+ return !strcmp(func_name, "atomicAdd") ||
+ !strcmp(func_name, "atomicMin") ||
+ !strcmp(func_name, "atomicMax") ||
+ !strcmp(func_name, "atomicAnd") ||
+ !strcmp(func_name, "atomicOr") ||
+ !strcmp(func_name, "atomicXor") ||
+ !strcmp(func_name, "atomicExchange") ||
+ !strcmp(func_name, "atomicCompSwap");
+}
+
/**
* Verify that 'out' and 'inout' actual parameters are lvalues. Also, verify
* that 'const_in' formal parameters (an extension in our IR) correspond to
actual_ir_node = actual_ir_node->next;
actual_ast_node = actual_ast_node->next;
}
+
+ /* The first parameter of atomic functions must be a buffer variable */
+ const char *func_name = sig->function_name();
+ bool is_atomic_ssbo = is_atomic_ssbo_function(func_name);
+ if (is_atomic_ssbo) {
+ const ir_rvalue *const actual = (ir_rvalue *) actual_ir_parameters.head;
+
+ const ast_expression *const actual_ast =
+ exec_node_data(ast_expression, actual_ast_parameters.head, link);
+ YYLTYPE loc = actual_ast->get_location();
+
+ if (!verify_first_atomic_ssbo_parameter(&loc, state,
+ actual->variable_referenced())) {
+ return false;
+ }
+ }
+
return true;
}
if (op->type->is_array()) {
if (op->type->is_unsized_array()) {
- _mesa_glsl_error(&loc, state, "length called on unsized array");
- goto fail;
+ if (!state->has_shader_storage_buffer_objects()) {
+ _mesa_glsl_error(&loc, state, "length called on unsized array"
+ " only available with "
+ "ARB_shader_storage_buffer_object");
+ }
+ /* Calculate length of an unsized array in run-time */
+ result = new(ctx) ir_expression(ir_unop_ssbo_unsized_array_length, op);
+ } else {
+ result = new(ctx) ir_constant(op->type->array_size());
}
-
- result = new(ctx) ir_constant(op->type->array_size());
} else if (op->type->is_vector()) {
if (state->ARB_shading_language_420pack_enable) {
/* .length() returns int. */
remove_per_vertex_blocks(exec_list *instructions,
_mesa_glsl_parse_state *state, ir_variable_mode mode);
+/**
+ * Visitor class that finds the first instance of any write-only variable that
+ * is ever read, if any
+ */
+class read_from_write_only_variable_visitor : public ir_hierarchical_visitor
+{
+public:
+ read_from_write_only_variable_visitor() : found(NULL)
+ {
+ }
+
+ virtual ir_visitor_status visit(ir_dereference_variable *ir)
+ {
+ if (this->in_assignee)
+ return visit_continue;
+
+ ir_variable *var = ir->variable_referenced();
+ /* We can have image_write_only set on both images and buffer variables,
+ * but in the former there is a distinction between reads from
+ * the variable itself (write_only) and from the memory they point to
+ * (image_write_only), while in the case of buffer variables there is
+ * no such distinction, that is why this check here is limited to
+ * buffer variables alone.
+ */
+ if (!var || var->data.mode != ir_var_shader_storage)
+ return visit_continue;
+
+ if (var->data.image_write_only) {
+ found = var;
+ return visit_stop;
+ }
+
+ return visit_continue;
+ }
+
+ ir_variable *get_variable() {
+ return found;
+ }
+
+private:
+ ir_variable *found;
+};
void
_mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state)
*/
remove_per_vertex_blocks(instructions, state, ir_var_shader_in);
remove_per_vertex_blocks(instructions, state, ir_var_shader_out);
+
+ /* Check that we don't have reads from write-only variables */
+ read_from_write_only_variable_visitor v;
+ v.run(instructions);
+ ir_variable *error_var = v.get_variable();
+ if (error_var) {
+ /* It would be nice to have proper location information, but for that
+ * we would need to check this as we process each kind of AST node
+ */
+ YYLTYPE loc;
+ memset(&loc, 0, sizeof(loc));
+ _mesa_glsl_error(&loc, state, "Read from write-only variable `%s'",
+ error_var->name);
+ }
}
"assignment to %s",
non_lvalue_description);
error_emitted = true;
- } else if (lhs_var != NULL && lhs_var->data.read_only) {
+ } else if (lhs_var != NULL && (lhs_var->data.read_only ||
+ (lhs_var->data.mode == ir_var_shader_storage &&
+ lhs_var->data.image_read_only))) {
+ /* We can have image_read_only set on both images and buffer variables,
+ * but in the former there is a distinction between assignments to
+ * the variable itself (read_only) and to the memory they point to
+ * (image_read_only), while in the case of buffer variables there is
+ * no such distinction, that is why this check here is limited to
+ * buffer variables alone.
+ */
_mesa_glsl_error(&lhs_loc, state,
"assignment to read-only variable '%s'",
lhs_var->name);
}
const struct gl_context *const ctx = state->ctx;
- unsigned elements = type->is_array() ? type->length : 1;
+ unsigned elements = type->is_array() ? type->arrays_of_arrays_size() : 1;
unsigned max_index = qual->binding + elements - 1;
const glsl_type *base_type = type->without_array();
var->data.depth_layout = ir_depth_layout_none;
if (qual->flags.q.std140 ||
+ qual->flags.q.std430 ||
qual->flags.q.packed ||
qual->flags.q.shared) {
_mesa_glsl_error(loc, state,
- "uniform block layout qualifiers std140, packed, and "
- "shared can only be applied to uniform blocks, not "
+ "uniform and shader storage block layout qualifiers "
+ "std140, std430, packed, and shared can only be "
+ "applied to uniform or shader storage blocks, not "
"members");
}
if (state->es_shader && state->language_version >= 300) {
/* Local shader has no exact candidates; check the built-ins. */
_mesa_glsl_initialize_builtin_functions();
- if (_mesa_glsl_find_builtin_function_by_name(state, name)) {
+ if (_mesa_glsl_find_builtin_function_by_name(name)) {
YYLTYPE loc = this->get_location();
_mesa_glsl_error(& loc, state,
"A shader cannot redefine or overload built-in "
bool is_interface,
enum glsl_matrix_layout matrix_layout,
bool allow_reserved_names,
- ir_variable_mode var_mode)
+ ir_variable_mode var_mode,
+ ast_type_qualifier *layout)
{
unsigned decl_count = 0;
+ /* For blocks that accept memory qualifiers (i.e. shader storage), verify
+ * that we don't have incompatible qualifiers
+ */
+ if (layout && layout->flags.q.read_only && layout->flags.q.write_only) {
+ _mesa_glsl_error(&loc, state,
+ "Interface block sets both readonly and writeonly");
+ }
+
/* Make an initial pass over the list of fields to determine how
* many there are. Each element in this list is an ast_declarator_list.
* This means that we actually need to count the number of elements in the
* is_interface case, will have resulted in compilation having
* already halted due to a syntax error.
*/
- const struct glsl_type *field_type =
- decl_type != NULL ? decl_type : glsl_type::error_type;
+ assert(decl_type);
- if (is_interface && field_type->contains_opaque()) {
+ if (is_interface && decl_type->contains_opaque()) {
YYLTYPE loc = decl_list->get_location();
_mesa_glsl_error(&loc, state,
"uniform/buffer in non-default interface block contains "
"opaque variable");
}
- if (field_type->contains_atomic()) {
+ if (decl_type->contains_atomic()) {
/* From section 4.1.7.3 of the GLSL 4.40 spec:
*
* "Members of structures cannot be declared as atomic counter
"shader storage block or uniform block");
}
- if (field_type->contains_image()) {
+ if (decl_type->contains_image()) {
/* FINISHME: Same problem as with atomic counters.
* FINISHME: Request clarification from Khronos and add
* FINISHME: spec quotation here.
const struct ast_type_qualifier *const qual =
& decl_list->type->qualifier;
if (qual->flags.q.std140 ||
+ qual->flags.q.std430 ||
qual->flags.q.packed ||
qual->flags.q.shared) {
_mesa_glsl_error(&loc, state,
"uniform/shader storage block layout qualifiers "
- "std140, packed, and shared can only be applied "
- "to uniform/shader storage blocks, not members");
+ "std140, std430, packed, and shared can only be "
+ "applied to uniform/shader storage blocks, not "
+ "members");
}
if (qual->flags.q.constant) {
"to struct or interface block members");
}
- field_type = process_array_type(&loc, decl_type,
- decl->array_specifier, state);
+ const struct glsl_type *field_type =
+ process_array_type(&loc, decl_type, decl->array_specifier, state);
fields[i].type = field_type;
fields[i].name = decl->identifier;
fields[i].location = -1;
|| fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR);
}
+ /* Image qualifiers are allowed on buffer variables, which can only
+ * be defined inside shader storage buffer objects
+ */
+ if (layout && var_mode == ir_var_shader_storage) {
+ if (qual->flags.q.read_only && qual->flags.q.write_only) {
+ _mesa_glsl_error(&loc, state,
+ "buffer variable `%s' can't be "
+ "readonly and writeonly.", fields[i].name);
+ }
+
+ /* For readonly and writeonly qualifiers the field definition,
+ * if set, overwrites the layout qualifier.
+ */
+ bool read_only = layout->flags.q.read_only;
+ bool write_only = layout->flags.q.write_only;
+
+ if (qual->flags.q.read_only) {
+ read_only = true;
+ write_only = false;
+ } else if (qual->flags.q.write_only) {
+ read_only = false;
+ write_only = true;
+ }
+
+ fields[i].image_read_only = read_only;
+ fields[i].image_write_only = write_only;
+
+ /* For other qualifiers, we set the flag if either the layout
+ * qualifier or the field qualifier are set
+ */
+ fields[i].image_coherent = qual->flags.q.coherent ||
+ layout->flags.q.coherent;
+ fields[i].image_volatile = qual->flags.q._volatile ||
+ layout->flags.q._volatile;
+ fields[i].image_restrict = qual->flags.q.restrict_flag ||
+ layout->flags.q.restrict_flag;
+ }
+
i++;
}
}
false,
GLSL_MATRIX_LAYOUT_INHERITED,
false /* allow_reserved_names */,
- ir_var_auto);
+ ir_var_auto,
+ NULL);
validate_identifier(this->name, loc, state);
bool found;
};
+static bool
+is_unsized_array_last_element(ir_variable *v)
+{
+ const glsl_type *interface_type = v->get_interface_type();
+ int length = interface_type->length;
+
+ assert(v->type->is_unsized_array());
+
+ /* Check if it is the last element of the interface */
+ if (strcmp(interface_type->fields.structure[length-1].name, v->name) == 0)
+ return true;
+ return false;
+}
ir_rvalue *
ast_interface_block::hir(exec_list *instructions,
this->block_name);
}
+ if (!this->layout.flags.q.buffer &&
+ this->layout.flags.q.std430) {
+ _mesa_glsl_error(&loc, state,
+ "std430 storage block layout qualifier is supported "
+ "only for shader storage blocks");
+ }
+
/* The ast_interface_block has a list of ast_declarator_lists. We
* need to turn those into ir_variables with an association
* with this uniform block.
packing = GLSL_INTERFACE_PACKING_SHARED;
} else if (this->layout.flags.q.packed) {
packing = GLSL_INTERFACE_PACKING_PACKED;
+ } else if (this->layout.flags.q.std430) {
+ packing = GLSL_INTERFACE_PACKING_STD430;
} else {
/* The default layout is std140.
*/
true,
matrix_layout,
redeclaring_per_vertex,
- var_mode);
+ var_mode,
+ &this->layout);
state->struct_specifier_depth--;
else if (state->stage == MESA_SHADER_TESS_CTRL && var_mode == ir_var_shader_out)
handle_tess_ctrl_shader_output_decl(state, loc, var);
+ for (unsigned i = 0; i < num_variables; i++) {
+ if (fields[i].type->is_unsized_array()) {
+ if (var_mode == ir_var_shader_storage) {
+ if (i != (num_variables - 1)) {
+ _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+ "only last member of a shader storage block "
+ "can be defined as unsized array",
+ fields[i].name);
+ }
+ } else {
+ /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays":
+ *
+ * "If an array is declared as the last member of a shader storage
+ * block and the size is not specified at compile-time, it is
+ * sized at run-time. In all other cases, arrays are sized only
+ * at compile-time."
+ */
+ if (state->es_shader) {
+ _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+ "only last member of a shader storage block "
+ "can be defined as unsized array",
+ fields[i].name);
+ }
+ }
+ }
+ }
+
if (ir_variable *earlier =
state->symbols->get_variable(this->instance_name)) {
if (!redeclaring_per_vertex) {
var->data.stream = this->layout.stream;
+ if (var->data.mode == ir_var_shader_storage) {
+ var->data.image_read_only = fields[i].image_read_only;
+ var->data.image_write_only = fields[i].image_write_only;
+ var->data.image_coherent = fields[i].image_coherent;
+ var->data.image_volatile = fields[i].image_volatile;
+ var->data.image_restrict = fields[i].image_restrict;
+ }
+
/* Examine var name here since var may get deleted in the next call */
bool var_is_gl_id = is_gl_identifier(var->name);
var->data.explicit_binding = this->layout.flags.q.explicit_binding;
var->data.binding = this->layout.binding;
+ if (var->type->is_unsized_array()) {
+ if (var->is_in_shader_storage_block()) {
+ if (!is_unsized_array_last_element(var)) {
+ _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+ "only last member of a shader storage block "
+ "can be defined as unsized array",
+ var->name);
+ }
+ var->data.from_ssbo_unsized_array = true;
+ } else {
+ /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays":
+ *
+ * "If an array is declared as the last member of a shader storage
+ * block and the size is not specified at compile-time, it is
+ * sized at run-time. In all other cases, arrays are sized only
+ * at compile-time."
+ */
+ if (state->es_shader) {
+ _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+ "only last member of a shader storage block "
+ "can be defined as unsized array",
+ var->name);
+ }
+ }
+ }
+
state->symbols->add_variable(var);
instructions->push_tail(var);
}
|| this->flags.q.depth_less
|| this->flags.q.depth_unchanged
|| this->flags.q.std140
+ || this->flags.q.std430
|| this->flags.q.shared
|| this->flags.q.column_major
|| this->flags.q.row_major
ubo_layout_mask.flags.q.std140 = 1;
ubo_layout_mask.flags.q.packed = 1;
ubo_layout_mask.flags.q.shared = 1;
+ ubo_layout_mask.flags.q.std430 = 1;
ast_type_qualifier ubo_binding_mask;
ubo_binding_mask.flags.i = 0;
}
static bool
+shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
+{
+ return state->ARB_shader_storage_buffer_object_enable;
+}
+
+static bool
shader_trinary_minmax(const _mesa_glsl_parse_state *state)
{
return state->AMD_shader_trinary_minmax_enable;
}
static bool
+shader_samples(const _mesa_glsl_parse_state *state)
+{
+ return state->is_version(450, 0) ||
+ state->ARB_shader_texture_image_samples_enable;
+}
+
+static bool
gs_streams(const _mesa_glsl_parse_state *state)
{
return gpu_shader5(state) && gs_only(state);
void add_function(const char *name, ...);
typedef ir_function_signature *(builtin_builder::*image_prototype_ctr)(const glsl_type *image_type,
- const char *intrinsic_name,
unsigned num_arguments,
unsigned flags);
IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE = (1 << 3),
IMAGE_FUNCTION_READ_ONLY = (1 << 4),
IMAGE_FUNCTION_WRITE_ONLY = (1 << 5),
- IMAGE_FUNCTION_AVAIL_ATOMIC = (1 << 6)
+ IMAGE_FUNCTION_AVAIL_ATOMIC = (1 << 6),
+ IMAGE_FUNCTION_MS_ONLY = (1 << 7),
};
/**
B1(all);
B1(not);
BA2(textureSize);
+ B1(textureSamples);
/** Flags to _texture() */
#define TEX_PROJECT 1
B1(interpolateAtOffset)
B1(interpolateAtSample)
- ir_function_signature *_atomic_intrinsic(builtin_available_predicate avail);
- ir_function_signature *_atomic_op(const char *intrinsic,
- builtin_available_predicate avail);
+ ir_function_signature *_atomic_counter_intrinsic(builtin_available_predicate avail);
+ ir_function_signature *_atomic_counter_op(const char *intrinsic,
+ builtin_available_predicate avail);
+
+ ir_function_signature *_atomic_ssbo_intrinsic2(builtin_available_predicate avail,
+ const glsl_type *type);
+ ir_function_signature *_atomic_ssbo_op2(const char *intrinsic,
+ builtin_available_predicate avail,
+ const glsl_type *type);
+ ir_function_signature *_atomic_ssbo_intrinsic3(builtin_available_predicate avail,
+ const glsl_type *type);
+ ir_function_signature *_atomic_ssbo_op3(const char *intrinsic,
+ builtin_available_predicate avail,
+ const glsl_type *type);
B1(min3)
B1(max3)
B1(mid3)
ir_function_signature *_image_prototype(const glsl_type *image_type,
- const char *intrinsic_name,
unsigned num_arguments,
unsigned flags);
ir_function_signature *_image_size_prototype(const glsl_type *image_type,
- const char *intrinsic_name,
unsigned num_arguments,
unsigned flags);
+ ir_function_signature *_image_samples_prototype(const glsl_type *image_type,
+ unsigned num_arguments,
+ unsigned flags);
ir_function_signature *_image(image_prototype_ctr prototype,
const glsl_type *image_type,
const char *intrinsic_name,
builtin_builder::create_intrinsics()
{
add_function("__intrinsic_atomic_read",
- _atomic_intrinsic(shader_atomic_counters),
+ _atomic_counter_intrinsic(shader_atomic_counters),
NULL);
add_function("__intrinsic_atomic_increment",
- _atomic_intrinsic(shader_atomic_counters),
+ _atomic_counter_intrinsic(shader_atomic_counters),
NULL);
add_function("__intrinsic_atomic_predecrement",
- _atomic_intrinsic(shader_atomic_counters),
+ _atomic_counter_intrinsic(shader_atomic_counters),
+ NULL);
+
+ add_function("__intrinsic_ssbo_atomic_add",
+ _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+ glsl_type::uint_type),
+ _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+ glsl_type::int_type),
+ NULL);
+ add_function("__intrinsic_ssbo_atomic_min",
+ _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+ glsl_type::uint_type),
+ _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+ glsl_type::int_type),
+ NULL);
+ add_function("__intrinsic_ssbo_atomic_max",
+ _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+ glsl_type::uint_type),
+ _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+ glsl_type::int_type),
+ NULL);
+ add_function("__intrinsic_ssbo_atomic_and",
+ _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+ glsl_type::uint_type),
+ _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+ glsl_type::int_type),
+ NULL);
+ add_function("__intrinsic_ssbo_atomic_or",
+ _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+ glsl_type::uint_type),
+ _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+ glsl_type::int_type),
+ NULL);
+ add_function("__intrinsic_ssbo_atomic_xor",
+ _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+ glsl_type::uint_type),
+ _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+ glsl_type::int_type),
+ NULL);
+ add_function("__intrinsic_ssbo_atomic_exchange",
+ _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+ glsl_type::uint_type),
+ _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
+ glsl_type::int_type),
+ NULL);
+ add_function("__intrinsic_ssbo_atomic_comp_swap",
+ _atomic_ssbo_intrinsic3(shader_storage_buffer_object,
+ glsl_type::uint_type),
+ _atomic_ssbo_intrinsic3(shader_storage_buffer_object,
+ glsl_type::int_type),
NULL);
add_image_functions(false);
_textureSize(texture_multisample_array, glsl_type::ivec3_type, glsl_type::usampler2DMSArray_type),
NULL);
+ add_function("textureSamples",
+ _textureSamples(glsl_type::sampler2DMS_type),
+ _textureSamples(glsl_type::isampler2DMS_type),
+ _textureSamples(glsl_type::usampler2DMS_type),
+
+ _textureSamples(glsl_type::sampler2DMSArray_type),
+ _textureSamples(glsl_type::isampler2DMSArray_type),
+ _textureSamples(glsl_type::usampler2DMSArray_type),
+ NULL);
+
add_function("texture",
_texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type),
_texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::float_type),
NULL);
add_function("atomicCounter",
- _atomic_op("__intrinsic_atomic_read",
- shader_atomic_counters),
+ _atomic_counter_op("__intrinsic_atomic_read",
+ shader_atomic_counters),
NULL);
add_function("atomicCounterIncrement",
- _atomic_op("__intrinsic_atomic_increment",
- shader_atomic_counters),
+ _atomic_counter_op("__intrinsic_atomic_increment",
+ shader_atomic_counters),
NULL);
add_function("atomicCounterDecrement",
- _atomic_op("__intrinsic_atomic_predecrement",
- shader_atomic_counters),
+ _atomic_counter_op("__intrinsic_atomic_predecrement",
+ shader_atomic_counters),
+ NULL);
+
+ add_function("atomicAdd",
+ _atomic_ssbo_op2("__intrinsic_ssbo_atomic_add",
+ shader_storage_buffer_object,
+ glsl_type::uint_type),
+ _atomic_ssbo_op2("__intrinsic_ssbo_atomic_add",
+ shader_storage_buffer_object,
+ glsl_type::int_type),
+ NULL);
+ add_function("atomicMin",
+ _atomic_ssbo_op2("__intrinsic_ssbo_atomic_min",
+ shader_storage_buffer_object,
+ glsl_type::uint_type),
+ _atomic_ssbo_op2("__intrinsic_ssbo_atomic_min",
+ shader_storage_buffer_object,
+ glsl_type::int_type),
+ NULL);
+ add_function("atomicMax",
+ _atomic_ssbo_op2("__intrinsic_ssbo_atomic_max",
+ shader_storage_buffer_object,
+ glsl_type::uint_type),
+ _atomic_ssbo_op2("__intrinsic_ssbo_atomic_max",
+ shader_storage_buffer_object,
+ glsl_type::int_type),
+ NULL);
+ add_function("atomicAnd",
+ _atomic_ssbo_op2("__intrinsic_ssbo_atomic_and",
+ shader_storage_buffer_object,
+ glsl_type::uint_type),
+ _atomic_ssbo_op2("__intrinsic_ssbo_atomic_and",
+ shader_storage_buffer_object,
+ glsl_type::int_type),
+ NULL);
+ add_function("atomicOr",
+ _atomic_ssbo_op2("__intrinsic_ssbo_atomic_or",
+ shader_storage_buffer_object,
+ glsl_type::uint_type),
+ _atomic_ssbo_op2("__intrinsic_ssbo_atomic_or",
+ shader_storage_buffer_object,
+ glsl_type::int_type),
+ NULL);
+ add_function("atomicXor",
+ _atomic_ssbo_op2("__intrinsic_ssbo_atomic_xor",
+ shader_storage_buffer_object,
+ glsl_type::uint_type),
+ _atomic_ssbo_op2("__intrinsic_ssbo_atomic_xor",
+ shader_storage_buffer_object,
+ glsl_type::int_type),
+ NULL);
+ add_function("atomicExchange",
+ _atomic_ssbo_op2("__intrinsic_ssbo_atomic_exchange",
+ shader_storage_buffer_object,
+ glsl_type::uint_type),
+ _atomic_ssbo_op2("__intrinsic_ssbo_atomic_exchange",
+ shader_storage_buffer_object,
+ glsl_type::int_type),
+ NULL);
+ add_function("atomicCompSwap",
+ _atomic_ssbo_op3("__intrinsic_ssbo_atomic_comp_swap",
+ shader_storage_buffer_object,
+ glsl_type::uint_type),
+ _atomic_ssbo_op3("__intrinsic_ssbo_atomic_comp_swap",
+ shader_storage_buffer_object,
+ glsl_type::int_type),
NULL);
add_function("min3",
ir_function *f = new(mem_ctx) ir_function(name);
for (unsigned i = 0; i < ARRAY_SIZE(types); ++i) {
- if (types[i]->sampler_type != GLSL_TYPE_FLOAT ||
- (flags & IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE))
+ if ((types[i]->sampler_type != GLSL_TYPE_FLOAT ||
+ (flags & IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE)) &&
+ (types[i]->sampler_dimensionality == GLSL_SAMPLER_DIM_MS ||
+ !(flags & IMAGE_FUNCTION_MS_ONLY)))
f->add_signature(_image(prototype, types[i], intrinsic_name,
num_arguments, flags));
}
"__intrinsic_image_size",
&builtin_builder::_image_size_prototype, 1,
flags | IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE);
+
+ add_image_function(glsl ? "imageSamples" : "__intrinsic_image_samples",
+ "__intrinsic_image_samples",
+ &builtin_builder::_image_samples_prototype, 1,
+ flags | IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE |
+ IMAGE_FUNCTION_MS_ONLY);
}
ir_variable *
}
ir_function_signature *
+builtin_builder::_textureSamples(const glsl_type *sampler_type)
+{
+ ir_variable *s = in_var(sampler_type, "sampler");
+ MAKE_SIG(glsl_type::int_type, shader_samples, 1, s);
+
+ ir_texture *tex = new(mem_ctx) ir_texture(ir_texture_samples);
+ tex->set_sampler(new(mem_ctx) ir_dereference_variable(s), glsl_type::int_type);
+ body.emit(ret(tex));
+
+ return sig;
+}
+
+ir_function_signature *
builtin_builder::_texture(ir_texture_opcode opcode,
builtin_available_predicate avail,
const glsl_type *return_type,
}
ir_function_signature *
-builtin_builder::_atomic_intrinsic(builtin_available_predicate avail)
+builtin_builder::_atomic_counter_intrinsic(builtin_available_predicate avail)
{
ir_variable *counter = in_var(glsl_type::atomic_uint_type, "counter");
MAKE_INTRINSIC(glsl_type::uint_type, avail, 1, counter);
}
ir_function_signature *
-builtin_builder::_atomic_op(const char *intrinsic,
- builtin_available_predicate avail)
+builtin_builder::_atomic_ssbo_intrinsic2(builtin_available_predicate avail,
+ const glsl_type *type)
+{
+ ir_variable *atomic = in_var(type, "atomic");
+ ir_variable *data = in_var(type, "data");
+ MAKE_INTRINSIC(type, avail, 2, atomic, data);
+ return sig;
+}
+
+ir_function_signature *
+builtin_builder::_atomic_ssbo_intrinsic3(builtin_available_predicate avail,
+ const glsl_type *type)
+{
+ ir_variable *atomic = in_var(type, "atomic");
+ ir_variable *data1 = in_var(type, "data1");
+ ir_variable *data2 = in_var(type, "data2");
+ MAKE_INTRINSIC(type, avail, 3, atomic, data1, data2);
+ return sig;
+}
+
+ir_function_signature *
+builtin_builder::_atomic_counter_op(const char *intrinsic,
+ builtin_available_predicate avail)
{
ir_variable *counter = in_var(glsl_type::atomic_uint_type, "atomic_counter");
MAKE_SIG(glsl_type::uint_type, avail, 1, counter);
}
ir_function_signature *
+builtin_builder::_atomic_ssbo_op2(const char *intrinsic,
+ builtin_available_predicate avail,
+ const glsl_type *type)
+{
+ ir_variable *atomic = in_var(type, "atomic_var");
+ ir_variable *data = in_var(type, "atomic_data");
+ MAKE_SIG(type, avail, 2, atomic, data);
+
+ ir_variable *retval = body.make_temp(type, "atomic_retval");
+ body.emit(call(shader->symbols->get_function(intrinsic), retval,
+ sig->parameters));
+ body.emit(ret(retval));
+ return sig;
+}
+
+ir_function_signature *
+builtin_builder::_atomic_ssbo_op3(const char *intrinsic,
+ builtin_available_predicate avail,
+ const glsl_type *type)
+{
+ ir_variable *atomic = in_var(type, "atomic_var");
+ ir_variable *data1 = in_var(type, "atomic_data1");
+ ir_variable *data2 = in_var(type, "atomic_data2");
+ MAKE_SIG(type, avail, 3, atomic, data1, data2);
+
+ ir_variable *retval = body.make_temp(type, "atomic_retval");
+ body.emit(call(shader->symbols->get_function(intrinsic), retval,
+ sig->parameters));
+ body.emit(ret(retval));
+ return sig;
+}
+
+ir_function_signature *
builtin_builder::_min3(const glsl_type *type)
{
ir_variable *x = in_var(type, "x");
ir_function_signature *
builtin_builder::_image_prototype(const glsl_type *image_type,
- const char *intrinsic_name,
unsigned num_arguments,
unsigned flags)
{
ir_function_signature *
builtin_builder::_image_size_prototype(const glsl_type *image_type,
- const char *intrinsic_name,
- unsigned num_arguments,
- unsigned flags)
+ unsigned /* num_arguments */,
+ unsigned /* flags */)
{
const glsl_type *ret_type;
unsigned num_components = image_type->coordinate_components();
}
ir_function_signature *
+builtin_builder::_image_samples_prototype(const glsl_type *image_type,
+ unsigned num_arguments,
+ unsigned flags)
+{
+ ir_variable *image = in_var(image_type, "image");
+ ir_function_signature *sig =
+ new_sig(glsl_type::int_type, shader_samples, 1, image);
+
+ /* Set the maximal set of qualifiers allowed for this image
+ * built-in. Function calls with arguments having fewer
+ * qualifiers than present in the prototype are allowed by the
+ * spec, but not with more, i.e. this will make the compiler
+ * accept everything that needs to be accepted, and reject cases
+ * like loads from write-only or stores to read-only images.
+ */
+ image->data.image_read_only = true;
+ image->data.image_write_only = true;
+ image->data.image_coherent = true;
+ image->data.image_volatile = true;
+ image->data.image_restrict = true;
+
+ return sig;
+}
+
+ir_function_signature *
builtin_builder::_image(image_prototype_ctr prototype,
const glsl_type *image_type,
const char *intrinsic_name,
unsigned num_arguments,
unsigned flags)
{
- ir_function_signature *sig = (this->*prototype)(image_type, intrinsic_name,
+ ir_function_signature *sig = (this->*prototype)(image_type,
num_arguments, flags);
if (flags & IMAGE_FUNCTION_EMIT_STUB) {
}
ir_function *
-_mesa_glsl_find_builtin_function_by_name(_mesa_glsl_parse_state *state,
- const char *name)
+_mesa_glsl_find_builtin_function_by_name(const char *name)
{
ir_function *f;
mtx_lock(&builtins_lock);
return builtins.shader;
}
+
+/**
+ * Get the function signature for main from a shader
+ */
+ir_function_signature *
+_mesa_get_main_function_signature(gl_shader *sh)
+{
+ ir_function *const f = sh->symbols->get_function("main");
+ if (f != NULL) {
+ exec_list void_parameters;
+
+ /* Look for the 'void main()' signature and ensure that it's defined.
+ * This keeps the linker from accidentally pick a shader that just
+ * contains a prototype for main.
+ *
+ * We don't have to check for multiple definitions of main (in multiple
+ * shaders) because that would have already been caught above.
+ */
+ ir_function_signature *sig =
+ f->matching_signature(NULL, &void_parameters, false);
+ if ((sig != NULL) && sig->is_defined) {
+ return sig;
+ }
+ }
+
+ return NULL;
+}
+
/** @} */
#define T(TYPE, MIN_GL, MIN_ES) \
{ glsl_type::TYPE##_type, MIN_GL, MIN_ES },
-const static struct builtin_type_versions {
+static const struct builtin_type_versions {
const glsl_type *const type;
int min_gl;
int min_es;
*/
#include "ir.h"
+#include "ir_builder.h"
+#include "linker.h"
#include "glsl_parser_extras.h"
#include "glsl_symbol_table.h"
#include "main/core.h"
#include "program/prog_statevars.h"
#include "program/prog_instruction.h"
+using namespace ir_builder;
+
static const struct gl_builtin_uniform_element gl_NumSamples_elements[] = {
{NULL, {STATE_NUM_SAMPLES, 0, 0}, SWIZZLE_XXXX}
};
ir_variable *add_uniform(const glsl_type *type, const char *name);
ir_variable *add_const(const char *name, int value);
ir_variable *add_const_ivec3(const char *name, int x, int y, int z);
- void add_varying(int slot, const glsl_type *type, const char *name,
- const char *name_as_gs_input);
+ void add_varying(int slot, const glsl_type *type, const char *name);
exec_list * const instructions;
struct _mesa_glsl_parse_state * const state;
const glsl_type * const bool_t;
const glsl_type * const int_t;
+ const glsl_type * const uint_t;
const glsl_type * const float_t;
const glsl_type * const vec2_t;
const glsl_type * const vec3_t;
const glsl_type * const vec4_t;
+ const glsl_type * const uvec3_t;
const glsl_type * const mat3_t;
const glsl_type * const mat4_t;
: instructions(instructions), state(state), symtab(state->symbols),
compatibility(!state->is_version(140, 100)),
bool_t(glsl_type::bool_type), int_t(glsl_type::int_type),
+ uint_t(glsl_type::uint_type),
float_t(glsl_type::float_type), vec2_t(glsl_type::vec2_type),
vec3_t(glsl_type::vec3_type), vec4_t(glsl_type::vec4_type),
+ uvec3_t(glsl_type::uvec3_type),
mat3_t(glsl_type::mat3_type), mat4_t(glsl_type::mat4_type)
{
}
if (!state->es_shader) {
add_const("gl_MaxGeometryAtomicCounters",
state->Const.MaxGeometryAtomicCounters);
-
- if (state->is_version(400, 0) ||
- state->ARB_tessellation_shader_enable) {
- add_const("gl_MaxTessControlAtomicCounters",
- state->Const.MaxTessControlAtomicCounters);
- add_const("gl_MaxTessEvaluationAtomicCounters",
- state->Const.MaxTessEvaluationAtomicCounters);
- }
+ add_const("gl_MaxTessControlAtomicCounters",
+ state->Const.MaxTessControlAtomicCounters);
+ add_const("gl_MaxTessEvaluationAtomicCounters",
+ state->Const.MaxTessEvaluationAtomicCounters);
}
}
void
builtin_variable_generator::generate_cs_special_vars()
{
- /* TODO: finish this. */
+ add_system_value(SYSTEM_VALUE_LOCAL_INVOCATION_ID, uvec3_t,
+ "gl_LocalInvocationID");
+ add_system_value(SYSTEM_VALUE_WORK_GROUP_ID, uvec3_t, "gl_WorkGroupID");
+ add_system_value(SYSTEM_VALUE_NUM_WORK_GROUPS, uvec3_t, "gl_NumWorkGroups");
+ add_variable("gl_GlobalInvocationID", uvec3_t, ir_var_auto, 0);
+ add_variable("gl_LocalInvocationIndex", uint_t, ir_var_auto, 0);
}
/**
* Add a single "varying" variable. The variable's type and direction (input
* or output) are adjusted as appropriate for the type of shader being
- * compiled. For geometry shaders using {ARB,EXT}_geometry_shader4,
- * name_as_gs_input is used for the input (to avoid ambiguity).
+ * compiled.
*/
void
builtin_variable_generator::add_varying(int slot, const glsl_type *type,
- const char *name,
- const char *name_as_gs_input)
+ const char *name)
{
switch (state->stage) {
case MESA_SHADER_TESS_CTRL:
void
builtin_variable_generator::generate_varyings()
{
-#define ADD_VARYING(loc, type, name) \
- add_varying(loc, type, name, name "In")
-
/* gl_Position and gl_PointSize are not visible from fragment shaders. */
if (state->stage != MESA_SHADER_FRAGMENT) {
- ADD_VARYING(VARYING_SLOT_POS, vec4_t, "gl_Position");
- ADD_VARYING(VARYING_SLOT_PSIZ, float_t, "gl_PointSize");
+ add_varying(VARYING_SLOT_POS, vec4_t, "gl_Position");
+ add_varying(VARYING_SLOT_PSIZ, float_t, "gl_PointSize");
}
if (state->is_version(130, 0)) {
- ADD_VARYING(VARYING_SLOT_CLIP_DIST0, array(float_t, 0),
+ add_varying(VARYING_SLOT_CLIP_DIST0, array(float_t, 0),
"gl_ClipDistance");
}
if (compatibility) {
- ADD_VARYING(VARYING_SLOT_TEX0, array(vec4_t, 0), "gl_TexCoord");
- ADD_VARYING(VARYING_SLOT_FOGC, float_t, "gl_FogFragCoord");
+ add_varying(VARYING_SLOT_TEX0, array(vec4_t, 0), "gl_TexCoord");
+ add_varying(VARYING_SLOT_FOGC, float_t, "gl_FogFragCoord");
if (state->stage == MESA_SHADER_FRAGMENT) {
- ADD_VARYING(VARYING_SLOT_COL0, vec4_t, "gl_Color");
- ADD_VARYING(VARYING_SLOT_COL1, vec4_t, "gl_SecondaryColor");
+ add_varying(VARYING_SLOT_COL0, vec4_t, "gl_Color");
+ add_varying(VARYING_SLOT_COL1, vec4_t, "gl_SecondaryColor");
} else {
- ADD_VARYING(VARYING_SLOT_CLIP_VERTEX, vec4_t, "gl_ClipVertex");
- ADD_VARYING(VARYING_SLOT_COL0, vec4_t, "gl_FrontColor");
- ADD_VARYING(VARYING_SLOT_BFC0, vec4_t, "gl_BackColor");
- ADD_VARYING(VARYING_SLOT_COL1, vec4_t, "gl_FrontSecondaryColor");
- ADD_VARYING(VARYING_SLOT_BFC1, vec4_t, "gl_BackSecondaryColor");
+ add_varying(VARYING_SLOT_CLIP_VERTEX, vec4_t, "gl_ClipVertex");
+ add_varying(VARYING_SLOT_COL0, vec4_t, "gl_FrontColor");
+ add_varying(VARYING_SLOT_BFC0, vec4_t, "gl_BackColor");
+ add_varying(VARYING_SLOT_COL1, vec4_t, "gl_FrontSecondaryColor");
+ add_varying(VARYING_SLOT_BFC1, vec4_t, "gl_BackSecondaryColor");
}
}
break;
}
}
+
+
+/**
+ * Initialize compute shader variables with values that are derived from other
+ * compute shader variable.
+ */
+static void
+initialize_cs_derived_variables(gl_shader *shader,
+ ir_function_signature *const main_sig)
+{
+ assert(shader->Stage == MESA_SHADER_COMPUTE);
+
+ ir_variable *gl_GlobalInvocationID =
+ shader->symbols->get_variable("gl_GlobalInvocationID");
+ assert(gl_GlobalInvocationID);
+ ir_variable *gl_WorkGroupID =
+ shader->symbols->get_variable("gl_WorkGroupID");
+ assert(gl_WorkGroupID);
+ ir_variable *gl_WorkGroupSize =
+ shader->symbols->get_variable("gl_WorkGroupSize");
+ if (gl_WorkGroupSize == NULL) {
+ void *const mem_ctx = ralloc_parent(shader->ir);
+ gl_WorkGroupSize = new(mem_ctx) ir_variable(glsl_type::uvec3_type,
+ "gl_WorkGroupSize",
+ ir_var_auto);
+ gl_WorkGroupSize->data.how_declared = ir_var_declared_implicitly;
+ gl_WorkGroupSize->data.read_only = true;
+ shader->ir->push_head(gl_WorkGroupSize);
+ }
+ ir_variable *gl_LocalInvocationID =
+ shader->symbols->get_variable("gl_LocalInvocationID");
+ assert(gl_LocalInvocationID);
+
+ /* gl_GlobalInvocationID =
+ * gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
+ */
+ ir_instruction *inst =
+ assign(gl_GlobalInvocationID,
+ add(mul(gl_WorkGroupID, gl_WorkGroupSize),
+ gl_LocalInvocationID));
+ main_sig->body.push_head(inst);
+
+ /* gl_LocalInvocationIndex =
+ * gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
+ * gl_LocalInvocationID.y * gl_WorkGroupSize.x +
+ * gl_LocalInvocationID.x;
+ */
+ ir_expression *index_z =
+ mul(mul(swizzle_z(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize)),
+ swizzle_y(gl_WorkGroupSize));
+ ir_expression *index_y =
+ mul(swizzle_y(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize));
+ ir_expression *index_y_plus_z = add(index_y, index_z);
+ operand index_x(swizzle_x(gl_LocalInvocationID));
+ ir_expression *index_x_plus_y_plus_z = add(index_y_plus_z, index_x);
+ ir_variable *gl_LocalInvocationIndex =
+ shader->symbols->get_variable("gl_LocalInvocationIndex");
+ assert(gl_LocalInvocationIndex);
+ inst = assign(gl_LocalInvocationIndex, index_x_plus_y_plus_z);
+ main_sig->body.push_head(inst);
+}
+
+
+/**
+ * Initialize builtin variables with values based on other builtin variables.
+ * These are initialized in the main function.
+ */
+void
+_mesa_glsl_initialize_derived_variables(gl_shader *shader)
+{
+ /* We only need to set CS variables currently. */
+ if (shader->Stage != MESA_SHADER_COMPUTE)
+ return;
+
+ ir_function_signature *const main_sig =
+ _mesa_get_main_function_signature(shader);
+ if (main_sig == NULL)
+ return;
+
+ initialize_cs_derived_variables(shader, main_sig);
+}
if (extensions->ARB_shader_image_size)
add_builtin_define(parser, "GL_ARB_shader_image_size", 1);
+ if (extensions->ARB_shader_texture_image_samples)
+ add_builtin_define(parser, "GL_ARB_shader_texture_image_samples", 1);
+
if (extensions->ARB_derivative_control)
add_builtin_define(parser, "GL_ARB_derivative_control", 1);
image1DArrayShadow KEYWORD(130, 300, 0, 0, IMAGE1DARRAYSHADOW);
image2DArrayShadow KEYWORD(130, 300, 0, 0, IMAGE2DARRAYSHADOW);
-coherent KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, COHERENT);
-volatile KEYWORD_WITH_ALT(110, 100, 420, 310, yyextra->ARB_shader_image_load_store_enable, VOLATILE);
-restrict KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, RESTRICT);
-readonly KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, READONLY);
-writeonly KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, WRITEONLY);
+coherent KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, COHERENT);
+volatile KEYWORD_WITH_ALT(110, 100, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, VOLATILE);
+restrict KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, RESTRICT);
+readonly KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, READONLY);
+writeonly KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, WRITEONLY);
atomic_uint KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_atomic_counters_enable, ATOMIC_UINT);
%token <identifier> IDENTIFIER TYPE_IDENTIFIER NEW_IDENTIFIER
%type <identifier> any_identifier
%type <interface_block> instance_name_opt
+%type <interface_block> buffer_instance_name_opt
%token <real> FLOATCONSTANT
%token <dreal> DOUBLECONSTANT
%token <n> INTCONSTANT UINTCONSTANT BOOLCONSTANT
%type <type_qualifier> subroutine_qualifier
%type <subroutine_list> subroutine_type_list
%type <type_qualifier> interface_qualifier
+%type <type_qualifier> buffer_interface_qualifier
%type <type_specifier> type_specifier
%type <type_specifier> type_specifier_nonarray
%type <array_specifier> array_specifier
$$.flags.q.std140 = 1;
} else if (match_layout_qualifier($1, "shared", state) == 0) {
$$.flags.q.shared = 1;
+ } else if (match_layout_qualifier($1, "std430", state) == 0) {
+ $$.flags.q.std430 = 1;
} else if (match_layout_qualifier($1, "column_major", state) == 0) {
$$.flags.q.column_major = 1;
/* "row_major" is a reserved word in GLSL 1.30+. Its token is parsed
{
$$ = $1;
}
- | layout_qualifier basic_interface_block
+ | layout_qualifier interface_block
{
- ast_interface_block *block = $2;
+ ast_interface_block *block = (ast_interface_block *) $2;
+
+ if (!state->has_420pack() && block->layout.has_layout() &&
+ !block->layout.is_default_qualifier) {
+ _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers");
+ YYERROR;
+ }
+
if (!block->layout.merge_qualifier(& @1, state, $1)) {
YYERROR;
}
+ block->layout.is_default_qualifier = false;
+
foreach_list_typed (ast_declarator_list, member, link, &block->declarations) {
ast_type_qualifier& qualifier = member->type->qualifier;
if (qualifier.flags.q.stream && qualifier.stream != block->layout.stream) {
}
$$ = block;
}
+ | memory_qualifier interface_block
+ {
+ ast_interface_block *block = (ast_interface_block *)$2;
+
+ if (!block->layout.flags.q.buffer) {
+ _mesa_glsl_error(& @1, state,
+ "memory qualifiers can only be used in the "
+ "declaration of shader storage blocks");
+ }
+ if (!block->layout.merge_qualifier(& @1, state, $1)) {
+ YYERROR;
+ }
+ $$ = block;
+ }
;
basic_interface_block:
block->block_name = $2;
block->declarations.push_degenerate_list_at_head(& $4->link);
- if ($1.flags.q.buffer) {
- if (!state->has_shader_storage_buffer_objects()) {
- _mesa_glsl_error(& @1, state,
- "#version 430 / GL_ARB_shader_storage_buffer_object "
- "required for defining shader storage blocks");
- } else if (state->ARB_shader_storage_buffer_object_warn) {
- _mesa_glsl_warning(& @1, state,
- "#version 430 / GL_ARB_shader_storage_buffer_object "
- "required for defining shader storage blocks");
- }
- } else if ($1.flags.q.uniform) {
- if (!state->has_uniform_buffer_objects()) {
- _mesa_glsl_error(& @1, state,
- "#version 140 / GL_ARB_uniform_buffer_object "
- "required for defining uniform blocks");
- } else if (state->ARB_uniform_buffer_object_warn) {
- _mesa_glsl_warning(& @1, state,
- "#version 140 / GL_ARB_uniform_buffer_object "
- "required for defining uniform blocks");
- }
- } else {
- if (state->es_shader || state->language_version < 150) {
- _mesa_glsl_error(& @1, state,
- "#version 150 required for using "
- "interface blocks");
- }
- }
-
- /* From the GLSL 1.50.11 spec, section 4.3.7 ("Interface Blocks"):
- * "It is illegal to have an input block in a vertex shader
- * or an output block in a fragment shader"
- */
- if ((state->stage == MESA_SHADER_VERTEX) && $1.flags.q.in) {
- _mesa_glsl_error(& @1, state,
- "`in' interface block is not allowed for "
- "a vertex shader");
- } else if ((state->stage == MESA_SHADER_FRAGMENT) && $1.flags.q.out) {
- _mesa_glsl_error(& @1, state,
- "`out' interface block is not allowed for "
- "a fragment shader");
- }
-
- /* Since block arrays require names, and both features are added in
- * the same language versions, we don't have to explicitly
- * version-check both things.
- */
- if (block->instance_name != NULL) {
- state->check_version(150, 300, & @1, "interface blocks with "
- "an instance name are not allowed");
- }
-
- uint64_t interface_type_mask;
- struct ast_type_qualifier temp_type_qualifier;
-
- /* Get a bitmask containing only the in/out/uniform/buffer
- * flags, allowing us to ignore other irrelevant flags like
- * interpolation qualifiers.
- */
- temp_type_qualifier.flags.i = 0;
- temp_type_qualifier.flags.q.uniform = true;
- temp_type_qualifier.flags.q.buffer = true;
- temp_type_qualifier.flags.q.in = true;
- temp_type_qualifier.flags.q.out = true;
- interface_type_mask = temp_type_qualifier.flags.i;
-
- /* Get the block's interface qualifier. The interface_qualifier
- * production rule guarantees that only one bit will be set (and
- * it will be in/out/uniform).
- */
- uint64_t block_interface_qualifier = $1.flags.i;
-
- block->layout.flags.i |= block_interface_qualifier;
+ _mesa_ast_process_interface_block(& @1, state, block, $1);
- if (state->stage == MESA_SHADER_GEOMETRY &&
- state->has_explicit_attrib_stream()) {
- /* Assign global layout's stream value. */
- block->layout.flags.q.stream = 1;
- block->layout.flags.q.explicit_stream = 0;
- block->layout.stream = state->out_qualifier->stream;
- }
+ $$ = block;
+ }
+ | buffer_interface_qualifier NEW_IDENTIFIER '{' member_list '}' buffer_instance_name_opt ';'
+ {
+ ast_interface_block *const block = $6;
- foreach_list_typed (ast_declarator_list, member, link, &block->declarations) {
- ast_type_qualifier& qualifier = member->type->qualifier;
- if ((qualifier.flags.i & interface_type_mask) == 0) {
- /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks):
- * "If no optional qualifier is used in a member declaration, the
- * qualifier of the variable is just in, out, or uniform as declared
- * by interface-qualifier."
- */
- qualifier.flags.i |= block_interface_qualifier;
- } else if ((qualifier.flags.i & interface_type_mask) !=
- block_interface_qualifier) {
- /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks):
- * "If optional qualifiers are used, they can include interpolation
- * and storage qualifiers and they must declare an input, output,
- * or uniform variable consistent with the interface qualifier of
- * the block."
- */
- _mesa_glsl_error(& @1, state,
- "uniform/in/out qualifier on "
- "interface block member does not match "
- "the interface block");
- }
+ block->block_name = $2;
+ block->declarations.push_degenerate_list_at_head(& $4->link);
- /* From GLSL ES 3.0, chapter 4.3.7 "Interface Blocks":
- *
- * "GLSL ES 3.0 does not support interface blocks for shader inputs or
- * outputs."
- *
- * And from GLSL ES 3.0, chapter 4.6.1 "The invariant qualifier":.
- *
- * "Only variables output from a shader can be candidates for
- * invariance."
- *
- * From GLSL 4.40 and GLSL 1.50, section "Interface Blocks":
- *
- * "If optional qualifiers are used, they can include interpolation
- * qualifiers, auxiliary storage qualifiers, and storage qualifiers
- * and they must declare an input, output, or uniform member
- * consistent with the interface qualifier of the block"
- */
- if (qualifier.flags.q.invariant)
- _mesa_glsl_error(&@1, state,
- "invariant qualifiers cannot be used "
- "with interface blocks members");
- }
+ _mesa_ast_process_interface_block(& @1, state, block, $1);
$$ = block;
}
memset(& $$, 0, sizeof($$));
$$.flags.q.uniform = 1;
}
- | BUFFER
+ ;
+
+buffer_interface_qualifier:
+ BUFFER
{
memset(& $$, 0, sizeof($$));
$$.flags.q.buffer = 1;
}
;
+buffer_instance_name_opt:
+ /* empty */
+ {
+ $$ = new(state) ast_interface_block(*state->default_shader_storage_qualifier,
+ NULL, NULL);
+ }
+ | NEW_IDENTIFIER
+ {
+ $$ = new(state) ast_interface_block(*state->default_shader_storage_qualifier,
+ $1, NULL);
+ $$->set_location(@1);
+ }
+ | NEW_IDENTIFIER array_specifier
+ {
+ $$ = new(state) ast_interface_block(*state->default_shader_storage_qualifier,
+ $1, $2);
+ $$->set_location_range(@1, @2);
+ }
+ ;
+
member_list:
member_declaration
{
$$ = NULL;
}
+ | layout_qualifier BUFFER ';'
+ {
+ if (!state->default_shader_storage_qualifier->merge_qualifier(& @1, state, $1)) {
+ YYERROR;
+ }
+ $$ = NULL;
+ }
+
| layout_qualifier IN_TOK ';'
{
$$ = NULL;
this->default_uniform_qualifier = new(this) ast_type_qualifier();
this->default_uniform_qualifier->flags.q.shared = 1;
this->default_uniform_qualifier->flags.q.column_major = 1;
+ this->default_uniform_qualifier->is_default_qualifier = true;
+
+ this->default_shader_storage_qualifier = new(this) ast_type_qualifier();
+ this->default_shader_storage_qualifier->flags.q.shared = 1;
+ this->default_shader_storage_qualifier->flags.q.column_major = 1;
+ this->default_shader_storage_qualifier->is_default_qualifier = true;
this->fs_uses_gl_fragcoord = false;
this->fs_redeclares_gl_fragcoord = false;
EXT(ARB_shader_image_size, true, false, ARB_shader_image_size),
EXT(ARB_shader_precision, true, false, ARB_shader_precision),
EXT(ARB_shader_stencil_export, true, false, ARB_shader_stencil_export),
- EXT(ARB_shader_storage_buffer_object, true, false, ARB_shader_storage_buffer_object),
+ EXT(ARB_shader_storage_buffer_object, true, true, ARB_shader_storage_buffer_object),
EXT(ARB_shader_subroutine, true, false, ARB_shader_subroutine),
+ EXT(ARB_shader_texture_image_samples, true, false, ARB_shader_texture_image_samples),
EXT(ARB_shader_texture_lod, true, false, ARB_shader_texture_lod),
EXT(ARB_shading_language_420pack, true, false, ARB_shading_language_420pack),
EXT(ARB_shading_language_packing, true, false, ARB_shading_language_packing),
}
}
+void
+_mesa_ast_process_interface_block(YYLTYPE *locp,
+ _mesa_glsl_parse_state *state,
+ ast_interface_block *const block,
+ const struct ast_type_qualifier q)
+{
+ if (q.flags.q.buffer) {
+ if (!state->has_shader_storage_buffer_objects()) {
+ _mesa_glsl_error(locp, state,
+ "#version 430 / GL_ARB_shader_storage_buffer_object "
+ "required for defining shader storage blocks");
+ } else if (state->ARB_shader_storage_buffer_object_warn) {
+ _mesa_glsl_warning(locp, state,
+ "#version 430 / GL_ARB_shader_storage_buffer_object "
+ "required for defining shader storage blocks");
+ }
+ } else if (q.flags.q.uniform) {
+ if (!state->has_uniform_buffer_objects()) {
+ _mesa_glsl_error(locp, state,
+ "#version 140 / GL_ARB_uniform_buffer_object "
+ "required for defining uniform blocks");
+ } else if (state->ARB_uniform_buffer_object_warn) {
+ _mesa_glsl_warning(locp, state,
+ "#version 140 / GL_ARB_uniform_buffer_object "
+ "required for defining uniform blocks");
+ }
+ } else {
+ if (state->es_shader || state->language_version < 150) {
+ _mesa_glsl_error(locp, state,
+ "#version 150 required for using "
+ "interface blocks");
+ }
+ }
+
+ /* From the GLSL 1.50.11 spec, section 4.3.7 ("Interface Blocks"):
+ * "It is illegal to have an input block in a vertex shader
+ * or an output block in a fragment shader"
+ */
+ if ((state->stage == MESA_SHADER_VERTEX) && q.flags.q.in) {
+ _mesa_glsl_error(locp, state,
+ "`in' interface block is not allowed for "
+ "a vertex shader");
+ } else if ((state->stage == MESA_SHADER_FRAGMENT) && q.flags.q.out) {
+ _mesa_glsl_error(locp, state,
+ "`out' interface block is not allowed for "
+ "a fragment shader");
+ }
+
+ /* Since block arrays require names, and both features are added in
+ * the same language versions, we don't have to explicitly
+ * version-check both things.
+ */
+ if (block->instance_name != NULL) {
+ state->check_version(150, 300, locp, "interface blocks with "
+ "an instance name are not allowed");
+ }
+
+ uint64_t interface_type_mask;
+ struct ast_type_qualifier temp_type_qualifier;
+
+ /* Get a bitmask containing only the in/out/uniform/buffer
+ * flags, allowing us to ignore other irrelevant flags like
+ * interpolation qualifiers.
+ */
+ temp_type_qualifier.flags.i = 0;
+ temp_type_qualifier.flags.q.uniform = true;
+ temp_type_qualifier.flags.q.in = true;
+ temp_type_qualifier.flags.q.out = true;
+ temp_type_qualifier.flags.q.buffer = true;
+ interface_type_mask = temp_type_qualifier.flags.i;
+
+ /* Get the block's interface qualifier. The interface_qualifier
+ * production rule guarantees that only one bit will be set (and
+ * it will be in/out/uniform).
+ */
+ uint64_t block_interface_qualifier = q.flags.i;
+
+ block->layout.flags.i |= block_interface_qualifier;
+
+ if (state->stage == MESA_SHADER_GEOMETRY &&
+ state->has_explicit_attrib_stream()) {
+ /* Assign global layout's stream value. */
+ block->layout.flags.q.stream = 1;
+ block->layout.flags.q.explicit_stream = 0;
+ block->layout.stream = state->out_qualifier->stream;
+ }
+
+ foreach_list_typed (ast_declarator_list, member, link, &block->declarations) {
+ ast_type_qualifier& qualifier = member->type->qualifier;
+ if ((qualifier.flags.i & interface_type_mask) == 0) {
+ /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks):
+ * "If no optional qualifier is used in a member declaration, the
+ * qualifier of the variable is just in, out, or uniform as declared
+ * by interface-qualifier."
+ */
+ qualifier.flags.i |= block_interface_qualifier;
+ } else if ((qualifier.flags.i & interface_type_mask) !=
+ block_interface_qualifier) {
+ /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks):
+ * "If optional qualifiers are used, they can include interpolation
+ * and storage qualifiers and they must declare an input, output,
+ * or uniform variable consistent with the interface qualifier of
+ * the block."
+ */
+ _mesa_glsl_error(locp, state,
+ "uniform/in/out qualifier on "
+ "interface block member does not match "
+ "the interface block");
+ }
+
+ /* From GLSL ES 3.0, chapter 4.3.7 "Interface Blocks":
+ *
+ * "GLSL ES 3.0 does not support interface blocks for shader inputs or
+ * outputs."
+ *
+ * And from GLSL ES 3.0, chapter 4.6.1 "The invariant qualifier":.
+ *
+ * "Only variables output from a shader can be candidates for
+ * invariance."
+ *
+ * From GLSL 4.40 and GLSL 1.50, section "Interface Blocks":
+ *
+ * "If optional qualifiers are used, they can include interpolation
+ * qualifiers, auxiliary storage qualifiers, and storage qualifiers
+ * and they must declare an input, output, or uniform member
+ * consistent with the interface qualifier of the block"
+ */
+ if (qualifier.flags.q.invariant)
+ _mesa_glsl_error(locp, state,
+ "invariant qualifiers cannot be used "
+ "with interface blocks members");
+ }
+}
void
_mesa_ast_type_qualifier_print(const struct ast_type_qualifier *q)
}
}
+ _mesa_glsl_initialize_derived_variables(shader);
+
delete state->symbols;
ralloc_free(state);
}
bool has_shader_storage_buffer_objects() const
{
- return ARB_shader_storage_buffer_object_enable || is_version(430, 0);
+ return ARB_shader_storage_buffer_object_enable || is_version(430, 310);
}
bool has_separate_shader_objects() const
struct ast_type_qualifier *default_uniform_qualifier;
/**
+ * Default shader storage layout qualifiers tracked during parsing.
+ * Currently affects shader storage blocks and shader storage buffer
+ * variables in those blocks.
+ */
+ struct ast_type_qualifier *default_shader_storage_qualifier;
+
+ /**
* Variables to track different cases if a fragment shader redeclares
* built-in variable gl_FragCoord.
*
bool ARB_shader_storage_buffer_object_warn;
bool ARB_shader_subroutine_enable;
bool ARB_shader_subroutine_warn;
+ bool ARB_shader_texture_image_samples_enable;
+ bool ARB_shader_texture_image_samples_warn;
bool ARB_shader_texture_lod_enable;
bool ARB_shader_texture_lod_warn;
bool ARB_shading_language_420pack_enable;
}
glsl_type::glsl_type(GLenum gl_type,
- glsl_base_type base_type, unsigned vector_elements,
- unsigned matrix_columns, const char *name) :
+ glsl_base_type base_type, unsigned vector_elements,
+ unsigned matrix_columns, const char *name) :
gl_type(gl_type),
base_type(base_type),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
}
glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type,
- enum glsl_sampler_dim dim, bool shadow, bool array,
- unsigned type, const char *name) :
+ enum glsl_sampler_dim dim, bool shadow, bool array,
+ unsigned type, const char *name) :
gl_type(gl_type),
base_type(base_type),
sampler_dimensionality(dim), sampler_shadow(shadow),
}
glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
- const char *name) :
+ const char *name) :
gl_type(0),
base_type(GLSL_TYPE_STRUCT),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
assert(name != NULL);
this->name = ralloc_strdup(this->mem_ctx, name);
this->fields.structure = ralloc_array(this->mem_ctx,
- glsl_struct_field, length);
+ glsl_struct_field, length);
for (i = 0; i < length; i++) {
this->fields.structure[i].type = fields[i].type;
this->fields.structure[i].name = ralloc_strdup(this->fields.structure,
- fields[i].name);
+ fields[i].name);
this->fields.structure[i].location = fields[i].location;
this->fields.structure[i].interpolation = fields[i].interpolation;
this->fields.structure[i].centroid = fields[i].centroid;
this->fields.structure[i].sample = fields[i].sample;
this->fields.structure[i].matrix_layout = fields[i].matrix_layout;
this->fields.structure[i].patch = fields[i].patch;
+ this->fields.structure[i].image_read_only = fields[i].image_read_only;
+ this->fields.structure[i].image_write_only = fields[i].image_write_only;
+ this->fields.structure[i].image_coherent = fields[i].image_coherent;
+ this->fields.structure[i].image_volatile = fields[i].image_volatile;
+ this->fields.structure[i].image_restrict = fields[i].image_restrict;
}
mtx_unlock(&glsl_type::mutex);
}
glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
- enum glsl_interface_packing packing, const char *name) :
+ enum glsl_interface_packing packing, const char *name) :
gl_type(0),
base_type(GLSL_TYPE_INTERFACE),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
assert(name != NULL);
this->name = ralloc_strdup(this->mem_ctx, name);
this->fields.structure = ralloc_array(this->mem_ctx,
- glsl_struct_field, length);
+ glsl_struct_field, length);
for (i = 0; i < length; i++) {
this->fields.structure[i].type = fields[i].type;
this->fields.structure[i].name = ralloc_strdup(this->fields.structure,
- fields[i].name);
+ fields[i].name);
this->fields.structure[i].location = fields[i].location;
this->fields.structure[i].interpolation = fields[i].interpolation;
this->fields.structure[i].centroid = fields[i].centroid;
return this->fields.array->contains_sampler();
} else if (this->is_record()) {
for (unsigned int i = 0; i < this->length; i++) {
- if (this->fields.structure[i].type->contains_sampler())
- return true;
+ if (this->fields.structure[i].type->contains_sampler())
+ return true;
}
return false;
} else {
return this->fields.array->contains_integer();
} else if (this->is_record()) {
for (unsigned int i = 0; i < this->length; i++) {
- if (this->fields.structure[i].type->contains_integer())
- return true;
+ if (this->fields.structure[i].type->contains_integer())
+ return true;
}
return false;
} else {
return this->fields.array->contains_double();
} else if (this->is_record()) {
for (unsigned int i = 0; i < this->length; i++) {
- if (this->fields.structure[i].type->contains_double())
- return true;
+ if (this->fields.structure[i].type->contains_double())
+ return true;
}
return false;
} else {
return this->fields.array->contains_subroutine();
} else if (this->is_record()) {
for (unsigned int i = 0; i < this->length; i++) {
- if (this->fields.structure[i].type->contains_subroutine())
- return true;
+ if (this->fields.structure[i].type->contains_subroutine())
+ return true;
}
return false;
} else {
return this->fields.array->contains_image();
} else if (this->is_record()) {
for (unsigned int i = 0; i < this->length; i++) {
- if (this->fields.structure[i].type->contains_image())
- return true;
+ if (this->fields.structure[i].type->contains_image())
+ return true;
}
return false;
} else {
if (columns == 1) {
switch (base_type) {
case GLSL_TYPE_UINT:
- return uvec(rows);
+ return uvec(rows);
case GLSL_TYPE_INT:
- return ivec(rows);
+ return ivec(rows);
case GLSL_TYPE_FLOAT:
- return vec(rows);
+ return vec(rows);
case GLSL_TYPE_DOUBLE:
- return dvec(rows);
+ return dvec(rows);
case GLSL_TYPE_BOOL:
- return bvec(rows);
+ return bvec(rows);
default:
- return error_type;
+ return error_type;
}
} else {
if ((base_type != GLSL_TYPE_FLOAT && base_type != GLSL_TYPE_DOUBLE) || (rows == 1))
- return error_type;
+ return error_type;
/* GLSL matrix types are named mat{COLUMNS}x{ROWS}. Only the following
* combinations are valid:
for (unsigned i = 0; i < this->length; i++) {
if (this->fields.structure[i].type != b->fields.structure[i].type)
- return false;
+ return false;
if (strcmp(this->fields.structure[i].name,
- b->fields.structure[i].name) != 0)
- return false;
+ b->fields.structure[i].name) != 0)
+ return false;
if (this->fields.structure[i].matrix_layout
!= b->fields.structure[i].matrix_layout)
return false;
if (this->fields.structure[i].patch
!= b->fields.structure[i].patch)
return false;
+ if (this->fields.structure[i].image_read_only
+ != b->fields.structure[i].image_read_only)
+ return false;
+ if (this->fields.structure[i].image_write_only
+ != b->fields.structure[i].image_write_only)
+ return false;
+ if (this->fields.structure[i].image_coherent
+ != b->fields.structure[i].image_coherent)
+ return false;
+ if (this->fields.structure[i].image_volatile
+ != b->fields.structure[i].image_volatile)
+ return false;
+ if (this->fields.structure[i].image_restrict
+ != b->fields.structure[i].image_restrict)
+ return false;
}
return true;
const glsl_type *
glsl_type::get_record_instance(const glsl_struct_field *fields,
- unsigned num_fields,
- const char *name)
+ unsigned num_fields,
+ const char *name)
{
const glsl_type key(fields, num_fields, name);
const glsl_type *
glsl_type::get_interface_instance(const glsl_struct_field *fields,
- unsigned num_fields,
- enum glsl_interface_packing packing,
- const char *block_name)
+ unsigned num_fields,
+ enum glsl_interface_packing packing,
+ const char *block_name)
{
const glsl_type key(fields, num_fields, packing, block_name);
for (unsigned i = 0; i < this->length; i++) {
if (strcmp(name, this->fields.structure[i].name) == 0)
- return this->fields.structure[i].type;
+ return this->fields.structure[i].type;
}
return error_type;
for (unsigned i = 0; i < this->length; i++) {
if (strcmp(name, this->fields.structure[i].name) == 0)
- return i;
+ return i;
}
return -1;
unsigned size = 0;
for (unsigned i = 0; i < this->length; i++)
- size += this->fields.structure[i].type->component_slots();
+ size += this->fields.structure[i].type->component_slots();
return size;
}
}
unsigned
+glsl_type::record_location_offset(unsigned length) const
+{
+ unsigned offset = 0;
+ const glsl_type *t = this->without_array();
+ if (t->is_record()) {
+ assert(length <= t->length);
+
+ for (unsigned i = 0; i < length; i++) {
+ const glsl_type *st = t->fields.structure[i].type;
+ const glsl_type *wa = st->without_array();
+ if (wa->is_record()) {
+ unsigned r_offset = wa->record_location_offset(wa->length);
+ offset += st->is_array() ? st->length * r_offset : r_offset;
+ } else {
+ /* We dont worry about arrays here because unless the array
+ * contains a structure or another array it only takes up a single
+ * uniform slot.
+ */
+ offset += 1;
+ }
+ }
+ }
+ return offset;
+}
+
+unsigned
glsl_type::uniform_locations() const
{
unsigned size = 0;
if (this->is_scalar() || this->is_vector()) {
switch (this->vector_elements) {
case 1:
- return N;
+ return N;
case 2:
- return 2 * N;
+ return 2 * N;
case 3:
case 4:
- return 4 * N;
+ return 4 * N;
}
}
*/
if (this->is_array()) {
if (this->fields.array->is_scalar() ||
- this->fields.array->is_vector() ||
- this->fields.array->is_matrix()) {
- return MAX2(this->fields.array->std140_base_alignment(row_major), 16);
+ this->fields.array->is_vector() ||
+ this->fields.array->is_matrix()) {
+ return MAX2(this->fields.array->std140_base_alignment(row_major), 16);
} else {
- assert(this->fields.array->is_record() ||
+ assert(this->fields.array->is_record() ||
this->fields.array->is_array());
- return this->fields.array->std140_base_alignment(row_major);
+ return this->fields.array->std140_base_alignment(row_major);
}
}
int r = this->vector_elements;
if (row_major) {
- vec_type = get_instance(base_type, c, 1);
- array_type = glsl_type::get_array_instance(vec_type, r);
+ vec_type = get_instance(base_type, c, 1);
+ array_type = glsl_type::get_array_instance(vec_type, r);
} else {
- vec_type = get_instance(base_type, r, 1);
- array_type = glsl_type::get_array_instance(vec_type, c);
+ vec_type = get_instance(base_type, r, 1);
+ array_type = glsl_type::get_array_instance(vec_type, c);
}
return array_type->std140_base_alignment(false);
field_row_major = false;
}
- const struct glsl_type *field_type = this->fields.structure[i].type;
- base_alignment = MAX2(base_alignment,
- field_type->std140_base_alignment(field_row_major));
+ const struct glsl_type *field_type = this->fields.structure[i].type;
+ base_alignment = MAX2(base_alignment,
+ field_type->std140_base_alignment(field_row_major));
}
return base_alignment;
}
unsigned int array_len;
if (this->is_array()) {
- element_type = this->fields.array;
- array_len = this->length;
+ element_type = this->fields.array;
+ array_len = this->length;
} else {
- element_type = this;
- array_len = 1;
+ element_type = this;
+ array_len = 1;
}
if (row_major) {
vec_type = get_instance(element_type->base_type,
element_type->matrix_columns, 1);
- array_len *= element_type->vector_elements;
+ array_len *= element_type->vector_elements;
} else {
- vec_type = get_instance(element_type->base_type,
- element_type->vector_elements, 1);
- array_len *= element_type->matrix_columns;
+ vec_type = get_instance(element_type->base_type,
+ element_type->vector_elements, 1);
+ array_len *= element_type->matrix_columns;
}
const glsl_type *array_type = glsl_type::get_array_instance(vec_type,
- array_len);
+ array_len);
return array_type->std140_size(false);
}
*/
if (this->is_array()) {
if (this->fields.array->is_record()) {
- return this->length * this->fields.array->std140_size(row_major);
+ return this->length * this->fields.array->std140_size(row_major);
} else {
- unsigned element_base_align =
- this->fields.array->std140_base_alignment(row_major);
- return this->length * MAX2(element_base_align, 16);
+ unsigned element_base_align =
+ this->fields.array->std140_base_alignment(row_major);
+ return this->length * MAX2(element_base_align, 16);
}
}
* rounded up to the next multiple of the base alignment of the
* structure.
*/
- if (this->is_record()) {
+ if (this->is_record() || this->is_interface()) {
unsigned size = 0;
unsigned max_align = 0;
field_row_major = false;
}
- const struct glsl_type *field_type = this->fields.structure[i].type;
- unsigned align = field_type->std140_base_alignment(field_row_major);
- size = glsl_align(size, align);
- size += field_type->std140_size(field_row_major);
+ const struct glsl_type *field_type = this->fields.structure[i].type;
+ unsigned align = field_type->std140_base_alignment(field_row_major);
+
+ /* Ignore unsized arrays when calculating size */
+ if (field_type->is_unsized_array())
+ continue;
+
+ size = glsl_align(size, align);
+ size += field_type->std140_size(field_row_major);
max_align = MAX2(align, max_align);
return -1;
}
+unsigned
+glsl_type::std430_base_alignment(bool row_major) const
+{
+
+ unsigned N = is_double() ? 8 : 4;
+
+ /* (1) If the member is a scalar consuming <N> basic machine units, the
+ * base alignment is <N>.
+ *
+ * (2) If the member is a two- or four-component vector with components
+ * consuming <N> basic machine units, the base alignment is 2<N> or
+ * 4<N>, respectively.
+ *
+ * (3) If the member is a three-component vector with components consuming
+ * <N> basic machine units, the base alignment is 4<N>.
+ */
+ if (this->is_scalar() || this->is_vector()) {
+ switch (this->vector_elements) {
+ case 1:
+ return N;
+ case 2:
+ return 2 * N;
+ case 3:
+ case 4:
+ return 4 * N;
+ }
+ }
+
+ /* OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout":
+ *
+ * "When using the std430 storage layout, shader storage blocks will be
+ * laid out in buffer storage identically to uniform and shader storage
+ * blocks using the std140 layout, except that the base alignment and
+ * stride of arrays of scalars and vectors in rule 4 and of structures
+ * in rule 9 are not rounded up a multiple of the base alignment of a vec4.
+ */
+
+ /* (1) If the member is a scalar consuming <N> basic machine units, the
+ * base alignment is <N>.
+ *
+ * (2) If the member is a two- or four-component vector with components
+ * consuming <N> basic machine units, the base alignment is 2<N> or
+ * 4<N>, respectively.
+ *
+ * (3) If the member is a three-component vector with components consuming
+ * <N> basic machine units, the base alignment is 4<N>.
+ */
+ if (this->is_array())
+ return this->fields.array->std430_base_alignment(row_major);
+
+ /* (5) If the member is a column-major matrix with <C> columns and
+ * <R> rows, the matrix is stored identically to an array of
+ * <C> column vectors with <R> components each, according to
+ * rule (4).
+ *
+ * (7) If the member is a row-major matrix with <C> columns and <R>
+ * rows, the matrix is stored identically to an array of <R>
+ * row vectors with <C> components each, according to rule (4).
+ */
+ if (this->is_matrix()) {
+ const struct glsl_type *vec_type, *array_type;
+ int c = this->matrix_columns;
+ int r = this->vector_elements;
+
+ if (row_major) {
+ vec_type = get_instance(base_type, c, 1);
+ array_type = glsl_type::get_array_instance(vec_type, r);
+ } else {
+ vec_type = get_instance(base_type, r, 1);
+ array_type = glsl_type::get_array_instance(vec_type, c);
+ }
+
+ return array_type->std430_base_alignment(false);
+ }
+
+ /* (9) If the member is a structure, the base alignment of the
+ * structure is <N>, where <N> is the largest base alignment
+ * value of any of its members, and rounded up to the base
+ * alignment of a vec4. The individual members of this
+ * sub-structure are then assigned offsets by applying this set
+ * of rules recursively, where the base offset of the first
+ * member of the sub-structure is equal to the aligned offset
+ * of the structure. The structure may have padding at the end;
+ * the base offset of the member following the sub-structure is
+ * rounded up to the next multiple of the base alignment of the
+ * structure.
+ */
+ if (this->is_record()) {
+ unsigned base_alignment = 0;
+ for (unsigned i = 0; i < this->length; i++) {
+ bool field_row_major = row_major;
+ const enum glsl_matrix_layout matrix_layout =
+ glsl_matrix_layout(this->fields.structure[i].matrix_layout);
+ if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) {
+ field_row_major = true;
+ } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) {
+ field_row_major = false;
+ }
+
+ const struct glsl_type *field_type = this->fields.structure[i].type;
+ base_alignment = MAX2(base_alignment,
+ field_type->std430_base_alignment(field_row_major));
+ }
+ return base_alignment;
+ }
+ assert(!"not reached");
+ return -1;
+}
+
+unsigned
+glsl_type::std430_array_stride(bool row_major) const
+{
+ unsigned N = is_double() ? 8 : 4;
+
+ /* Notice that the array stride of a vec3 is not 3 * N but 4 * N.
+ * See OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout"
+ *
+ * (3) If the member is a three-component vector with components consuming
+ * <N> basic machine units, the base alignment is 4<N>.
+ */
+ if (this->is_vector() && this->vector_elements == 3)
+ return 4 * N;
+
+ /* By default use std430_size(row_major) */
+ return this->std430_size(row_major);
+}
+
+unsigned
+glsl_type::std430_size(bool row_major) const
+{
+ unsigned N = is_double() ? 8 : 4;
+
+ /* OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout":
+ *
+ * "When using the std430 storage layout, shader storage blocks will be
+ * laid out in buffer storage identically to uniform and shader storage
+ * blocks using the std140 layout, except that the base alignment and
+ * stride of arrays of scalars and vectors in rule 4 and of structures
+ * in rule 9 are not rounded up a multiple of the base alignment of a vec4.
+ */
+ if (this->is_scalar() || this->is_vector())
+ return this->vector_elements * N;
+
+ if (this->without_array()->is_matrix()) {
+ const struct glsl_type *element_type;
+ const struct glsl_type *vec_type;
+ unsigned int array_len;
+
+ if (this->is_array()) {
+ element_type = this->fields.array;
+ array_len = this->length;
+ } else {
+ element_type = this;
+ array_len = 1;
+ }
+
+ if (row_major) {
+ vec_type = get_instance(element_type->base_type,
+ element_type->matrix_columns, 1);
+
+ array_len *= element_type->vector_elements;
+ } else {
+ vec_type = get_instance(element_type->base_type,
+ element_type->vector_elements, 1);
+ array_len *= element_type->matrix_columns;
+ }
+ const glsl_type *array_type = glsl_type::get_array_instance(vec_type,
+ array_len);
+
+ return array_type->std430_size(false);
+ }
+
+ if (this->is_array()) {
+ if (this->fields.array->is_record())
+ return this->length * this->fields.array->std430_size(row_major);
+ else
+ return this->length * this->fields.array->std430_base_alignment(row_major);
+ }
+
+ if (this->is_record() || this->is_interface()) {
+ unsigned size = 0;
+ unsigned max_align = 0;
+
+ for (unsigned i = 0; i < this->length; i++) {
+ bool field_row_major = row_major;
+ const enum glsl_matrix_layout matrix_layout =
+ glsl_matrix_layout(this->fields.structure[i].matrix_layout);
+ if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) {
+ field_row_major = true;
+ } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) {
+ field_row_major = false;
+ }
+
+ const struct glsl_type *field_type = this->fields.structure[i].type;
+ unsigned align = field_type->std430_base_alignment(field_row_major);
+ size = glsl_align(size, align);
+ size += field_type->std430_size(field_row_major);
+
+ max_align = MAX2(align, max_align);
+ }
+ size = glsl_align(size, max_align);
+ return size;
+ }
+
+ assert(!"not reached");
+ return -1;
+}
unsigned
glsl_type::count_attribute_slots() const
enum glsl_interface_packing {
GLSL_INTERFACE_PACKING_STD140,
GLSL_INTERFACE_PACKING_SHARED,
- GLSL_INTERFACE_PACKING_PACKED
+ GLSL_INTERFACE_PACKING_PACKED,
+ GLSL_INTERFACE_PACKING_STD430
};
enum glsl_matrix_layout {
unsigned component_slots() const;
/**
+ * Calculate offset between the base location of the struct in
+ * uniform storage and a struct member.
+ * For the initial call, length is the index of the member to find the
+ * offset for.
+ */
+ unsigned record_location_offset(unsigned length) const;
+
+ /**
* Calculate the number of unique values from glGetUniformLocation for the
* elements of the type.
*
unsigned std140_size(bool row_major) const;
/**
+ * Alignment in bytes of the start of this type in a std430 shader
+ * storage block.
+ */
+ unsigned std430_base_alignment(bool row_major) const;
+
+ /**
+ * Calculate array stride in bytes of this type in a std430 shader storage
+ * block.
+ */
+ unsigned std430_array_stride(bool row_major) const;
+
+ /**
+ * Size in bytes of this type in a std430 shader storage block.
+ *
+ * Note that this is not GL_BUFFER_SIZE
+ */
+ unsigned std430_size(bool row_major) const;
+
+ /**
* \brief Can this type be implicitly converted to another?
*
* \return True if the types are identical or if this type can be converted
}
/**
+ * Return the total number of elements in an array including the elements
+ * in arrays of arrays.
+ */
+ unsigned arrays_of_arrays_size() const
+ {
+ if (!is_array())
+ return 0;
+
+ unsigned size = length;
+ const glsl_type *base_type = fields.array;
+
+ while (base_type->is_array()) {
+ size = size * base_type->length;
+ base_type = base_type->fields.array;
+ }
+ return size;
+ }
+
+ /**
* Return the amount of atomic counter storage required for a type.
*/
unsigned atomic_size() const
*/
int stream;
+ /**
+ * Image qualifiers, applicable to buffer variables defined in shader
+ * storage buffer objects (SSBOs)
+ */
+ unsigned image_read_only:1;
+ unsigned image_write_only:1;
+ unsigned image_coherent:1;
+ unsigned image_volatile:1;
+ unsigned image_restrict:1;
+
#ifdef __cplusplus
glsl_struct_field(const struct glsl_type *_type, const char *_name)
: type(_type), name(_name), location(-1), interpolation(0), centroid(0),
op0->type->vector_elements, 1);
break;
+ case ir_unop_get_buffer_size:
+ case ir_unop_ssbo_unsized_array_length:
+ this->type = glsl_type::int_type;
+ break;
+
default:
assert(!"not reached: missing automatic type setup for ir_expression");
this->type = op0->type;
"noise",
"subroutine_to_int",
"interpolate_at_centroid",
+ "get_buffer_size",
+ "ssbo_unsized_array_length",
"+",
"-",
"*",
}
-static const char * const tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txf_ms", "txs", "lod", "tg4", "query_levels" };
+static const char * const tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txf_ms", "txs", "lod", "tg4", "query_levels", "texture_samples" };
const char *ir_texture::opcode_string()
{
this->sampler = sampler;
this->type = type;
- if (this->op == ir_txs || this->op == ir_query_levels) {
+ if (this->op == ir_txs || this->op == ir_query_levels ||
+ this->op == ir_texture_samples) {
assert(type->base_type == GLSL_TYPE_INT);
} else if (this->op == ir_lod) {
assert(type->vector_elements == 2);
this->data.image_coherent = false;
this->data.image_volatile = false;
this->data.image_restrict = false;
+ this->data.from_ssbo_unsized_array = false;
if (type != NULL) {
if (type->base_type == GLSL_TYPE_SAMPLER)
}
/**
+ * Determine whether or not a variable is part of a shader storage block.
+ */
+ inline bool is_in_shader_storage_block() const
+ {
+ return this->data.mode == ir_var_shader_storage &&
+ this->interface_type != NULL;
+ }
+
+ /**
* Determine whether or not a variable is the declaration of an interface
* block
*
unsigned image_restrict:1;
/**
+ * ARB_shader_storage_buffer_object
+ */
+ unsigned from_ssbo_unsized_array:1; /**< unsized array buffer variable. */
+
+ /**
* Emit a warning if this variable is accessed.
*/
private:
* - Fragment shader output: one of the values from \c gl_frag_result.
* - Uniforms: Per-stage uniform slot number for default uniform block.
* - Uniforms: Index within the uniform block definition for UBO members.
+ * - Non-UBO Uniforms: explicit location until linking then reused to
+ * store uniform slot number.
* - Other: This field is not currently used.
*
* If the variable is a uniform, shader input, or shader output, and the
ir_unop_interpolate_at_centroid,
/**
+ * Ask the driver for the total size of a buffer block.
+ *
+ * operand0 is the ir_constant buffer block index in the linked shader.
+ */
+ ir_unop_get_buffer_size,
+
+ /**
+ * Calculate length of an unsized array inside a buffer block.
+ * This opcode is going to be replaced in a lowering pass inside
+ * the linker.
+ *
+ * operand0 is the unsized array's ir_value for the calculation
+ * of its length.
+ */
+ ir_unop_ssbo_unsized_array_length,
+
+ /**
* A sentinel marking the last of the unary operations.
*/
- ir_last_unop = ir_unop_interpolate_at_centroid,
+ ir_last_unop = ir_unop_ssbo_unsized_array_length,
ir_binop_add,
ir_binop_sub,
ir_txs, /**< Texture size */
ir_lod, /**< Texture lod query */
ir_tg4, /**< Texture gather */
- ir_query_levels /**< Texture levels query */
+ ir_query_levels, /**< Texture levels query */
+ ir_texture_samples, /**< Texture samples query */
};
struct _mesa_glsl_parse_state *state);
extern void
+_mesa_glsl_initialize_derived_variables(gl_shader *shader);
+
+extern void
_mesa_glsl_initialize_functions(_mesa_glsl_parse_state *state);
extern void
const char *name, exec_list *actual_parameters);
extern ir_function *
-_mesa_glsl_find_builtin_function_by_name(_mesa_glsl_parse_state *state,
- const char *name);
+_mesa_glsl_find_builtin_function_by_name(const char *name);
extern gl_shader *
_mesa_glsl_get_builtin_function_shader(void);
+extern ir_function_signature *
+_mesa_get_main_function_signature(gl_shader *sh);
+
extern void
_mesa_glsl_release_functions(void);
case ir_tex:
case ir_lod:
case ir_query_levels:
+ case ir_texture_samples:
break;
case ir_txb:
new_tex->lod_info.bias = this->lod_info.bias->clone(mem_ctx, ht);
case ir_tex:
case ir_lod:
case ir_query_levels:
+ case ir_texture_samples:
break;
case ir_txb:
if (!lod_info.bias->equals(other->lod_info.bias, ignore))
case ir_tex:
case ir_lod:
case ir_query_levels:
+ case ir_texture_samples:
break;
case ir_txb:
s = this->lod_info.bias->accept(v);
ir->sampler->accept(this);
fprintf(f, " ");
- if (ir->op != ir_txs && ir->op != ir_query_levels) {
+ if (ir->op != ir_txs && ir->op != ir_query_levels &&
+ ir->op != ir_texture_samples) {
ir->coordinate->accept(this);
fprintf(f, " ");
if (ir->op != ir_txf && ir->op != ir_txf_ms &&
ir->op != ir_txs && ir->op != ir_tg4 &&
- ir->op != ir_query_levels) {
+ ir->op != ir_query_levels && ir->op != ir_texture_samples) {
if (ir->projector)
ir->projector->accept(this);
else
case ir_tex:
case ir_lod:
case ir_query_levels:
+ case ir_texture_samples:
break;
case ir_txb:
ir->lod_info.bias->accept(this);
}
void
-ir_print_visitor::visit(ir_barrier *ir)
+ir_print_visitor::visit(ir_barrier *)
{
fprintf(f, "(barrier)\n");
}
#include "glsl_types.h"
#include "s_expression.h"
-const static bool debug = false;
+static const bool debug = false;
namespace {
{ "tg4", s_type, s_sampler, s_coord, s_offset, s_component };
s_pattern query_levels_pattern[] =
{ "query_levels", s_type, s_sampler };
+ s_pattern texture_samples_pattern[] =
+ { "samples", s_type, s_sampler };
s_pattern other_pattern[] =
{ tag, s_type, s_sampler, s_coord, s_offset, s_proj, s_shadow, s_lod };
op = ir_tg4;
} else if (MATCH(expr, query_levels_pattern)) {
op = ir_query_levels;
+ } else if (MATCH(expr, texture_samples_pattern)) {
+ op = ir_texture_samples;
} else if (MATCH(expr, other_pattern)) {
op = ir_texture::get_opcode(tag->value());
if (op == (ir_texture_opcode) -1)
if (op != ir_txf && op != ir_txf_ms &&
op != ir_txs && op != ir_lod && op != ir_tg4 &&
- op != ir_query_levels) {
+ op != ir_query_levels && op != ir_texture_samples) {
s_int *proj_as_int = SX_AS_INT(s_proj);
if (proj_as_int && proj_as_int->value() == 1) {
tex->projector = NULL;
case ir_tex:
case ir_lod:
case ir_query_levels:
+ case ir_texture_samples:
break;
case ir_txb:
handle_rvalue(&ir->lod_info.bias);
* This is a built-in uniform that should not be modified through any gl API.
*/
bool builtin;
+
+ /**
+ * This is a shader storage buffer variable, not an uniform.
+ */
+ bool is_shader_storage;
};
#ifdef __cplusplus
assert(ir->operands[0]->type->is_float());
break;
+ case ir_unop_get_buffer_size:
+ assert(ir->type == glsl_type::int_type);
+ assert(ir->operands[0]->type == glsl_type::uint_type);
+ break;
+
+ case ir_unop_ssbo_unsized_array_length:
+ assert(ir->type == glsl_type::int_type);
+ assert(ir->operands[0]->type->is_array());
+ assert(ir->operands[0]->type->is_unsized_array());
+ break;
+
case ir_unop_d2f:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
assert(ir->type->base_type == GLSL_TYPE_FLOAT);
if (!var->is_in_buffer_block())
return visit_continue;
- const glsl_type *const block_type = var->is_interface_instance()
- ? var->type : var->get_interface_type();
-
/* Section 2.11.6 (Uniform Variables) of the OpenGL ES 3.0.3 spec says:
*
* "All members of a named uniform block declared with a shared or
* also considered active, even if no member of the block is
* referenced."
*/
- if (block_type->interface_packing == GLSL_INTERFACE_PACKING_PACKED)
+ if (var->get_interface_type()->interface_packing ==
+ GLSL_INTERFACE_PACKING_PACKED)
return visit_continue;
/* Process the block. Bail if there was an error.
assert(b->num_array_elements == 0);
assert(b->array_elements == NULL);
assert(b->type != NULL);
+ assert(!b->type->is_array() || b->has_instance_name);
+
+ /* For uniform block arrays declared with a shared or std140 layout
+ * qualifier, mark all its instances as used.
+ */
+ if (b->type->is_array() && b->type->length > 0) {
+ b->num_array_elements = b->type->length;
+ b->array_elements = reralloc(this->mem_ctx,
+ b->array_elements,
+ unsigned,
+ b->num_array_elements);
+
+ for (unsigned i = 0; i < b->num_array_elements; i++) {
+ b->array_elements[i] = i;
+ }
+ }
return visit_continue;
}
assert((b->num_array_elements == 0) == (b->array_elements == NULL));
assert(b->type != NULL);
+ /* If the block array was declared with a shared or
+ * std140 layout qualifier, all its instances have been already marked
+ * as used in link_uniform_block_active_visitor::visit(ir_variable *).
+ */
+ if (var->get_interface_type()->interface_packing !=
+ GLSL_INTERFACE_PACKING_PACKED)
+ return visit_continue_with_parent;
+
ir_constant *c = ir->array_index->as_constant();
if (c) {
}
virtual void enter_record(const glsl_type *type, const char *,
- bool row_major) {
+ bool row_major, const unsigned packing) {
assert(type->is_record());
- this->offset = glsl_align(
+ if (packing == GLSL_INTERFACE_PACKING_STD430)
+ this->offset = glsl_align(
+ this->offset, type->std430_base_alignment(row_major));
+ else
+ this->offset = glsl_align(
this->offset, type->std140_base_alignment(row_major));
}
virtual void leave_record(const glsl_type *type, const char *,
- bool row_major) {
+ bool row_major, const unsigned packing) {
assert(type->is_record());
/* If this is the last field of a structure, apply rule #9. The
* the member following the sub-structure is rounded up to the next
* multiple of the base alignment of the structure."
*/
- this->offset = glsl_align(
+ if (packing == GLSL_INTERFACE_PACKING_STD430)
+ this->offset = glsl_align(
+ this->offset, type->std430_base_alignment(row_major));
+ else
+ this->offset = glsl_align(
this->offset, type->std140_base_alignment(row_major));
}
virtual void visit_field(const glsl_type *type, const char *name,
bool row_major, const glsl_type *,
+ const unsigned packing,
bool /* last_field */)
{
assert(this->index < this->num_variables);
v->IndexName = v->Name;
}
- const unsigned alignment = type->std140_base_alignment(v->RowMajor);
- unsigned size = type->std140_size(v->RowMajor);
+ unsigned alignment = 0;
+ unsigned size = 0;
+
+ if (packing == GLSL_INTERFACE_PACKING_STD430) {
+ alignment = type->std430_base_alignment(v->RowMajor);
+ size = type->std430_size(v->RowMajor);
+ } else {
+ alignment = type->std140_base_alignment(v->RowMajor);
+ size = type->std140_size(v->RowMajor);
+ }
this->offset = glsl_align(this->offset, alignment);
v->Offset = this->offset;
unsigned
link_uniform_blocks(void *mem_ctx,
+ struct gl_context *ctx,
struct gl_shader_program *prog,
struct gl_shader **shader_list,
unsigned num_shaders,
== unsigned(ubo_packing_shared));
STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_PACKED)
== unsigned(ubo_packing_packed));
-
+ STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_STD430)
+ == unsigned(ubo_packing_std430));
hash_table_foreach (block_hash, entry) {
const struct link_uniform_block_active *const b =
blocks[i].UniformBufferSize = parcel.buffer_size;
+ /* Check SSBO size is lower than maximum supported size for SSBO */
+ if (b->is_shader_storage &&
+ parcel.buffer_size > ctx->Const.MaxShaderStorageBlockSize) {
+ linker_error(prog, "shader storage block `%s' has size %d, "
+ "which is larger than than the maximum allowed (%d)",
+ block_type->name,
+ parcel.buffer_size,
+ ctx->Const.MaxShaderStorageBlockSize);
+ }
blocks[i].NumUniforms =
(unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms);
blocks[i].UniformBufferSize = parcel.buffer_size;
+ /* Check SSBO size is lower than maximum supported size for SSBO */
+ if (b->is_shader_storage &&
+ parcel.buffer_size > ctx->Const.MaxShaderStorageBlockSize) {
+ linker_error(prog, "shader storage block `%s' has size %d, "
+ "which is larger than than the maximum allowed (%d)",
+ block_type->name,
+ parcel.buffer_size,
+ ctx->Const.MaxShaderStorageBlockSize);
+ }
blocks[i].NumUniforms =
(unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms);
get_uniform_block_index(const gl_shader_program *shProg,
const char *uniformBlockName)
{
- for (unsigned i = 0; i < shProg->NumUniformBlocks; i++) {
+ for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
if (!strcmp(shProg->UniformBlocks[i].Name, uniformBlockName))
return i;
}
#include "glsl_symbol_table.h"
#include "program/hash_table.h"
#include "program.h"
+#include "util/hash_table.h"
/**
* \file link_uniforms.cpp
assert(type->without_array()->is_record()
|| type->without_array()->is_interface());
+ unsigned record_array_count = 1;
char *name_copy = ralloc_strdup(NULL, name);
- recursion(type, &name_copy, strlen(name), false, NULL, false);
+ unsigned packing = type->interface_packing;
+
+ recursion(type, &name_copy, strlen(name), false, NULL, packing, false,
+ record_array_count);
ralloc_free(name_copy);
}
void
program_resource_visitor::process(ir_variable *var)
{
+ unsigned record_array_count = 1;
const glsl_type *t = var->type;
+ const glsl_type *t_without_array = var->type->without_array();
const bool row_major =
var->data.matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR;
+ const unsigned packing = var->get_interface_type() ?
+ var->get_interface_type()->interface_packing :
+ var->type->interface_packing;
+
/* false is always passed for the row_major parameter to the other
* processing functions because no information is available to do
* otherwise. See the warning in linker.h.
* lowering is only applied to non-uniform interface blocks, so we
* can safely pass false for row_major.
*/
- recursion(var->type, &name, new_length, row_major, NULL, false);
+ recursion(var->type, &name, new_length, row_major, NULL, packing,
+ false, record_array_count);
}
ralloc_free(name);
} else if (var->data.from_named_ifc_block_nonarray) {
* is only applied to non-uniform interface blocks, so we can safely
* pass false for row_major.
*/
- recursion(var->type, &name, strlen(name), row_major, NULL, false);
+ recursion(var->type, &name, strlen(name), row_major, NULL, packing,
+ false, record_array_count);
ralloc_free(name);
} else if (t->without_array()->is_record()) {
char *name = ralloc_strdup(NULL, var->name);
- recursion(var->type, &name, strlen(name), row_major, NULL, false);
- ralloc_free(name);
- } else if (t->is_interface()) {
- char *name = ralloc_strdup(NULL, var->type->name);
- recursion(var->type, &name, strlen(name), row_major, NULL, false);
+ recursion(var->type, &name, strlen(name), row_major, NULL, packing,
+ false, record_array_count);
ralloc_free(name);
- } else if (t->is_array() && t->fields.array->is_interface()) {
- char *name = ralloc_strdup(NULL, var->type->fields.array->name);
- recursion(var->type, &name, strlen(name), row_major, NULL, false);
+ } else if (t_without_array->is_interface()) {
+ char *name = ralloc_strdup(NULL, t_without_array->name);
+ recursion(var->type, &name, strlen(name), row_major, NULL, packing,
+ false, record_array_count);
ralloc_free(name);
} else {
- this->visit_field(t, var->name, row_major, NULL, false);
+ this->visit_field(t, var->name, row_major, NULL, packing, false);
}
}
program_resource_visitor::recursion(const glsl_type *t, char **name,
size_t name_length, bool row_major,
const glsl_type *record_type,
- bool last_field)
+ const unsigned packing,
+ bool last_field,
+ unsigned record_array_count)
{
/* Records need to have each field processed individually.
*
record_type = t;
if (t->is_record())
- this->enter_record(t, *name, row_major);
+ this->enter_record(t, *name, row_major, packing);
for (unsigned i = 0; i < t->length; i++) {
const char *field = t->fields.structure[i].name;
recursion(t->fields.structure[i].type, name, new_length,
field_row_major,
record_type,
- (i + 1) == t->length);
+ packing,
+ (i + 1) == t->length, record_array_count);
/* Only the first leaf-field of the record gets called with the
* record type pointer.
if (t->is_record()) {
(*name)[name_length] = '\0';
- this->leave_record(t, *name, row_major);
+ this->leave_record(t, *name, row_major, packing);
}
- } else if (t->is_array() && (t->fields.array->is_record()
- || t->fields.array->is_interface())) {
+ } else if (t->without_array()->is_record() ||
+ t->without_array()->is_interface()) {
if (record_type == NULL && t->fields.array->is_record())
record_type = t->fields.array;
- for (unsigned i = 0; i < t->length; i++) {
+ unsigned length = t->length;
+ /* Shader storage block unsized arrays: add subscript [0] to variable
+ * names */
+ if (t->is_unsized_array())
+ length = 1;
+
+ record_array_count *= length;
+
+ for (unsigned i = 0; i < length; i++) {
size_t new_length = name_length;
/* Append the subscript to the current variable name */
recursion(t->fields.array, name, new_length, row_major,
record_type,
- (i + 1) == t->length);
+ packing,
+ (i + 1) == t->length, record_array_count);
/* Only the first leaf-field of the record gets called with the
* record type pointer.
record_type = NULL;
}
} else {
- this->visit_field(t, *name, row_major, record_type, last_field);
+ this->set_record_array_count(record_array_count);
+ this->visit_field(t, *name, row_major, record_type, packing, last_field);
}
}
program_resource_visitor::visit_field(const glsl_type *type, const char *name,
bool row_major,
const glsl_type *,
+ const unsigned,
bool /* last_field */)
{
visit_field(type, name, row_major);
}
void
-program_resource_visitor::enter_record(const glsl_type *, const char *, bool)
+program_resource_visitor::enter_record(const glsl_type *, const char *, bool,
+ const unsigned)
+{
+}
+
+void
+program_resource_visitor::leave_record(const glsl_type *, const char *, bool,
+ const unsigned)
{
}
void
-program_resource_visitor::leave_record(const glsl_type *, const char *, bool)
+program_resource_visitor::set_record_array_count(unsigned)
{
}
*/
class count_uniform_size : public program_resource_visitor {
public:
- count_uniform_size(struct string_to_uint_map *map)
- : num_active_uniforms(0), num_values(0), num_shader_samplers(0),
- num_shader_images(0), num_shader_uniform_components(0),
- num_shader_subroutines(0),
- is_ubo_var(false), map(map)
+ count_uniform_size(struct string_to_uint_map *map,
+ struct string_to_uint_map *hidden_map)
+ : num_active_uniforms(0), num_hidden_uniforms(0), num_values(0),
+ num_shader_samplers(0), num_shader_images(0),
+ num_shader_uniform_components(0), num_shader_subroutines(0),
+ is_ubo_var(false), is_shader_storage(false), map(map),
+ hidden_map(hidden_map)
{
/* empty */
}
void process(ir_variable *var)
{
+ this->current_var = var;
this->is_ubo_var = var->is_in_buffer_block();
+ this->is_shader_storage = var->is_in_shader_storage_block();
if (var->is_interface_instance())
program_resource_visitor::process(var->get_interface_type(),
var->get_interface_type()->name);
*/
unsigned num_active_uniforms;
+ unsigned num_hidden_uniforms;
+
/**
* Number of data values required to back the storage for the active uniforms
*/
unsigned num_shader_subroutines;
bool is_ubo_var;
+ bool is_shader_storage;
+
+ struct string_to_uint_map *map;
private:
virtual void visit_field(const glsl_type *type, const char *name,
* components in the default block. The spec allows image
* uniforms to use up no more than one scalar slot.
*/
- this->num_shader_uniform_components += values;
+ if(!is_shader_storage)
+ this->num_shader_uniform_components += values;
} else {
/* Accumulate the total number of uniform slots used by this shader.
* Note that samplers do not count against this limit because they
* don't use any storage on current hardware.
*/
- if (!is_ubo_var)
+ if (!is_ubo_var && !is_shader_storage)
this->num_shader_uniform_components += values;
}
if (this->map->get(id, name))
return;
- this->map->put(this->num_active_uniforms, name);
+ if (this->current_var->data.how_declared == ir_var_hidden) {
+ this->hidden_map->put(this->num_hidden_uniforms, name);
+ this->num_hidden_uniforms++;
+ } else {
+ this->map->put(this->num_active_uniforms-this->num_hidden_uniforms,
+ name);
+ }
/* Each leaf uniform occupies one entry in the list of active
* uniforms.
this->num_values += values;
}
- struct string_to_uint_map *map;
+ struct string_to_uint_map *hidden_map;
+
+ /**
+ * Current variable being processed.
+ */
+ ir_variable *current_var;
};
} /* anonymous namespace */
this->next_sampler = 0;
this->next_image = 0;
this->next_subroutine = 0;
+ this->record_array_count = 1;
memset(this->targets, 0, sizeof(this->targets));
}
{
current_var = var;
field_counter = 0;
+ this->record_next_sampler = new string_to_uint_map;
ubo_block_index = -1;
if (var->is_in_buffer_block()) {
if (var->is_interface_instance() && var->type->is_array()) {
unsigned l = strlen(var->get_interface_type()->name);
- for (unsigned i = 0; i < prog->NumUniformBlocks; i++) {
+ for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) {
if (strncmp(var->get_interface_type()->name,
prog->UniformBlocks[i].Name,
l) == 0
}
}
} else {
- for (unsigned i = 0; i < prog->NumUniformBlocks; i++) {
+ for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) {
if (strcmp(var->get_interface_type()->name,
prog->UniformBlocks[i].Name) == 0) {
ubo_block_index = i;
var->get_interface_type()->name);
else
process(var);
- } else
+ } else {
+ /* Store any explicit location and reset data location so we can
+ * reuse this variable for storing the uniform slot number.
+ */
+ this->explicit_location = current_var->data.location;
+ current_var->data.location = -1;
+
process(var);
+ }
+ delete this->record_next_sampler;
}
int ubo_block_index;
private:
void handle_samplers(const glsl_type *base_type,
- struct gl_uniform_storage *uniform)
+ struct gl_uniform_storage *uniform, const char *name)
{
if (base_type->is_sampler()) {
- uniform->sampler[shader_type].index = this->next_sampler;
uniform->sampler[shader_type].active = true;
- /* Increment the sampler by 1 for non-arrays and by the number of
- * array elements for arrays.
- */
- this->next_sampler +=
- MAX2(1, uniform->array_elements);
+ /* Handle multiple samplers inside struct arrays */
+ if (this->record_array_count > 1) {
+ unsigned inner_array_size = MAX2(1, uniform->array_elements);
+ char *name_copy = ralloc_strdup(NULL, name);
+
+ /* Remove all array subscripts from the sampler name */
+ char *str_start;
+ const char *str_end;
+ while((str_start = strchr(name_copy, '[')) &&
+ (str_end = strchr(name_copy, ']'))) {
+ memmove(str_start, str_end + 1, 1 + strlen(str_end));
+ }
+
+ unsigned index = 0;
+ if (this->record_next_sampler->get(index, name_copy)) {
+ /* In this case, we've already seen this uniform so we just use
+ * the next sampler index recorded the last time we visited.
+ */
+ uniform->sampler[shader_type].index = index;
+ index = inner_array_size + uniform->sampler[shader_type].index;
+ this->record_next_sampler->put(index, name_copy);
+
+ ralloc_free(name_copy);
+ /* Return as everything else has already been initialised in a
+ * previous pass.
+ */
+ return;
+ } else {
+ /* We've never seen this uniform before so we need to allocate
+ * enough indices to store it.
+ *
+ * Nested struct arrays behave like arrays of arrays so we need
+ * to increase the index by the total number of elements of the
+ * sampler in case there is more than one sampler inside the
+ * structs. This allows the offset to be easily calculated for
+ * indirect indexing.
+ */
+ uniform->sampler[shader_type].index = this->next_sampler;
+ this->next_sampler +=
+ inner_array_size * this->record_array_count;
+
+ /* Store the next index for future passes over the struct array
+ */
+ index = uniform->sampler[shader_type].index + inner_array_size;
+ this->record_next_sampler->put(index, name_copy);
+ ralloc_free(name_copy);
+ }
+ } else {
+ /* Increment the sampler by 1 for non-arrays and by the number of
+ * array elements for arrays.
+ */
+ uniform->sampler[shader_type].index = this->next_sampler;
+ this->next_sampler += MAX2(1, uniform->array_elements);
+ }
const gl_texture_index target = base_type->sampler_index();
const unsigned shadow = base_type->sampler_shadow;
}
}
+ virtual void set_record_array_count(unsigned record_array_count)
+ {
+ this->record_array_count = record_array_count;
+ }
+
virtual void visit_field(const glsl_type *type, const char *name,
bool row_major)
{
}
virtual void enter_record(const glsl_type *type, const char *,
- bool row_major) {
+ bool row_major, const unsigned packing) {
assert(type->is_record());
if (this->ubo_block_index == -1)
return;
- this->ubo_byte_offset = glsl_align(
+ if (packing == GLSL_INTERFACE_PACKING_STD430)
+ this->ubo_byte_offset = glsl_align(
+ this->ubo_byte_offset, type->std430_base_alignment(row_major));
+ else
+ this->ubo_byte_offset = glsl_align(
this->ubo_byte_offset, type->std140_base_alignment(row_major));
}
virtual void leave_record(const glsl_type *type, const char *,
- bool row_major) {
+ bool row_major, const unsigned packing) {
assert(type->is_record());
if (this->ubo_block_index == -1)
return;
- this->ubo_byte_offset = glsl_align(
+ if (packing == GLSL_INTERFACE_PACKING_STD430)
+ this->ubo_byte_offset = glsl_align(
+ this->ubo_byte_offset, type->std430_base_alignment(row_major));
+ else
+ this->ubo_byte_offset = glsl_align(
this->ubo_byte_offset, type->std140_base_alignment(row_major));
}
virtual void visit_field(const glsl_type *type, const char *name,
bool row_major, const glsl_type *record_type,
+ const unsigned packing,
bool /* last_field */)
{
assert(!type->without_array()->is_record());
}
/* This assigns uniform indices to sampler and image uniforms. */
- handle_samplers(base_type, &this->uniforms[id]);
+ handle_samplers(base_type, &this->uniforms[id], name);
handle_images(base_type, &this->uniforms[id]);
handle_subroutines(base_type, &this->uniforms[id]);
+ /* For array of arrays or struct arrays the base location may have
+ * already been set so dont set it again.
+ */
+ if (ubo_block_index == -1 && current_var->data.location == -1) {
+ current_var->data.location = id;
+ }
+
/* If there is already storage associated with this uniform or if the
* uniform is set as builtin, it means that it was set while processing
* an earlier shader stage. For example, we may be processing the
if (record_type != NULL) {
const unsigned entries = MAX2(1, this->uniforms[id].array_elements);
this->uniforms[id].remap_location =
- current_var->data.location + field_counter;
+ this->explicit_location + field_counter;
field_counter += entries;
} else {
- this->uniforms[id].remap_location = current_var->data.location;
+ this->uniforms[id].remap_location = this->explicit_location;
}
} else {
/* Initialize to to indicate that no location is set */
if (!this->uniforms[id].builtin)
this->uniforms[id].storage = this->values;
- if (this->ubo_block_index != -1) {
- this->uniforms[id].block_index = this->ubo_block_index;
-
- const unsigned alignment = type->std140_base_alignment(row_major);
- this->ubo_byte_offset = glsl_align(this->ubo_byte_offset, alignment);
- this->uniforms[id].offset = this->ubo_byte_offset;
- this->ubo_byte_offset += type->std140_size(row_major);
+ this->uniforms[id].is_shader_storage =
+ current_var->is_in_shader_storage_block();
- if (type->is_array()) {
- this->uniforms[id].array_stride =
- glsl_align(type->fields.array->std140_size(row_major), 16);
+ if (this->ubo_block_index != -1) {
+ this->uniforms[id].block_index = this->ubo_block_index;
+
+ unsigned alignment = type->std140_base_alignment(row_major);
+ if (packing == GLSL_INTERFACE_PACKING_STD430)
+ alignment = type->std430_base_alignment(row_major);
+ this->ubo_byte_offset = glsl_align(this->ubo_byte_offset, alignment);
+ this->uniforms[id].offset = this->ubo_byte_offset;
+ if (packing == GLSL_INTERFACE_PACKING_STD430)
+ this->ubo_byte_offset += type->std430_size(row_major);
+ else
+ this->ubo_byte_offset += type->std140_size(row_major);
+
+ if (type->is_array()) {
+ if (packing == GLSL_INTERFACE_PACKING_STD430)
+ this->uniforms[id].array_stride =
+ type->fields.array->std430_array_stride(row_major);
+ else
+ this->uniforms[id].array_stride =
+ glsl_align(type->fields.array->std140_size(row_major), 16);
} else {
this->uniforms[id].array_stride = 0;
}
const unsigned items = row_major ? matrix->matrix_columns : matrix->vector_elements;
assert(items <= 4);
- this->uniforms[id].matrix_stride = glsl_align(items * N, 16);
+ if (packing == GLSL_INTERFACE_PACKING_STD430)
+ this->uniforms[id].matrix_stride = items < 3 ? items * N :
+ glsl_align(items * N, 16);
+ else
+ this->uniforms[id].matrix_stride = glsl_align(items * N, 16);
this->uniforms[id].row_major = row_major;
} else {
this->uniforms[id].matrix_stride = 0;
unsigned next_image;
unsigned next_subroutine;
-public:
- union gl_constant_value *values;
-
- gl_texture_index targets[MAX_SAMPLERS];
+ /**
+ * Field counter is used to take care that uniform structures
+ * with explicit locations get sequential locations.
+ */
+ unsigned field_counter;
/**
* Current variable being processed.
*/
ir_variable *current_var;
- /**
- * Field counter is used to take care that uniform structures
- * with explicit locations get sequential locations.
+ /* Used to store the explicit location from current_var so that we can
+ * reuse the location field for storing the uniform slot id.
*/
- unsigned field_counter;
+ int explicit_location;
+
+ /* Stores total struct array elements including nested structs */
+ unsigned record_array_count;
+
+ /* Map for temporarily storing next sampler index when handling samplers in
+ * struct arrays.
+ */
+ struct string_to_uint_map *record_next_sampler;
+
+public:
+ union gl_constant_value *values;
+
+ gl_texture_index targets[MAX_SAMPLERS];
/**
* Mask of samplers used by the current shader stage.
if (var->type->is_record()) {
sentinel = '.';
- } else if (var->type->is_array()
- && var->type->fields.array->is_record()) {
+ } else if (var->type->without_array()->is_record()) {
sentinel = '[';
}
}
/**
- * Sort the array of uniform storage so that the non-hidden uniforms are first
- *
- * This function sorts the list "in place." This is important because some of
- * the storage accessible from \c uniforms has \c uniforms as its \c ralloc
- * context. If \c uniforms is freed, some other storage will also be freed.
+ * Combine the hidden uniform hash map with the uniform hash map so that the
+ * hidden uniforms will be given indicies at the end of the uniform storage
+ * array.
*/
-static unsigned
-move_hidden_uniforms_to_end(struct gl_shader_program *prog,
- struct gl_uniform_storage *uniforms,
- unsigned num_elements)
+static void
+assign_hidden_uniform_slot_id(const char *name, unsigned hidden_id,
+ void *closure)
{
- struct gl_uniform_storage *sorted_uniforms =
- ralloc_array(prog, struct gl_uniform_storage, num_elements);
- unsigned hidden_uniforms = 0;
- unsigned j = 0;
-
- /* Add the non-hidden uniforms. */
- for (unsigned i = 0; i < num_elements; i++) {
- if (!uniforms[i].hidden)
- sorted_uniforms[j++] = uniforms[i];
- }
+ count_uniform_size *uniform_size = (count_uniform_size *) closure;
+ unsigned hidden_uniform_start = uniform_size->num_active_uniforms -
+ uniform_size->num_hidden_uniforms;
- /* Add and count the hidden uniforms. */
- for (unsigned i = 0; i < num_elements; i++) {
- if (uniforms[i].hidden) {
- sorted_uniforms[j++] = uniforms[i];
- hidden_uniforms++;
- }
- }
-
- assert(prog->UniformHash != NULL);
- prog->UniformHash->clear();
- for (unsigned i = 0; i < num_elements; i++) {
- if (sorted_uniforms[i].name != NULL)
- prog->UniformHash->put(i, sorted_uniforms[i].name);
- }
-
- memcpy(uniforms, sorted_uniforms, sizeof(uniforms[0]) * num_elements);
- ralloc_free(sorted_uniforms);
-
- return hidden_uniforms;
+ uniform_size->map->put(hidden_uniform_start + hidden_id, name);
}
void
* Note: this is *NOT* the index that is returned to the application by
* glGetUniformLocation.
*/
- count_uniform_size uniform_size(prog->UniformHash);
+ struct string_to_uint_map *hiddenUniforms = new string_to_uint_map;
+ count_uniform_size uniform_size(prog->UniformHash, hiddenUniforms);
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
struct gl_shader *sh = prog->_LinkedShaders[i];
sh->num_combined_uniform_components = sh->num_uniform_components;
for (unsigned i = 0; i < sh->NumUniformBlocks; i++) {
- sh->num_combined_uniform_components +=
- sh->UniformBlocks[i].UniformBufferSize / 4;
+ if (!sh->UniformBlocks[i].IsShaderStorage) {
+ sh->num_combined_uniform_components +=
+ sh->UniformBlocks[i].UniformBufferSize / 4;
+ }
}
}
const unsigned num_uniforms = uniform_size.num_active_uniforms;
const unsigned num_data_slots = uniform_size.num_values;
+ const unsigned hidden_uniforms = uniform_size.num_hidden_uniforms;
/* On the outside chance that there were no uniforms, bail out.
*/
if (num_uniforms == 0)
return;
+ /* assign hidden uniforms a slot id */
+ hiddenUniforms->iterate(assign_hidden_uniform_slot_id, &uniform_size);
+ delete hiddenUniforms;
+
struct gl_uniform_storage *uniforms =
rzalloc_array(prog, struct gl_uniform_storage, num_uniforms);
union gl_constant_value *data =
sizeof(prog->_LinkedShaders[i]->SamplerTargets));
}
- const unsigned hidden_uniforms =
- move_hidden_uniforms_to_end(prog, uniforms, num_uniforms);
-
/* Reserve all the explicit locations of the active uniforms. */
for (unsigned i = 0; i < num_uniforms; i++) {
if (uniforms[i].type->is_subroutine())
type = type->fields.array;
}
- slots = (type->is_array()
- ? (type->length * type->fields.array->matrix_columns)
- : type->matrix_columns);
+ if (type->is_array()) {
+ slots = 1;
+ while (type->is_array()) {
+ slots *= type->length;
+ type = type->fields.array;
+ }
+ slots *= type->matrix_columns;
+ } else {
+ slots = type->matrix_columns;
+ }
this->matches[this->num_matches].num_components = 4 * slots;
} else {
this->matches[this->num_matches].num_components
return visit_continue;
}
- virtual ir_visitor_status visit_leave(ir_function *ir)
+ virtual ir_visitor_status visit_leave(ir_function *)
{
in_main = false;
after_return = false;
return visit_continue;
}
- virtual ir_visitor_status visit_leave(ir_return *ir)
+ virtual ir_visitor_status visit_leave(ir_return *)
{
after_return = true;
return visit_continue;
}
- virtual ir_visitor_status visit_enter(ir_if *ir)
+ virtual ir_visitor_status visit_enter(ir_if *)
{
++control_flow;
return visit_continue;
}
- virtual ir_visitor_status visit_leave(ir_if *ir)
+ virtual ir_visitor_status visit_leave(ir_if *)
{
--control_flow;
return visit_continue;
}
- virtual ir_visitor_status visit_enter(ir_loop *ir)
+ virtual ir_visitor_status visit_enter(ir_loop *)
{
++control_flow;
return visit_continue;
}
- virtual ir_visitor_status visit_leave(ir_loop *ir)
+ virtual ir_visitor_status visit_leave(ir_loop *)
{
--control_flow;
return visit_continue;
* In addition, set the type of the linked variable to the
* explicitly sized array.
*/
- if (var->type->is_array() && existing->type->is_array() &&
- (var->type->fields.array == existing->type->fields.array) &&
- ((var->type->length == 0)|| (existing->type->length == 0))) {
- if (var->type->length != 0) {
- if (var->type->length <= existing->data.max_array_access) {
- linker_error(prog, "%s `%s' declared as type "
- "`%s' but outermost dimension has an index"
- " of `%i'\n",
- mode_string(var),
- var->name, var->type->name,
- existing->data.max_array_access);
- }
- existing->type = var->type;
- return true;
- } else if (existing->type->length != 0) {
- if(existing->type->length <= var->data.max_array_access) {
- linker_error(prog, "%s `%s' declared as type "
- "`%s' but outermost dimension has an index"
- " of `%i'\n",
- mode_string(var),
- var->name, existing->type->name,
- var->data.max_array_access);
+ if (var->type->is_array() && existing->type->is_array()) {
+ if ((var->type->fields.array == existing->type->fields.array) &&
+ ((var->type->length == 0)|| (existing->type->length == 0))) {
+ if (var->type->length != 0) {
+ if (var->type->length <= existing->data.max_array_access) {
+ linker_error(prog, "%s `%s' declared as type "
+ "`%s' but outermost dimension has an index"
+ " of `%i'\n",
+ mode_string(var),
+ var->name, var->type->name,
+ existing->data.max_array_access);
+ }
+ existing->type = var->type;
+ return true;
+ } else if (existing->type->length != 0) {
+ if(existing->type->length <= var->data.max_array_access &&
+ !existing->data.from_ssbo_unsized_array) {
+ linker_error(prog, "%s `%s' declared as type "
+ "`%s' but outermost dimension has an index"
+ " of `%i'\n",
+ mode_string(var),
+ var->name, existing->type->name,
+ var->data.max_array_access);
+ }
+ return true;
}
- return true;
+ } else {
+ /* The arrays of structs could have different glsl_type pointers but
+ * they are actually the same type. Use record_compare() to check that.
+ */
+ if (existing->type->fields.array->is_record() &&
+ var->type->fields.array->is_record() &&
+ existing->type->fields.array->record_compare(var->type->fields.array))
+ return true;
}
}
return false;
&& existing->type->record_compare(var->type)) {
existing->type = var->type;
} else {
- linker_error(prog, "%s `%s' declared as type "
- "`%s' and type `%s'\n",
- mode_string(var),
- var->name, var->type->name,
- existing->type->name);
- return;
+ /* If it is an unsized array in a Shader Storage Block,
+ * two different shaders can access to different elements.
+ * Because of that, they might be converted to different
+ * sized arrays, then check that they are compatible but
+ * ignore the array size.
+ */
+ if (!(var->data.mode == ir_var_shader_storage &&
+ var->data.from_ssbo_unsized_array &&
+ existing->data.mode == ir_var_shader_storage &&
+ existing->data.from_ssbo_unsized_array &&
+ var->type->gl_type == existing->type->gl_type)) {
+ linker_error(prog, "%s `%s' declared as type "
+ "`%s' and type `%s'\n",
+ mode_string(var),
+ var->name, var->type->name,
+ existing->type->name);
+ return;
+ }
}
}
}
for (unsigned int j = 0; j < sh->NumUniformBlocks; j++) {
int index = link_cross_validate_uniform_block(prog,
&prog->UniformBlocks,
- &prog->NumUniformBlocks,
+ &prog->NumBufferInterfaceBlocks,
&sh->UniformBlocks[j]);
if (index == -1) {
return last;
}
-/**
- * Get the function signature for main from a shader
- */
-ir_function_signature *
-link_get_main_function_signature(gl_shader *sh)
-{
- ir_function *const f = sh->symbols->get_function("main");
- if (f != NULL) {
- exec_list void_parameters;
-
- /* Look for the 'void main()' signature and ensure that it's defined.
- * This keeps the linker from accidentally pick a shader that just
- * contains a prototype for main.
- *
- * We don't have to check for multiple definitions of main (in multiple
- * shaders) because that would have already been caught above.
- */
- ir_function_signature *sig =
- f->matching_signature(NULL, &void_parameters, false);
- if ((sig != NULL) && sig->is_defined) {
- return sig;
- }
- }
-
- return NULL;
-}
-
/**
* This class is only used in link_intrastage_shaders() below but declaring
virtual ir_visitor_status visit(ir_variable *var)
{
- fixup_type(&var->type, var->data.max_array_access);
+ fixup_type(&var->type, var->data.max_array_access,
+ var->data.from_ssbo_unsized_array);
if (var->type->is_interface()) {
if (interface_contains_unsized_arrays(var->type)) {
const glsl_type *new_type =
resize_interface_members(var->type,
- var->get_max_ifc_array_access());
+ var->get_max_ifc_array_access(),
+ var->is_in_shader_storage_block());
var->type = new_type;
var->change_interface_type(new_type);
}
if (interface_contains_unsized_arrays(var->type->fields.array)) {
const glsl_type *new_type =
resize_interface_members(var->type->fields.array,
- var->get_max_ifc_array_access());
+ var->get_max_ifc_array_access(),
+ var->is_in_shader_storage_block());
var->change_interface_type(new_type);
var->type = update_interface_members_array(var->type, new_type);
}
* If the type pointed to by \c type represents an unsized array, replace
* it with a sized array whose size is determined by max_array_access.
*/
- static void fixup_type(const glsl_type **type, unsigned max_array_access)
+ static void fixup_type(const glsl_type **type, unsigned max_array_access,
+ bool from_ssbo_unsized_array)
{
- if ((*type)->is_unsized_array()) {
+ if (!from_ssbo_unsized_array && (*type)->is_unsized_array()) {
*type = glsl_type::get_array_instance((*type)->fields.array,
max_array_access + 1);
assert(*type != NULL);
*/
static const glsl_type *
resize_interface_members(const glsl_type *type,
- const unsigned *max_ifc_array_access)
+ const unsigned *max_ifc_array_access,
+ bool is_ssbo)
{
unsigned num_fields = type->length;
glsl_struct_field *fields = new glsl_struct_field[num_fields];
memcpy(fields, type->fields.structure,
num_fields * sizeof(*fields));
for (unsigned i = 0; i < num_fields; i++) {
- fixup_type(&fields[i].type, max_ifc_array_access[i]);
+ /* If SSBO last member is unsized array, we don't replace it by a sized
+ * array.
+ */
+ if (is_ssbo && i == (num_fields - 1))
+ fixup_type(&fields[i].type, max_ifc_array_access[i],
+ true);
+ else
+ fixup_type(&fields[i].type, max_ifc_array_access[i],
+ false);
}
glsl_interface_packing packing =
(glsl_interface_packing) type->interface_packing;
/* Link up uniform blocks defined within this stage. */
const unsigned num_uniform_blocks =
- link_uniform_blocks(mem_ctx, prog, shader_list, num_shaders,
+ link_uniform_blocks(mem_ctx, ctx, prog, shader_list, num_shaders,
&uniform_blocks);
if (!prog->LinkStatus)
return NULL;
*/
gl_shader *main = NULL;
for (unsigned i = 0; i < num_shaders; i++) {
- if (link_get_main_function_signature(shader_list[i]) != NULL) {
+ if (_mesa_get_main_function_signature(shader_list[i]) != NULL) {
main = shader_list[i];
break;
}
* copy of the original shader that contained the main function).
*/
ir_function_signature *const main_sig =
- link_get_main_function_signature(linked);
+ _mesa_get_main_function_signature(linked);
/* Move any instructions other than variable declarations or function
* declarations into main.
*/
unsigned used_locations = (max_index >= 32)
? ~0 : ~((1 << max_index) - 1);
+ unsigned double_storage_locations = 0;
assert((target_index == MESA_SHADER_VERTEX)
|| (target_index == MESA_SHADER_FRAGMENT));
} to_assign[16];
unsigned num_attr = 0;
- unsigned total_attribs_size = 0;
foreach_in_list(ir_instruction, node, sh->ir) {
ir_variable *const var = node->as_variable();
const unsigned slots = var->type->count_attribute_slots();
- /* From GL4.5 core spec, section 11.1.1 (Vertex Attributes):
- *
- * "A program with more than the value of MAX_VERTEX_ATTRIBS active
- * attribute variables may fail to link, unless device-dependent
- * optimizations are able to make the program fit within available
- * hardware resources. For the purposes of this test, attribute variables
- * of the type dvec3, dvec4, dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3,
- * and dmat4 may count as consuming twice as many attributes as equivalent
- * single-precision types. While these types use the same number of
- * generic attributes as their single-precision equivalents,
- * implementations are permitted to consume two single-precision vectors
- * of internal storage for each three- or four-component double-precision
- * vector."
- * Until someone has a good reason in Mesa, enforce that now.
- */
- if (target_index == MESA_SHADER_VERTEX) {
- total_attribs_size += slots;
- if (var->type->without_array() == glsl_type::dvec3_type ||
- var->type->without_array() == glsl_type::dvec4_type ||
- var->type->without_array() == glsl_type::dmat2x3_type ||
- var->type->without_array() == glsl_type::dmat2x4_type ||
- var->type->without_array() == glsl_type::dmat3_type ||
- var->type->without_array() == glsl_type::dmat3x4_type ||
- var->type->without_array() == glsl_type::dmat4x3_type ||
- var->type->without_array() == glsl_type::dmat4_type)
- total_attribs_size += slots;
- }
-
/* If the variable is not a built-in and has a location statically
* assigned in the shader (presumably via a layout qualifier), make sure
* that it doesn't collide with other assigned locations. Otherwise,
}
used_locations |= (use_mask << attr);
+
+ /* From the GL 4.5 core spec, section 11.1.1 (Vertex Attributes):
+ *
+ * "A program with more than the value of MAX_VERTEX_ATTRIBS
+ * active attribute variables may fail to link, unless
+ * device-dependent optimizations are able to make the program
+ * fit within available hardware resources. For the purposes
+ * of this test, attribute variables of the type dvec3, dvec4,
+ * dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3, and dmat4 may
+ * count as consuming twice as many attributes as equivalent
+ * single-precision types. While these types use the same number
+ * of generic attributes as their single-precision equivalents,
+ * implementations are permitted to consume two single-precision
+ * vectors of internal storage for each three- or four-component
+ * double-precision vector."
+ *
+ * Mark this attribute slot as taking up twice as much space
+ * so we can count it properly against limits. According to
+ * issue (3) of the GL_ARB_vertex_attrib_64bit behavior, this
+ * is optional behavior, but it seems preferable.
+ */
+ const glsl_type *type = var->type->without_array();
+ if (type == glsl_type::dvec3_type ||
+ type == glsl_type::dvec4_type ||
+ type == glsl_type::dmat2x3_type ||
+ type == glsl_type::dmat2x4_type ||
+ type == glsl_type::dmat3_type ||
+ type == glsl_type::dmat3x4_type ||
+ type == glsl_type::dmat4x3_type ||
+ type == glsl_type::dmat4_type) {
+ double_storage_locations |= (use_mask << attr);
+ }
}
continue;
}
if (target_index == MESA_SHADER_VERTEX) {
+ unsigned total_attribs_size =
+ _mesa_bitcount(used_locations & ((1 << max_index) - 1)) +
+ _mesa_bitcount(double_storage_locations);
if (total_attribs_size > max_index) {
linker_error(prog,
"attempt to use %d vertex attribute slots only %d available ",
unsigned blocks[MESA_SHADER_STAGES] = {0};
unsigned total_uniform_blocks = 0;
+ unsigned shader_blocks[MESA_SHADER_STAGES] = {0};
+ unsigned total_shader_storage_blocks = 0;
- for (unsigned i = 0; i < prog->NumUniformBlocks; i++) {
- if (prog->UniformBlocks[i].UniformBufferSize > ctx->Const.MaxUniformBlockSize) {
+ for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) {
+ /* Don't check SSBOs for Uniform Block Size */
+ if (!prog->UniformBlocks[i].IsShaderStorage &&
+ prog->UniformBlocks[i].UniformBufferSize > ctx->Const.MaxUniformBlockSize) {
linker_error(prog, "Uniform block %s too big (%d/%d)\n",
prog->UniformBlocks[i].Name,
prog->UniformBlocks[i].UniformBufferSize,
ctx->Const.MaxUniformBlockSize);
}
+ if (prog->UniformBlocks[i].IsShaderStorage &&
+ prog->UniformBlocks[i].UniformBufferSize > ctx->Const.MaxShaderStorageBlockSize) {
+ linker_error(prog, "Shader storage block %s too big (%d/%d)\n",
+ prog->UniformBlocks[i].Name,
+ prog->UniformBlocks[i].UniformBufferSize,
+ ctx->Const.MaxShaderStorageBlockSize);
+ }
+
for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) {
if (prog->UniformBlockStageIndex[j][i] != -1) {
- blocks[j]++;
- total_uniform_blocks++;
+ struct gl_shader *sh = prog->_LinkedShaders[j];
+ int stage_index = prog->UniformBlockStageIndex[j][i];
+ if (sh && sh->UniformBlocks[stage_index].IsShaderStorage) {
+ shader_blocks[j]++;
+ total_shader_storage_blocks++;
+ } else {
+ blocks[j]++;
+ total_uniform_blocks++;
+ }
}
}
if (total_uniform_blocks > ctx->Const.MaxCombinedUniformBlocks) {
linker_error(prog, "Too many combined uniform blocks (%d/%d)\n",
- prog->NumUniformBlocks,
+ total_uniform_blocks,
ctx->Const.MaxCombinedUniformBlocks);
} else {
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
}
}
}
+
+ if (total_shader_storage_blocks > ctx->Const.MaxCombinedShaderStorageBlocks) {
+ linker_error(prog, "Too many combined shader storage blocks (%d/%d)\n",
+ total_shader_storage_blocks,
+ ctx->Const.MaxCombinedShaderStorageBlocks);
+ } else {
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+ const unsigned max_shader_storage_blocks =
+ ctx->Const.Program[i].MaxShaderStorageBlocks;
+ if (shader_blocks[i] > max_shader_storage_blocks) {
+ linker_error(prog, "Too many %s shader storage blocks (%d/%d)\n",
+ _mesa_shader_stage_to_string(i),
+ shader_blocks[i],
+ max_shader_storage_blocks);
+ break;
+ }
+ }
+ }
}
}
static void
-link_calculate_subroutine_compat(struct gl_context *ctx, struct gl_shader_program *prog)
+link_calculate_subroutine_compat(struct gl_shader_program *prog)
{
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
struct gl_shader *sh = prog->_LinkedShaders[i];
}
static void
-check_subroutine_resources(struct gl_context *ctx, struct gl_shader_program *prog)
+check_subroutine_resources(struct gl_shader_program *prog)
{
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
struct gl_shader *sh = prog->_LinkedShaders[i];
{
unsigned total_image_units = 0;
unsigned fragment_outputs = 0;
+ unsigned total_shader_storage_blocks = 0;
if (!ctx->Extensions.ARB_shader_image_load_store)
return;
total_image_units += sh->NumImages;
+ for (unsigned j = 0; j < prog->NumBufferInterfaceBlocks; j++) {
+ int stage_index = prog->UniformBlockStageIndex[i][j];
+ if (stage_index != -1 && sh->UniformBlocks[stage_index].IsShaderStorage)
+ total_shader_storage_blocks++;
+ }
+
if (i == MESA_SHADER_FRAGMENT) {
foreach_in_list(ir_instruction, node, sh->ir) {
ir_variable *var = node->as_variable();
if (total_image_units > ctx->Const.MaxCombinedImageUniforms)
linker_error(prog, "Too many combined image uniforms\n");
- if (total_image_units + fragment_outputs >
+ if (total_image_units + fragment_outputs + total_shader_storage_blocks >
ctx->Const.MaxCombinedShaderOutputResources)
- linker_error(prog, "Too many combined image uniforms and fragment outputs\n");
+ linker_error(prog, "Too many combined image uniforms, shader storage "
+ " buffers and fragment outputs\n");
}
return true;
}
+/* Function checks if a variable var is a packed varying and
+ * if given name is part of packed varying's list.
+ *
+ * If a variable is a packed varying, it has a name like
+ * 'packed:a,b,c' where a, b and c are separate variables.
+ */
+static bool
+included_in_packed_varying(ir_variable *var, const char *name)
+{
+ if (strncmp(var->name, "packed:", 7) != 0)
+ return false;
+
+ char *list = strdup(var->name + 7);
+ assert(list);
+
+ bool found = false;
+ char *saveptr;
+ char *token = strtok_r(list, ",", &saveptr);
+ while (token) {
+ if (strcmp(token, name) == 0) {
+ found = true;
+ break;
+ }
+ token = strtok_r(NULL, ",", &saveptr);
+ }
+ free(list);
+ return found;
+}
+
/**
* Function builds a stage reference bitmask from variable name.
*/
if (var) {
unsigned baselen = strlen(var->name);
+ if (included_in_packed_varying(var, name)) {
+ stages |= (1 << i);
+ break;
+ }
+
/* Type needs to match if specified, otherwise we might
* pick a variable with same name but different interface.
*/
static bool
add_interface_variables(struct gl_shader_program *shProg,
- struct gl_shader *sh, GLenum programInterface)
+ exec_list *ir, GLenum programInterface)
{
- foreach_in_list(ir_instruction, node, sh->ir) {
+ foreach_in_list(ir_instruction, node, ir) {
ir_variable *var = node->as_variable();
uint8_t mask = 0;
continue;
};
+ /* Skip packed varyings, packed varyings are handled separately
+ * by add_packed_varyings.
+ */
+ if (strncmp(var->name, "packed:", 7) == 0)
+ continue;
+
if (!add_program_resource(shProg, programInterface, var,
build_stageref(shProg, var->name,
var->data.mode) | mask))
return true;
}
+static bool
+add_packed_varyings(struct gl_shader_program *shProg, int stage)
+{
+ struct gl_shader *sh = shProg->_LinkedShaders[stage];
+ GLenum iface;
+
+ if (!sh || !sh->packed_varyings)
+ return true;
+
+ foreach_in_list(ir_instruction, node, sh->packed_varyings) {
+ ir_variable *var = node->as_variable();
+ if (var) {
+ switch (var->data.mode) {
+ case ir_var_shader_in:
+ iface = GL_PROGRAM_INPUT;
+ break;
+ case ir_var_shader_out:
+ iface = GL_PROGRAM_OUTPUT;
+ break;
+ default:
+ unreachable("unexpected type");
+ }
+ if (!add_program_resource(shProg, iface, var,
+ build_stageref(shProg, var->name,
+ var->data.mode)))
+ return false;
+ }
+ }
+ return true;
+}
+
/**
* Builds up a list of program resources that point to existing
* resource data.
*/
void
-build_program_resource_list(struct gl_context *ctx,
- struct gl_shader_program *shProg)
+build_program_resource_list(struct gl_shader_program *shProg)
{
/* Rebuild resource list. */
if (shProg->ProgramResourceList) {
if (input_stage == MESA_SHADER_STAGES && output_stage == 0)
return;
+ if (!add_packed_varyings(shProg, input_stage))
+ return;
+ if (!add_packed_varyings(shProg, output_stage))
+ return;
+
/* Add inputs and outputs to the resource list. */
- if (!add_interface_variables(shProg, shProg->_LinkedShaders[input_stage],
+ if (!add_interface_variables(shProg, shProg->_LinkedShaders[input_stage]->ir,
GL_PROGRAM_INPUT))
return;
- if (!add_interface_variables(shProg, shProg->_LinkedShaders[output_stage],
+ if (!add_interface_variables(shProg, shProg->_LinkedShaders[output_stage]->ir,
GL_PROGRAM_OUTPUT))
return;
}
}
- if (!add_program_resource(shProg, GL_UNIFORM,
+ bool is_shader_storage = shProg->UniformStorage[i].is_shader_storage;
+ GLenum type = is_shader_storage ? GL_BUFFER_VARIABLE : GL_UNIFORM;
+ if (!add_program_resource(shProg, type,
&shProg->UniformStorage[i], stageref))
return;
}
- /* Add program uniform blocks. */
- for (unsigned i = 0; i < shProg->NumUniformBlocks; i++) {
- if (!add_program_resource(shProg, GL_UNIFORM_BLOCK,
+ /* Add program uniform blocks and shader storage blocks. */
+ for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
+ bool is_shader_storage = shProg->UniformBlocks[i].IsShaderStorage;
+ GLenum type = is_shader_storage ? GL_SHADER_STORAGE_BLOCK : GL_UNIFORM_BLOCK;
+ if (!add_program_resource(shProg, type,
&shProg->UniformBlocks[i], 0))
return;
}
return true;
}
-void
-link_assign_subroutine_types(struct gl_context *ctx,
- struct gl_shader_program *prog)
+static void
+link_assign_subroutine_types(struct gl_shader_program *prog)
{
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
gl_shader *sh = prog->_LinkedShaders[i];
prog->Version = max_version;
prog->IsES = is_es_prog;
+ /* From OpenGL 4.5 Core specification (7.3 Program Objects):
+ * "Linking can fail for a variety of reasons as specified in the OpenGL
+ * Shading Language Specification, as well as any of the following
+ * reasons:
+ *
+ * * No shader objects are attached to program.
+ *
+ * ..."
+ *
+ * Same rule applies for OpenGL ES >= 3.1.
+ */
+
+ if (prog->NumShaders == 0 &&
+ ((ctx->API == API_OPENGL_CORE && ctx->Version >= 45) ||
+ (ctx->API == API_OPENGLES2 && ctx->Version >= 31))) {
+ linker_error(prog, "No shader objects are attached to program.\n");
+ goto done;
+ }
+
/* Some shaders have to be linked with some other shaders present.
*/
if (num_shaders[MESA_SHADER_GEOMETRY] > 0 &&
}
check_explicit_uniform_locations(ctx, prog);
- link_assign_subroutine_types(ctx, prog);
+ link_assign_subroutine_types(prog);
if (!prog->LinkStatus)
goto done;
link_assign_atomic_counter_resources(ctx, prog);
store_fragdepth_layout(prog);
- link_calculate_subroutine_compat(ctx, prog);
+ link_calculate_subroutine_compat(prog);
check_resources(ctx, prog);
- check_subroutine_resources(ctx, prog);
+ check_subroutine_resources(prog);
check_image_resources(ctx, prog);
link_check_atomic_counter_resources(ctx, prog);
* behavior specified in GLSL specification.
*/
if (!prog->SeparateShader && ctx->API == API_OPENGLES2) {
- if (prog->_LinkedShaders[MESA_SHADER_VERTEX] == NULL) {
- linker_error(prog, "program lacks a vertex shader\n");
- } else if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL) {
- linker_error(prog, "program lacks a fragment shader\n");
+ /* With ES < 3.1 one needs to have always vertex + fragment shader. */
+ if (ctx->Version < 31) {
+ if (prog->_LinkedShaders[MESA_SHADER_VERTEX] == NULL) {
+ linker_error(prog, "program lacks a vertex shader\n");
+ } else if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL) {
+ linker_error(prog, "program lacks a fragment shader\n");
+ }
+ } else {
+ /* From OpenGL ES 3.1 specification (7.3 Program Objects):
+ * "Linking can fail for a variety of reasons as specified in the
+ * OpenGL ES Shading Language Specification, as well as any of the
+ * following reasons:
+ *
+ * ...
+ *
+ * * program contains objects to form either a vertex shader or
+ * fragment shader, and program is not separable, and does not
+ * contain objects to form both a vertex shader and fragment
+ * shader."
+ */
+ if (!!prog->_LinkedShaders[MESA_SHADER_VERTEX] ^
+ !!prog->_LinkedShaders[MESA_SHADER_FRAGMENT]) {
+ linker_error(prog, "Program needs to contain both vertex and "
+ "fragment shaders.\n");
+ }
}
}
#ifndef GLSL_LINKER_H
#define GLSL_LINKER_H
-ir_function_signature *
-link_get_main_function_signature(gl_shader *sh);
-
extern bool
link_function_calls(gl_shader_program *prog, gl_shader *main,
gl_shader **shader_list, unsigned num_shaders);
extern unsigned
link_uniform_blocks(void *mem_ctx,
+ struct gl_context *ctx,
struct gl_shader_program *prog,
struct gl_shader **shader_list,
unsigned num_shaders,
*/
virtual void visit_field(const glsl_type *type, const char *name,
bool row_major, const glsl_type *record_type,
+ const unsigned packing,
bool last_field);
/**
virtual void visit_field(const glsl_struct_field *field);
virtual void enter_record(const glsl_type *type, const char *name,
- bool row_major);
+ bool row_major, const unsigned packing);
virtual void leave_record(const glsl_type *type, const char *name,
- bool row_major);
+ bool row_major, const unsigned packing);
+
+ virtual void set_record_array_count(unsigned record_array_count);
private:
/**
*/
void recursion(const glsl_type *t, char **name, size_t name_length,
bool row_major, const glsl_type *record_type,
- bool last_field);
+ const unsigned packing,
+ bool last_field, unsigned record_array_count);
};
void
exec_list *out_instructions,
exec_list *out_variables);
- void run(exec_list *instructions);
+ void run(struct gl_shader *shader);
private:
void bitwise_assign_pack(ir_rvalue *lhs, ir_rvalue *rhs);
}
void
-lower_packed_varyings_visitor::run(exec_list *instructions)
+lower_packed_varyings_visitor::run(struct gl_shader *shader)
{
- foreach_in_list(ir_instruction, node, instructions) {
+ foreach_in_list(ir_instruction, node, shader->ir) {
ir_variable *var = node->as_variable();
if (var == NULL)
continue;
assert(var->data.interpolation == INTERP_QUALIFIER_FLAT ||
!var->type->contains_integer());
+ /* Clone the variable for program resource list before
+ * it gets modified and lost.
+ */
+ if (!shader->packed_varyings)
+ shader->packed_varyings = new (shader) exec_list;
+
+ shader->packed_varyings->push_tail(var->clone(shader, NULL));
+
/* Change the old varying into an ordinary global. */
assert(var->data.mode != ir_var_temporary);
var->data.mode = ir_var_auto;
gs_input_vertices,
&new_instructions,
&new_variables);
- visitor.run(instructions);
+ visitor.run(shader);
if (mode == ir_var_shader_out) {
if (shader->Stage == MESA_SHADER_GEOMETRY) {
/* For geometry shaders, outputs need to be lowered before each call
* thing referenced is row-major.
*/
static bool
-is_dereferenced_thing_row_major(const ir_dereference *deref)
+is_dereferenced_thing_row_major(const ir_rvalue *deref)
{
bool matrix = false;
const ir_rvalue *ir = deref;
ir_visitor_status visit_enter(ir_assignment *ir);
void setup_for_load_or_store(ir_variable *var,
- ir_dereference *deref,
+ ir_rvalue *deref,
ir_rvalue **offset,
unsigned *const_offset,
bool *row_major,
- int *matrix_columns);
+ int *matrix_columns,
+ unsigned packing);
ir_expression *ubo_load(const struct glsl_type *type,
ir_rvalue *offset);
ir_call *ssbo_load(const struct glsl_type *type,
void emit_access(bool is_write, ir_dereference *deref,
ir_variable *base_offset, unsigned int deref_offset,
bool row_major, int matrix_columns,
- unsigned write_mask);
+ unsigned packing, unsigned write_mask);
+
+ ir_visitor_status visit_enter(class ir_expression *);
+ ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr);
+ void check_ssbo_unsized_array_length_expression(class ir_expression *);
+ void check_ssbo_unsized_array_length_assignment(ir_assignment *ir);
+
+ ir_expression *process_ssbo_unsized_array_length(ir_rvalue **,
+ ir_dereference *,
+ ir_variable *);
+ ir_expression *emit_ssbo_get_buffer_size();
+
+ unsigned calculate_unsized_array_stride(ir_dereference *deref,
+ unsigned packing);
+
+ ir_call *lower_ssbo_atomic_intrinsic(ir_call *ir);
+ ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir);
+ ir_visitor_status visit_enter(ir_call *ir);
void *mem_ctx;
struct gl_shader *shader;
* \c UniformBlocks array.
*/
static const char *
-interface_field_name(void *mem_ctx, char *base_name, ir_dereference *d,
+interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d,
ir_rvalue **nonconst_block_index)
{
ir_rvalue *previous_index = NULL;
break;
}
+ case ir_type_swizzle: {
+ ir_swizzle *s = (ir_swizzle *) d;
+ d = s->val->as_dereference();
+ break;
+ }
default:
assert(!"Should not get here.");
break;
void
lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
- ir_dereference *deref,
+ ir_rvalue *deref,
ir_rvalue **offset,
unsigned *const_offset,
bool *row_major,
- int *matrix_columns)
+ int *matrix_columns,
+ unsigned packing)
{
/* Determine the name of the interface block */
ir_rvalue *nonconst_block_index;
const bool array_row_major =
is_dereferenced_thing_row_major(deref_array);
- array_stride = deref_array->type->std140_size(array_row_major);
- array_stride = glsl_align(array_stride, 16);
+ /* The array type will give the correct interface packing
+ * information
+ */
+ if (packing == GLSL_INTERFACE_PACKING_STD430) {
+ array_stride = deref_array->type->std430_array_stride(array_row_major);
+ } else {
+ array_stride = deref_array->type->std140_size(array_row_major);
+ array_stride = glsl_align(array_stride, 16);
+ }
}
ir_rvalue *array_index = deref_array->array_index;
ralloc_free(field_deref);
- unsigned field_align = type->std140_base_alignment(field_row_major);
+ unsigned field_align = 0;
+
+ if (packing == GLSL_INTERFACE_PACKING_STD430)
+ field_align = type->std430_base_alignment(field_row_major);
+ else
+ field_align = type->std140_base_alignment(field_row_major);
intra_struct_offset = glsl_align(intra_struct_offset, field_align);
deref_record->field) == 0)
break;
- intra_struct_offset += type->std140_size(field_row_major);
+ if (packing == GLSL_INTERFACE_PACKING_STD430)
+ intra_struct_offset += type->std430_size(field_row_major);
+ else
+ intra_struct_offset += type->std140_size(field_row_major);
/* If the field just examined was itself a structure, apply rule
* #9:
break;
}
+ case ir_type_swizzle: {
+ ir_swizzle *deref_swizzle = (ir_swizzle *) deref;
+
+ assert(deref_swizzle->mask.num_components == 1);
+
+ *const_offset += deref_swizzle->mask.x * sizeof(int);
+ deref = deref_swizzle->val->as_dereference();
+ break;
+ }
+
default:
assert(!"not reached");
deref = NULL;
unsigned const_offset;
bool row_major;
int matrix_columns;
+ unsigned packing = var->get_interface_type()->interface_packing;
/* Compute the offset to the start if the dereference as well as other
* information we need to configure the write
*/
setup_for_load_or_store(var, deref,
&offset, &const_offset,
- &row_major, &matrix_columns);
+ &row_major, &matrix_columns,
+ packing);
assert(offset);
/* Now that we've calculated the offset to the start of the
deref = new(mem_ctx) ir_dereference_variable(load_var);
emit_access(false, deref, load_offset, const_offset,
- row_major, matrix_columns, 0);
+ row_major, matrix_columns, packing, 0);
*rvalue = deref;
progress = true;
unsigned int deref_offset,
bool row_major,
int matrix_columns,
+ unsigned packing,
unsigned write_mask)
{
if (deref->type->is_record()) {
emit_access(is_write, field_deref, base_offset,
deref_offset + field_offset,
- row_major, 1,
+ row_major, 1, packing,
writemask_for_size(field_deref->type->vector_elements));
field_offset += field->type->std140_size(row_major);
}
if (deref->type->is_array()) {
- unsigned array_stride =
+ unsigned array_stride = packing == GLSL_INTERFACE_PACKING_STD430 ?
+ deref->type->fields.array->std430_array_stride(row_major) :
glsl_align(deref->type->fields.array->std140_size(row_major), 16);
for (unsigned i = 0; i < deref->type->length; i++) {
element);
emit_access(is_write, element_deref, base_offset,
deref_offset + i * array_stride,
- row_major, 1,
+ row_major, 1, packing,
writemask_for_size(element_deref->type->vector_elements));
}
return;
int size_mul = deref->type->is_double() ? 8 : 4;
emit_access(is_write, col_deref, base_offset,
deref_offset + i * size_mul,
- row_major, deref->type->matrix_columns,
+ row_major, deref->type->matrix_columns, packing,
writemask_for_size(col_deref->type->vector_elements));
} else {
- /* std140 always rounds the stride of arrays (and matrices) to a
- * vec4, so matrices are always 16 between columns/rows. With
- * doubles, they will be 32 apart when there are more than 2 rows.
- */
- int size_mul = (deref->type->is_double() &&
- deref->type->vector_elements > 2) ? 32 : 16;
+ int size_mul;
+
+ /* std430 doesn't round up vec2 size to a vec4 size */
+ if (packing == GLSL_INTERFACE_PACKING_STD430 &&
+ deref->type->vector_elements == 2 &&
+ !deref->type->is_double()) {
+ size_mul = 8;
+ } else {
+ /* std140 always rounds the stride of arrays (and matrices) to a
+ * vec4, so matrices are always 16 between columns/rows. With
+ * doubles, they will be 32 apart when there are more than 2 rows.
+ *
+ * For both std140 and std430, if the member is a
+ * three-'component vector with components consuming N basic
+ * machine units, the base alignment is 4N. For vec4, base
+ * alignment is 4N.
+ */
+ size_mul = (deref->type->is_double() &&
+ deref->type->vector_elements > 2) ? 32 : 16;
+ }
+
emit_access(is_write, col_deref, base_offset,
deref_offset + i * size_mul,
- row_major, deref->type->matrix_columns,
+ row_major, deref->type->matrix_columns, packing,
writemask_for_size(col_deref->type->vector_elements));
}
}
unsigned const_offset;
bool row_major;
int matrix_columns;
+ unsigned packing = var->get_interface_type()->interface_packing;
/* Compute the offset to the start if the dereference as well as other
* information we need to configure the write
*/
setup_for_load_or_store(var, deref,
&offset, &const_offset,
- &row_major, &matrix_columns);
+ &row_major, &matrix_columns,
+ packing);
assert(offset);
/* Now emit writes from the temporary to memory */
deref = new(mem_ctx) ir_dereference_variable(write_var);
emit_access(true, deref, write_offset, const_offset,
- row_major, matrix_columns, write_mask);
+ row_major, matrix_columns, packing, write_mask);
+}
+
+ir_visitor_status
+lower_ubo_reference_visitor::visit_enter(ir_expression *ir)
+{
+ check_ssbo_unsized_array_length_expression(ir);
+ return rvalue_visit(ir);
+}
+
+ir_expression *
+lower_ubo_reference_visitor::calculate_ssbo_unsized_array_length(ir_expression *expr)
+{
+ if (expr->operation !=
+ ir_expression_operation(ir_unop_ssbo_unsized_array_length))
+ return NULL;
+
+ ir_rvalue *rvalue = expr->operands[0]->as_rvalue();
+ if (!rvalue ||
+ !rvalue->type->is_array() || !rvalue->type->is_unsized_array())
+ return NULL;
+
+ ir_dereference *deref = expr->operands[0]->as_dereference();
+ if (!deref)
+ return NULL;
+
+ ir_variable *var = expr->operands[0]->variable_referenced();
+ if (!var || !var->is_in_shader_storage_block())
+ return NULL;
+ return process_ssbo_unsized_array_length(&rvalue, deref, var);
+}
+
+void
+lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression *ir)
+{
+ if (ir->operation ==
+ ir_expression_operation(ir_unop_ssbo_unsized_array_length)) {
+ /* Don't replace this unop if it is found alone. It is going to be
+ * removed by the optimization passes or replaced if it is part of
+ * an ir_assignment or another ir_expression.
+ */
+ return;
+ }
+
+ for (unsigned i = 0; i < ir->get_num_operands(); i++) {
+ if (ir->operands[i]->ir_type != ir_type_expression)
+ continue;
+ ir_expression *expr = (ir_expression *) ir->operands[i];
+ ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
+ if (!temp)
+ continue;
+
+ delete expr;
+ ir->operands[i] = temp;
+ }
+}
+
+void
+lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment *ir)
+{
+ if (!ir->rhs || ir->rhs->ir_type != ir_type_expression)
+ return;
+
+ ir_expression *expr = (ir_expression *) ir->rhs;
+ ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
+ if (!temp)
+ return;
+
+ delete expr;
+ ir->rhs = temp;
+ return;
+}
+
+ir_expression *
+lower_ubo_reference_visitor::emit_ssbo_get_buffer_size()
+{
+ ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
+ return new(mem_ctx) ir_expression(ir_unop_get_buffer_size,
+ glsl_type::int_type,
+ block_ref);
+}
+
+unsigned
+lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref,
+ unsigned packing)
+{
+ unsigned array_stride = 0;
+
+ switch (deref->ir_type) {
+ case ir_type_dereference_variable:
+ {
+ ir_dereference_variable *deref_var = (ir_dereference_variable *)deref;
+ const struct glsl_type *unsized_array_type = NULL;
+ /* An unsized array can be sized by other lowering passes, so pick
+ * the first field of the array which has the data type of the unsized
+ * array.
+ */
+ unsized_array_type = deref_var->var->type->fields.array;
+
+ /* Whether or not the field is row-major (because it might be a
+ * bvec2 or something) does not affect the array itself. We need
+ * to know whether an array element in its entirety is row-major.
+ */
+ const bool array_row_major =
+ is_dereferenced_thing_row_major(deref_var);
+
+ if (packing == GLSL_INTERFACE_PACKING_STD430) {
+ array_stride = unsized_array_type->std430_array_stride(array_row_major);
+ } else {
+ array_stride = unsized_array_type->std140_size(array_row_major);
+ array_stride = glsl_align(array_stride, 16);
+ }
+ break;
+ }
+ case ir_type_dereference_record:
+ {
+ ir_dereference_record *deref_record = (ir_dereference_record *) deref;
+ ir_dereference *interface_deref =
+ deref_record->record->as_dereference();
+ assert(interface_deref != NULL);
+ const struct glsl_type *interface_type = interface_deref->type;
+ unsigned record_length = interface_type->length;
+ /* Unsized array is always the last element of the interface */
+ const struct glsl_type *unsized_array_type =
+ interface_type->fields.structure[record_length - 1].type->fields.array;
+
+ const bool array_row_major =
+ is_dereferenced_thing_row_major(deref_record);
+
+ if (packing == GLSL_INTERFACE_PACKING_STD430) {
+ array_stride = unsized_array_type->std430_array_stride(array_row_major);
+ } else {
+ array_stride = unsized_array_type->std140_size(array_row_major);
+ array_stride = glsl_align(array_stride, 16);
+ }
+ break;
+ }
+ default:
+ unreachable("Unsupported dereference type");
+ }
+ return array_stride;
+}
+
+ir_expression *
+lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalue,
+ ir_dereference *deref,
+ ir_variable *var)
+{
+ mem_ctx = ralloc_parent(*rvalue);
+
+ ir_rvalue *base_offset = NULL;
+ unsigned const_offset;
+ bool row_major;
+ int matrix_columns;
+ unsigned packing = var->get_interface_type()->interface_packing;
+ int unsized_array_stride = calculate_unsized_array_stride(deref, packing);
+
+ /* Compute the offset to the start if the dereference as well as other
+ * information we need to calculate the length.
+ */
+ setup_for_load_or_store(var, deref,
+ &base_offset, &const_offset,
+ &row_major, &matrix_columns,
+ packing);
+ /* array.length() =
+ * max((buffer_object_size - offset_of_array) / stride_of_array, 0)
+ */
+ ir_expression *buffer_size = emit_ssbo_get_buffer_size();
+
+ ir_expression *offset_of_array = new(mem_ctx)
+ ir_expression(ir_binop_add, base_offset,
+ new(mem_ctx) ir_constant(const_offset));
+ ir_expression *offset_of_array_int = new(mem_ctx)
+ ir_expression(ir_unop_u2i, offset_of_array);
+
+ ir_expression *sub = new(mem_ctx)
+ ir_expression(ir_binop_sub, buffer_size, offset_of_array_int);
+ ir_expression *div = new(mem_ctx)
+ ir_expression(ir_binop_div, sub,
+ new(mem_ctx) ir_constant(unsized_array_stride));
+ ir_expression *max = new(mem_ctx)
+ ir_expression(ir_binop_max, div, new(mem_ctx) ir_constant(0));
+
+ return max;
}
void
ir_visitor_status
lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
{
+ check_ssbo_unsized_array_length_assignment(ir);
check_for_ssbo_store(ir);
return rvalue_visit(ir);
}
+/* Lowers the intrinsic call to a new internal intrinsic that swaps the
+ * access to the buffer variable in the first parameter by an offset
+ * and block index. This involves creating the new internal intrinsic
+ * (i.e. the new function signature).
+ */
+ir_call *
+lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
+{
+ /* SSBO atomics usually have 2 parameters, the buffer variable and an
+ * integer argument. The exception is CompSwap, that has an additional
+ * integer parameter.
+ */
+ int param_count = ir->actual_parameters.length();
+ assert(param_count == 2 || param_count == 3);
+
+ /* First argument must be a scalar integer buffer variable */
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_instruction *inst = (ir_instruction *) param;
+ assert(inst->ir_type == ir_type_dereference_variable ||
+ inst->ir_type == ir_type_dereference_array ||
+ inst->ir_type == ir_type_dereference_record ||
+ inst->ir_type == ir_type_swizzle);
+
+ ir_rvalue *deref = (ir_rvalue *) inst;
+ assert(deref->type->is_scalar() && deref->type->is_integer());
+
+ ir_variable *var = deref->variable_referenced();
+ assert(var);
+
+ /* Compute the offset to the start if the dereference and the
+ * block index
+ */
+ mem_ctx = ralloc_parent(shader->ir);
+
+ ir_rvalue *offset = NULL;
+ unsigned const_offset;
+ bool row_major;
+ int matrix_columns;
+ unsigned packing = var->get_interface_type()->interface_packing;
+
+ setup_for_load_or_store(var, deref,
+ &offset, &const_offset,
+ &row_major, &matrix_columns,
+ packing);
+ assert(offset);
+ assert(!row_major);
+ assert(matrix_columns == 1);
+
+ ir_rvalue *deref_offset =
+ add(offset, new(mem_ctx) ir_constant(const_offset));
+ ir_rvalue *block_index = this->uniform_block->clone(mem_ctx, NULL);
+
+ /* Create the new internal function signature that will take a block
+ * index and offset instead of a buffer variable
+ */
+ exec_list sig_params;
+ ir_variable *sig_param = new(mem_ctx)
+ ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
+ sig_params.push_tail(sig_param);
+
+ sig_param = new(mem_ctx)
+ ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
+ sig_params.push_tail(sig_param);
+
+ const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ?
+ glsl_type::int_type : glsl_type::uint_type;
+ sig_param = new(mem_ctx)
+ ir_variable(type, "data1", ir_var_function_in);
+ sig_params.push_tail(sig_param);
+
+ if (param_count == 3) {
+ sig_param = new(mem_ctx)
+ ir_variable(type, "data2", ir_var_function_in);
+ sig_params.push_tail(sig_param);
+ }
+
+ ir_function_signature *sig =
+ new(mem_ctx) ir_function_signature(deref->type,
+ shader_storage_buffer_object);
+ assert(sig);
+ sig->replace_parameters(&sig_params);
+ sig->is_intrinsic = true;
+
+ char func_name[64];
+ sprintf(func_name, "%s_internal", ir->callee_name());
+ ir_function *f = new(mem_ctx) ir_function(func_name);
+ f->add_signature(sig);
+
+ /* Now, create the call to the internal intrinsic */
+ exec_list call_params;
+ call_params.push_tail(block_index);
+ call_params.push_tail(deref_offset);
+ param = ir->actual_parameters.get_head()->get_next();
+ ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
+ call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
+ if (param_count == 3) {
+ param = param->get_next();
+ param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
+ call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
+ }
+ ir_dereference_variable *return_deref =
+ ir->return_deref->clone(mem_ctx, NULL);
+ return new(mem_ctx) ir_call(sig, return_deref, &call_params);
+}
+
+ir_call *
+lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir)
+{
+ const char *callee = ir->callee_name();
+ if (!strcmp("__intrinsic_ssbo_atomic_add", callee) ||
+ !strcmp("__intrinsic_ssbo_atomic_min", callee) ||
+ !strcmp("__intrinsic_ssbo_atomic_max", callee) ||
+ !strcmp("__intrinsic_ssbo_atomic_and", callee) ||
+ !strcmp("__intrinsic_ssbo_atomic_or", callee) ||
+ !strcmp("__intrinsic_ssbo_atomic_xor", callee) ||
+ !strcmp("__intrinsic_ssbo_atomic_exchange", callee) ||
+ !strcmp("__intrinsic_ssbo_atomic_comp_swap", callee)) {
+ return lower_ssbo_atomic_intrinsic(ir);
+ }
+
+ return ir;
+}
+
+
+ir_visitor_status
+lower_ubo_reference_visitor::visit_enter(ir_call *ir)
+{
+ ir_call *new_ir = check_for_ssbo_atomic_intrinsic(ir);
+ if (new_ir != ir) {
+ progress = true;
+ base_ir->replace_with(new_ir);
+ return visit_continue_with_parent;
+ }
+
+ return rvalue_visit(ir);
+}
+
+
} /* unnamed namespace */
void
return false;
ir_function_signature *const main_sig =
- link_get_main_function_signature(shader);
+ _mesa_get_main_function_signature(shader);
if (main_sig == NULL) {
assert(main_sig != NULL);
return false;
v2.run(sh->ir);
visit_exec_list(sh->ir, &v1);
+ nir_lower_outputs_to_temporaries(shader);
+
+ shader->gs.vertices_out = sh->Geom.VerticesOut;
+ shader->gs.invocations = sh->Geom.Invocations;
+
return shader;
}
op = nir_intrinsic_memory_barrier;
} else if (strcmp(ir->callee_name(), "__intrinsic_image_size") == 0) {
op = nir_intrinsic_image_size;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_samples") == 0) {
+ op = nir_intrinsic_image_samples;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_store_ssbo") == 0) {
+ op = nir_intrinsic_store_ssbo;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_load_ssbo") == 0) {
+ op = nir_intrinsic_load_ssbo;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_add_internal") == 0) {
+ op = nir_intrinsic_ssbo_atomic_add;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_and_internal") == 0) {
+ op = nir_intrinsic_ssbo_atomic_and;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_or_internal") == 0) {
+ op = nir_intrinsic_ssbo_atomic_or;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_xor_internal") == 0) {
+ op = nir_intrinsic_ssbo_atomic_xor;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_min_internal") == 0) {
+ op = nir_intrinsic_ssbo_atomic_min;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_max_internal") == 0) {
+ op = nir_intrinsic_ssbo_atomic_max;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_exchange_internal") == 0) {
+ op = nir_intrinsic_ssbo_atomic_exchange;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_comp_swap_internal") == 0) {
+ op = nir_intrinsic_ssbo_atomic_comp_swap;
} else {
unreachable("not reached");
}
nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op);
+ nir_dest *dest = &instr->dest;
switch (op) {
case nir_intrinsic_atomic_counter_read_var:
(ir_dereference *) ir->actual_parameters.get_head();
instr->variables[0] = evaluate_deref(&instr->instr, param);
nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+ nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
break;
}
case nir_intrinsic_image_load:
case nir_intrinsic_image_atomic_xor:
case nir_intrinsic_image_atomic_exchange:
case nir_intrinsic_image_atomic_comp_swap:
+ case nir_intrinsic_image_samples:
case nir_intrinsic_image_size: {
nir_ssa_undef_instr *instr_undef =
nir_ssa_undef_instr_create(shader, 1);
info->dest_components, NULL);
}
- if (op == nir_intrinsic_image_size)
+ if (op == nir_intrinsic_image_size ||
+ op == nir_intrinsic_image_samples) {
+ nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
break;
+ }
/* Set the address argument, extending the coordinate vector to four
* components.
instr->src[3] = evaluate_rvalue((ir_dereference *)param);
param = param->get_next();
}
+ nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
break;
}
case nir_intrinsic_memory_barrier:
+ nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
break;
+ case nir_intrinsic_store_ssbo: {
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
+ assert(write_mask);
+
+ /* Check if we need the indirect version */
+ ir_constant *const_offset = offset->as_constant();
+ if (!const_offset) {
+ op = nir_intrinsic_store_ssbo_indirect;
+ ralloc_free(instr);
+ instr = nir_intrinsic_instr_create(shader, op);
+ instr->src[2] = evaluate_rvalue(offset);
+ instr->const_index[0] = 0;
+ dest = &instr->dest;
+ } else {
+ instr->const_index[0] = const_offset->value.u[0];
+ }
+
+ instr->const_index[1] = write_mask->value.u[0];
+
+ instr->src[0] = evaluate_rvalue(val);
+ instr->num_components = val->type->vector_elements;
+
+ instr->src[1] = evaluate_rvalue(block);
+ nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+ break;
+ }
+ case nir_intrinsic_load_ssbo: {
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+ /* Check if we need the indirect version */
+ ir_constant *const_offset = offset->as_constant();
+ if (!const_offset) {
+ op = nir_intrinsic_load_ssbo_indirect;
+ ralloc_free(instr);
+ instr = nir_intrinsic_instr_create(shader, op);
+ instr->src[1] = evaluate_rvalue(offset);
+ instr->const_index[0] = 0;
+ dest = &instr->dest;
+ } else {
+ instr->const_index[0] = const_offset->value.u[0];
+ }
+
+ instr->src[0] = evaluate_rvalue(block);
+
+ const glsl_type *type = ir->return_deref->var->type;
+ instr->num_components = type->vector_elements;
+
+ /* Setup destination register */
+ nir_ssa_dest_init(&instr->instr, &instr->dest,
+ type->vector_elements, NULL);
+
+ /* Insert the created nir instruction now since in the case of boolean
+ * result we will need to emit another instruction after it
+ */
+ nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+
+ /*
+ * In SSBO/UBO's, a true boolean value is any non-zero value, but we
+ * consider a true boolean to be ~0. Fix this up with a != 0
+ * comparison.
+ */
+ if (type->base_type == GLSL_TYPE_BOOL) {
+ nir_load_const_instr *const_zero =
+ nir_load_const_instr_create(shader, 1);
+ const_zero->value.u[0] = 0;
+ nir_instr_insert_after_cf_list(this->cf_node_list,
+ &const_zero->instr);
+
+ nir_alu_instr *load_ssbo_compare =
+ nir_alu_instr_create(shader, nir_op_ine);
+ load_ssbo_compare->src[0].src.is_ssa = true;
+ load_ssbo_compare->src[0].src.ssa = &instr->dest.ssa;
+ load_ssbo_compare->src[1].src.is_ssa = true;
+ load_ssbo_compare->src[1].src.ssa = &const_zero->def;
+ for (unsigned i = 0; i < type->vector_elements; i++)
+ load_ssbo_compare->src[1].swizzle[i] = 0;
+ nir_ssa_dest_init(&load_ssbo_compare->instr,
+ &load_ssbo_compare->dest.dest,
+ type->vector_elements, NULL);
+ load_ssbo_compare->dest.write_mask = (1 << type->vector_elements) - 1;
+ nir_instr_insert_after_cf_list(this->cf_node_list,
+ &load_ssbo_compare->instr);
+ dest = &load_ssbo_compare->dest.dest;
+ }
+ break;
+ }
+ case nir_intrinsic_ssbo_atomic_add:
+ case nir_intrinsic_ssbo_atomic_min:
+ case nir_intrinsic_ssbo_atomic_max:
+ case nir_intrinsic_ssbo_atomic_and:
+ case nir_intrinsic_ssbo_atomic_or:
+ case nir_intrinsic_ssbo_atomic_xor:
+ case nir_intrinsic_ssbo_atomic_exchange:
+ case nir_intrinsic_ssbo_atomic_comp_swap: {
+ int param_count = ir->actual_parameters.length();
+ assert(param_count == 3 || param_count == 4);
+
+ /* Block index */
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_instruction *inst = (ir_instruction *) param;
+ instr->src[0] = evaluate_rvalue(inst->as_rvalue());
+
+ /* Offset */
+ param = param->get_next();
+ inst = (ir_instruction *) param;
+ instr->src[1] = evaluate_rvalue(inst->as_rvalue());
+
+ /* data1 parameter (this is always present) */
+ param = param->get_next();
+ inst = (ir_instruction *) param;
+ instr->src[2] = evaluate_rvalue(inst->as_rvalue());
+
+ /* data2 parameter (only with atomic_comp_swap) */
+ if (param_count == 4) {
+ assert(op == nir_intrinsic_ssbo_atomic_comp_swap);
+ param = param->get_next();
+ inst = (ir_instruction *) param;
+ instr->src[3] = evaluate_rvalue(inst->as_rvalue());
+ }
+
+ /* Atomic result */
+ assert(ir->return_deref);
+ nir_ssa_dest_init(&instr->instr, &instr->dest,
+ ir->return_deref->type->vector_elements, NULL);
+ nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+ break;
+ }
default:
unreachable("not reached");
}
- nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
-
if (ir->return_deref) {
nir_intrinsic_instr *store_instr =
nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
store_instr->variables[0] =
evaluate_deref(&store_instr->instr, ir->return_deref);
- store_instr->src[0] = nir_src_for_ssa(&instr->dest.ssa);
+ store_instr->src[0] = nir_src_for_ssa(&dest->ssa);
nir_instr_insert_after_cf_list(this->cf_node_list,
&store_instr->instr);
{
nir_dest *dest = get_instr_dest(instr);
- nir_ssa_dest_init(instr, dest, num_components, NULL);
+ if (dest)
+ nir_ssa_dest_init(instr, dest, num_components, NULL);
nir_instr_insert_after_cf_list(this->cf_node_list, instr);
this->result = instr;
nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader, op);
load->num_components = ir->type->vector_elements;
load->const_index[0] = const_index ? const_index->value.u[0] : 0; /* base offset */
- load->const_index[1] = 1; /* number of vec4's */
load->src[0] = evaluate_rvalue(ir->operands[0]);
if (!const_index)
load->src[1] = evaluate_rvalue(ir->operands[1]);
unreachable("not reached");
}
break;
+ case ir_unop_get_buffer_size: {
+ nir_intrinsic_instr *load = nir_intrinsic_instr_create(
+ this->shader,
+ nir_intrinsic_get_buffer_size);
+ load->num_components = ir->type->vector_elements;
+ load->src[0] = evaluate_rvalue(ir->operands[0]);
+ add_instr(&load->instr, ir->type->vector_elements);
+ return;
+ }
+
case ir_binop_add:
case ir_binop_sub:
case ir_binop_mul:
num_srcs = 0;
break;
+ case ir_texture_samples:
+ op = nir_texop_texture_samples;
+ num_srcs = 0;
+ break;
+
default:
unreachable("not reached");
}
shader->stage = stage;
+ shader->gs.vertices_out = 0;
+ shader->gs.invocations = 0;
+
return shader;
}
}
}
-void nir_dest_copy(nir_dest *dest, const nir_dest *src, void *mem_ctx)
+void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr)
{
/* Copying an SSA definition makes no sense whatsoever. */
assert(!src->is_ssa);
dest->reg.base_offset = src->reg.base_offset;
dest->reg.reg = src->reg.reg;
if (src->reg.indirect) {
- dest->reg.indirect = ralloc(mem_ctx, nir_src);
- nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
+ dest->reg.indirect = ralloc(instr, nir_src);
+ nir_src_copy(dest->reg.indirect, src->reg.indirect, instr);
} else {
dest->reg.indirect = NULL;
}
}
void
-nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, void *mem_ctx)
+nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
+ nir_alu_instr *instr)
{
- nir_src_copy(&dest->src, &src->src, mem_ctx);
+ nir_src_copy(&dest->src, &src->src, &instr->instr);
dest->abs = src->abs;
dest->negate = src->negate;
for (unsigned i = 0; i < 4; i++)
}
void
-nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src, void *mem_ctx)
+nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
+ nir_alu_instr *instr)
{
- nir_dest_copy(&dest->dest, &src->dest, mem_ctx);
+ nir_dest_copy(&dest->dest, &src->dest, &instr->instr);
dest->write_mask = src->write_mask;
dest->saturate = src->saturate;
}
}
static bool
+src_is_valid(const nir_src *src)
+{
+ return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL);
+}
+
+static bool
remove_use_cb(nir_src *src, void *state)
{
- list_del(&src->use_link);
+ if (src_is_valid(src))
+ list_del(&src->use_link);
return true;
}
}
}
-static bool
-src_is_valid(const nir_src *src)
-{
- return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL);
-}
-
static void
src_remove_all_uses(nir_src *src)
{
}
void
+nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest)
+{
+ if (dest->is_ssa) {
+ /* We can only overwrite an SSA destination if it has no uses. */
+ assert(list_empty(&dest->ssa.uses) && list_empty(&dest->ssa.if_uses));
+ } else {
+ list_del(&dest->reg.def_link);
+ if (dest->reg.indirect)
+ src_remove_all_uses(dest->reg.indirect);
+ }
+
+ /* We can't re-write with an SSA def */
+ assert(!new_dest.is_ssa);
+
+ nir_dest_copy(dest, &new_dest, instr);
+
+ dest->reg.parent_instr = instr;
+ list_addtail(&dest->reg.def_link, &new_dest.reg.reg->defs);
+
+ if (dest->reg.indirect)
+ src_add_all_uses(dest->reg.indirect, instr, NULL);
+}
+
+void
nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
unsigned num_components, const char *name)
{
}
void
-nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src, void *mem_ctx)
+nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src)
{
assert(!new_src.is_ssa || def != new_src.ssa);
nir_foreach_use_safe(def, use_src) {
nir_instr *src_parent_instr = use_src->parent_instr;
list_del(&use_src->use_link);
- nir_src_copy(use_src, &new_src, mem_ctx);
+ nir_src_copy(use_src, &new_src, src_parent_instr);
src_add_all_uses(use_src, src_parent_instr, NULL);
}
nir_foreach_if_use_safe(def, use_src) {
nir_if *src_parent_if = use_src->parent_if;
list_del(&use_src->use_link);
- nir_src_copy(use_src, &new_src, mem_ctx);
+ nir_src_copy(use_src, &new_src, src_parent_if);
src_add_all_uses(use_src, NULL, src_parent_if);
}
}
}
bool
+nir_foreach_block_in_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
+ void *state)
+{
+ return foreach_cf_node(node, cb, false, state);
+}
+
+bool
nir_foreach_block(nir_function_impl *impl, nir_foreach_block_cb cb, void *state)
{
foreach_list_typed_safe(nir_cf_node, node, node, &impl->body) {
return nir_cf_node_as_if(next_node);
}
+nir_loop *
+nir_block_get_following_loop(nir_block *block)
+{
+ if (exec_node_is_tail_sentinel(&block->cf_node.node))
+ return NULL;
+
+ if (nir_cf_node_is_last(&block->cf_node))
+ return NULL;
+
+ nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
+
+ if (next_node->type != nir_cf_node_loop)
+ return NULL;
+
+ return nir_cf_node_as_loop(next_node);
+}
static bool
index_block(nir_block *block, void *state)
{
return true;
}
+/**
+ * The indices are applied top-to-bottom which has the very nice property
+ * that, if A dominates B, then A->index <= B->index.
+ */
void
nir_index_ssa_defs(nir_function_impl *impl)
{
nir_foreach_block(impl, index_ssa_block, &index);
impl->ssa_alloc = index;
}
+
+static bool
+index_instrs_block(nir_block *block, void *state)
+{
+ unsigned *index = state;
+ nir_foreach_instr(block, instr)
+ instr->index = (*index)++;
+
+ return true;
+}
+
+/**
+ * The indices are applied top-to-bottom which has the very nice property
+ * that, if A dominates B, then A->index <= B->index.
+ */
+unsigned
+nir_index_instrs(nir_function_impl *impl)
+{
+ unsigned index = 0;
+ nir_foreach_block(impl, index_instrs_block, &index);
+ return index;
+}
+
+nir_intrinsic_op
+nir_intrinsic_from_system_value(gl_system_value val)
+{
+ switch (val) {
+ case SYSTEM_VALUE_VERTEX_ID:
+ return nir_intrinsic_load_vertex_id;
+ case SYSTEM_VALUE_INSTANCE_ID:
+ return nir_intrinsic_load_instance_id;
+ case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
+ return nir_intrinsic_load_vertex_id_zero_base;
+ case SYSTEM_VALUE_BASE_VERTEX:
+ return nir_intrinsic_load_base_vertex;
+ case SYSTEM_VALUE_INVOCATION_ID:
+ return nir_intrinsic_load_invocation_id;
+ case SYSTEM_VALUE_FRONT_FACE:
+ return nir_intrinsic_load_front_face;
+ case SYSTEM_VALUE_SAMPLE_ID:
+ return nir_intrinsic_load_sample_id;
+ case SYSTEM_VALUE_SAMPLE_POS:
+ return nir_intrinsic_load_sample_pos;
+ case SYSTEM_VALUE_SAMPLE_MASK_IN:
+ return nir_intrinsic_load_sample_mask_in;
+ case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
+ return nir_intrinsic_load_local_invocation_id;
+ case SYSTEM_VALUE_WORK_GROUP_ID:
+ return nir_intrinsic_load_work_group_id;
+ case SYSTEM_VALUE_NUM_WORK_GROUPS:
+ return nir_intrinsic_load_num_work_groups;
+ /* FINISHME: Add tessellation intrinsics.
+ case SYSTEM_VALUE_TESS_COORD:
+ case SYSTEM_VALUE_VERTICES_IN:
+ case SYSTEM_VALUE_PRIMITIVE_ID:
+ case SYSTEM_VALUE_TESS_LEVEL_OUTER:
+ case SYSTEM_VALUE_TESS_LEVEL_INNER:
+ */
+ default:
+ unreachable("system value does not directly correspond to intrinsic");
+ }
+}
+
+gl_system_value
+nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
+{
+ switch (intrin) {
+ case nir_intrinsic_load_vertex_id:
+ return SYSTEM_VALUE_VERTEX_ID;
+ case nir_intrinsic_load_instance_id:
+ return SYSTEM_VALUE_INSTANCE_ID;
+ case nir_intrinsic_load_vertex_id_zero_base:
+ return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
+ case nir_intrinsic_load_base_vertex:
+ return SYSTEM_VALUE_BASE_VERTEX;
+ case nir_intrinsic_load_invocation_id:
+ return SYSTEM_VALUE_INVOCATION_ID;
+ case nir_intrinsic_load_front_face:
+ return SYSTEM_VALUE_FRONT_FACE;
+ case nir_intrinsic_load_sample_id:
+ return SYSTEM_VALUE_SAMPLE_ID;
+ case nir_intrinsic_load_sample_pos:
+ return SYSTEM_VALUE_SAMPLE_POS;
+ case nir_intrinsic_load_sample_mask_in:
+ return SYSTEM_VALUE_SAMPLE_MASK_IN;
+ case nir_intrinsic_load_local_invocation_id:
+ return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
+ case nir_intrinsic_load_num_work_groups:
+ return SYSTEM_VALUE_NUM_WORK_GROUPS;
+ case nir_intrinsic_load_work_group_id:
+ return SYSTEM_VALUE_WORK_GROUP_ID;
+ /* FINISHME: Add tessellation intrinsics.
+ return SYSTEM_VALUE_TESS_COORD;
+ return SYSTEM_VALUE_VERTICES_IN;
+ return SYSTEM_VALUE_PRIMITIVE_ID;
+ return SYSTEM_VALUE_TESS_LEVEL_OUTER;
+ return SYSTEM_VALUE_TESS_LEVEL_INNER;
+ */
+ default:
+ unreachable("intrinsic doesn't produce a system value");
+ }
+}
* - Fragment shader output: one of the values from \c gl_frag_result.
* - Uniforms: Per-stage uniform slot number for default uniform block.
* - Uniforms: Index within the uniform block definition for UBO members.
+ * - Non-UBO Uniforms: uniform slot number.
* - Other: This field is not currently used.
*
* If the variable is a uniform, shader input, or shader output, and the
nir_instr_type type;
struct nir_block *block;
+ /** generic instruction index. */
+ unsigned index;
+
/* A temporary for optimization and analysis passes to use for storing
* flags. For instance, DCE uses this to store the "dead/live" info.
*/
return dest;
}
-void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx);
-void nir_dest_copy(nir_dest *dest, const nir_dest *src, void *mem_ctx);
+void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if);
+void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr);
typedef struct {
nir_src src;
unsigned write_mask : 4; /* ignored if dest.is_ssa is true */
} nir_alu_dest;
-void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, void *mem_ctx);
-void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
- void *mem_ctx);
-
typedef enum {
nir_type_invalid = 0, /* Not a valid type */
nir_type_float,
nir_alu_src src[];
} nir_alu_instr;
+void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
+ nir_alu_instr *instr);
+void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
+ nir_alu_instr *instr);
+
/* is this source channel used? */
static inline bool
nir_alu_instr_channel_used(nir_alu_instr *instr, unsigned src, unsigned channel)
nir_texop_txs, /**< Texture size */
nir_texop_lod, /**< Texture lod query */
nir_texop_tg4, /**< Texture gather */
- nir_texop_query_levels /**< Texture levels query */
+ nir_texop_query_levels, /**< Texture levels query */
+ nir_texop_texture_samples, /**< Texture samples query */
} nir_texop;
typedef struct {
case nir_texop_lod:
return 2;
+ case nir_texop_texture_samples:
case nir_texop_query_levels:
return 1;
/* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */
bool lower_scmp;
+ /* Does the native fdot instruction replicate its result for four
+ * components? If so, then opt_algebraic_late will turn all fdotN
+ * instructions into fdot_replicatedN instructions.
+ */
+ bool fdot_replicates;
+
+ /** lowers ffract to fsub+ffloor: */
+ bool lower_ffract;
+
/**
* Does the driver support real 32-bit integers? (Otherwise, integers
* are simulated by floats.)
} nir_shader_compiler_options;
typedef struct nir_shader {
- /** hash table of name -> uniform nir_variable */
+ /** list of uniforms (nir_variable) */
struct exec_list uniforms;
- /** hash table of name -> input nir_variable */
+ /** list of inputs (nir_variable) */
struct exec_list inputs;
- /** hash table of name -> output nir_variable */
+ /** list of outputs (nir_variable) */
struct exec_list outputs;
/** Set of driver-specific options for the shader.
*/
const struct nir_shader_compiler_options *options;
- /** list of global variables in the shader */
+ /** list of global variables in the shader (nir_variable) */
struct exec_list globals;
- /** list of system value variables in the shader */
+ /** list of system value variables in the shader (nir_variable) */
struct exec_list system_values;
struct exec_list functions; /** < list of nir_function */
/** The shader stage, such as MESA_SHADER_VERTEX. */
gl_shader_stage stage;
+
+ struct {
+ /** The maximum number of vertices the geometry shader might write. */
+ unsigned vertices_out;
+
+ /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
+ unsigned invocations;
+ } gs;
} nir_shader;
#define nir_foreach_overload(shader, overload) \
void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src);
void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src);
void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src);
+void nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest,
+ nir_dest new_dest);
void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
unsigned num_components, const char *name);
void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
unsigned num_components, const char *name);
-void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src, void *mem_ctx);
+void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src);
/* visits basic blocks in source-code order */
typedef bool (*nir_foreach_block_cb)(nir_block *block, void *state);
void *state);
bool nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb,
void *state);
+bool nir_foreach_block_in_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
+ void *state);
/* If the following CF node is an if, this function returns that if.
* Otherwise, it returns NULL.
*/
nir_if *nir_block_get_following_if(nir_block *block);
+nir_loop *nir_block_get_following_loop(nir_block *block);
+
void nir_index_local_regs(nir_function_impl *impl);
void nir_index_global_regs(nir_shader *shader);
void nir_index_ssa_defs(nir_function_impl *impl);
+unsigned nir_index_instrs(nir_function_impl *impl);
void nir_index_blocks(nir_function_impl *impl);
void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp);
void nir_dump_cfg(nir_shader *shader, FILE *fp);
-void nir_split_var_copies(nir_shader *shader);
+int nir_gs_count_vertices(nir_shader *shader);
+
+bool nir_split_var_copies(nir_shader *shader);
void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx);
void nir_lower_var_copies(nir_shader *shader);
-void nir_lower_global_vars_to_local(nir_shader *shader);
+bool nir_lower_global_vars_to_local(nir_shader *shader);
-void nir_lower_locals_to_regs(nir_shader *shader);
+bool nir_lower_locals_to_regs(nir_shader *shader);
+
+void nir_lower_outputs_to_temporaries(nir_shader *shader);
void nir_lower_outputs_to_temporaries(nir_shader *shader);
int (*type_size)(const struct glsl_type *));
void nir_lower_vars_to_ssa(nir_shader *shader);
-void nir_remove_dead_variables(nir_shader *shader);
+bool nir_remove_dead_variables(nir_shader *shader);
-void nir_lower_vec_to_movs(nir_shader *shader);
+void nir_move_vec_src_uses_to_dest(nir_shader *shader);
+bool nir_lower_vec_to_movs(nir_shader *shader);
void nir_lower_alu_to_scalar(nir_shader *shader);
void nir_lower_load_const_to_scalar(nir_shader *shader);
const struct gl_shader_program *shader_program);
void nir_lower_samplers_for_vk(nir_shader *shader);
-void nir_lower_system_values(nir_shader *shader);
-void nir_lower_tex_projector(nir_shader *shader);
+bool nir_lower_system_values(nir_shader *shader);
+
+typedef struct nir_lower_tex_options {
+ /**
+ * bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which
+ * sampler types a texture projector is lowered.
+ */
+ unsigned lower_txp;
+
+ /**
+ * If true, lower rect textures to 2D, using txs to fetch the
+ * texture dimensions and dividing the texture coords by the
+ * texture dims to normalize.
+ */
+ bool lower_rect;
+
+ /**
+ * To emulate certain texture wrap modes, this can be used
+ * to saturate the specified tex coord to [0.0, 1.0]. The
+ * bits are according to sampler #, ie. if, for example:
+ *
+ * (conf->saturate_s & (1 << n))
+ *
+ * is true, then the s coord for sampler n is saturated.
+ *
+ * Note that clamping must happen *after* projector lowering
+ * so any projected texture sample instruction with a clamped
+ * coordinate gets automatically lowered, regardless of the
+ * 'lower_txp' setting.
+ */
+ unsigned saturate_s;
+ unsigned saturate_t;
+ unsigned saturate_r;
+} nir_lower_tex_options;
+
+void nir_lower_tex(nir_shader *shader,
+ const nir_lower_tex_options *options);
+
void nir_lower_idiv(nir_shader *shader);
+void nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables);
+void nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables);
+
+void nir_lower_two_sided_color(nir_shader *shader);
+
void nir_lower_atomics(nir_shader *shader);
void nir_lower_to_source_mods(nir_shader *shader);
-void nir_normalize_cubemap_coords(nir_shader *shader);
+bool nir_lower_gs_intrinsics(nir_shader *shader);
+
+bool nir_normalize_cubemap_coords(nir_shader *shader);
void nir_live_variables_impl(nir_function_impl *impl);
bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b);
bool nir_opt_dce_impl(nir_function_impl *impl);
bool nir_opt_dce(nir_shader *shader);
+bool nir_opt_dead_cf(nir_shader *shader);
+
void nir_opt_gcm(nir_shader *shader);
bool nir_opt_peephole_select(nir_shader *shader);
void nir_sweep(nir_shader *shader);
+nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val);
+gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
static inline nir_ssa_def *
nir_imm_float(nir_builder *build, float x)
{
- nir_const_value v = { { .f = {x, 0, 0, 0} } };
+ nir_const_value v;
+
+ memset(&v, 0, sizeof(v));
+ v.f[0] = x;
+
return nir_build_imm(build, 1, v);
}
static inline nir_ssa_def *
nir_imm_vec4(nir_builder *build, float x, float y, float z, float w)
{
- nir_const_value v = { { .f = {x, y, z, w} } };
+ nir_const_value v;
+
+ memset(&v, 0, sizeof(v));
+ v.f[0] = x;
+ v.f[1] = y;
+ v.f[2] = z;
+ v.f[3] = w;
+
return nir_build_imm(build, 4, v);
}
static inline nir_ssa_def *
nir_imm_int(nir_builder *build, int x)
{
- nir_const_value v = { { .i = {x, 0, 0, 0} } };
+ nir_const_value v;
+
+ memset(&v, 0, sizeof(v));
+ v.i[0] = x;
+
return nir_build_imm(build, 1, v);
}
#include "nir_builder_opcodes.h"
+static inline nir_ssa_def *
+nir_vec(nir_builder *build, nir_ssa_def **comp, unsigned num_components)
+{
+ switch (num_components) {
+ case 4:
+ return nir_vec4(build, comp[0], comp[1], comp[2], comp[3]);
+ case 3:
+ return nir_vec3(build, comp[0], comp[1], comp[2]);
+ case 2:
+ return nir_vec2(build, comp[0], comp[1]);
+ case 1:
+ return comp[0];
+ default:
+ unreachable("bad component count");
+ return NULL;
+ }
+}
+
/**
* Similar to nir_fmov, but takes a nir_alu_src instead of a nir_ssa_def.
*/
return NULL;
}
+static inline nir_ssa_def *
+nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c)
+{
+ unsigned swizzle[4] = {c, c, c, c};
+ return nir_swizzle(b, def, swizzle, 1, false);
+}
+
/**
* Turns a nir_src into a nir_ssa_def * so it can be passed to
* nir_build_alu()-based builder calls.
return nir_imov_alu(build, alu, num_components);
}
+static inline nir_ssa_def *
+nir_load_var(nir_builder *build, nir_variable *var)
+{
+ const unsigned num_components = glsl_get_vector_elements(var->type);
+
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_var);
+ load->num_components = num_components;
+ load->variables[0] = nir_deref_var_create(load, var);
+ nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
+ nir_builder_instr_insert(build, &load->instr);
+ return &load->dest.ssa;
+}
+
+static inline void
+nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value)
+{
+ const unsigned num_components = glsl_get_vector_elements(var->type);
+
+ nir_intrinsic_instr *store =
+ nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var);
+ store->num_components = num_components;
+ store->variables[0] = nir_deref_var_create(store, var);
+ store->src[0] = nir_src_for_ssa(value);
+ nir_builder_instr_insert(build, &store->instr);
+}
+
#endif /* NIR_BUILDER_H */
_mesa_set_add(block->predecessors, pred);
}
+static inline void
+block_remove_pred(nir_block *block, nir_block *pred)
+{
+ struct set_entry *entry = _mesa_set_search(block->predecessors, pred);
+
+ assert(entry);
+
+ _mesa_set_remove(block->predecessors, entry);
+}
+
static void
link_blocks(nir_block *pred, nir_block *succ1, nir_block *succ2)
{
pred->successors[1] = NULL;
}
- struct set_entry *entry = _mesa_set_search(succ->predecessors, pred);
-
- assert(entry);
-
- _mesa_set_remove(succ->predecessors, entry);
+ block_remove_pred(succ, pred);
}
static void
unlink_block_successors(nir_block *block)
{
- if (block->successors[0] != NULL)
- unlink_blocks(block, block->successors[0]);
if (block->successors[1] != NULL)
unlink_blocks(block, block->successors[1]);
+ if (block->successors[0] != NULL)
+ unlink_blocks(block, block->successors[0]);
}
static void
}
/**
+ * Replace a block's successor with a different one.
+ */
+static void
+replace_successor(nir_block *block, nir_block *old_succ, nir_block *new_succ)
+{
+ if (block->successors[0] == old_succ) {
+ block->successors[0] = new_succ;
+ } else {
+ assert(block->successors[1] == old_succ);
+ block->successors[1] = new_succ;
+ }
+
+ block_remove_pred(old_succ, block);
+ block_add_pred(new_succ, block);
+}
+
+/**
* Takes a basic block and inserts a new empty basic block before it, making its
* predecessors point to the new block. This essentially splits the block into
* an empty header and a body so that another non-block CF node can be inserted
struct set_entry *entry;
set_foreach(block->predecessors, entry) {
nir_block *pred = (nir_block *) entry->key;
-
- unlink_blocks(pred, block);
- link_blocks(pred, new_block, NULL);
+ replace_successor(pred, block, new_block);
}
/* Any phi nodes must stay part of the new block, or else their
* infinite loops. Note that the jump to be eliminated may be free-floating.
*/
-static
-void unlink_jump(nir_block *block, nir_jump_type type)
+static void
+unlink_jump(nir_block *block, nir_jump_type type, bool add_normal_successors)
{
+ nir_block *next = block->successors[0];
+
if (block->successors[0])
remove_phi_src(block->successors[0], block);
if (block->successors[1])
remove_phi_src(block->successors[1], block);
- if (type == nir_jump_break) {
- nir_block *next = block->successors[0];
+ unlink_block_successors(block);
+ if (add_normal_successors)
+ block_add_normal_succs(block);
- if (next->predecessors->entries == 1) {
- nir_loop *loop =
- nir_cf_node_as_loop(nir_cf_node_prev(&next->cf_node));
+ /* If we've just removed a break, and the block we were jumping to (after
+ * the loop) now has zero predecessors, we've created a new infinite loop.
+ *
+ * NIR doesn't allow blocks (other than the start block) to have zero
+ * predecessors. In particular, dominance assumes all blocks are reachable.
+ * So, we insert a "fake link" by making successors[1] point after the loop.
+ *
+ * Note that we have to do this after unlinking/recreating the block's
+ * successors. If we removed a "break" at the end of the loop, then
+ * block == last_block, so block->successors[0] would already be "next",
+ * and adding a fake link would create two identical successors. Doing
+ * this afterward works, as we'll have changed block->successors[0] to
+ * be the top of the loop.
+ */
+ if (type == nir_jump_break && next->predecessors->entries == 0) {
+ nir_loop *loop =
+ nir_cf_node_as_loop(nir_cf_node_prev(&next->cf_node));
- /* insert fake link */
- nir_cf_node *last = nir_loop_last_cf_node(loop);
- assert(last->type == nir_cf_node_block);
- nir_block *last_block = nir_cf_node_as_block(last);
+ /* insert fake link */
+ nir_cf_node *last = nir_loop_last_cf_node(loop);
+ assert(last->type == nir_cf_node_block);
+ nir_block *last_block = nir_cf_node_as_block(last);
- last_block->successors[1] = next;
- block_add_pred(next, last_block);
- }
+ last_block->successors[1] = next;
+ block_add_pred(next, last_block);
}
-
- unlink_block_successors(block);
}
void
nir_handle_remove_jump(nir_block *block, nir_jump_type type)
{
- unlink_jump(block, type);
-
- block_add_normal_succs(block);
+ unlink_jump(block, type, true);
nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node);
nir_metadata_preserve(impl, nir_metadata_none);
nir_ssa_undef_instr *undef =
nir_ssa_undef_instr_create(mem_ctx, def->num_components);
nir_instr_insert_before_cf_list(&impl->body, &undef->instr);
- nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(&undef->def), mem_ctx);
+ nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(&undef->def));
return true;
}
nir_foreach_instr_safe(block, instr) {
if (instr->type == nir_instr_type_jump) {
nir_jump_type jump_type = nir_instr_as_jump(instr)->type;
- unlink_jump(block, jump_type);
+ unlink_jump(block, jump_type, false);
} else {
nir_foreach_ssa_def(instr, replace_ssa_def_uses, impl);
nir_instr_remove(instr);
extracted->impl = nir_cf_node_get_function(&block_begin->cf_node);
exec_list_make_empty(&extracted->list);
+ /* Dominance and other block-related information is toast. */
+ nir_metadata_preserve(extracted->impl, nir_metadata_none);
+
nir_cf_node *cf_node = &block_begin->cf_node;
nir_cf_node *cf_node_end = &block_end->cf_node;
while (true) {
exec_list_push_tail(&block_pcopy->entries, &entry->node);
nir_ssa_def_rewrite_uses(&phi->dest.ssa,
- nir_src_for_ssa(&entry->dest.ssa),
- state->mem_ctx);
+ nir_src_for_ssa(&entry->dest.ssa));
nir_instr_rewrite_src(&block_pcopy->instr, &entry->src,
nir_src_for_ssa(&phi->dest.ssa));
reg->num_array_elems = 0;
}
- nir_ssa_def_rewrite_uses(def, nir_src_for_reg(reg), state->mem_ctx);
+ nir_ssa_def_rewrite_uses(def, nir_src_for_reg(reg));
assert(list_empty(&def->uses) && list_empty(&def->if_uses));
if (def->parent_instr->type == nir_instr_type_ssa_undef) {
*/
nir_dest *dest = exec_node_data(nir_dest, def, ssa);
- *dest = nir_dest_for_reg(reg);
- dest->reg.parent_instr = state->instr;
- list_addtail(&dest->reg.def_link, ®->defs);
+ nir_instr_rewrite_dest(state->instr, dest, nir_dest_for_reg(reg));
return true;
}
assert(src.reg.reg->num_components >= dest_src.reg.reg->num_components);
nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
- nir_src_copy(&mov->src[0].src, &src, mem_ctx);
+ nir_src_copy(&mov->src[0].src, &src, mov);
mov->dest.dest = nir_dest_for_reg(dest_src.reg.reg);
mov->dest.write_mask = (1 << dest_src.reg.reg->num_components) - 1;
--- /dev/null
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+static nir_intrinsic_instr *
+as_intrinsic(nir_instr *instr, nir_intrinsic_op op)
+{
+ if (instr->type != nir_instr_type_intrinsic)
+ return NULL;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ if (intrin->intrinsic != op)
+ return NULL;
+
+ return intrin;
+}
+
+static nir_intrinsic_instr *
+as_set_vertex_count(nir_instr *instr)
+{
+ return as_intrinsic(instr, nir_intrinsic_set_vertex_count);
+}
+
+/**
+ * If a geometry shader emits a constant number of vertices, return the
+ * number of vertices. Otherwise, return -1 (unknown).
+ *
+ * This only works if you've used nir_lower_gs_intrinsics() to do vertex
+ * counting at the NIR level.
+ */
+int
+nir_gs_count_vertices(nir_shader *shader)
+{
+ int count = -1;
+
+ nir_foreach_overload(shader, overload) {
+ if (!overload->impl)
+ continue;
+
+ /* set_vertex_count intrinsics only appear in predecessors of the
+ * end block. So we don't need to walk all of them.
+ */
+ struct set_entry *entry;
+ set_foreach(overload->impl->end_block->predecessors, entry) {
+ nir_block *block = (nir_block *) entry->key;
+
+ nir_foreach_instr_reverse(block, instr) {
+ nir_intrinsic_instr *intrin = as_set_vertex_count(instr);
+ if (!intrin)
+ continue;
+
+ nir_const_value *val = nir_src_as_const_value(intrin->src[0]);
+ /* We've found a non-constant value. Bail. */
+ if (!val)
+ return -1;
+
+ if (count == -1)
+ count = val->i[0];
+
+ /* We've found contradictory set_vertex_count intrinsics.
+ * This can happen if there are early-returns in main() and
+ * different paths emit different numbers of vertices.
+ */
+ if (count != val->i[0])
+ return -1;
+ }
+ }
+ }
+
+ return count;
+}
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
/*
+ * Ask the driver for the size of a given buffer. It takes the buffer index
+ * as source.
+ */
+INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0,
+ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+
+/*
* a barrier is an intrinsic with no inputs/outputs but which can't be moved
* around/optimized in general
*/
/** A conditional discard, with a single boolean source. */
INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0)
+/**
+ * Basic Geometry Shader intrinsics.
+ *
+ * emit_vertex implements GLSL's EmitStreamVertex() built-in. It takes a single
+ * index, which is the stream ID to write to.
+ *
+ * end_primitive implements GLSL's EndPrimitive() built-in.
+ */
INTRINSIC(emit_vertex, 0, ARR(), false, 0, 0, 1, 0)
INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, 0)
+/**
+ * Geometry Shader intrinsics with a vertex count.
+ *
+ * Alternatively, drivers may implement these intrinsics, and use
+ * nir_lower_gs_intrinsics() to convert from the basic intrinsics.
+ *
+ * These maintain a count of the number of vertices emitted, as an additional
+ * unsigned integer source.
+ */
+INTRINSIC(emit_vertex_with_counter, 1, ARR(1), false, 0, 0, 1, 0)
+INTRINSIC(end_primitive_with_counter, 1, ARR(1), false, 0, 0, 1, 0)
+INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, 0)
+
/*
* Atomic counters
*
INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, 0)
INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0,
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0,
+ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-#define SYSTEM_VALUE(name, components) \
- INTRINSIC(load_##name, 0, ARR(), true, components, 0, 0, \
+/*
+ * SSBO atomic intrinsics
+ *
+ * All of the SSBO atomic memory operations read a value from memory,
+ * compute a new value using one of the operations below, write the new
+ * value to memory, and return the original value read.
+ *
+ * All operations take 3 sources except CompSwap that takes 4. These
+ * sources represent:
+ *
+ * 0: The SSBO buffer index.
+ * 1: The offset into the SSBO buffer of the variable that the atomic
+ * operation will operate on.
+ * 2: The data parameter to the atomic function (i.e. the value to add
+ * in ssbo_atomic_add, etc).
+ * 3: For CompSwap only: the second data parameter.
+ */
+INTRINSIC(ssbo_atomic_add, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_min, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_max, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_and, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_or, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, 0)
+
+#define SYSTEM_VALUE(name, components, num_indices) \
+ INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-SYSTEM_VALUE(front_face, 1)
-SYSTEM_VALUE(vertex_id, 1)
-SYSTEM_VALUE(vertex_id_zero_base, 1)
-SYSTEM_VALUE(base_vertex, 1)
-SYSTEM_VALUE(instance_id, 1)
-SYSTEM_VALUE(sample_id, 1)
-SYSTEM_VALUE(sample_pos, 2)
-SYSTEM_VALUE(sample_mask_in, 1)
-SYSTEM_VALUE(invocation_id, 1)
+SYSTEM_VALUE(front_face, 1, 0)
+SYSTEM_VALUE(vertex_id, 1, 0)
+SYSTEM_VALUE(vertex_id_zero_base, 1, 0)
+SYSTEM_VALUE(base_vertex, 1, 0)
+SYSTEM_VALUE(instance_id, 1, 0)
+SYSTEM_VALUE(sample_id, 1, 0)
+SYSTEM_VALUE(sample_pos, 2, 0)
+SYSTEM_VALUE(sample_mask_in, 1, 0)
+SYSTEM_VALUE(invocation_id, 1, 0)
+SYSTEM_VALUE(local_invocation_id, 3, 0)
+SYSTEM_VALUE(work_group_id, 3, 0)
+SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */
+SYSTEM_VALUE(num_work_groups, 3, 0)
/*
* The format of the indices depends on the type of the load. For uniforms,
LOAD(uniform, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
LOAD(ubo, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-/* LOAD(ssbo, 1, 0) */
+LOAD(ssbo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
/*
* Stores work the same way as loads, except now the first register input is
* the value or array to store and the optional second input is the indirect
- * offset.
+ * offset. SSBO stores are similar, but they accept an extra source for the
+ * block index and an extra index with the writemask to use.
*/
-#define STORE(name, num_indices, flags) \
- INTRINSIC(store_##name, 1, ARR(0), false, 0, 0, num_indices, flags) \
- INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \
- num_indices, flags) \
+#define STORE(name, extra_srcs, extra_srcs_size, extra_indices, flags) \
+ INTRINSIC(store_##name, 1 + extra_srcs, \
+ ARR(0, extra_srcs_size, extra_srcs_size, extra_srcs_size), \
+ false, 0, 0, 1 + extra_indices, flags) \
+ INTRINSIC(store_##name##_indirect, 2 + extra_srcs, \
+ ARR(0, 1, extra_srcs_size, extra_srcs_size), \
+ false, 0, 0, 1 + extra_indices, flags)
-STORE(output, 1, 0)
-/* STORE(ssbo, 2, 0) */
+STORE(output, 0, 0, 0, 0)
+STORE(ssbo, 1, 1, 1, 0)
-LAST_INTRINSIC(store_output_indirect)
+LAST_INTRINSIC(store_ssbo_indirect)
*/
#include "nir.h"
+#include "nir_builder.h"
/** @file nir_lower_alu_to_scalar.c
*
static void
lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op,
- void *mem_ctx)
+ nir_builder *builder)
{
unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
nir_ssa_def *last = NULL;
for (unsigned i = 0; i < num_components; i++) {
- nir_alu_instr *chan = nir_alu_instr_create(mem_ctx, chan_op);
+ nir_alu_instr *chan = nir_alu_instr_create(builder->shader, chan_op);
nir_alu_ssa_dest_init(chan, 1);
- nir_alu_src_copy(&chan->src[0], &instr->src[0], mem_ctx);
+ nir_alu_src_copy(&chan->src[0], &instr->src[0], chan);
chan->src[0].swizzle[0] = chan->src[0].swizzle[i];
if (nir_op_infos[chan_op].num_inputs > 1) {
assert(nir_op_infos[chan_op].num_inputs == 2);
- nir_alu_src_copy(&chan->src[1], &instr->src[1], mem_ctx);
+ nir_alu_src_copy(&chan->src[1], &instr->src[1], chan);
chan->src[1].swizzle[0] = chan->src[1].swizzle[i];
}
- nir_instr_insert_before(&instr->instr, &chan->instr);
+ nir_builder_instr_insert(builder, &chan->instr);
if (i == 0) {
last = &chan->dest.dest.ssa;
} else {
- nir_alu_instr *merge = nir_alu_instr_create(mem_ctx, merge_op);
- nir_alu_ssa_dest_init(merge, 1);
- merge->dest.write_mask = 1;
- merge->src[0].src = nir_src_for_ssa(last);
- merge->src[1].src = nir_src_for_ssa(&chan->dest.dest.ssa);
- nir_instr_insert_before(&instr->instr, &merge->instr);
- last = &merge->dest.dest.ssa;
+ last = nir_build_alu(builder, merge_op,
+ last, &chan->dest.dest.ssa, NULL, NULL);
}
}
assert(instr->dest.write_mask == 1);
- nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(last),
- mem_ctx);
+ nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(last));
nir_instr_remove(&instr->instr);
}
static void
-lower_alu_instr_scalar(nir_alu_instr *instr, void *mem_ctx)
+lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
{
unsigned num_src = nir_op_infos[instr->op].num_inputs;
unsigned i, chan;
assert(instr->dest.dest.is_ssa);
assert(instr->dest.write_mask != 0);
+ b->cursor = nir_before_instr(&instr->instr);
+
#define LOWER_REDUCTION(name, chan, merge) \
case name##2: \
case name##3: \
case name##4: \
- lower_reduction(instr, chan, merge, mem_ctx); \
- break;
+ lower_reduction(instr, chan, merge, b); \
+ return;
switch (instr->op) {
case nir_op_vec4:
*/
return;
+ case nir_op_fdph: {
+ nir_ssa_def *sum[4];
+ for (unsigned i = 0; i < 3; i++) {
+ sum[i] = nir_fmul(b, nir_channel(b, instr->src[0].src.ssa,
+ instr->src[0].swizzle[i]),
+ nir_channel(b, instr->src[1].src.ssa,
+ instr->src[1].swizzle[i]));
+ }
+ sum[3] = nir_channel(b, instr->src[1].src.ssa, instr->src[1].swizzle[3]);
+
+ nir_ssa_def *val = nir_fadd(b, nir_fadd(b, sum[0], sum[1]),
+ nir_fadd(b, sum[2], sum[3]));
+
+ nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val));
+ nir_instr_remove(&instr->instr);
+ return;
+ }
+
LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd);
LOWER_REDUCTION(nir_op_ball_fequal, nir_op_feq, nir_op_iand);
LOWER_REDUCTION(nir_op_ball_iequal, nir_op_ieq, nir_op_iand);
return;
unsigned num_components = instr->dest.dest.ssa.num_components;
- static const nir_op nir_op_map[] = {nir_op_vec2, nir_op_vec3, nir_op_vec4};
- nir_alu_instr *vec_instr =
- nir_alu_instr_create(mem_ctx, nir_op_map[num_components - 2]);
- nir_alu_ssa_dest_init(vec_instr, num_components);
+ nir_ssa_def *comps[] = { NULL, NULL, NULL, NULL };
for (chan = 0; chan < 4; chan++) {
if (!(instr->dest.write_mask & (1 << chan)))
continue;
- nir_alu_instr *lower = nir_alu_instr_create(mem_ctx, instr->op);
+ nir_alu_instr *lower = nir_alu_instr_create(b->shader, instr->op);
for (i = 0; i < num_src; i++) {
/* We only handle same-size-as-dest (input_sizes[] == 0) or scalar
* args (input_sizes[] == 1).
unsigned src_chan = (nir_op_infos[instr->op].input_sizes[i] == 1 ?
0 : chan);
- nir_alu_src_copy(&lower->src[i], &instr->src[i], mem_ctx);
+ nir_alu_src_copy(&lower->src[i], &instr->src[i], lower);
for (int j = 0; j < 4; j++)
lower->src[i].swizzle[j] = instr->src[i].swizzle[src_chan];
}
nir_alu_ssa_dest_init(lower, 1);
lower->dest.saturate = instr->dest.saturate;
- vec_instr->src[chan].src = nir_src_for_ssa(&lower->dest.dest.ssa);
+ comps[chan] = &lower->dest.dest.ssa;
- nir_instr_insert_before(&instr->instr, &lower->instr);
+ nir_builder_instr_insert(b, &lower->instr);
}
- nir_instr_insert_before(&instr->instr, &vec_instr->instr);
+ nir_ssa_def *vec = nir_vec(b, comps, num_components);
- nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa,
- nir_src_for_ssa(&vec_instr->dest.dest.ssa),
- mem_ctx);
+ nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(vec));
nir_instr_remove(&instr->instr);
}
static bool
-lower_alu_to_scalar_block(nir_block *block, void *data)
+lower_alu_to_scalar_block(nir_block *block, void *builder)
{
nir_foreach_instr_safe(block, instr) {
if (instr->type == nir_instr_type_alu)
- lower_alu_instr_scalar(nir_instr_as_alu(instr), data);
+ lower_alu_instr_scalar(nir_instr_as_alu(instr), builder);
}
return true;
static void
nir_lower_alu_to_scalar_impl(nir_function_impl *impl)
{
- nir_foreach_block(impl, lower_alu_to_scalar_block, ralloc_parent(impl));
+ nir_builder builder;
+ nir_builder_init(&builder, impl);
+
+ nir_foreach_block(impl, lower_alu_to_scalar_block, &builder);
}
void
nir_alu_instr *mul = nir_alu_instr_create(mem_ctx, nir_op_imul);
nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL);
mul->dest.write_mask = 0x1;
- nir_src_copy(&mul->src[0].src, &deref_array->indirect, mem_ctx);
+ nir_src_copy(&mul->src[0].src, &deref_array->indirect, mul);
mul->src[1].src.is_ssa = true;
mul->src[1].src.ssa = &atomic_counter_size->def;
nir_instr_insert_before(&instr->instr, &mul->instr);
nir_ssa_dest_init(&new_instr->instr, &new_instr->dest,
instr->dest.ssa.num_components, NULL);
nir_ssa_def_rewrite_uses(&instr->dest.ssa,
- nir_src_for_ssa(&new_instr->dest.ssa),
- mem_ctx);
+ nir_src_for_ssa(&new_instr->dest.ssa));
} else {
nir_dest_copy(&new_instr->dest, &instr->dest, mem_ctx);
}
--- /dev/null
+/*
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+#define MAX_CLIP_PLANES 8
+
+/* Generates the lowering code for user-clip-planes, generating CLIPDIST
+ * from UCP[n] + CLIPVERTEX or POSITION. Additionally, an optional pass
+ * for fragment shaders to insert conditional kill's based on the inter-
+ * polated CLIPDIST
+ *
+ * NOTE: should be run after nir_lower_outputs_to_temporaries() (or at
+ * least in scenarios where you can count on each output written once
+ * and only once).
+ */
+
+
+static nir_variable *
+create_clipdist_var(nir_shader *shader, unsigned drvloc,
+ bool output, gl_varying_slot slot)
+{
+ nir_variable *var = rzalloc(shader, nir_variable);
+
+ var->data.driver_location = drvloc;
+ var->type = glsl_vec4_type();
+ var->data.mode = output ? nir_var_shader_out : nir_var_shader_in;
+ var->name = ralloc_asprintf(var, "clipdist_%d", drvloc);
+ var->data.index = 0;
+ var->data.location = slot;
+
+ if (output) {
+ exec_list_push_tail(&shader->outputs, &var->node);
+ }
+ else {
+ exec_list_push_tail(&shader->inputs, &var->node);
+ }
+ return var;
+}
+
+static void
+store_clipdist_output(nir_builder *b, nir_variable *out, nir_ssa_def **val)
+{
+ nir_intrinsic_instr *store;
+
+ store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
+ store->num_components = 4;
+ store->const_index[0] = out->data.driver_location;
+ store->src[0].ssa = nir_vec4(b, val[0], val[1], val[2], val[3]);
+ store->src[0].is_ssa = true;
+ nir_builder_instr_insert(b, &store->instr);
+}
+
+static void
+load_clipdist_input(nir_builder *b, nir_variable *in, nir_ssa_def **val)
+{
+ nir_intrinsic_instr *load;
+
+ load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
+ load->num_components = 4;
+ load->const_index[0] = in->data.driver_location;
+ nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
+ nir_builder_instr_insert(b, &load->instr);
+
+ val[0] = nir_channel(b, &load->dest.ssa, 0);
+ val[1] = nir_channel(b, &load->dest.ssa, 1);
+ val[2] = nir_channel(b, &load->dest.ssa, 2);
+ val[3] = nir_channel(b, &load->dest.ssa, 3);
+}
+
+struct find_output_state
+{
+ unsigned drvloc;
+ nir_ssa_def *def;
+};
+
+static bool
+find_output_in_block(nir_block *block, void *void_state)
+{
+ struct find_output_state *state = void_state;
+ nir_foreach_instr(block, instr) {
+
+ if (instr->type == nir_instr_type_intrinsic) {
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if ((intr->intrinsic == nir_intrinsic_store_output) &&
+ intr->const_index[0] == state->drvloc) {
+ assert(state->def == NULL);
+ assert(intr->src[0].is_ssa);
+ state->def = intr->src[0].ssa;
+
+#if !defined(DEBUG)
+ /* for debug builds, scan entire shader to assert
+ * if output is written multiple times. For release
+ * builds just assume all is well and bail when we
+ * find first:
+ */
+ return false;
+#endif
+ }
+ }
+ }
+
+ return true;
+}
+
+/* TODO: maybe this would be a useful helper?
+ * NOTE: assumes each output is written exactly once (and unconditionally)
+ * so if needed nir_lower_outputs_to_temporaries()
+ */
+static nir_ssa_def *
+find_output(nir_shader *shader, unsigned drvloc)
+{
+ struct find_output_state state = {
+ .drvloc = drvloc,
+ };
+
+ nir_foreach_overload(shader, overload) {
+ if (overload->impl) {
+ nir_foreach_block_reverse(overload->impl,
+ find_output_in_block, &state);
+ }
+ }
+
+ return state.def;
+}
+
+/*
+ * VS lowering
+ */
+
+static void
+lower_clip_vs(nir_function_impl *impl, unsigned ucp_enables,
+ nir_ssa_def *cv, nir_variable **out)
+{
+ nir_ssa_def *clipdist[MAX_CLIP_PLANES];
+ nir_builder b;
+
+ nir_builder_init(&b, impl);
+
+ /* NIR should ensure that, even in case of loops/if-else, there
+ * should be only a single predecessor block to end_block, which
+ * makes the perfect place to insert the clipdist calculations.
+ *
+ * NOTE: in case of early return's, these would have to be lowered
+ * to jumps to end_block predecessor in a previous pass. Not sure
+ * if there is a good way to sanity check this, but for now the
+ * users of this pass don't support sub-routines.
+ */
+ assert(impl->end_block->predecessors->entries == 1);
+ b.cursor = nir_after_cf_list(&impl->body);
+
+ for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) {
+ if (ucp_enables & (1 << plane)) {
+ nir_intrinsic_instr *ucp;
+
+ /* insert intrinsic to fetch ucp[plane]: */
+ ucp = nir_intrinsic_instr_create(b.shader,
+ nir_intrinsic_load_user_clip_plane);
+ ucp->num_components = 4;
+ ucp->const_index[0] = plane;
+ nir_ssa_dest_init(&ucp->instr, &ucp->dest, 4, NULL);
+ nir_builder_instr_insert(&b, &ucp->instr);
+
+ /* calculate clipdist[plane] - dot(ucp, cv): */
+ clipdist[plane] = nir_fdot4(&b, &ucp->dest.ssa, cv);
+ }
+ else {
+ /* 0.0 == don't-clip == disabled: */
+ clipdist[plane] = nir_imm_float(&b, 0.0);
+ }
+ }
+
+ if (ucp_enables & 0x0f)
+ store_clipdist_output(&b, out[0], &clipdist[0]);
+ if (ucp_enables & 0xf0)
+ store_clipdist_output(&b, out[1], &clipdist[4]);
+
+ nir_metadata_preserve(impl, nir_metadata_dominance);
+}
+
+/* ucp_enables is bitmask of enabled ucp's. Actual ucp values are
+ * passed in to shader via user_clip_plane system-values
+ */
+void
+nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables)
+{
+ int clipvertex = -1;
+ int position = -1;
+ int maxloc = -1;
+ nir_ssa_def *cv;
+ nir_variable *out[2];
+
+ if (!ucp_enables)
+ return;
+
+ /* find clipvertex/position outputs: */
+ foreach_list_typed(nir_variable, var, node, &shader->outputs) {
+ int loc = var->data.driver_location;
+
+ /* keep track of last used driver-location.. we'll be
+ * appending CLIP_DIST0/CLIP_DIST1 after last existing
+ * output:
+ */
+ maxloc = MAX2(maxloc, loc);
+
+ switch (var->data.location) {
+ case VARYING_SLOT_POS:
+ position = loc;
+ break;
+ case VARYING_SLOT_CLIP_VERTEX:
+ clipvertex = loc;
+ break;
+ case VARYING_SLOT_CLIP_DIST0:
+ case VARYING_SLOT_CLIP_DIST1:
+ /* if shader is already writing CLIPDIST, then
+ * there should be no user-clip-planes to deal
+ * with.
+ */
+ return;
+ }
+ }
+
+ if (clipvertex != -1)
+ cv = find_output(shader, clipvertex);
+ else if (position != -1)
+ cv = find_output(shader, position);
+ else
+ return;
+
+ /* insert CLIPDIST outputs: */
+ if (ucp_enables & 0x0f)
+ out[0] =
+ create_clipdist_var(shader, ++maxloc, true, VARYING_SLOT_CLIP_DIST0);
+ if (ucp_enables & 0xf0)
+ out[1] =
+ create_clipdist_var(shader, ++maxloc, true, VARYING_SLOT_CLIP_DIST1);
+
+ nir_foreach_overload(shader, overload) {
+ if (!strcmp(overload->function->name, "main"))
+ lower_clip_vs(overload->impl, ucp_enables, cv, out);
+ }
+}
+
+/*
+ * FS lowering
+ */
+
+static void
+lower_clip_fs(nir_function_impl *impl, unsigned ucp_enables,
+ nir_variable **in)
+{
+ nir_ssa_def *clipdist[MAX_CLIP_PLANES];
+ nir_builder b;
+
+ nir_builder_init(&b, impl);
+ b.cursor = nir_before_cf_list(&impl->body);
+
+ if (ucp_enables & 0x0f)
+ load_clipdist_input(&b, in[0], &clipdist[0]);
+ if (ucp_enables & 0xf0)
+ load_clipdist_input(&b, in[1], &clipdist[4]);
+
+ for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) {
+ if (ucp_enables & (1 << plane)) {
+ nir_intrinsic_instr *discard;
+ nir_ssa_def *cond;
+
+ cond = nir_flt(&b, clipdist[plane], nir_imm_float(&b, 0.0));
+
+ discard = nir_intrinsic_instr_create(b.shader,
+ nir_intrinsic_discard_if);
+ discard->src[0] = nir_src_for_ssa(cond);
+ nir_builder_instr_insert(&b, &discard->instr);
+ }
+ }
+}
+
+/* insert conditional kill based on interpolated CLIPDIST
+ */
+void
+nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables)
+{
+ nir_variable *in[2];
+ int maxloc = -1;
+
+ if (!ucp_enables)
+ return;
+
+ foreach_list_typed(nir_variable, var, node, &shader->inputs) {
+ int loc = var->data.driver_location;
+
+ /* keep track of last used driver-location.. we'll be
+ * appending CLIP_DIST0/CLIP_DIST1 after last existing
+ * input:
+ */
+ maxloc = MAX2(maxloc, loc);
+ }
+
+ /* The shader won't normally have CLIPDIST inputs, so we
+ * must add our own:
+ */
+ /* insert CLIPDIST outputs: */
+ if (ucp_enables & 0x0f)
+ in[0] =
+ create_clipdist_var(shader, ++maxloc, false,
+ VARYING_SLOT_CLIP_DIST0);
+ if (ucp_enables & 0xf0)
+ in[1] =
+ create_clipdist_var(shader, ++maxloc, false,
+ VARYING_SLOT_CLIP_DIST1);
+
+ nir_foreach_overload(shader, overload) {
+ if (!strcmp(overload->function->name, "main"))
+ lower_clip_fs(overload->impl, ucp_enables, in);
+ }
+}
return true;
}
-void
+bool
nir_lower_global_vars_to_local(nir_shader *shader)
{
struct global_to_local_state state;
+ bool progress = false;
state.var_func_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
_mesa_key_pointer_equal);
exec_node_remove(&var->node);
var->data.mode = nir_var_local;
exec_list_push_tail(&impl->locals, &var->node);
+ progress = true;
}
}
_mesa_hash_table_destroy(state.var_func_table, NULL);
+
+ return progress;
}
--- /dev/null
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+/**
+ * \file nir_lower_gs_intrinsics.c
+ *
+ * Geometry Shaders can call EmitVertex()/EmitStreamVertex() to output an
+ * arbitrary number of vertices. However, the shader must declare the maximum
+ * number of vertices that it will ever output - further attempts to emit
+ * vertices result in undefined behavior according to the GLSL specification.
+ *
+ * Drivers might use this maximum number of vertices to allocate enough space
+ * to hold the geometry shader's output. Some drivers (such as i965) need to
+ * implement "safety checks" which ensure that the shader hasn't emitted too
+ * many vertices, to avoid overflowing that space and trashing other memory.
+ *
+ * The count of emitted vertices can also be useful in buffer offset
+ * calculations, so drivers know where to write the GS output.
+ *
+ * However, for simple geometry shaders that emit a statically determinable
+ * number of vertices, this extra bookkeeping is unnecessary and inefficient.
+ * By tracking the vertex count in NIR, we allow constant folding/propagation
+ * and dead control flow optimizations to eliminate most of it where possible.
+ *
+ * This pass introduces a new global variable which stores the current vertex
+ * count (initialized to 0), and converts emit_vertex/end_primitive intrinsics
+ * to their *_with_counter variants. emit_vertex is also wrapped in a safety
+ * check to avoid buffer overflows. Finally, it adds a set_vertex_count
+ * intrinsic at the end of the program, informing the driver of the final
+ * vertex count.
+ */
+
+struct state {
+ nir_builder *builder;
+ nir_variable *vertex_count_var;
+ bool progress;
+};
+
+/**
+ * Replace emit_vertex intrinsics with:
+ *
+ * if (vertex_count < max_vertices) {
+ * emit_vertex_with_counter vertex_count ...
+ * vertex_count += 1
+ * }
+ */
+static void
+rewrite_emit_vertex(nir_intrinsic_instr *intrin, struct state *state)
+{
+ nir_builder *b = state->builder;
+
+ /* Load the vertex count */
+ b->cursor = nir_before_instr(&intrin->instr);
+ nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
+
+ nir_ssa_def *max_vertices = nir_imm_int(b, b->shader->gs.vertices_out);
+
+ /* Create: if (vertex_count < max_vertices) and insert it.
+ *
+ * The new if statement needs to be hooked up to the control flow graph
+ * before we start inserting instructions into it.
+ */
+ nir_if *if_stmt = nir_if_create(b->shader);
+ if_stmt->condition = nir_src_for_ssa(nir_ilt(b, count, max_vertices));
+ nir_builder_cf_insert(b, &if_stmt->cf_node);
+
+ /* Fill out the new then-block */
+ b->cursor = nir_after_cf_list(&if_stmt->then_list);
+
+ nir_intrinsic_instr *lowered =
+ nir_intrinsic_instr_create(b->shader,
+ nir_intrinsic_emit_vertex_with_counter);
+ lowered->const_index[0] = intrin->const_index[0];
+ lowered->src[0] = nir_src_for_ssa(count);
+ nir_builder_instr_insert(b, &lowered->instr);
+
+ /* Increment the vertex count by 1 */
+ nir_store_var(b, state->vertex_count_var,
+ nir_iadd(b, count, nir_imm_int(b, 1)));
+
+ nir_instr_remove(&intrin->instr);
+
+ state->progress = true;
+}
+
+/**
+ * Replace end_primitive with end_primitive_with_counter.
+ */
+static void
+rewrite_end_primitive(nir_intrinsic_instr *intrin, struct state *state)
+{
+ nir_builder *b = state->builder;
+
+ b->cursor = nir_before_instr(&intrin->instr);
+ nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
+
+ nir_intrinsic_instr *lowered =
+ nir_intrinsic_instr_create(b->shader,
+ nir_intrinsic_end_primitive_with_counter);
+ lowered->const_index[0] = intrin->const_index[0];
+ lowered->src[0] = nir_src_for_ssa(count);
+ nir_builder_instr_insert(b, &lowered->instr);
+
+ nir_instr_remove(&intrin->instr);
+
+ state->progress = true;
+}
+
+static bool
+rewrite_intrinsics(nir_block *block, void *closure)
+{
+ struct state *state = closure;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_emit_vertex:
+ rewrite_emit_vertex(intrin, state);
+ break;
+ case nir_intrinsic_end_primitive:
+ rewrite_end_primitive(intrin, state);
+ break;
+ default:
+ /* not interesting; skip this */
+ break;
+ }
+ }
+
+ return true;
+}
+
+/**
+ * Add a set_vertex_count intrinsic at the end of the program
+ * (representing the final vertex count).
+ */
+static void
+append_set_vertex_count(nir_block *end_block, struct state *state)
+{
+ nir_builder *b = state->builder;
+ nir_shader *shader = state->builder->shader;
+
+ /* Insert the new intrinsic in all of the predecessors of the end block,
+ * but before any jump instructions (return).
+ */
+ struct set_entry *entry;
+ set_foreach(end_block->predecessors, entry) {
+ nir_block *pred = (nir_block *) entry->key;
+ b->cursor = nir_after_block_before_jump(pred);
+
+ nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
+
+ nir_intrinsic_instr *set_vertex_count =
+ nir_intrinsic_instr_create(shader, nir_intrinsic_set_vertex_count);
+ set_vertex_count->src[0] = nir_src_for_ssa(count);
+
+ nir_builder_instr_insert(b, &set_vertex_count->instr);
+ }
+}
+
+bool
+nir_lower_gs_intrinsics(nir_shader *shader)
+{
+ struct state state;
+ state.progress = false;
+
+ /* Create the counter variable */
+ nir_variable *var = rzalloc(shader, nir_variable);
+ var->data.mode = nir_var_global;
+ var->type = glsl_uint_type();
+ var->name = "vertex_count";
+ var->constant_initializer = rzalloc(shader, nir_constant); /* initialize to 0 */
+
+ exec_list_push_tail(&shader->globals, &var->node);
+ state.vertex_count_var = var;
+
+ nir_foreach_overload(shader, overload) {
+ if (overload->impl) {
+ nir_builder b;
+ nir_builder_init(&b, overload->impl);
+ state.builder = &b;
+
+ nir_foreach_block(overload->impl, rewrite_intrinsics, &state);
+
+ /* This only works because we have a single main() function. */
+ append_set_vertex_count(overload->impl->end_block, &state);
+
+ nir_metadata_preserve(overload->impl, 0);
+ }
+ }
+
+ return state.progress;
+}
}
assert(alu->dest.dest.is_ssa);
- nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
- nir_src_for_ssa(q),
- ralloc_parent(alu));
+ nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(q));
}
static bool
nir_ssa_dest_init(&load->instr, &load->dest,
intrin->num_components, NULL);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
- nir_src_for_ssa(&load->dest.ssa),
- state->mem_ctx);
+ nir_src_for_ssa(&load->dest.ssa));
} else {
nir_dest_copy(&load->dest, &intrin->dest, state->mem_ctx);
}
store->const_index[0] = offset;
- nir_src_copy(&store->src[0], &intrin->src[0], state->mem_ctx);
+ nir_src_copy(&store->src[0], &intrin->src[0], store);
if (has_indirect)
store->src[1] = indirect;
}
/* Batch things back together into a vector. */
- nir_ssa_def *vec;
- switch (lower->def.num_components) {
- case 2:
- vec = nir_vec2(&b, loads[0], loads[1]);
- break;
- case 3:
- vec = nir_vec3(&b, loads[0], loads[1], loads[2]);
- break;
- case 4:
- vec = nir_vec4(&b, loads[0], loads[1], loads[2], loads[3]);
- break;
- default:
- unreachable("Unknown load_const component count.");
- }
+ nir_ssa_def *vec = nir_vec(&b, loads, lower->def.num_components);
/* Replace the old load with a reference to our reconstructed vector. */
- nir_ssa_def_rewrite_uses(&lower->def, nir_src_for_ssa(vec),
- ralloc_parent(b.impl));
+ nir_ssa_def_rewrite_uses(&lower->def, nir_src_for_ssa(vec));
nir_instr_remove(&lower->instr);
}
* used to make adding register initialization code deterministic.
*/
nir_array derefs_array;
+
+ bool progress;
};
/* The following two functions implement a hash and equality check for
nir_alu_instr *add = nir_alu_instr_create(state->shader,
nir_op_iadd);
add->src[0].src = *src.reg.indirect;
- nir_src_copy(&add->src[1].src, &deref_array->indirect,
- state->shader);
+ nir_src_copy(&add->src[1].src, &deref_array->indirect, add);
add->dest.write_mask = 1;
nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL);
nir_instr_insert_before(instr, &add->instr);
nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
intrin->num_components, NULL);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
- nir_src_for_ssa(&mov->dest.dest.ssa),
- state->shader);
+ nir_src_for_ssa(&mov->dest.dest.ssa));
} else {
- nir_dest_copy(&mov->dest.dest, &intrin->dest, state->shader);
+ nir_dest_copy(&mov->dest.dest, &intrin->dest, &mov->instr);
}
nir_instr_insert_before(&intrin->instr, &mov->instr);
nir_instr_remove(&intrin->instr);
+ state->progress = true;
break;
}
&intrin->instr, state);
nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov);
- nir_src_copy(&mov->src[0].src, &intrin->src[0], state->shader);
+ nir_src_copy(&mov->src[0].src, &intrin->src[0], mov);
mov->dest.write_mask = (1 << intrin->num_components) - 1;
mov->dest.dest.is_ssa = false;
mov->dest.dest.reg.reg = reg_src.reg.reg;
nir_instr_insert_before(&intrin->instr, &mov->instr);
nir_instr_remove(&intrin->instr);
+ state->progress = true;
break;
}
mov->dest.dest.reg.indirect = reg_src.reg.indirect;
nir_instr_insert_after(&load->instr, &mov->instr);
+ state->progress = true;
}
-static void
+static bool
nir_lower_locals_to_regs_impl(nir_function_impl *impl)
{
struct locals_to_regs_state state;
state.shader = impl->overload->function->shader;
state.impl = impl;
+ state.progress = false;
state.regs_table = _mesa_hash_table_create(NULL, hash_deref, derefs_equal);
nir_array_init(&state.derefs_array, NULL);
nir_array_fini(&state.derefs_array);
_mesa_hash_table_destroy(state.regs_table, NULL);
+
+ return state.progress;
}
-void
+bool
nir_lower_locals_to_regs(nir_shader *shader)
{
+ bool progress = false;
+
nir_foreach_overload(shader, overload) {
if (overload->impl)
- nir_lower_locals_to_regs_impl(overload->impl);
+ progress = nir_lower_locals_to_regs_impl(overload->impl) || progress;
}
+
+ return progress;
}
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
- *
- * Authors:
- * Jason Ekstrand (jason@jlekstrand.net)
- *
*/
/*
#include "nir.h"
+struct lower_outputs_state {
+ nir_shader *shader;
+ struct exec_list old_outputs;
+};
+
static void
-emit_output_copies(nir_shader *shader, nir_variable *temp, nir_variable *output)
+emit_output_copies(nir_cursor cursor, struct lower_outputs_state *state)
{
- nir_foreach_overload(shader, overload) {
- if (!overload->impl || strcmp(overload->function->name, "main"))
- continue;
+ assert(exec_list_length(&state->shader->outputs) ==
+ exec_list_length(&state->old_outputs));
- struct set_entry *block_entry;
- set_foreach(overload->impl->end_block->predecessors, block_entry) {
- struct nir_block *block = (void *)block_entry->key;
+ foreach_two_lists(out_node, &state->shader->outputs,
+ temp_node, &state->old_outputs) {
+ nir_variable *output = exec_node_data(nir_variable, out_node, node);
+ nir_variable *temp = exec_node_data(nir_variable, temp_node, node);
- nir_intrinsic_instr *copy =
- nir_intrinsic_instr_create(shader, nir_intrinsic_copy_var);
- copy->variables[0] = nir_deref_var_create(copy, output);
- copy->variables[1] = nir_deref_var_create(copy, temp);
+ nir_intrinsic_instr *copy =
+ nir_intrinsic_instr_create(state->shader, nir_intrinsic_copy_var);
+ copy->variables[0] = nir_deref_var_create(copy, output);
+ copy->variables[1] = nir_deref_var_create(copy, temp);
- nir_instr_insert(nir_after_block_before_jump(block), ©->instr);
- }
+ nir_instr_insert(cursor, ©->instr);
}
}
+static bool
+emit_output_copies_block(nir_block *block, void *state)
+{
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ if (intrin->intrinsic == nir_intrinsic_emit_vertex)
+ emit_output_copies(nir_before_instr(&intrin->instr), state);
+ }
+
+ return true;
+}
+
void
nir_lower_outputs_to_temporaries(nir_shader *shader)
{
- struct exec_list old_outputs;
+ struct lower_outputs_state state;
- exec_list_move_nodes_to(&shader->outputs, &old_outputs);
+ state.shader = shader;
+ exec_list_move_nodes_to(&shader->outputs, &state.old_outputs);
/* Walk over all of the outputs turn each output into a temporary and
* make a new variable for the actual output.
*/
- foreach_list_typed(nir_variable, var, node, &old_outputs) {
+ foreach_list_typed(nir_variable, var, node, &state.old_outputs) {
nir_variable *output = ralloc(shader, nir_variable);
memcpy(output, var, sizeof *output);
/* The orignal is now the temporary */
nir_variable *temp = var;
- /* Move the original name over to the new output */
- if (output->name)
- ralloc_steal(output, output->name);
+ /* Reparent the name to the new variable */
+ ralloc_steal(output, output->name);
/* Give the output a new name with @out-temp appended */
temp->name = ralloc_asprintf(var, "%s@out-temp", output->name);
temp->constant_initializer = NULL;
exec_list_push_tail(&shader->outputs, &output->node);
+ }
+
+ nir_foreach_overload(shader, overload) {
+ if (overload->impl == NULL)
+ continue;
+
+ if (shader->stage == MESA_SHADER_GEOMETRY) {
+ /* For geometry shaders, we have to emit the output copies right
+ * before each EmitVertex call.
+ */
+ nir_foreach_block(overload->impl, emit_output_copies_block, &state);
+ } else if (strcmp(overload->function->name, "main") == 0) {
+ /* For all other shader types, we need to do the copies right before
+ * the jumps to the end block.
+ */
+ struct set_entry *block_entry;
+ set_foreach(overload->impl->end_block->predecessors, block_entry) {
+ struct nir_block *block = (void *)block_entry->key;
+ emit_output_copies(nir_after_block_before_jump(block), &state);
+ }
+ }
- emit_output_copies(shader, temp, output);
+ nir_metadata_preserve(overload->impl, nir_metadata_block_index |
+ nir_metadata_dominance);
}
- exec_list_append(&shader->globals, &old_outputs);
+ exec_list_append(&shader->globals, &state.old_outputs);
}
case nir_intrinsic_load_uniform_indirect:
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_ubo_indirect:
+ case nir_intrinsic_load_ssbo:
+ case nir_intrinsic_load_ssbo_indirect:
case nir_intrinsic_load_input:
case nir_intrinsic_load_input_indirect:
return true;
nir_instr_insert_after(&last_phi->instr, &vec->instr);
nir_ssa_def_rewrite_uses(&phi->dest.ssa,
- nir_src_for_ssa(&vec->dest.dest.ssa),
- state->mem_ctx);
+ nir_src_for_ssa(&vec->dest.dest.ssa));
ralloc_steal(state->dead_ctx, phi);
nir_instr_remove(&phi->instr);
--- /dev/null
+/*
+ * Copyright (C) 2005-2007 Brian Paul All Rights Reserved.
+ * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "../program.h"
+#include "program/hash_table.h"
+#include "ir_uniform.h"
+
+#include "main/compiler.h"
+#include "main/mtypes.h"
+#include "program/prog_parameter.h"
+#include "program/program.h"
+
+static void
+add_indirect_to_tex(nir_tex_instr *instr, nir_src indirect)
+{
+ /* First, we have to resize the array of texture sources */
+ nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src,
+ instr->num_srcs + 1);
+
+ for (unsigned i = 0; i < instr->num_srcs; i++) {
+ new_srcs[i].src_type = instr->src[i].src_type;
+ nir_instr_move_src(&instr->instr, &new_srcs[i].src, &instr->src[i].src);
+ }
+
+ ralloc_free(instr->src);
+ instr->src = new_srcs;
+
+ /* Now we can go ahead and move the source over to being a
+ * first-class texture source.
+ */
+ instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset;
+ instr->num_srcs++;
+ nir_instr_rewrite_src(&instr->instr, &instr->src[instr->num_srcs - 1].src,
+ indirect);
+}
+
+/* Calculate the sampler index based on array indicies and also
+ * calculate the base uniform location for struct members.
+ */
+static void
+calc_sampler_offsets(nir_deref *tail, nir_tex_instr *instr,
+ unsigned *array_elements, nir_ssa_def **indirect,
+ nir_builder *b, unsigned *location)
+{
+ if (tail->child == NULL)
+ return;
+
+ switch (tail->child->deref_type) {
+ case nir_deref_type_array: {
+ nir_deref_array *deref_array = nir_deref_as_array(tail->child);
+
+ assert(deref_array->deref_array_type != nir_deref_array_type_wildcard);
+
+ calc_sampler_offsets(tail->child, instr, array_elements,
+ indirect, b, location);
+ instr->sampler_index += deref_array->base_offset * *array_elements;
+
+ if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+ nir_ssa_def *mul =
+ nir_imul(b, nir_imm_int(b, *array_elements),
+ nir_ssa_for_src(b, deref_array->indirect, 1));
+
+ nir_instr_rewrite_src(&instr->instr, &deref_array->indirect,
+ NIR_SRC_INIT);
+
+ if (*indirect) {
+ *indirect = nir_iadd(b, *indirect, mul);
+ } else {
+ *indirect = mul;
+ }
+ }
+
+ *array_elements *= glsl_get_length(tail->type);
+ break;
+ }
+
+ case nir_deref_type_struct: {
+ nir_deref_struct *deref_struct = nir_deref_as_struct(tail->child);
+ *location += glsl_get_record_location_offset(tail->type, deref_struct->index);
+ calc_sampler_offsets(tail->child, instr, array_elements,
+ indirect, b, location);
+ break;
+ }
+
+ default:
+ unreachable("Invalid deref type");
+ break;
+ }
+}
+
+static void
+lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_program,
+ gl_shader_stage stage, nir_builder *builder)
+{
+ if (instr->sampler == NULL)
+ return;
+
+ instr->sampler_index = 0;
+ unsigned location = instr->sampler->var->data.location;
+ unsigned array_elements = 1;
+ nir_ssa_def *indirect = NULL;
+
+ builder->cursor = nir_before_instr(&instr->instr);
+ calc_sampler_offsets(&instr->sampler->deref, instr, &array_elements,
+ &indirect, builder, &location);
+
+ if (indirect) {
+ /* First, we have to resize the array of texture sources */
+ nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src,
+ instr->num_srcs + 1);
+
+ for (unsigned i = 0; i < instr->num_srcs; i++) {
+ new_srcs[i].src_type = instr->src[i].src_type;
+ nir_instr_move_src(&instr->instr, &new_srcs[i].src,
+ &instr->src[i].src);
+ }
+
+ ralloc_free(instr->src);
+ instr->src = new_srcs;
+
+ /* Now we can go ahead and move the source over to being a
+ * first-class texture source.
+ */
+ instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset;
+ instr->num_srcs++;
+ nir_instr_rewrite_src(&instr->instr,
+ &instr->src[instr->num_srcs - 1].src,
+ nir_src_for_ssa(indirect));
+
+ instr->sampler_array_size = array_elements;
+ }
+
+ if (location > shader_program->NumUniformStorage - 1 ||
+ !shader_program->UniformStorage[location].sampler[stage].active) {
+ assert(!"cannot return a sampler");
+ return;
+ }
+
+ instr->sampler_index +=
+ shader_program->UniformStorage[location].sampler[stage].index;
+
+ instr->sampler = NULL;
+}
+
+typedef struct {
+ nir_builder builder;
+ const struct gl_shader_program *shader_program;
+ gl_shader_stage stage;
+} lower_state;
+
+static bool
+lower_block_cb(nir_block *block, void *_state)
+{
+ lower_state *state = (lower_state *) _state;
+
+ nir_foreach_instr(block, instr) {
+ if (instr->type == nir_instr_type_tex) {
+ nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
+ lower_sampler(tex_instr, state->shader_program, state->stage,
+ &state->builder);
+ }
+ }
+
+ return true;
+}
+
+static void
+lower_impl(nir_function_impl *impl, const struct gl_shader_program *shader_program,
+ gl_shader_stage stage)
+{
+ lower_state state;
+
+ nir_builder_init(&state.builder, impl);
+ state.shader_program = shader_program;
+ state.stage = stage;
+
+ nir_foreach_block(impl, lower_block_cb, &state);
+}
+
+void
+nir_lower_samplers(nir_shader *shader,
+ const struct gl_shader_program *shader_program)
+{
+ nir_foreach_overload(shader, overload) {
+ if (overload->impl)
+ lower_impl(overload->impl, shader_program, shader->stage);
+ }
+}
+
+static bool
+lower_samplers_for_vk_block(nir_block *block, void *data)
+{
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_tex)
+ continue;
+
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+
+ assert(tex->sampler);
+
+ tex->sampler_set = tex->sampler->var->data.descriptor_set;
+ tex->sampler_index = tex->sampler->var->data.binding;
+
+ if (tex->sampler->deref.child) {
+ assert(tex->sampler->deref.child->deref_type == nir_deref_type_array);
+ nir_deref_array *arr = nir_deref_as_array(tex->sampler->deref.child);
+
+ /* Only one-level arrays are allowed in vulkan */
+ assert(arr->deref.child == NULL);
+
+ tex->sampler_index += arr->base_offset;
+ if (arr->deref_array_type == nir_deref_array_type_indirect) {
+ add_indirect_to_tex(tex, arr->indirect);
+ nir_instr_rewrite_src(instr, &arr->indirect, NIR_SRC_INIT);
+
+ tex->sampler_array_size = glsl_get_length(tex->sampler->deref.type);
+ }
+ }
+
+ tex->sampler = NULL;
+ }
+
+ return true;
+}
+
+void
+nir_lower_samplers_for_vk(nir_shader *shader)
+{
+ nir_foreach_overload(shader, overload) {
+ if (overload->impl) {
+ nir_foreach_block(overload->impl, lower_samplers_for_vk_block, NULL);
+ }
+ }
+}
#include "nir.h"
#include "main/mtypes.h"
-static void
+static bool
convert_instr(nir_intrinsic_instr *instr)
{
if (instr->intrinsic != nir_intrinsic_load_var)
- return;
+ return false;
nir_variable *var = instr->variables[0]->var;
if (var->data.mode != nir_var_system_value)
- return;
+ return false;
void *mem_ctx = ralloc_parent(instr);
- nir_intrinsic_op op;
-
- switch (var->data.location) {
- case SYSTEM_VALUE_FRONT_FACE:
- op = nir_intrinsic_load_front_face;
- break;
- case SYSTEM_VALUE_VERTEX_ID:
- op = nir_intrinsic_load_vertex_id;
- break;
- case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
- op = nir_intrinsic_load_vertex_id_zero_base;
- break;
- case SYSTEM_VALUE_BASE_VERTEX:
- op = nir_intrinsic_load_base_vertex;
- break;
- case SYSTEM_VALUE_INSTANCE_ID:
- op = nir_intrinsic_load_instance_id;
- break;
- case SYSTEM_VALUE_SAMPLE_ID:
- op = nir_intrinsic_load_sample_id;
- break;
- case SYSTEM_VALUE_SAMPLE_POS:
- op = nir_intrinsic_load_sample_pos;
- break;
- case SYSTEM_VALUE_SAMPLE_MASK_IN:
- op = nir_intrinsic_load_sample_mask_in;
- break;
- case SYSTEM_VALUE_INVOCATION_ID:
- op = nir_intrinsic_load_invocation_id;
- break;
- default:
- unreachable("not reached");
- }
-
+ nir_intrinsic_op op = nir_intrinsic_from_system_value(var->data.location);
nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op);
if (instr->dest.is_ssa) {
nir_ssa_dest_init(&new_instr->instr, &new_instr->dest,
instr->dest.ssa.num_components, NULL);
nir_ssa_def_rewrite_uses(&instr->dest.ssa,
- nir_src_for_ssa(&new_instr->dest.ssa),
- mem_ctx);
+ nir_src_for_ssa(&new_instr->dest.ssa));
} else {
nir_dest_copy(&new_instr->dest, &instr->dest, mem_ctx);
}
nir_instr_insert_before(&instr->instr, &new_instr->instr);
nir_instr_remove(&instr->instr);
+
+ return true;
}
static bool
convert_block(nir_block *block, void *state)
{
- (void) state;
+ bool *progress = state;
nir_foreach_instr_safe(block, instr) {
if (instr->type == nir_instr_type_intrinsic)
- convert_instr(nir_instr_as_intrinsic(instr));
+ *progress = convert_instr(nir_instr_as_intrinsic(instr)) || *progress;
}
return true;
}
-static void
+static bool
convert_impl(nir_function_impl *impl)
{
- nir_foreach_block(impl, convert_block, NULL);
+ bool progress;
+
+ nir_foreach_block(impl, convert_block, &progress);
nir_metadata_preserve(impl, nir_metadata_block_index |
nir_metadata_dominance);
+ return progress;
}
-void
+bool
nir_lower_system_values(nir_shader *shader)
{
+ bool progress = false;
+
nir_foreach_overload(shader, overload) {
if (overload->impl)
- convert_impl(overload->impl);
+ progress = convert_impl(overload->impl) || progress;
}
exec_list_make_empty(&shader->system_values);
+
+ return progress;
}
--- /dev/null
+/*
+ * Copyright © 2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/*
+ * This lowering pass supports (as configured via nir_lower_tex_options)
+ * various texture related conversions:
+ * + texture projector lowering: converts the coordinate division for
+ * texture projection to be done in ALU instructions instead of
+ * asking the texture operation to do so.
+ * + lowering RECT: converts the un-normalized RECT texture coordinates
+ * to normalized coordinates with txs plus ALU instructions
+ * + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
+ * inserts instructions to clamp specified coordinates to [0.0, 1.0].
+ * Note that this automatically triggers texture projector lowering if
+ * needed, since clamping must happen after projector lowering.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+typedef struct {
+ nir_builder b;
+ const nir_lower_tex_options *options;
+} lower_tex_state;
+
+static void
+project_src(nir_builder *b, nir_tex_instr *tex)
+{
+ /* Find the projector in the srcs list, if present. */
+ unsigned proj_index;
+ for (proj_index = 0; proj_index < tex->num_srcs; proj_index++) {
+ if (tex->src[proj_index].src_type == nir_tex_src_projector)
+ break;
+ }
+ if (proj_index == tex->num_srcs)
+ return;
+
+ b->cursor = nir_before_instr(&tex->instr);
+
+ nir_ssa_def *inv_proj =
+ nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
+
+ /* Walk through the sources projecting the arguments. */
+ for (unsigned i = 0; i < tex->num_srcs; i++) {
+ switch (tex->src[i].src_type) {
+ case nir_tex_src_coord:
+ case nir_tex_src_comparitor:
+ break;
+ default:
+ continue;
+ }
+ nir_ssa_def *unprojected =
+ nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
+ nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
+
+ /* Array indices don't get projected, so make an new vector with the
+ * coordinate's array index untouched.
+ */
+ if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
+ switch (tex->coord_components) {
+ case 4:
+ projected = nir_vec4(b,
+ nir_channel(b, projected, 0),
+ nir_channel(b, projected, 1),
+ nir_channel(b, projected, 2),
+ nir_channel(b, unprojected, 3));
+ break;
+ case 3:
+ projected = nir_vec3(b,
+ nir_channel(b, projected, 0),
+ nir_channel(b, projected, 1),
+ nir_channel(b, unprojected, 2));
+ break;
+ case 2:
+ projected = nir_vec2(b,
+ nir_channel(b, projected, 0),
+ nir_channel(b, unprojected, 1));
+ break;
+ default:
+ unreachable("bad texture coord count for array");
+ break;
+ }
+ }
+
+ nir_instr_rewrite_src(&tex->instr,
+ &tex->src[i].src,
+ nir_src_for_ssa(projected));
+ }
+
+ /* Now move the later tex sources down the array so that the projector
+ * disappears.
+ */
+ nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src,
+ NIR_SRC_INIT);
+ for (unsigned i = proj_index + 1; i < tex->num_srcs; i++) {
+ tex->src[i-1].src_type = tex->src[i].src_type;
+ nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
+ }
+ tex->num_srcs--;
+}
+
+static nir_ssa_def *
+get_texture_size(nir_builder *b, nir_tex_instr *tex)
+{
+ b->cursor = nir_before_instr(&tex->instr);
+
+ /* RECT textures should not be array: */
+ assert(!tex->is_array);
+
+ nir_tex_instr *txs;
+
+ txs = nir_tex_instr_create(b->shader, 1);
+ txs->op = nir_texop_txs;
+ txs->sampler_dim = GLSL_SAMPLER_DIM_RECT;
+ txs->sampler_index = tex->sampler_index;
+
+ /* only single src, the lod: */
+ txs->src[0].src = nir_src_for_ssa(nir_imm_int(b, 0));
+ txs->src[0].src_type = nir_tex_src_lod;
+
+ nir_ssa_dest_init(&txs->instr, &txs->dest, 2, NULL);
+ nir_builder_instr_insert(b, &txs->instr);
+
+ return nir_i2f(b, &txs->dest.ssa);
+}
+
+static void
+lower_rect(nir_builder *b, nir_tex_instr *tex)
+{
+ nir_ssa_def *txs = get_texture_size(b, tex);
+ nir_ssa_def *scale = nir_frcp(b, txs);
+
+ /* Walk through the sources normalizing the requested arguments. */
+ for (unsigned i = 0; i < tex->num_srcs; i++) {
+ if (tex->src[i].src_type != nir_tex_src_coord)
+ continue;
+
+ nir_ssa_def *coords =
+ nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
+ nir_instr_rewrite_src(&tex->instr,
+ &tex->src[i].src,
+ nir_src_for_ssa(nir_fmul(b, coords, scale)));
+ }
+
+ tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
+}
+
+static void
+saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
+{
+ b->cursor = nir_before_instr(&tex->instr);
+
+ /* Walk through the sources saturating the requested arguments. */
+ for (unsigned i = 0; i < tex->num_srcs; i++) {
+ if (tex->src[i].src_type != nir_tex_src_coord)
+ continue;
+
+ nir_ssa_def *src =
+ nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
+
+ /* split src into components: */
+ nir_ssa_def *comp[4];
+
+ for (unsigned j = 0; j < tex->coord_components; j++)
+ comp[j] = nir_channel(b, src, j);
+
+ /* clamp requested components, array index does not get clamped: */
+ unsigned ncomp = tex->coord_components;
+ if (tex->is_array)
+ ncomp--;
+
+ for (unsigned j = 0; j < ncomp; j++) {
+ if ((1 << j) & sat_mask) {
+ if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
+ /* non-normalized texture coords, so clamp to texture
+ * size rather than [0.0, 1.0]
+ */
+ nir_ssa_def *txs = get_texture_size(b, tex);
+ comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0));
+ comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j));
+ } else {
+ comp[j] = nir_fsat(b, comp[j]);
+ }
+ }
+ }
+
+ /* and move the result back into a single vecN: */
+ src = nir_vec(b, comp, tex->coord_components);
+
+ nir_instr_rewrite_src(&tex->instr,
+ &tex->src[i].src,
+ nir_src_for_ssa(src));
+ }
+}
+
+static bool
+nir_lower_tex_block(nir_block *block, void *void_state)
+{
+ lower_tex_state *state = void_state;
+ nir_builder *b = &state->b;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_tex)
+ continue;
+
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ bool lower_txp = !!(state->options->lower_txp & (1 << tex->sampler_dim));
+
+ /* mask of src coords to saturate (clamp): */
+ unsigned sat_mask = 0;
+
+ if ((1 << tex->sampler_index) & state->options->saturate_r)
+ sat_mask |= (1 << 2); /* .z */
+ if ((1 << tex->sampler_index) & state->options->saturate_t)
+ sat_mask |= (1 << 1); /* .y */
+ if ((1 << tex->sampler_index) & state->options->saturate_s)
+ sat_mask |= (1 << 0); /* .x */
+
+ /* If we are clamping any coords, we must lower projector first
+ * as clamping happens *after* projection:
+ */
+ if (lower_txp || sat_mask)
+ project_src(b, tex);
+
+ if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) &&
+ state->options->lower_rect)
+ lower_rect(b, tex);
+
+ if (sat_mask)
+ saturate_src(b, tex, sat_mask);
+ }
+
+ return true;
+}
+
+static void
+nir_lower_tex_impl(nir_function_impl *impl, lower_tex_state *state)
+{
+ nir_builder_init(&state->b, impl);
+
+ nir_foreach_block(impl, nir_lower_tex_block, state);
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+}
+
+void
+nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
+{
+ lower_tex_state state;
+ state.options = options;
+ nir_foreach_overload(shader, overload) {
+ if (overload->impl)
+ nir_lower_tex_impl(overload->impl, &state);
+ }
+}
+++ /dev/null
-/*
- * Copyright © 2015 Broadcom
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/*
- * This lowering pass converts the coordinate division for texture projection
- * to be done in ALU instructions instead of asking the texture operation to
- * do so.
- */
-
-#include "nir.h"
-#include "nir_builder.h"
-
-static nir_ssa_def *
-channel(nir_builder *b, nir_ssa_def *def, int c)
-{
- return nir_swizzle(b, def, (unsigned[4]){c, c, c, c}, 1, false);
-}
-
-static bool
-nir_lower_tex_projector_block(nir_block *block, void *void_state)
-{
- nir_builder *b = void_state;
-
- nir_foreach_instr_safe(block, instr) {
- if (instr->type != nir_instr_type_tex)
- continue;
-
- nir_tex_instr *tex = nir_instr_as_tex(instr);
- b->cursor = nir_before_instr(&tex->instr);
-
- /* Find the projector in the srcs list, if present. */
- int proj_index;
- for (proj_index = 0; proj_index < tex->num_srcs; proj_index++) {
- if (tex->src[proj_index].src_type == nir_tex_src_projector)
- break;
- }
- if (proj_index == tex->num_srcs)
- continue;
- nir_ssa_def *inv_proj =
- nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
-
- /* Walk through the sources projecting the arguments. */
- for (int i = 0; i < tex->num_srcs; i++) {
- switch (tex->src[i].src_type) {
- case nir_tex_src_coord:
- case nir_tex_src_comparitor:
- break;
- default:
- continue;
- }
- nir_ssa_def *unprojected =
- nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
- nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
-
- /* Array indices don't get projected, so make an new vector with the
- * coordinate's array index untouched.
- */
- if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
- switch (tex->coord_components) {
- case 4:
- projected = nir_vec4(b,
- channel(b, projected, 0),
- channel(b, projected, 1),
- channel(b, projected, 2),
- channel(b, unprojected, 3));
- break;
- case 3:
- projected = nir_vec3(b,
- channel(b, projected, 0),
- channel(b, projected, 1),
- channel(b, unprojected, 2));
- break;
- case 2:
- projected = nir_vec2(b,
- channel(b, projected, 0),
- channel(b, unprojected, 1));
- break;
- default:
- unreachable("bad texture coord count for array");
- break;
- }
- }
-
- nir_instr_rewrite_src(&tex->instr,
- &tex->src[i].src,
- nir_src_for_ssa(projected));
- }
-
- /* Now move the later tex sources down the array so that the projector
- * disappears.
- */
- nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src,
- NIR_SRC_INIT);
- for (int i = proj_index + 1; i < tex->num_srcs; i++) {
- tex->src[i-1].src_type = tex->src[i].src_type;
- nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
- }
- tex->num_srcs--;
- }
-
- return true;
-}
-
-static void
-nir_lower_tex_projector_impl(nir_function_impl *impl)
-{
- nir_builder b;
- nir_builder_init(&b, impl);
-
- nir_foreach_block(impl, nir_lower_tex_projector_block, &b);
-
- nir_metadata_preserve(impl, nir_metadata_block_index |
- nir_metadata_dominance);
-}
-
-void
-nir_lower_tex_projector(nir_shader *shader)
-{
- nir_foreach_overload(shader, overload) {
- if (overload->impl)
- nir_lower_tex_projector_impl(overload->impl);
- }
-}
--- /dev/null
+/*
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+#define MAX_COLORS 2 /* VARYING_SLOT_COL0/COL1 */
+
+typedef struct {
+ nir_builder b;
+ nir_shader *shader;
+ nir_variable *face;
+ struct {
+ nir_variable *front; /* COLn */
+ nir_variable *back; /* BFCn */
+ } colors[MAX_COLORS];
+ int colors_count;
+} lower_2side_state;
+
+
+/* Lowering pass for fragment shaders to emulated two-sided-color. For
+ * each COLOR input, a corresponding BCOLOR input is created, and bcsel
+ * instruction used to select front or back color based on FACE.
+ */
+
+static nir_variable *
+create_input(nir_shader *shader, unsigned drvloc, gl_varying_slot slot)
+{
+ nir_variable *var = rzalloc(shader, nir_variable);
+
+ var->data.driver_location = drvloc;
+ var->type = glsl_vec4_type();
+ var->data.mode = nir_var_shader_in;
+ var->name = ralloc_asprintf(var, "in_%d", drvloc);
+ var->data.index = 0;
+ var->data.location = slot;
+
+ exec_list_push_tail(&shader->inputs, &var->node);
+
+ return var;
+}
+
+static nir_ssa_def *
+load_input(nir_builder *b, nir_variable *in)
+{
+ nir_intrinsic_instr *load;
+
+ load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
+ load->num_components = 4;
+ load->const_index[0] = in->data.driver_location;
+ nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
+ nir_builder_instr_insert(b, &load->instr);
+
+ return &load->dest.ssa;
+}
+
+static int
+setup_inputs(lower_2side_state *state)
+{
+ int maxloc = -1;
+
+ /* find color/face inputs: */
+ foreach_list_typed(nir_variable, var, node, &state->shader->inputs) {
+ int loc = var->data.driver_location;
+
+ /* keep track of last used driver-location.. we'll be
+ * appending BCLr/FACE after last existing input:
+ */
+ maxloc = MAX2(maxloc, loc);
+
+ switch (var->data.location) {
+ case VARYING_SLOT_COL0:
+ case VARYING_SLOT_COL1:
+ assert(state->colors_count < ARRAY_SIZE(state->colors));
+ state->colors[state->colors_count].front = var;
+ state->colors_count++;
+ break;
+ case VARYING_SLOT_FACE:
+ state->face = var;
+ break;
+ }
+ }
+
+ /* if we don't have any color inputs, nothing to do: */
+ if (state->colors_count == 0)
+ return -1;
+
+ /* if we don't already have one, insert a FACE input: */
+ if (!state->face) {
+ state->face = create_input(state->shader, ++maxloc, VARYING_SLOT_FACE);
+ state->face->data.interpolation = INTERP_QUALIFIER_FLAT;
+ }
+
+ /* add required back-face color inputs: */
+ for (int i = 0; i < state->colors_count; i++) {
+ gl_varying_slot slot;
+
+ if (state->colors[i].front->data.location == VARYING_SLOT_COL0)
+ slot = VARYING_SLOT_BFC0;
+ else
+ slot = VARYING_SLOT_BFC1;
+
+ state->colors[i].back = create_input(state->shader, ++maxloc, slot);
+ }
+
+ return 0;
+}
+
+static bool
+nir_lower_two_sided_color_block(nir_block *block, void *void_state)
+{
+ lower_2side_state *state = void_state;
+ nir_builder *b = &state->b;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+ if (intr->intrinsic != nir_intrinsic_load_input)
+ continue;
+
+ int idx;
+ for (idx = 0; idx < state->colors_count; idx++) {
+ unsigned drvloc =
+ state->colors[idx].front->data.driver_location;
+ if (intr->const_index[0] == drvloc) {
+ break;
+ }
+ }
+
+ if (idx == state->colors_count)
+ continue;
+
+ /* replace load_input(COLn) with
+ * bcsel(load_input(FACE), load_input(COLn), load_input(BFCn))
+ */
+ b->cursor = nir_before_instr(&intr->instr);
+ nir_ssa_def *face = nir_channel(b, load_input(b, state->face), 0);
+ nir_ssa_def *front = load_input(b, state->colors[idx].front);
+ nir_ssa_def *back = load_input(b, state->colors[idx].back);
+ nir_ssa_def *cond = nir_flt(b, face, nir_imm_float(b, 0.0));
+ nir_ssa_def *color = nir_bcsel(b, cond, back, front);
+
+ assert(intr->dest.is_ssa);
+ nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(color));
+ }
+
+ return true;
+}
+
+static void
+nir_lower_two_sided_color_impl(nir_function_impl *impl,
+ lower_2side_state *state)
+{
+ nir_builder *b = &state->b;
+
+ nir_builder_init(b, impl);
+
+ nir_foreach_block(impl, nir_lower_two_sided_color_block, state);
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+}
+
+void
+nir_lower_two_sided_color(nir_shader *shader)
+{
+ lower_2side_state state = {
+ .shader = shader,
+ };
+
+ if (shader->stage != MESA_SHADER_FRAGMENT)
+ return;
+
+ if (setup_inputs(&state) != 0)
+ return;
+
+ nir_foreach_overload(shader, overload) {
+ if (overload->impl)
+ nir_lower_two_sided_color_impl(overload->impl, &state);
+ }
+
+}
nir_instr_remove(&intrin->instr);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
- nir_src_for_ssa(&undef->def),
- state->shader);
+ nir_src_for_ssa(&undef->def));
continue;
}
nir_instr_remove(&intrin->instr);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
- nir_src_for_ssa(&mov->dest.dest.ssa),
- state->shader);
+ nir_src_for_ssa(&mov->dest.dest.ssa));
break;
}
* moves with partial writes.
*/
+struct vec_to_movs_state {
+ nir_function_impl *impl;
+ bool progress;
+};
+
static bool
src_matches_dest_reg(nir_dest *dest, nir_src *src)
{
* which ones have been processed.
*/
static unsigned
-insert_mov(nir_alu_instr *vec, unsigned start_channel,
- unsigned start_src_idx, void *mem_ctx)
+insert_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
{
- unsigned src_idx = start_src_idx;
- assert(src_idx < nir_op_infos[vec->op].num_inputs);
+ assert(start_idx < nir_op_infos[vec->op].num_inputs);
- nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
- nir_alu_src_copy(&mov->src[0], &vec->src[src_idx], mem_ctx);
- nir_alu_dest_copy(&mov->dest, &vec->dest, mem_ctx);
+ nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_imov);
+ nir_alu_src_copy(&mov->src[0], &vec->src[start_idx], mov);
+ nir_alu_dest_copy(&mov->dest, &vec->dest, mov);
- mov->dest.write_mask = (1u << start_channel);
- mov->src[0].swizzle[start_channel] = vec->src[src_idx].swizzle[0];
- src_idx++;
+ mov->dest.write_mask = (1u << start_idx);
+ mov->src[0].swizzle[start_idx] = vec->src[start_idx].swizzle[0];
+ mov->src[0].negate = vec->src[start_idx].negate;
+ mov->src[0].abs = vec->src[start_idx].abs;
- for (unsigned i = start_channel + 1; i < 4; i++) {
+ for (unsigned i = start_idx + 1; i < 4; i++) {
if (!(vec->dest.write_mask & (1 << i)))
continue;
- if (nir_srcs_equal(vec->src[src_idx].src, vec->src[start_src_idx].src)) {
+ if (nir_srcs_equal(vec->src[i].src, vec->src[start_idx].src) &&
+ vec->src[i].negate == vec->src[start_idx].negate &&
+ vec->src[i].abs == vec->src[start_idx].abs) {
mov->dest.write_mask |= (1 << i);
- mov->src[0].swizzle[i] = vec->src[src_idx].swizzle[0];
+ mov->src[0].swizzle[i] = vec->src[i].swizzle[0];
}
- src_idx++;
}
- nir_instr_insert_before(&vec->instr, &mov->instr);
+ /* In some situations (if the vecN is involved in a phi-web), we can end
+ * up with a mov from a register to itself. Some of those channels may end
+ * up doing nothing and there's no reason to have them as part of the mov.
+ */
+ if (src_matches_dest_reg(&mov->dest.dest, &mov->src[0].src) &&
+ !mov->src[0].abs && !mov->src[0].negate) {
+ for (unsigned i = 0; i < 4; i++) {
+ if (mov->src[0].swizzle[i] == i) {
+ mov->dest.write_mask &= ~(1 << i);
+ }
+ }
+ }
+
+ /* Only emit the instruction if it actually does something */
+ if (mov->dest.write_mask) {
+ nir_instr_insert_before(&vec->instr, &mov->instr);
+ } else {
+ ralloc_free(mov);
+ }
return mov->dest.write_mask;
}
static bool
-lower_vec_to_movs_block(nir_block *block, void *mem_ctx)
+has_replicated_dest(nir_alu_instr *alu)
+{
+ return alu->op == nir_op_fdot_replicated2 ||
+ alu->op == nir_op_fdot_replicated3 ||
+ alu->op == nir_op_fdot_replicated4 ||
+ alu->op == nir_op_fdph_replicated;
+}
+
+/* Attempts to coalesce the "move" from the given source of the vec to the
+ * destination of the instruction generating the value. If, for whatever
+ * reason, we cannot coalesce the mmove, it does nothing and returns 0. We
+ * can then call insert_mov as normal.
+ */
+static unsigned
+try_coalesce(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
+{
+ assert(start_idx < nir_op_infos[vec->op].num_inputs);
+
+ /* We will only even try if the source is SSA */
+ if (!vec->src[start_idx].src.is_ssa)
+ return 0;
+
+ assert(vec->src[start_idx].src.ssa);
+
+ /* If we are going to do a reswizzle, then the vecN operation must be the
+ * only use of the source value. We also can't have any source modifiers.
+ */
+ nir_foreach_use(vec->src[start_idx].src.ssa, src) {
+ if (src->parent_instr != &vec->instr)
+ return 0;
+
+ nir_alu_src *alu_src = exec_node_data(nir_alu_src, src, src);
+ if (alu_src->abs || alu_src->negate)
+ return 0;
+ }
+
+ if (!list_empty(&vec->src[start_idx].src.ssa->if_uses))
+ return 0;
+
+ if (vec->src[start_idx].src.ssa->parent_instr->type != nir_instr_type_alu)
+ return 0;
+
+ nir_alu_instr *src_alu =
+ nir_instr_as_alu(vec->src[start_idx].src.ssa->parent_instr);
+
+ if (has_replicated_dest(src_alu)) {
+ /* The fdot instruction is special: It replicates its result to all
+ * components. This means that we can always rewrite its destination
+ * and we don't need to swizzle anything.
+ */
+ } else {
+ /* We only care about being able to re-swizzle the instruction if it is
+ * something that we can reswizzle. It must be per-component. The one
+ * exception to this is the fdotN instructions which implicitly splat
+ * their result out to all channels.
+ */
+ if (nir_op_infos[src_alu->op].output_size != 0)
+ return 0;
+
+ /* If we are going to reswizzle the instruction, we can't have any
+ * non-per-component sources either.
+ */
+ for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
+ if (nir_op_infos[src_alu->op].input_sizes[j] != 0)
+ return 0;
+ }
+
+ /* Stash off all of the ALU instruction's swizzles. */
+ uint8_t swizzles[4][4];
+ for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
+ for (unsigned i = 0; i < 4; i++)
+ swizzles[j][i] = src_alu->src[j].swizzle[i];
+
+ unsigned write_mask = 0;
+ for (unsigned i = start_idx; i < 4; i++) {
+ if (!(vec->dest.write_mask & (1 << i)))
+ continue;
+
+ if (!vec->src[i].src.is_ssa ||
+ vec->src[i].src.ssa != &src_alu->dest.dest.ssa)
+ continue;
+
+ /* At this point, the give vec source matchese up with the ALU
+ * instruction so we can re-swizzle that component to match.
+ */
+ write_mask |= 1 << i;
+ if (has_replicated_dest(src_alu)) {
+ /* Since the destination is a single replicated value, we don't need
+ * to do any reswizzling
+ */
+ } else {
+ for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
+ src_alu->src[j].swizzle[i] = swizzles[j][vec->src[i].swizzle[0]];
+ }
+
+ /* Clear the no longer needed vec source */
+ nir_instr_rewrite_src(&vec->instr, &vec->src[i].src, NIR_SRC_INIT);
+ }
+
+ nir_instr_rewrite_dest(&src_alu->instr, &src_alu->dest.dest, vec->dest.dest);
+ src_alu->dest.write_mask = write_mask;
+
+ return write_mask;
+}
+
+static bool
+lower_vec_to_movs_block(nir_block *block, void *void_state)
{
+ struct vec_to_movs_state *state = void_state;
+ nir_function_impl *impl = state->impl;
+ nir_shader *shader = impl->overload->function->shader;
+
nir_foreach_instr_safe(block, instr) {
if (instr->type != nir_instr_type_alu)
continue;
continue; /* The loop */
}
- /* Since we insert multiple MOVs, we have to be non-SSA. */
- assert(!vec->dest.dest.is_ssa);
+ if (vec->dest.dest.is_ssa) {
+ /* Since we insert multiple MOVs, we have a register destination. */
+ nir_register *reg = nir_local_reg_create(impl);
+ reg->num_components = vec->dest.dest.ssa.num_components;
+
+ nir_ssa_def_rewrite_uses(&vec->dest.dest.ssa, nir_src_for_reg(reg));
+
+ nir_instr_rewrite_dest(&vec->instr, &vec->dest.dest,
+ nir_dest_for_reg(reg));
+ }
unsigned finished_write_mask = 0;
* destination reg, in case other values we're populating in the dest
* might overwrite them.
*/
- for (unsigned i = 0, src_idx = 0; i < 4; i++) {
+ for (unsigned i = 0; i < 4; i++) {
if (!(vec->dest.write_mask & (1 << i)))
continue;
- if (src_matches_dest_reg(&vec->dest.dest, &vec->src[src_idx].src)) {
- finished_write_mask |= insert_mov(vec, i, src_idx, mem_ctx);
+ if (src_matches_dest_reg(&vec->dest.dest, &vec->src[i].src)) {
+ finished_write_mask |= insert_mov(vec, i, shader);
break;
}
- src_idx++;
}
/* Now, emit MOVs for all the other src channels. */
- for (unsigned i = 0, src_idx = 0; i < 4; i++) {
+ for (unsigned i = 0; i < 4; i++) {
if (!(vec->dest.write_mask & (1 << i)))
continue;
if (!(finished_write_mask & (1 << i)))
- finished_write_mask |= insert_mov(vec, i, src_idx, mem_ctx);
+ finished_write_mask |= try_coalesce(vec, i, shader);
- src_idx++;
+ if (!(finished_write_mask & (1 << i)))
+ finished_write_mask |= insert_mov(vec, i, shader);
}
nir_instr_remove(&vec->instr);
ralloc_free(vec);
+ state->progress = true;
}
return true;
}
-static void
+static bool
nir_lower_vec_to_movs_impl(nir_function_impl *impl)
{
- nir_foreach_block(impl, lower_vec_to_movs_block, ralloc_parent(impl));
+ struct vec_to_movs_state state = { impl, false };
+
+ nir_foreach_block(impl, lower_vec_to_movs_block, &state);
+
+ return state.progress;
}
-void
+bool
nir_lower_vec_to_movs(nir_shader *shader)
{
+ bool progress = false;
+
nir_foreach_overload(shader, overload) {
if (overload->impl)
- nir_lower_vec_to_movs_impl(overload->impl);
+ progress = nir_lower_vec_to_movs_impl(overload->impl) || progress;
}
+
+ return progress;
}
--- /dev/null
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements a pass that tries to move uses vecN sources to their
+ * destinations. This is kind of like an inverse copy-propagation pass.
+ * For instance, if you have
+ *
+ * ssa_1 = vec4(a, b, c, d)
+ * ssa_2 = fadd(a, b)
+ *
+ * This will be turned into
+ *
+ * ssa_1 = vec4(a, b, c, d)
+ * ssa_2 = fadd(ssa_1.x, ssa_1.y)
+ *
+ * While this is "worse" because it adds a bunch of unneeded dependencies, it
+ * actually makes it much easier for vec4-based backends to coalesce the MOV's
+ * that result from the vec4 operation because it doesn't have to worry about
+ * quite as many reads.
+ */
+
+/* Returns true if the given SSA def dominates the instruction. An SSA def is
+ * considered to *not* dominate the instruction that defines it.
+ */
+static bool
+ssa_def_dominates_instr(nir_ssa_def *def, nir_instr *instr)
+{
+ if (instr->index <= def->parent_instr->index) {
+ return false;
+ } else if (def->parent_instr->block == instr->block) {
+ return def->parent_instr->index < instr->index;
+ } else {
+ return nir_block_dominates(def->parent_instr->block, instr->block);
+ }
+}
+
+static bool
+move_vec_src_uses_to_dest_block(nir_block *block, void *shader)
+{
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_alu)
+ continue;
+
+ nir_alu_instr *vec = nir_instr_as_alu(instr);
+
+ switch (vec->op) {
+ case nir_op_vec2:
+ case nir_op_vec3:
+ case nir_op_vec4:
+ break;
+ default:
+ continue; /* The loop */
+ }
+
+ /* Can't handle non-SSA vec operations */
+ if (!vec->dest.dest.is_ssa)
+ continue;
+
+ /* Can't handle saturation */
+ if (vec->dest.saturate)
+ continue;
+
+ /* First, mark all of the sources we are going to consider for rewriting
+ * to the destination
+ */
+ int srcs_remaining = 0;
+ for (unsigned i = 0; i < nir_op_infos[vec->op].num_inputs; i++) {
+ /* We can't rewrite a source if it's not in SSA form */
+ if (!vec->src[i].src.is_ssa)
+ continue;
+
+ /* We can't rewrite a source if it has modifiers */
+ if (vec->src[i].abs || vec->src[i].negate)
+ continue;
+
+ srcs_remaining |= 1 << i;
+ }
+
+ /* We can't actually do anything with this instruction */
+ if (srcs_remaining == 0)
+ continue;
+
+ for (unsigned i; i = ffs(srcs_remaining) - 1, srcs_remaining;) {
+ int8_t swizzle[4] = { -1, -1, -1, -1 };
+
+ for (unsigned j = i; j < nir_op_infos[vec->op].num_inputs; j++) {
+ if (vec->src[j].src.ssa != vec->src[i].src.ssa)
+ continue;
+
+ /* Mark the given chanle as having been handled */
+ srcs_remaining &= ~(1 << j);
+
+ /* Mark the appropreate channel as coming from src j */
+ swizzle[vec->src[j].swizzle[0]] = j;
+ }
+
+ nir_foreach_use_safe(vec->src[i].src.ssa, use) {
+ if (use->parent_instr == &vec->instr)
+ continue;
+
+ /* We need to dominate the use if we are going to rewrite it */
+ if (!ssa_def_dominates_instr(&vec->dest.dest.ssa, use->parent_instr))
+ continue;
+
+ /* For now, we'll just rewrite ALU instructions */
+ if (use->parent_instr->type != nir_instr_type_alu)
+ continue;
+
+ assert(use->is_ssa);
+
+ nir_alu_instr *use_alu = nir_instr_as_alu(use->parent_instr);
+
+ /* Figure out which source we're actually looking at */
+ nir_alu_src *use_alu_src = exec_node_data(nir_alu_src, use, src);
+ unsigned src_idx = use_alu_src - use_alu->src;
+ assert(src_idx < nir_op_infos[use_alu->op].num_inputs);
+
+ bool can_reswizzle = true;
+ for (unsigned j = 0; j < 4; j++) {
+ if (!nir_alu_instr_channel_used(use_alu, src_idx, j))
+ continue;
+
+ if (swizzle[use_alu_src->swizzle[j]] == -1) {
+ can_reswizzle = false;
+ break;
+ }
+ }
+
+ if (!can_reswizzle)
+ continue;
+
+ /* At this point, we have determined that the given use can be
+ * reswizzled to actually use the destination of the vecN operation.
+ * Go ahead and rewrite it as needed.
+ */
+ nir_instr_rewrite_src(use->parent_instr, use,
+ nir_src_for_ssa(&vec->dest.dest.ssa));
+ for (unsigned j = 0; j < 4; j++) {
+ if (!nir_alu_instr_channel_used(use_alu, src_idx, j))
+ continue;
+
+ use_alu_src->swizzle[j] = swizzle[use_alu_src->swizzle[j]];
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+static void
+nir_move_vec_src_uses_to_dest_impl(nir_shader *shader, nir_function_impl *impl)
+{
+ nir_metadata_require(impl, nir_metadata_dominance);
+
+ nir_index_instrs(impl);
+ nir_foreach_block(impl, move_vec_src_uses_to_dest_block, shader);
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+}
+
+void
+nir_move_vec_src_uses_to_dest(nir_shader *shader)
+{
+ nir_foreach_overload(shader, overload) {
+ if (overload->impl)
+ nir_move_vec_src_uses_to_dest_impl(shader, overload->impl);
+ }
+}
* or 1.0. This is based on the old GLSL IR based pass by Eric.
*/
-static nir_ssa_def *
-channel(nir_builder *b, nir_ssa_def *def, int c)
-{
- return nir_swizzle(b, def, (unsigned[4]){c, c, c, c}, 1, false);
-}
+struct normalize_cubemap_state {
+ nir_builder b;
+ bool progress;
+};
static bool
normalize_cubemap_coords_block(nir_block *block, void *void_state)
{
- nir_builder *b = void_state;
+ struct normalize_cubemap_state *state = void_state;
+ nir_builder *b = &state->b;
nir_foreach_instr(block, instr) {
if (instr->type != nir_instr_type_tex)
assert(orig_coord->num_components >= 3);
nir_ssa_def *abs = nir_fabs(b, orig_coord);
- nir_ssa_def *norm = nir_fmax(b, channel(b, abs, 0),
- nir_fmax(b, channel(b, abs, 1),
- channel(b, abs, 2)));
+ nir_ssa_def *norm = nir_fmax(b, nir_channel(b, abs, 0),
+ nir_fmax(b, nir_channel(b, abs, 1),
+ nir_channel(b, abs, 2)));
nir_ssa_def *normalized = nir_fmul(b, orig_coord, nir_frcp(b, norm));
*/
if (tex->coord_components == 4) {
normalized = nir_vec4(b,
- channel(b, normalized, 0),
- channel(b, normalized, 1),
- channel(b, normalized, 2),
- channel(b, orig_coord, 3));
+ nir_channel(b, normalized, 0),
+ nir_channel(b, normalized, 1),
+ nir_channel(b, normalized, 2),
+ nir_channel(b, orig_coord, 3));
}
nir_instr_rewrite_src(&tex->instr,
&tex->src[i].src,
nir_src_for_ssa(normalized));
+
+ state->progress = true;
}
}
return true;
}
-static void
+static bool
normalize_cubemap_coords_impl(nir_function_impl *impl)
{
- nir_builder b;
- nir_builder_init(&b, impl);
+ struct normalize_cubemap_state state;
+ nir_builder_init(&state.b, impl);
+ state.progress = false;
- nir_foreach_block(impl, normalize_cubemap_coords_block, &b);
+ nir_foreach_block(impl, normalize_cubemap_coords_block, &state);
nir_metadata_preserve(impl, nir_metadata_block_index |
nir_metadata_dominance);
+
+ return state.progress;
}
-void
+bool
nir_normalize_cubemap_coords(nir_shader *shader)
{
- nir_foreach_overload(shader, overload)
+ bool progress = false;
+
+ nir_foreach_overload(shader, overload) {
if (overload->impl)
- normalize_cubemap_coords_impl(overload->impl);
+ progress = normalize_cubemap_coords_impl(overload->impl) || progress;
+ }
+
+ return progress;
}
binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
"{src}")
+binop_reduce("fdot_replicated", 4, tfloat, tfloat,
+ "{src0} * {src1}", "{src0} + {src1}", "{src}")
+
+opcode("fdph", 1, tfloat, [3, 4], [tfloat, tfloat], "",
+ "src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w")
+opcode("fdph_replicated", 4, tfloat, [3, 4], [tfloat, tfloat], "",
+ "src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w")
+
binop("fmin", tfloat, "", "fminf(src0, src1)")
binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1")
binop("umin", tunsigned, commutative + associative, "src1 > src0 ? src0 : src1")
(('flrp', a, a, b), a),
(('flrp', 0.0, a, b), ('fmul', a, b)),
(('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'),
+ (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
(('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'),
(('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
(('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
(('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
(('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
(('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
+ (('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'),
+ (('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'),
+ (('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'),
+ (('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates'),
]
print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()
nir_instr_insert_before(&instr->instr, &new_instr->instr);
- nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(&new_instr->def),
- mem_ctx);
+ nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa,
+ nir_src_for_ssa(&new_instr->def));
nir_instr_remove(&instr->instr);
ralloc_free(instr);
if (nir_instrs_equal(instr, other)) {
nir_ssa_def *other_def = nir_instr_get_dest_ssa_def(other);
nir_ssa_def_rewrite_uses(nir_instr_get_dest_ssa_def(instr),
- nir_src_for_ssa(other_def),
- state->mem_ctx);
+ nir_src_for_ssa(other_def));
nir_instr_remove(instr);
state->progress = true;
return;
if (nir_instrs_equal(instr, other)) {
nir_ssa_def *other_def = nir_instr_get_dest_ssa_def(other);
nir_ssa_def_rewrite_uses(nir_instr_get_dest_ssa_def(instr),
- nir_src_for_ssa(other_def),
- state->mem_ctx);
+ nir_src_for_ssa(other_def));
nir_instr_remove(instr);
state->progress = true;
return;
--- /dev/null
+/*
+ * Copyright © 2014 Connor Abbott
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+#include "nir_control_flow.h"
+
+/*
+ * This file implements an optimization that deletes statically
+ * unreachable/dead code. In NIR, one way this can happen if if an if
+ * statement has a constant condition:
+ *
+ * if (true) {
+ * ...
+ * }
+ *
+ * We delete the if statement and paste the contents of the always-executed
+ * branch into the surrounding control flow, possibly removing more code if
+ * the branch had a jump at the end.
+ *
+ * Another way is that control flow can end in a jump so that code after it
+ * never gets executed. In particular, this can happen after optimizing
+ * something like:
+ *
+ * if (true) {
+ * ...
+ * break;
+ * }
+ * ...
+ *
+ * We also consider the case where both branches of an if end in a jump, e.g.:
+ *
+ * if (...) {
+ * break;
+ * } else {
+ * continue;
+ * }
+ * ...
+ *
+ * Finally, we also handle removing useless loops, i.e. loops with no side
+ * effects and without any definitions that are used elsewhere. This case is a
+ * little different from the first two in that the code is actually run (it
+ * just never does anything), but there are similar issues with needing to
+ * be careful with restarting after deleting the cf_node (see dead_cf_list())
+ * so this is a convenient place to remove them.
+ */
+
+static void
+remove_after_cf_node(nir_cf_node *node)
+{
+ nir_cf_node *end = node;
+ while (!nir_cf_node_is_last(end))
+ end = nir_cf_node_next(end);
+
+ nir_cf_list list;
+ nir_cf_extract(&list, nir_after_cf_node(node), nir_after_cf_node(end));
+ nir_cf_delete(&list);
+}
+
+static void
+opt_constant_if(nir_if *if_stmt, bool condition)
+{
+ /* First, we need to remove any phi nodes after the if by rewriting uses to
+ * point to the correct source.
+ */
+ nir_block *after = nir_cf_node_as_block(nir_cf_node_next(&if_stmt->cf_node));
+ nir_block *last_block =
+ nir_cf_node_as_block(condition ? nir_if_last_then_node(if_stmt)
+ : nir_if_last_else_node(if_stmt));
+
+ nir_foreach_instr_safe(after, instr) {
+ if (instr->type != nir_instr_type_phi)
+ break;
+
+ nir_phi_instr *phi = nir_instr_as_phi(instr);
+ nir_ssa_def *def = NULL;
+ nir_foreach_phi_src(phi, phi_src) {
+ if (phi_src->pred != last_block)
+ continue;
+
+ assert(phi_src->src.is_ssa);
+ def = phi_src->src.ssa;
+ }
+
+ assert(def);
+ assert(phi->dest.is_ssa);
+ nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_src_for_ssa(def));
+ nir_instr_remove(instr);
+ }
+
+ /* The control flow list we're about to paste in may include a jump at the
+ * end, and in that case we have to delete the rest of the control flow
+ * list after the if since it's unreachable and the validator will balk if
+ * we don't.
+ */
+
+ if (!exec_list_is_empty(&last_block->instr_list)) {
+ nir_instr *last_instr = nir_block_last_instr(last_block);
+ if (last_instr->type == nir_instr_type_jump)
+ remove_after_cf_node(&if_stmt->cf_node);
+ }
+
+ /* Finally, actually paste in the then or else branch and delete the if. */
+ struct exec_list *cf_list = condition ? &if_stmt->then_list
+ : &if_stmt->else_list;
+
+ nir_cf_list list;
+ nir_cf_extract(&list, nir_before_cf_list(cf_list),
+ nir_after_cf_list(cf_list));
+ nir_cf_reinsert(&list, nir_after_cf_node(&if_stmt->cf_node));
+ nir_cf_node_remove(&if_stmt->cf_node);
+}
+
+static bool
+block_has_no_side_effects(nir_block *block, void *state)
+{
+ (void) state;
+
+ nir_foreach_instr(block, instr) {
+ if (instr->type == nir_instr_type_call)
+ return false;
+
+ /* Return instructions can cause us to skip over other side-effecting
+ * instructions after the loop, so consider them to have side effects
+ * here.
+ */
+
+ if (instr->type == nir_instr_type_jump &&
+ nir_instr_as_jump(instr)->type == nir_jump_return)
+ return false;
+
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ if (!nir_intrinsic_infos[intrin->intrinsic].flags &
+ NIR_INTRINSIC_CAN_ELIMINATE)
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+def_not_live_out(nir_ssa_def *def, void *state)
+{
+ nir_block *after = state;
+
+ return !BITSET_TEST(after->live_in, def->live_index);
+}
+
+/*
+ * Test if a loop is dead. A loop is dead if:
+ *
+ * 1) It has no side effects (i.e. intrinsics which could possibly affect the
+ * state of the program aside from producing an SSA value, indicated by a lack
+ * of NIR_INTRINSIC_CAN_ELIMINATE).
+ *
+ * 2) It has no phi nodes after it, since those indicate values inside the
+ * loop being used after the loop.
+ *
+ * 3) If there are no phi nodes after the loop, then the only way a value
+ * defined inside the loop can be used outside the loop is if its definition
+ * dominates the block after the loop. If none of the definitions that
+ * dominate the loop exit are used outside the loop, then the loop is dead
+ * and it can be deleted.
+ */
+
+static bool
+loop_is_dead(nir_loop *loop)
+{
+ nir_block *before = nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
+ nir_block *after = nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
+
+ if (!exec_list_is_empty(&after->instr_list) &&
+ nir_block_first_instr(after)->type == nir_instr_type_phi)
+ return false;
+
+ if (!nir_foreach_block_in_cf_node(&loop->cf_node, block_has_no_side_effects,
+ NULL))
+ return false;
+
+ nir_function_impl *impl = nir_cf_node_get_function(&loop->cf_node);
+ nir_metadata_require(impl, nir_metadata_live_variables |
+ nir_metadata_dominance);
+
+ for (nir_block *cur = after->imm_dom; cur != before; cur = cur->imm_dom) {
+ nir_foreach_instr(cur, instr) {
+ if (!nir_foreach_ssa_def(instr, def_not_live_out, after))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool
+dead_cf_block(nir_block *block)
+{
+ nir_if *following_if = nir_block_get_following_if(block);
+ if (following_if) {
+ nir_const_value *const_value =
+ nir_src_as_const_value(following_if->condition);
+
+ if (!const_value)
+ return false;
+
+ opt_constant_if(following_if, const_value->u[0] != 0);
+ return true;
+ }
+
+ nir_loop *following_loop = nir_block_get_following_loop(block);
+ if (!following_loop)
+ return false;
+
+ if (!loop_is_dead(following_loop))
+ return false;
+
+ nir_cf_node_remove(&following_loop->cf_node);
+ return true;
+}
+
+static bool
+ends_in_jump(nir_block *block)
+{
+ if (exec_list_is_empty(&block->instr_list))
+ return false;
+
+ nir_instr *instr = nir_block_last_instr(block);
+ return instr->type == nir_instr_type_jump;
+}
+
+static bool
+dead_cf_list(struct exec_list *list, bool *list_ends_in_jump)
+{
+ bool progress = false;
+ *list_ends_in_jump = false;
+
+ nir_cf_node *prev = NULL;
+
+ foreach_list_typed(nir_cf_node, cur, node, list) {
+ switch (cur->type) {
+ case nir_cf_node_block: {
+ nir_block *block = nir_cf_node_as_block(cur);
+ if (dead_cf_block(block)) {
+ /* We just deleted the if or loop after this block, so we may have
+ * deleted the block before or after it -- which one is an
+ * implementation detail. Therefore, to recover the place we were
+ * at, we have to use the previous cf_node.
+ */
+
+ if (prev) {
+ cur = nir_cf_node_next(prev);
+ } else {
+ cur = exec_node_data(nir_cf_node, exec_list_get_head(list),
+ node);
+ }
+
+ block = nir_cf_node_as_block(cur);
+
+ progress = true;
+ }
+
+ if (ends_in_jump(block)) {
+ *list_ends_in_jump = true;
+
+ if (!exec_node_is_tail_sentinel(cur->node.next)) {
+ remove_after_cf_node(cur);
+ return true;
+ }
+ }
+
+ break;
+ }
+
+ case nir_cf_node_if: {
+ nir_if *if_stmt = nir_cf_node_as_if(cur);
+ bool then_ends_in_jump, else_ends_in_jump;
+ progress |= dead_cf_list(&if_stmt->then_list, &then_ends_in_jump);
+ progress |= dead_cf_list(&if_stmt->else_list, &else_ends_in_jump);
+
+ if (then_ends_in_jump && else_ends_in_jump) {
+ *list_ends_in_jump = true;
+ nir_block *next = nir_cf_node_as_block(nir_cf_node_next(cur));
+ if (!exec_list_is_empty(&next->instr_list) ||
+ !exec_node_is_tail_sentinel(next->cf_node.node.next)) {
+ remove_after_cf_node(cur);
+ return true;
+ }
+ }
+
+ break;
+ }
+
+ case nir_cf_node_loop: {
+ nir_loop *loop = nir_cf_node_as_loop(cur);
+ bool dummy;
+ progress |= dead_cf_list(&loop->body, &dummy);
+
+ break;
+ }
+
+ default:
+ unreachable("unknown cf node type");
+ }
+
+ prev = cur;
+ }
+
+ return progress;
+}
+
+static bool
+opt_dead_cf_impl(nir_function_impl *impl)
+{
+ bool dummy;
+ bool progress = dead_cf_list(&impl->body, &dummy);
+
+ if (progress)
+ nir_metadata_preserve(impl, nir_metadata_none);
+
+ return progress;
+}
+
+bool
+nir_opt_dead_cf(nir_shader *shader)
+{
+ bool progress = false;
+
+ nir_foreach_overload(shader, overload)
+ if (overload->impl)
+ progress |= opt_dead_cf_impl(overload->impl);
+
+ return progress;
+}
* If we reuse swizzle in the loop, then output swizzle would be zyzz.
*/
memcpy(swizzle_tmp, swizzle, 4*sizeof(uint8_t));
- for (unsigned i = 0; i < num_components; i++)
+ for (int i = 0; i < num_components; i++)
swizzle[i] = swizzle_tmp[src->swizzle[i]];
return alu;
for (unsigned j = 0; j < add->dest.dest.ssa.num_components; j++)
ffma->src[i].swizzle[j] = mul->src[i].swizzle[swizzle[j]];
}
- nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src],
- state->mem_ctx);
+ nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src], ffma);
assert(add->dest.dest.is_ssa);
add->dest.dest.ssa.num_components,
add->dest.dest.ssa.name);
nir_ssa_def_rewrite_uses(&add->dest.dest.ssa,
- nir_src_for_ssa(&ffma->dest.dest.ssa),
- state->mem_ctx);
+ nir_src_for_ssa(&ffma->dest.dest.ssa));
nir_instr_insert_before(&add->instr, &ffma->instr);
assert(list_empty(&add->dest.dest.ssa.uses));
nir_phi_instr *phi = nir_instr_as_phi(instr);
nir_alu_instr *sel = nir_alu_instr_create(state->mem_ctx, nir_op_bcsel);
- nir_src_copy(&sel->src[0].src, &if_stmt->condition, state->mem_ctx);
+ nir_src_copy(&sel->src[0].src, &if_stmt->condition, sel);
/* Splat the condition to all channels */
memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle);
assert(src->src.is_ssa);
unsigned idx = src->pred == then_block ? 1 : 2;
- nir_src_copy(&sel->src[idx].src, &src->src, state->mem_ctx);
+ nir_src_copy(&sel->src[idx].src, &src->src, sel);
}
nir_ssa_dest_init(&sel->instr, &sel->dest.dest,
sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
nir_ssa_def_rewrite_uses(&phi->dest.ssa,
- nir_src_for_ssa(&sel->dest.dest.ssa),
- state->mem_ctx);
+ nir_src_for_ssa(&sel->dest.dest.ssa));
nir_instr_insert_before(&phi->instr, &sel->instr);
nir_instr_remove(&phi->instr);
{
bool *progress = state;
- void *mem_ctx = ralloc_parent(block);
-
nir_foreach_instr_safe(block, instr) {
if (instr->type != nir_instr_type_phi)
break;
continue;
assert(phi->dest.is_ssa);
- nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_src_for_ssa(def),
- mem_ctx);
+ nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_src_for_ssa(def));
nir_instr_remove(instr);
*progress = true;
*/
#include "nir.h"
+#include "shader_enums.h"
#include <stdio.h>
#include <stdlib.h>
}
typedef struct {
+ FILE *fp;
+ nir_shader *shader;
/** map from nir_variable -> printable name */
struct hash_table *ht;
/* an index used to make new non-conflicting names */
unsigned index;
-} print_var_state;
+} print_state;
static void
-print_register(nir_register *reg, FILE *fp)
+print_register(nir_register *reg, print_state *state)
{
+ FILE *fp = state->fp;
if (reg->name != NULL)
fprintf(fp, "/* %s */ ", reg->name);
if (reg->is_global)
static const char *sizes[] = { "error", "vec1", "vec2", "vec3", "vec4" };
static void
-print_register_decl(nir_register *reg, FILE *fp)
+print_register_decl(nir_register *reg, print_state *state)
{
+ FILE *fp = state->fp;
fprintf(fp, "decl_reg %s ", sizes[reg->num_components]);
if (reg->is_packed)
fprintf(fp, "(packed) ");
- print_register(reg, fp);
+ print_register(reg, state);
if (reg->num_array_elems != 0)
fprintf(fp, "[%u]", reg->num_array_elems);
fprintf(fp, "\n");
}
static void
-print_ssa_def(nir_ssa_def *def, FILE *fp)
+print_ssa_def(nir_ssa_def *def, print_state *state)
{
+ FILE *fp = state->fp;
if (def->name != NULL)
fprintf(fp, "/* %s */ ", def->name);
fprintf(fp, "%s ssa_%u", sizes[def->num_components], def->index);
}
static void
-print_ssa_use(nir_ssa_def *def, FILE *fp)
+print_ssa_use(nir_ssa_def *def, print_state *state)
{
+ FILE *fp = state->fp;
if (def->name != NULL)
fprintf(fp, "/* %s */ ", def->name);
fprintf(fp, "ssa_%u", def->index);
}
-static void print_src(nir_src *src, FILE *fp);
+static void print_src(nir_src *src, print_state *state);
static void
-print_reg_src(nir_reg_src *src, FILE *fp)
+print_reg_src(nir_reg_src *src, print_state *state)
{
- print_register(src->reg, fp);
+ FILE *fp = state->fp;
+ print_register(src->reg, state);
if (src->reg->num_array_elems != 0) {
fprintf(fp, "[%u", src->base_offset);
if (src->indirect != NULL) {
fprintf(fp, " + ");
- print_src(src->indirect, fp);
+ print_src(src->indirect, state);
}
fprintf(fp, "]");
}
}
static void
-print_reg_dest(nir_reg_dest *dest, FILE *fp)
+print_reg_dest(nir_reg_dest *dest, print_state *state)
{
- print_register(dest->reg, fp);
+ FILE *fp = state->fp;
+ print_register(dest->reg, state);
if (dest->reg->num_array_elems != 0) {
fprintf(fp, "[%u", dest->base_offset);
if (dest->indirect != NULL) {
fprintf(fp, " + ");
- print_src(dest->indirect, fp);
+ print_src(dest->indirect, state);
}
fprintf(fp, "]");
}
}
static void
-print_src(nir_src *src, FILE *fp)
+print_src(nir_src *src, print_state *state)
{
if (src->is_ssa)
- print_ssa_use(src->ssa, fp);
+ print_ssa_use(src->ssa, state);
else
- print_reg_src(&src->reg, fp);
+ print_reg_src(&src->reg, state);
}
static void
-print_dest(nir_dest *dest, FILE *fp)
+print_dest(nir_dest *dest, print_state *state)
{
if (dest->is_ssa)
- print_ssa_def(&dest->ssa, fp);
+ print_ssa_def(&dest->ssa, state);
else
- print_reg_dest(&dest->reg, fp);
+ print_reg_dest(&dest->reg, state);
}
static void
-print_alu_src(nir_alu_instr *instr, unsigned src, FILE *fp)
+print_alu_src(nir_alu_instr *instr, unsigned src, print_state *state)
{
+ FILE *fp = state->fp;
+
if (instr->src[src].negate)
fprintf(fp, "-");
if (instr->src[src].abs)
fprintf(fp, "abs(");
- print_src(&instr->src[src].src, fp);
+ print_src(&instr->src[src].src, state);
bool print_swizzle = false;
for (unsigned i = 0; i < 4; i++) {
}
static void
-print_alu_dest(nir_alu_dest *dest, FILE *fp)
+print_alu_dest(nir_alu_dest *dest, print_state *state)
{
+ FILE *fp = state->fp;
/* we're going to print the saturate modifier later, after the opcode */
- print_dest(&dest->dest, fp);
+ print_dest(&dest->dest, state);
if (!dest->dest.is_ssa &&
dest->write_mask != (1 << dest->dest.reg.reg->num_components) - 1) {
}
static void
-print_alu_instr(nir_alu_instr *instr, FILE *fp)
+print_alu_instr(nir_alu_instr *instr, print_state *state)
{
- print_alu_dest(&instr->dest, fp);
+ FILE *fp = state->fp;
+
+ print_alu_dest(&instr->dest, state);
fprintf(fp, " = %s", nir_op_infos[instr->op].name);
if (instr->dest.saturate)
if (i != 0)
fprintf(fp, ", ");
- print_alu_src(instr, i, fp);
+ print_alu_src(instr, i, state);
}
}
static void
-print_var_decl(nir_variable *var, print_var_state *state, FILE *fp)
+print_var_decl(nir_variable *var, print_state *state)
{
+ FILE *fp = state->fp;
+
fprintf(fp, "decl_var ");
const char *const cent = (var->data.centroid) ? "centroid " : "";
const char *const inv = (var->data.invariant) ? "invariant " : "";
const char *const mode[] = { "shader_in ", "shader_out ", "", "",
"uniform ", "shader_storage", "system " };
- const char *const interp[] = { "", "smooth", "flat", "noperspective" };
fprintf(fp, "%s%s%s%s%s ",
- cent, samp, inv, mode[var->data.mode], interp[var->data.interpolation]);
+ cent, samp, inv, mode[var->data.mode],
+ glsl_interp_qualifier_name(var->data.interpolation));
glsl_print_type(var->type, fp);
struct set_entry *entry = NULL;
- if (state)
+ if (state->syms)
entry = _mesa_set_search(state->syms, var->name);
char *name;
var->data.mode == nir_var_shader_out ||
var->data.mode == nir_var_uniform ||
var->data.mode == nir_var_shader_storage) {
- fprintf(fp, " (%u, %u)", var->data.location, var->data.driver_location);
+ const char *loc = NULL;
+ char buf[4];
+
+ switch (state->shader->stage) {
+ case MESA_SHADER_VERTEX:
+ if (var->data.mode == nir_var_shader_in)
+ loc = gl_vert_attrib_name(var->data.location);
+ else if (var->data.mode == nir_var_shader_out)
+ loc = gl_varying_slot_name(var->data.location);
+ break;
+ case MESA_SHADER_GEOMETRY:
+ if ((var->data.mode == nir_var_shader_in) ||
+ (var->data.mode == nir_var_shader_out))
+ loc = gl_varying_slot_name(var->data.location);
+ break;
+ case MESA_SHADER_FRAGMENT:
+ if (var->data.mode == nir_var_shader_in)
+ loc = gl_varying_slot_name(var->data.location);
+ else if (var->data.mode == nir_var_shader_out)
+ loc = gl_frag_result_name(var->data.location);
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ case MESA_SHADER_TESS_EVAL:
+ case MESA_SHADER_COMPUTE:
+ default:
+ /* TODO */
+ break;
+ }
+
+ if (!loc) {
+ snprintf(buf, sizeof(buf), "%u", var->data.location);
+ loc = buf;
+ }
+
+ fprintf(fp, " (%s, %u)", loc, var->data.driver_location);
}
fprintf(fp, "\n");
- if (state) {
+ if (state->syms) {
_mesa_set_add(state->syms, name);
_mesa_hash_table_insert(state->ht, var, name);
}
}
static void
-print_var(nir_variable *var, print_var_state *state, FILE *fp)
+print_var(nir_variable *var, print_state *state)
{
+ FILE *fp = state->fp;
const char *name;
- if (state) {
+ if (state->ht) {
struct hash_entry *entry = _mesa_hash_table_search(state->ht, var);
assert(entry != NULL);
}
static void
-print_deref_var(nir_deref_var *deref, print_var_state *state, FILE *fp)
+print_deref_var(nir_deref_var *deref, print_state *state)
{
- print_var(deref->var, state, fp);
+ print_var(deref->var, state);
}
static void
-print_deref_array(nir_deref_array *deref, print_var_state *state, FILE *fp)
+print_deref_array(nir_deref_array *deref, print_state *state)
{
+ FILE *fp = state->fp;
fprintf(fp, "[");
switch (deref->deref_array_type) {
case nir_deref_array_type_direct:
case nir_deref_array_type_indirect:
if (deref->base_offset != 0)
fprintf(fp, "%u + ", deref->base_offset);
- print_src(&deref->indirect, fp);
+ print_src(&deref->indirect, state);
break;
case nir_deref_array_type_wildcard:
fprintf(fp, "*");
static void
print_deref_struct(nir_deref_struct *deref, const struct glsl_type *parent_type,
- print_var_state *state, FILE *fp)
+ print_state *state)
{
+ FILE *fp = state->fp;
fprintf(fp, ".%s", glsl_get_struct_elem_name(parent_type, deref->index));
}
static void
-print_deref(nir_deref_var *deref, print_var_state *state, FILE *fp)
+print_deref(nir_deref_var *deref, print_state *state)
{
nir_deref *tail = &deref->deref;
nir_deref *pretail = NULL;
case nir_deref_type_var:
assert(pretail == NULL);
assert(tail == &deref->deref);
- print_deref_var(deref, state, fp);
+ print_deref_var(deref, state);
break;
case nir_deref_type_array:
assert(pretail != NULL);
- print_deref_array(nir_deref_as_array(tail), state, fp);
+ print_deref_array(nir_deref_as_array(tail), state);
break;
case nir_deref_type_struct:
assert(pretail != NULL);
print_deref_struct(nir_deref_as_struct(tail),
- pretail->type, state, fp);
+ pretail->type, state);
break;
default:
}
static void
-print_intrinsic_instr(nir_intrinsic_instr *instr, print_var_state *state,
- FILE *fp)
+print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
{
unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
+ FILE *fp = state->fp;
if (nir_intrinsic_infos[instr->intrinsic].has_dest) {
- print_dest(&instr->dest, fp);
+ print_dest(&instr->dest, state);
fprintf(fp, " = ");
}
if (i != 0)
fprintf(fp, ", ");
- print_src(&instr->src[i], fp);
+ print_src(&instr->src[i], state);
}
fprintf(fp, ") (");
if (i != 0)
fprintf(fp, ", ");
- print_deref(instr->variables[i], state, fp);
+ print_deref(instr->variables[i], state);
}
fprintf(fp, ") (");
}
fprintf(fp, ")");
+
+ if (!state->shader)
+ return;
+
+ struct exec_list *var_list = NULL;
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_uniform:
+ case nir_intrinsic_load_uniform_indirect:
+ var_list = &state->shader->uniforms;
+ break;
+ case nir_intrinsic_load_input:
+ case nir_intrinsic_load_input_indirect:
+ var_list = &state->shader->inputs;
+ break;
+ case nir_intrinsic_store_output:
+ case nir_intrinsic_store_output_indirect:
+ var_list = &state->shader->outputs;
+ break;
+ default:
+ return;
+ }
+
+ foreach_list_typed(nir_variable, var, node, var_list) {
+ if ((var->data.driver_location == instr->const_index[0]) &&
+ var->name) {
+ fprintf(fp, "\t/* %s */", var->name);
+ break;
+ }
+ }
}
static void
-print_tex_instr(nir_tex_instr *instr, print_var_state *state, FILE *fp)
+print_tex_instr(nir_tex_instr *instr, print_state *state)
{
- print_dest(&instr->dest, fp);
+ FILE *fp = state->fp;
+
+ print_dest(&instr->dest, state);
fprintf(fp, " = ");
case nir_texop_query_levels:
fprintf(fp, "query_levels ");
break;
+ case nir_texop_texture_samples:
+ fprintf(fp, "texture_samples ");
+ break;
default:
unreachable("Invalid texture operation");
}
for (unsigned i = 0; i < instr->num_srcs; i++) {
- print_src(&instr->src[i].src, fp);
+ print_src(&instr->src[i].src, state);
fprintf(fp, " ");
}
if (instr->sampler) {
- print_deref(instr->sampler, state, fp);
+ print_deref(instr->sampler, state);
} else {
fprintf(fp, "%u", instr->sampler_index);
}
}
static void
-print_call_instr(nir_call_instr *instr, print_var_state *state, FILE *fp)
+print_call_instr(nir_call_instr *instr, print_state *state)
{
+ FILE *fp = state->fp;
+
fprintf(fp, "call %s ", instr->callee->function->name);
for (unsigned i = 0; i < instr->num_params; i++) {
if (i != 0)
fprintf(fp, ", ");
- print_deref(instr->params[i], state, fp);
+ print_deref(instr->params[i], state);
}
if (instr->return_deref != NULL) {
if (instr->num_params != 0)
fprintf(fp, ", ");
fprintf(fp, "returning ");
- print_deref(instr->return_deref, state, fp);
+ print_deref(instr->return_deref, state);
}
}
static void
-print_load_const_instr(nir_load_const_instr *instr, unsigned tabs, FILE *fp)
+print_load_const_instr(nir_load_const_instr *instr, print_state *state)
{
- print_ssa_def(&instr->def, fp);
+ FILE *fp = state->fp;
+
+ print_ssa_def(&instr->def, state);
fprintf(fp, " = load_const (");
}
static void
-print_jump_instr(nir_jump_instr *instr, FILE *fp)
+print_jump_instr(nir_jump_instr *instr, print_state *state)
{
+ FILE *fp = state->fp;
+
switch (instr->type) {
case nir_jump_break:
fprintf(fp, "break");
}
static void
-print_ssa_undef_instr(nir_ssa_undef_instr* instr, FILE *fp)
+print_ssa_undef_instr(nir_ssa_undef_instr* instr, print_state *state)
{
- print_ssa_def(&instr->def, fp);
+ FILE *fp = state->fp;
+ print_ssa_def(&instr->def, state);
fprintf(fp, " = undefined");
}
static void
-print_phi_instr(nir_phi_instr *instr, FILE *fp)
+print_phi_instr(nir_phi_instr *instr, print_state *state)
{
- print_dest(&instr->dest, fp);
+ FILE *fp = state->fp;
+ print_dest(&instr->dest, state);
fprintf(fp, " = phi ");
nir_foreach_phi_src(instr, src) {
if (&src->node != exec_list_get_head(&instr->srcs))
fprintf(fp, ", ");
fprintf(fp, "block_%u: ", src->pred->index);
- print_src(&src->src, fp);
+ print_src(&src->src, state);
}
}
static void
-print_parallel_copy_instr(nir_parallel_copy_instr *instr, FILE *fp)
+print_parallel_copy_instr(nir_parallel_copy_instr *instr, print_state *state)
{
+ FILE *fp = state->fp;
nir_foreach_parallel_copy_entry(instr, entry) {
if (&entry->node != exec_list_get_head(&instr->entries))
fprintf(fp, "; ");
- print_dest(&entry->dest, fp);
+ print_dest(&entry->dest, state);
fprintf(fp, " = ");
- print_src(&entry->src, fp);
+ print_src(&entry->src, state);
}
}
static void
-print_instr(const nir_instr *instr, print_var_state *state, unsigned tabs, FILE *fp)
+print_instr(const nir_instr *instr, print_state *state, unsigned tabs)
{
+ FILE *fp = state->fp;
print_tabs(tabs, fp);
switch (instr->type) {
case nir_instr_type_alu:
- print_alu_instr(nir_instr_as_alu(instr), fp);
+ print_alu_instr(nir_instr_as_alu(instr), state);
break;
case nir_instr_type_call:
- print_call_instr(nir_instr_as_call(instr), state, fp);
+ print_call_instr(nir_instr_as_call(instr), state);
break;
case nir_instr_type_intrinsic:
- print_intrinsic_instr(nir_instr_as_intrinsic(instr), state, fp);
+ print_intrinsic_instr(nir_instr_as_intrinsic(instr), state);
break;
case nir_instr_type_tex:
- print_tex_instr(nir_instr_as_tex(instr), state, fp);
+ print_tex_instr(nir_instr_as_tex(instr), state);
break;
case nir_instr_type_load_const:
- print_load_const_instr(nir_instr_as_load_const(instr), tabs, fp);
+ print_load_const_instr(nir_instr_as_load_const(instr), state);
break;
case nir_instr_type_jump:
- print_jump_instr(nir_instr_as_jump(instr), fp);
+ print_jump_instr(nir_instr_as_jump(instr), state);
break;
case nir_instr_type_ssa_undef:
- print_ssa_undef_instr(nir_instr_as_ssa_undef(instr), fp);
+ print_ssa_undef_instr(nir_instr_as_ssa_undef(instr), state);
break;
case nir_instr_type_phi:
- print_phi_instr(nir_instr_as_phi(instr), fp);
+ print_phi_instr(nir_instr_as_phi(instr), state);
break;
case nir_instr_type_parallel_copy:
- print_parallel_copy_instr(nir_instr_as_parallel_copy(instr), fp);
+ print_parallel_copy_instr(nir_instr_as_parallel_copy(instr), state);
break;
default:
return (int) block1->index - (int) block2->index;
}
-static void print_cf_node(nir_cf_node *node, print_var_state *state,
- unsigned tabs, FILE *fp);
+static void print_cf_node(nir_cf_node *node, print_state *state,
+ unsigned tabs);
static void
-print_block(nir_block *block, print_var_state *state, unsigned tabs, FILE *fp)
+print_block(nir_block *block, print_state *state, unsigned tabs)
{
+ FILE *fp = state->fp;
+
print_tabs(tabs, fp);
fprintf(fp, "block block_%u:\n", block->index);
free(preds);
nir_foreach_instr(block, instr) {
- print_instr(instr, state, tabs, fp);
+ print_instr(instr, state, tabs);
fprintf(fp, "\n");
}
}
static void
-print_if(nir_if *if_stmt, print_var_state *state, unsigned tabs, FILE *fp)
+print_if(nir_if *if_stmt, print_state *state, unsigned tabs)
{
+ FILE *fp = state->fp;
+
print_tabs(tabs, fp);
fprintf(fp, "if ");
- print_src(&if_stmt->condition, fp);
+ print_src(&if_stmt->condition, state);
fprintf(fp, " {\n");
foreach_list_typed(nir_cf_node, node, node, &if_stmt->then_list) {
- print_cf_node(node, state, tabs + 1, fp);
+ print_cf_node(node, state, tabs + 1);
}
print_tabs(tabs, fp);
fprintf(fp, "} else {\n");
foreach_list_typed(nir_cf_node, node, node, &if_stmt->else_list) {
- print_cf_node(node, state, tabs + 1, fp);
+ print_cf_node(node, state, tabs + 1);
}
print_tabs(tabs, fp);
fprintf(fp, "}\n");
}
static void
-print_loop(nir_loop *loop, print_var_state *state, unsigned tabs, FILE *fp)
+print_loop(nir_loop *loop, print_state *state, unsigned tabs)
{
+ FILE *fp = state->fp;
+
print_tabs(tabs, fp);
fprintf(fp, "loop {\n");
foreach_list_typed(nir_cf_node, node, node, &loop->body) {
- print_cf_node(node, state, tabs + 1, fp);
+ print_cf_node(node, state, tabs + 1);
}
print_tabs(tabs, fp);
fprintf(fp, "}\n");
}
static void
-print_cf_node(nir_cf_node *node, print_var_state *state, unsigned int tabs,
- FILE *fp)
+print_cf_node(nir_cf_node *node, print_state *state, unsigned int tabs)
{
switch (node->type) {
case nir_cf_node_block:
- print_block(nir_cf_node_as_block(node), state, tabs, fp);
+ print_block(nir_cf_node_as_block(node), state, tabs);
break;
case nir_cf_node_if:
- print_if(nir_cf_node_as_if(node), state, tabs, fp);
+ print_if(nir_cf_node_as_if(node), state, tabs);
break;
case nir_cf_node_loop:
- print_loop(nir_cf_node_as_loop(node), state, tabs, fp);
+ print_loop(nir_cf_node_as_loop(node), state, tabs);
break;
default:
}
static void
-print_function_impl(nir_function_impl *impl, print_var_state *state, FILE *fp)
+print_function_impl(nir_function_impl *impl, print_state *state)
{
+ FILE *fp = state->fp;
+
fprintf(fp, "\nimpl %s ", impl->overload->function->name);
for (unsigned i = 0; i < impl->num_params; i++) {
if (i != 0)
fprintf(fp, ", ");
- print_var(impl->params[i], state, fp);
+ print_var(impl->params[i], state);
}
if (impl->return_var != NULL) {
if (impl->num_params != 0)
fprintf(fp, ", ");
fprintf(fp, "returning ");
- print_var(impl->return_var, state, fp);
+ print_var(impl->return_var, state);
}
fprintf(fp, "{\n");
foreach_list_typed(nir_variable, var, node, &impl->locals) {
fprintf(fp, "\t");
- print_var_decl(var, state, fp);
+ print_var_decl(var, state);
}
foreach_list_typed(nir_register, reg, node, &impl->registers) {
fprintf(fp, "\t");
- print_register_decl(reg, fp);
+ print_register_decl(reg, state);
}
nir_index_blocks(impl);
foreach_list_typed(nir_cf_node, node, node, &impl->body) {
- print_cf_node(node, state, 1, fp);
+ print_cf_node(node, state, 1);
}
fprintf(fp, "\tblock block_%u:\n}\n\n", impl->end_block->index);
static void
print_function_overload(nir_function_overload *overload,
- print_var_state *state, FILE *fp)
+ print_state *state)
{
+ FILE *fp = state->fp;
+
fprintf(fp, "decl_overload %s ", overload->function->name);
for (unsigned i = 0; i < overload->num_params; i++) {
fprintf(fp, "\n");
if (overload->impl != NULL) {
- print_function_impl(overload->impl, state, fp);
+ print_function_impl(overload->impl, state);
return;
}
}
static void
-print_function(nir_function *func, print_var_state *state, FILE *fp)
+print_function(nir_function *func, print_state *state)
{
foreach_list_typed(nir_function_overload, overload, node, &func->overload_list) {
- print_function_overload(overload, state, fp);
+ print_function_overload(overload, state);
}
}
static void
-init_print_state(print_var_state *state)
+init_print_state(print_state *state, nir_shader *shader, FILE *fp)
{
+ state->fp = fp;
+ state->shader = shader;
state->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
_mesa_key_pointer_equal);
state->syms = _mesa_set_create(NULL, _mesa_key_hash_string,
}
static void
-destroy_print_state(print_var_state *state)
+destroy_print_state(print_state *state)
{
_mesa_hash_table_destroy(state->ht, NULL);
_mesa_set_destroy(state->syms, NULL);
void
nir_print_shader(nir_shader *shader, FILE *fp)
{
- print_var_state state;
- init_print_state(&state);
+ print_state state;
+ init_print_state(&state, shader, fp);
+
+ fprintf(fp, "shader: %s\n", gl_shader_stage_name(shader->stage));
foreach_list_typed(nir_variable, var, node, &shader->uniforms) {
- print_var_decl(var, &state, fp);
+ print_var_decl(var, &state);
}
foreach_list_typed(nir_variable, var, node, &shader->inputs) {
- print_var_decl(var, &state, fp);
+ print_var_decl(var, &state);
}
foreach_list_typed(nir_variable, var, node, &shader->outputs) {
- print_var_decl(var, &state, fp);
+ print_var_decl(var, &state);
}
foreach_list_typed(nir_variable, var, node, &shader->globals) {
- print_var_decl(var, &state, fp);
+ print_var_decl(var, &state);
}
foreach_list_typed(nir_variable, var, node, &shader->system_values) {
- print_var_decl(var, &state, fp);
+ print_var_decl(var, &state);
}
foreach_list_typed(nir_register, reg, node, &shader->registers) {
- print_register_decl(reg, fp);
+ print_register_decl(reg, &state);
}
foreach_list_typed(nir_function, func, node, &shader->functions) {
- print_function(func, &state, fp);
+ print_function(func, &state);
}
destroy_print_state(&state);
void
nir_print_instr(const nir_instr *instr, FILE *fp)
{
- print_instr(instr, NULL, 0, fp);
+ print_state state = {
+ .fp = fp,
+ };
+ print_instr(instr, &state, 0);
+
}
}
}
-static void
+static bool
remove_dead_vars(struct exec_list *var_list, struct set *live)
{
+ bool progress = false;
+
foreach_list_typed_safe(nir_variable, var, node, var_list) {
struct set_entry *entry = _mesa_set_search(live, var);
if (entry == NULL) {
exec_node_remove(&var->node);
ralloc_free(var);
+ progress = true;
}
}
+
+ return progress;
}
-void
+bool
nir_remove_dead_variables(nir_shader *shader)
{
+ bool progress = false;
struct set *live =
_mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
add_var_use_shader(shader, live);
- remove_dead_vars(&shader->globals, live);
+ progress = remove_dead_vars(&shader->globals, live) || progress;
nir_foreach_overload(shader, overload) {
if (overload->impl)
- remove_dead_vars(&overload->impl->locals, live);
+ progress = remove_dead_vars(&overload->impl->locals, live) || progress;
}
_mesa_set_destroy(live, NULL);
+ return progress;
}
swizzle = identity_swizzle;
}
- for (int i = 0; i < num_components; ++i)
+ for (unsigned i = 0; i < num_components; ++i)
new_swizzle[i] = instr->src[src].swizzle[swizzle[i]];
switch (value->type) {
assert(!instr->src[src].abs && !instr->src[src].negate);
- for (int i = 0; i < num_components; ++i) {
+ for (unsigned i = 0; i < num_components; ++i) {
if (state->variables[var->variable].swizzle[i] != new_swizzle[i])
return false;
}
state->variables[var->variable].abs = false;
state->variables[var->variable].negate = false;
- for (int i = 0; i < 4; ++i) {
+ for (unsigned i = 0; i < 4; ++i) {
if (i < num_components)
state->variables[var->variable].swizzle[i] = new_swizzle[i];
else
nir_instr_insert_before(&instr->instr, &mov->instr);
nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa,
- nir_src_for_ssa(&mov->dest.dest.ssa), mem_ctx);
+ nir_src_for_ssa(&mov->dest.dest.ssa));
/* We know this one has no more uses because we just rewrote them all,
* so we can remove it. The rest of the matched expression, however, we
struct split_var_copies_state {
void *mem_ctx;
void *dead_ctx;
+ bool progress;
};
/* Recursively constructs deref chains to split a copy instruction into
* remove the old one later.
*/
nir_instr_insert_after(&old_copy->instr, &new_copy->instr);
+ state->progress = true;
}
break;
return true;
}
-static void
+static bool
split_var_copies_impl(nir_function_impl *impl)
{
struct split_var_copies_state state;
state.mem_ctx = ralloc_parent(impl);
state.dead_ctx = ralloc_context(NULL);
+ state.progress = false;
nir_foreach_block(impl, split_var_copies_block, &state);
ralloc_free(state.dead_ctx);
+
+ return state.progress;
}
-void
+bool
nir_split_var_copies(nir_shader *shader)
{
+ bool progress = false;
+
nir_foreach_overload(shader, overload) {
if (overload->impl)
- split_var_copies_impl(overload->impl);
+ progress = split_var_copies_impl(overload->impl) || progress;
}
+
+ return progress;
}
return (glsl_base_type)type->sampler_type;
}
+unsigned
+glsl_get_record_location_offset(const struct glsl_type *type,
+ unsigned length)
+{
+ return type->record_location_offset(length);
+}
+
bool
glsl_type_is_void(const glsl_type *type)
{
#pragma once
+#include <stdio.h>
+
/* C wrapper around glsl_types.h */
#include "../glsl_types.h"
struct glsl_type;
#endif
-#include <stdio.h>
-
void glsl_print_type(const struct glsl_type *type, FILE *fp);
void glsl_print_struct(const struct glsl_type *type, FILE *fp);
enum glsl_sampler_dim glsl_get_sampler_dim(const struct glsl_type *type);
enum glsl_base_type glsl_get_sampler_result_type(const struct glsl_type *type);
+unsigned glsl_get_record_location_offset(const struct glsl_type *type,
+ unsigned length);
+
bool glsl_type_is_void(const struct glsl_type *type);
bool glsl_type_is_vector(const struct glsl_type *type);
bool glsl_type_is_scalar(const struct glsl_type *type);
}
assert(block->successors[0] != NULL);
+ assert(block->successors[0] != block->successors[1]);
for (unsigned i = 0; i < 2; i++) {
if (block->successors[i] != NULL) {
--- /dev/null
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <gtest/gtest.h>
+#include "nir.h"
+#include "nir_builder.h"
+
+class nir_cf_test : public ::testing::Test {
+protected:
+ nir_cf_test();
+ ~nir_cf_test();
+
+ nir_builder b;
+ nir_shader *shader;
+ nir_function_impl *impl;
+};
+
+nir_cf_test::nir_cf_test()
+{
+ static const nir_shader_compiler_options options = { };
+ shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, &options);
+ nir_function *func = nir_function_create(shader, "main");
+ nir_function_overload *overload = nir_function_overload_create(func);
+ impl = nir_function_impl_create(overload);
+
+ nir_builder_init(&b, impl);
+}
+
+nir_cf_test::~nir_cf_test()
+{
+ ralloc_free(shader);
+}
+
+TEST_F(nir_cf_test, delete_break_in_loop)
+{
+ /* Create IR:
+ *
+ * while (...) { break; }
+ */
+ nir_loop *loop = nir_loop_create(shader);
+ nir_cf_node_insert(nir_after_cf_list(&impl->body), &loop->cf_node);
+
+ b.cursor = nir_after_cf_list(&loop->body);
+
+ nir_jump_instr *jump = nir_jump_instr_create(shader, nir_jump_break);
+ nir_builder_instr_insert(&b, &jump->instr);
+
+ /* At this point, we should have:
+ *
+ * impl main {
+ * block block_0:
+ * // preds:
+ * // succs: block_1
+ * loop {
+ * block block_1:
+ * // preds: block_0
+ * break
+ * // succs: block_2
+ * }
+ * block block_2:
+ * // preds: block_1
+ * // succs: block_3
+ * block block_3:
+ * }
+ */
+ nir_block *block_0 = nir_start_block(impl);
+ nir_block *block_1 = nir_cf_node_as_block(nir_loop_first_cf_node(loop));
+ nir_block *block_2 = nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
+ nir_block *block_3 = impl->end_block;
+ ASSERT_EQ(nir_cf_node_block, block_0->cf_node.type);
+ ASSERT_EQ(nir_cf_node_block, block_1->cf_node.type);
+ ASSERT_EQ(nir_cf_node_block, block_2->cf_node.type);
+ ASSERT_EQ(nir_cf_node_block, block_3->cf_node.type);
+
+ /* Verify the successors and predecessors. */
+ EXPECT_EQ(block_1, block_0->successors[0]);
+ EXPECT_EQ(NULL, block_0->successors[1]);
+ EXPECT_EQ(block_2, block_1->successors[0]);
+ EXPECT_EQ(NULL, block_1->successors[1]);
+ EXPECT_EQ(block_3, block_2->successors[0]);
+ EXPECT_EQ(NULL, block_2->successors[1]);
+ EXPECT_EQ(NULL, block_3->successors[0]);
+ EXPECT_EQ(NULL, block_3->successors[1]);
+ EXPECT_EQ(0, block_0->predecessors->entries);
+ EXPECT_EQ(1, block_1->predecessors->entries);
+ EXPECT_EQ(1, block_2->predecessors->entries);
+ EXPECT_EQ(1, block_3->predecessors->entries);
+ EXPECT_TRUE(_mesa_set_search(block_1->predecessors, block_0));
+ EXPECT_TRUE(_mesa_set_search(block_2->predecessors, block_1));
+ EXPECT_TRUE(_mesa_set_search(block_3->predecessors, block_2));
+
+ nir_print_shader(shader, stderr);
+
+ /* Now remove the break. */
+ nir_instr_remove(&jump->instr);
+
+ nir_print_shader(shader, stderr);
+
+ /* At this point, we should have:
+ *
+ * impl main {
+ * block block_0:
+ * // preds:
+ * // succs: block_1
+ * loop {
+ * block block_1:
+ * // preds: block_0 block_1
+ * // succs: block_1
+ * }
+ * block block_2:
+ * // preds: block_1
+ * // succs: block_3
+ * block block_3:
+ * }
+ *
+ * Re-verify the predecessors and successors.
+ */
+ EXPECT_EQ(block_1, block_0->successors[0]);
+ EXPECT_EQ(NULL, block_0->successors[1]);
+ EXPECT_EQ(block_1, block_1->successors[0]); /* back to itself */
+ EXPECT_EQ(block_2, block_1->successors[1]); /* fake successor */
+ EXPECT_EQ(block_3, block_2->successors[0]);
+ EXPECT_EQ(NULL, block_2->successors[1]);
+ EXPECT_EQ(NULL, block_3->successors[0]);
+ EXPECT_EQ(NULL, block_3->successors[1]);
+ EXPECT_EQ(0, block_0->predecessors->entries);
+ EXPECT_EQ(2, block_1->predecessors->entries);
+ EXPECT_EQ(1, block_2->predecessors->entries);
+ EXPECT_EQ(1, block_3->predecessors->entries);
+ EXPECT_TRUE(_mesa_set_search(block_1->predecessors, block_0));
+ EXPECT_TRUE(_mesa_set_search(block_1->predecessors, block_1));
+ EXPECT_TRUE(_mesa_set_search(block_2->predecessors, block_1));
+ EXPECT_TRUE(_mesa_set_search(block_3->predecessors, block_2));
+
+ nir_metadata_require(impl, nir_metadata_dominance);
+}
#include "ir_basic_block.h"
#include "ir_optimization.h"
#include "glsl_types.h"
+#include "util/hash_table.h"
namespace {
killed_all = false;
mem_ctx = ralloc_context(0);
this->acp = new(mem_ctx) exec_list;
- this->kills = new(mem_ctx) exec_list;
+ this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
}
~ir_constant_propagation_visitor()
{
* List of kill_entry: The masks of variables whose values were
* killed in this block.
*/
- exec_list *kills;
+ hash_table *kills;
bool progress;
* main() at link time, so they're irrelevant to us.
*/
exec_list *orig_acp = this->acp;
- exec_list *orig_kills = this->kills;
+ hash_table *orig_kills = this->kills;
bool orig_killed_all = this->killed_all;
this->acp = new(mem_ctx) exec_list;
- this->kills = new(mem_ctx) exec_list;
+ this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
this->killed_all = false;
visit_list_elements(this, &ir->body);
ir_constant_propagation_visitor::handle_if_block(exec_list *instructions)
{
exec_list *orig_acp = this->acp;
- exec_list *orig_kills = this->kills;
+ hash_table *orig_kills = this->kills;
bool orig_killed_all = this->killed_all;
this->acp = new(mem_ctx) exec_list;
- this->kills = new(mem_ctx) exec_list;
+ this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
this->killed_all = false;
/* Populate the initial acp with a constant of the original */
orig_acp->make_empty();
}
- exec_list *new_kills = this->kills;
+ hash_table *new_kills = this->kills;
this->kills = orig_kills;
this->acp = orig_acp;
this->killed_all = this->killed_all || orig_killed_all;
- foreach_in_list(kill_entry, k, new_kills) {
+ hash_entry *htk;
+ hash_table_foreach(new_kills, htk) {
+ kill_entry *k = (kill_entry *) htk->data;
kill(k->var, k->write_mask);
}
}
ir_constant_propagation_visitor::visit_enter(ir_loop *ir)
{
exec_list *orig_acp = this->acp;
- exec_list *orig_kills = this->kills;
+ hash_table *orig_kills = this->kills;
bool orig_killed_all = this->killed_all;
/* FINISHME: For now, the initial acp for loops is totally empty.
* cloned minus the killed entries after the first run through.
*/
this->acp = new(mem_ctx) exec_list;
- this->kills = new(mem_ctx) exec_list;
+ this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
this->killed_all = false;
visit_list_elements(this, &ir->body_instructions);
orig_acp->make_empty();
}
- exec_list *new_kills = this->kills;
+ hash_table *new_kills = this->kills;
this->kills = orig_kills;
this->acp = orig_acp;
this->killed_all = this->killed_all || orig_killed_all;
- foreach_in_list(kill_entry, k, new_kills) {
+ hash_entry *htk;
+ hash_table_foreach(new_kills, htk) {
+ kill_entry *k = (kill_entry *) htk->data;
kill(k->var, k->write_mask);
}
/* Add this writemask of the variable to the list of killed
* variables in this block.
*/
- foreach_in_list(kill_entry, entry, this->kills) {
- if (entry->var == var) {
- entry->write_mask |= write_mask;
- return;
- }
+ hash_entry *kill_hash_entry = _mesa_hash_table_search(this->kills, var);
+ if (kill_hash_entry) {
+ kill_entry *entry = (kill_entry *) kill_hash_entry->data;
+ entry->write_mask |= write_mask;
+ return;
}
/* Not already in the list. Make new entry. */
- this->kills->push_tail(new(this->mem_ctx) kill_entry(var, write_mask));
+ _mesa_hash_table_insert(this->kills, var,
+ new(this->mem_ctx) kill_entry(var, write_mask));
}
/**
#include "ir_visitor.h"
#include "ir_optimization.h"
#include "glsl_types.h"
+#include "util/hash_table.h"
namespace {
struct assignment_entry {
- exec_node link;
int assignment_count;
ir_variable *var;
ir_constant *constval;
virtual ir_visitor_status visit_enter(ir_assignment *);
virtual ir_visitor_status visit_enter(ir_call *);
- exec_list list;
+ struct hash_table *ht;
};
} /* unnamed namespace */
static struct assignment_entry *
-get_assignment_entry(ir_variable *var, exec_list *list)
+get_assignment_entry(ir_variable *var, struct hash_table *ht)
{
+ struct hash_entry *hte = _mesa_hash_table_search(ht, var);
struct assignment_entry *entry;
- foreach_list_typed(struct assignment_entry, entry, link, list) {
- if (entry->var == var)
- return entry;
+ if (hte) {
+ entry = (struct assignment_entry *) hte->data;
+ } else {
+ entry = (struct assignment_entry *) calloc(1, sizeof(*entry));
+ entry->var = var;
+ _mesa_hash_table_insert(ht, var, entry);
}
- entry = (struct assignment_entry *)calloc(1, sizeof(*entry));
- entry->var = var;
- list->push_head(&entry->link);
return entry;
}
ir_visitor_status
ir_constant_variable_visitor::visit(ir_variable *ir)
{
- struct assignment_entry *entry = get_assignment_entry(ir, &this->list);
+ struct assignment_entry *entry = get_assignment_entry(ir, this->ht);
entry->our_scope = true;
return visit_continue;
}
ir_constant *constval;
struct assignment_entry *entry;
- entry = get_assignment_entry(ir->lhs->variable_referenced(), &this->list);
+ entry = get_assignment_entry(ir->lhs->variable_referenced(), this->ht);
assert(entry);
entry->assignment_count++;
struct assignment_entry *entry;
assert(var);
- entry = get_assignment_entry(var, &this->list);
+ entry = get_assignment_entry(var, this->ht);
entry->assignment_count++;
}
}
struct assignment_entry *entry;
assert(var);
- entry = get_assignment_entry(var, &this->list);
+ entry = get_assignment_entry(var, this->ht);
entry->assignment_count++;
}
bool progress = false;
ir_constant_variable_visitor v;
+ v.ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
v.run(instructions);
- while (!v.list.is_empty()) {
-
- struct assignment_entry *entry;
- entry = exec_node_data(struct assignment_entry, v.list.head, link);
+ struct hash_entry *hte;
+ hash_table_foreach(v.ht, hte) {
+ struct assignment_entry *entry = (struct assignment_entry *) hte->data;
if (entry->assignment_count == 1 && entry->constval && entry->our_scope) {
entry->var->constant_value = entry->constval;
progress = true;
}
- entry->link.remove();
+ hte->data = NULL;
free(entry);
}
+ _mesa_hash_table_destroy(v.ht, NULL);
return progress;
}
* information, so removing these variables from the user shader will
* cause problems later.
*
+ * For compute shaders, gl_GlobalInvocationID has some dependencies, so
+ * we avoid removing these dependencies.
+ *
+ * We also avoid removing gl_GlobalInvocationID at this stage because it
+ * might be used by a linked shader. In this case it still needs to be
+ * initialized by the main function.
+ *
+ * gl_GlobalInvocationID =
+ * gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
+ *
+ * Similarly, we initialize gl_LocalInvocationIndex in the main function:
+ *
+ * gl_LocalInvocationIndex =
+ * gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
+ * gl_LocalInvocationID.y * gl_WorkGroupSize.x +
+ * gl_LocalInvocationID.x;
+ *
* Matrix uniforms with "Transpose" are not eliminated because there's
* an optimization pass that can turn references to the regular matrix
* into references to the transpose matrix. Eliminating the transpose
*/
if (strcmp(var->name, "gl_ModelViewProjectionMatrix") == 0
|| strcmp(var->name, "gl_Vertex") == 0
+ || strcmp(var->name, "gl_WorkGroupID") == 0
+ || strcmp(var->name, "gl_WorkGroupSize") == 0
+ || strcmp(var->name, "gl_LocalInvocationID") == 0
+ || strcmp(var->name, "gl_GlobalInvocationID") == 0
+ || strcmp(var->name, "gl_LocalInvocationIndex") == 0
|| strstr(var->name, "Transpose") != NULL)
continue;
* layouts, do not eliminate it.
*/
if (entry->var->is_in_buffer_block()) {
- const glsl_type *const block_type =
- entry->var->is_interface_instance()
- ? entry->var->type : entry->var->get_interface_type();
-
- if (block_type->interface_packing != GLSL_INTERFACE_PACKING_PACKED)
+ if (entry->var->get_interface_type()->interface_packing !=
+ GLSL_INTERFACE_PACKING_PACKED)
continue;
}
case ir_tex:
case ir_lod:
case ir_query_levels:
+ case ir_texture_samples:
break;
case ir_txb:
if (do_graft(&ir->lod_info.bias))
link_shaders(struct gl_context *ctx, struct gl_shader_program *prog);
extern void
-build_program_resource_list(struct gl_context *ctx,
- struct gl_shader_program *shProg);
+build_program_resource_list(struct gl_shader_program *shProg);
extern void
linker_error(struct gl_shader_program *prog, const char *fmt, ...)
--- /dev/null
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "glsl/shader_enums.h"
+#include "util/macros.h"
+
+#define ENUM(x) [x] = #x
+#define NAME(val) ((((val) < ARRAY_SIZE(names)) && names[(val)]) ? names[(val)] : "UNKNOWN")
+
+const char * gl_shader_stage_name(gl_shader_stage stage)
+{
+ static const char *names[] = {
+ ENUM(MESA_SHADER_VERTEX),
+ ENUM(MESA_SHADER_TESS_CTRL),
+ ENUM(MESA_SHADER_TESS_EVAL),
+ ENUM(MESA_SHADER_GEOMETRY),
+ ENUM(MESA_SHADER_FRAGMENT),
+ ENUM(MESA_SHADER_COMPUTE),
+ };
+ return NAME(stage);
+}
+
+const char * gl_vert_attrib_name(gl_vert_attrib attrib)
+{
+ static const char *names[] = {
+ ENUM(VERT_ATTRIB_POS),
+ ENUM(VERT_ATTRIB_WEIGHT),
+ ENUM(VERT_ATTRIB_NORMAL),
+ ENUM(VERT_ATTRIB_COLOR0),
+ ENUM(VERT_ATTRIB_COLOR1),
+ ENUM(VERT_ATTRIB_FOG),
+ ENUM(VERT_ATTRIB_COLOR_INDEX),
+ ENUM(VERT_ATTRIB_EDGEFLAG),
+ ENUM(VERT_ATTRIB_TEX0),
+ ENUM(VERT_ATTRIB_TEX1),
+ ENUM(VERT_ATTRIB_TEX2),
+ ENUM(VERT_ATTRIB_TEX3),
+ ENUM(VERT_ATTRIB_TEX4),
+ ENUM(VERT_ATTRIB_TEX5),
+ ENUM(VERT_ATTRIB_TEX6),
+ ENUM(VERT_ATTRIB_TEX7),
+ ENUM(VERT_ATTRIB_POINT_SIZE),
+ ENUM(VERT_ATTRIB_GENERIC0),
+ ENUM(VERT_ATTRIB_GENERIC1),
+ ENUM(VERT_ATTRIB_GENERIC2),
+ ENUM(VERT_ATTRIB_GENERIC3),
+ ENUM(VERT_ATTRIB_GENERIC4),
+ ENUM(VERT_ATTRIB_GENERIC5),
+ ENUM(VERT_ATTRIB_GENERIC6),
+ ENUM(VERT_ATTRIB_GENERIC7),
+ ENUM(VERT_ATTRIB_GENERIC8),
+ ENUM(VERT_ATTRIB_GENERIC9),
+ ENUM(VERT_ATTRIB_GENERIC10),
+ ENUM(VERT_ATTRIB_GENERIC11),
+ ENUM(VERT_ATTRIB_GENERIC12),
+ ENUM(VERT_ATTRIB_GENERIC13),
+ ENUM(VERT_ATTRIB_GENERIC14),
+ ENUM(VERT_ATTRIB_GENERIC15),
+ };
+ return NAME(attrib);
+}
+
+const char * gl_varying_slot_name(gl_varying_slot slot)
+{
+ static const char *names[] = {
+ ENUM(VARYING_SLOT_POS),
+ ENUM(VARYING_SLOT_COL0),
+ ENUM(VARYING_SLOT_COL1),
+ ENUM(VARYING_SLOT_FOGC),
+ ENUM(VARYING_SLOT_TEX0),
+ ENUM(VARYING_SLOT_TEX1),
+ ENUM(VARYING_SLOT_TEX2),
+ ENUM(VARYING_SLOT_TEX3),
+ ENUM(VARYING_SLOT_TEX4),
+ ENUM(VARYING_SLOT_TEX5),
+ ENUM(VARYING_SLOT_TEX6),
+ ENUM(VARYING_SLOT_TEX7),
+ ENUM(VARYING_SLOT_PSIZ),
+ ENUM(VARYING_SLOT_BFC0),
+ ENUM(VARYING_SLOT_BFC1),
+ ENUM(VARYING_SLOT_EDGE),
+ ENUM(VARYING_SLOT_CLIP_VERTEX),
+ ENUM(VARYING_SLOT_CLIP_DIST0),
+ ENUM(VARYING_SLOT_CLIP_DIST1),
+ ENUM(VARYING_SLOT_PRIMITIVE_ID),
+ ENUM(VARYING_SLOT_LAYER),
+ ENUM(VARYING_SLOT_VIEWPORT),
+ ENUM(VARYING_SLOT_FACE),
+ ENUM(VARYING_SLOT_PNTC),
+ ENUM(VARYING_SLOT_TESS_LEVEL_OUTER),
+ ENUM(VARYING_SLOT_TESS_LEVEL_INNER),
+ ENUM(VARYING_SLOT_VAR0),
+ ENUM(VARYING_SLOT_VAR1),
+ ENUM(VARYING_SLOT_VAR2),
+ ENUM(VARYING_SLOT_VAR3),
+ ENUM(VARYING_SLOT_VAR4),
+ ENUM(VARYING_SLOT_VAR5),
+ ENUM(VARYING_SLOT_VAR6),
+ ENUM(VARYING_SLOT_VAR7),
+ ENUM(VARYING_SLOT_VAR8),
+ ENUM(VARYING_SLOT_VAR9),
+ ENUM(VARYING_SLOT_VAR10),
+ ENUM(VARYING_SLOT_VAR11),
+ ENUM(VARYING_SLOT_VAR12),
+ ENUM(VARYING_SLOT_VAR13),
+ ENUM(VARYING_SLOT_VAR14),
+ ENUM(VARYING_SLOT_VAR15),
+ ENUM(VARYING_SLOT_VAR16),
+ ENUM(VARYING_SLOT_VAR17),
+ ENUM(VARYING_SLOT_VAR18),
+ ENUM(VARYING_SLOT_VAR19),
+ ENUM(VARYING_SLOT_VAR20),
+ ENUM(VARYING_SLOT_VAR21),
+ ENUM(VARYING_SLOT_VAR22),
+ ENUM(VARYING_SLOT_VAR23),
+ ENUM(VARYING_SLOT_VAR24),
+ ENUM(VARYING_SLOT_VAR25),
+ ENUM(VARYING_SLOT_VAR26),
+ ENUM(VARYING_SLOT_VAR27),
+ ENUM(VARYING_SLOT_VAR28),
+ ENUM(VARYING_SLOT_VAR29),
+ ENUM(VARYING_SLOT_VAR30),
+ ENUM(VARYING_SLOT_VAR31),
+ };
+ return NAME(slot);
+}
+
+const char * gl_system_value_name(gl_system_value sysval)
+{
+ static const char *names[] = {
+ ENUM(SYSTEM_VALUE_VERTEX_ID),
+ ENUM(SYSTEM_VALUE_INSTANCE_ID),
+ ENUM(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE),
+ ENUM(SYSTEM_VALUE_BASE_VERTEX),
+ ENUM(SYSTEM_VALUE_INVOCATION_ID),
+ ENUM(SYSTEM_VALUE_FRONT_FACE),
+ ENUM(SYSTEM_VALUE_SAMPLE_ID),
+ ENUM(SYSTEM_VALUE_SAMPLE_POS),
+ ENUM(SYSTEM_VALUE_SAMPLE_MASK_IN),
+ ENUM(SYSTEM_VALUE_TESS_COORD),
+ ENUM(SYSTEM_VALUE_VERTICES_IN),
+ ENUM(SYSTEM_VALUE_PRIMITIVE_ID),
+ ENUM(SYSTEM_VALUE_TESS_LEVEL_OUTER),
+ ENUM(SYSTEM_VALUE_TESS_LEVEL_INNER),
+ ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_ID),
+ ENUM(SYSTEM_VALUE_WORK_GROUP_ID),
+ ENUM(SYSTEM_VALUE_VERTEX_CNT),
+ };
+ return NAME(sysval);
+}
+
+const char * glsl_interp_qualifier_name(enum glsl_interp_qualifier qual)
+{
+ static const char *names[] = {
+ ENUM(INTERP_QUALIFIER_NONE),
+ ENUM(INTERP_QUALIFIER_SMOOTH),
+ ENUM(INTERP_QUALIFIER_FLAT),
+ ENUM(INTERP_QUALIFIER_NOPERSPECTIVE),
+ };
+ return NAME(qual);
+}
+
+const char * gl_frag_result_name(gl_frag_result result)
+{
+ static const char *names[] = {
+ ENUM(FRAG_RESULT_DEPTH),
+ ENUM(FRAG_RESULT_STENCIL),
+ ENUM(FRAG_RESULT_COLOR),
+ ENUM(FRAG_RESULT_SAMPLE_MASK),
+ ENUM(FRAG_RESULT_DATA0),
+ ENUM(FRAG_RESULT_DATA1),
+ ENUM(FRAG_RESULT_DATA2),
+ ENUM(FRAG_RESULT_DATA3),
+ ENUM(FRAG_RESULT_DATA4),
+ ENUM(FRAG_RESULT_DATA5),
+ ENUM(FRAG_RESULT_DATA6),
+ ENUM(FRAG_RESULT_DATA7),
+ };
+ return NAME(result);
+}
MESA_SHADER_COMPUTE = 5,
} gl_shader_stage;
+const char * gl_shader_stage_name(gl_shader_stage stage);
+
#define MESA_SHADER_STAGES (MESA_SHADER_COMPUTE + 1)
VERT_ATTRIB_MAX = 33
} gl_vert_attrib;
+const char * gl_vert_attrib_name(gl_vert_attrib attrib);
+
/**
* Symbolic constats to help iterating over
* specific blocks of vertex attributes.
VARYING_SLOT_TESS_LEVEL_OUTER, /* Only appears as TCS output. */
VARYING_SLOT_TESS_LEVEL_INNER, /* Only appears as TCS output. */
VARYING_SLOT_VAR0, /* First generic varying slot */
+ /* the remaining are simply for the benefit of gl_varying_slot_name()
+ * and not to be construed as an upper bound:
+ */
+ VARYING_SLOT_VAR1,
+ VARYING_SLOT_VAR2,
+ VARYING_SLOT_VAR3,
+ VARYING_SLOT_VAR4,
+ VARYING_SLOT_VAR5,
+ VARYING_SLOT_VAR6,
+ VARYING_SLOT_VAR7,
+ VARYING_SLOT_VAR8,
+ VARYING_SLOT_VAR9,
+ VARYING_SLOT_VAR10,
+ VARYING_SLOT_VAR11,
+ VARYING_SLOT_VAR12,
+ VARYING_SLOT_VAR13,
+ VARYING_SLOT_VAR14,
+ VARYING_SLOT_VAR15,
+ VARYING_SLOT_VAR16,
+ VARYING_SLOT_VAR17,
+ VARYING_SLOT_VAR18,
+ VARYING_SLOT_VAR19,
+ VARYING_SLOT_VAR20,
+ VARYING_SLOT_VAR21,
+ VARYING_SLOT_VAR22,
+ VARYING_SLOT_VAR23,
+ VARYING_SLOT_VAR24,
+ VARYING_SLOT_VAR25,
+ VARYING_SLOT_VAR26,
+ VARYING_SLOT_VAR27,
+ VARYING_SLOT_VAR28,
+ VARYING_SLOT_VAR29,
+ VARYING_SLOT_VAR30,
+ VARYING_SLOT_VAR31,
} gl_varying_slot;
+const char * gl_varying_slot_name(gl_varying_slot slot);
/**
* Bitflags for varying slots.
#define SYSTEM_BIT_SAMPLE_ID ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_ID)
#define SYSTEM_BIT_SAMPLE_POS ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_POS)
#define SYSTEM_BIT_SAMPLE_MASK_IN ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_MASK_IN)
+#define SYSTEM_BIT_LOCAL_INVOCATION_ID ((uint64_t)1 << SYSTEM_VALUE_LOCAL_INVOCATION_ID)
+
/**
* If the gl_register_file is PROGRAM_SYSTEM_VALUE, the register index will be
* one of these values. If a NIR variable's mode is nir_var_system_value, it
SYSTEM_VALUE_TESS_LEVEL_INNER, /**< TES input */
/*@}*/
+ /**
+ * \name Compute shader system values
+ */
+ /*@{*/
+ SYSTEM_VALUE_LOCAL_INVOCATION_ID,
+ SYSTEM_VALUE_WORK_GROUP_ID,
+ SYSTEM_VALUE_NUM_WORK_GROUPS,
+ /*@}*/
+
+ /**
+ * Driver internal vertex-count, used (for example) for drivers to
+ * calculate stride for stream-out outputs. Not externally visible.
+ */
+ SYSTEM_VALUE_VERTEX_CNT,
+
SYSTEM_VALUE_MAX /**< Number of values */
} gl_system_value;
+const char * gl_system_value_name(gl_system_value sysval);
/**
* The possible interpolation qualifiers that can be applied to a fragment
INTERP_QUALIFIER_COUNT /**< Number of interpolation qualifiers */
};
+const char * glsl_interp_qualifier_name(enum glsl_interp_qualifier qual);
+
/**
* Fragment program results
*/
/* FRAG_RESULT_DATAn are the per-render-target (GLSL gl_FragData[n]
* or ARB_fragment_program fragment.color[n]) color results. If
* any are written, FRAG_RESULT_COLOR will not be written.
+ * FRAG_RESULT_DATA1 and up are simply for the benefit of
+ * gl_frag_result_name() and not to be construed as an upper bound
*/
FRAG_RESULT_DATA0 = 4,
+ FRAG_RESULT_DATA1,
+ FRAG_RESULT_DATA2,
+ FRAG_RESULT_DATA3,
+ FRAG_RESULT_DATA4,
+ FRAG_RESULT_DATA5,
+ FRAG_RESULT_DATA6,
+ FRAG_RESULT_DATA7,
} gl_frag_result;
+const char * gl_frag_result_name(gl_frag_result result);
+
#endif /* SHADER_ENUMS_H */
}
void
-_mesa_shader_debug(struct gl_context *, GLenum, GLuint *id,
+_mesa_shader_debug(struct gl_context *, GLenum, GLuint *,
const char *, int)
{
}
ralloc_free(shProg->UniformBlocks);
shProg->UniformBlocks = NULL;
- shProg->NumUniformBlocks = 0;
+ shProg->NumBufferInterfaceBlocks = 0;
for (i = 0; i < MESA_SHADER_STAGES; i++) {
ralloc_free(shProg->UniformBlockStageIndex[i]);
shProg->UniformBlockStageIndex[i] = NULL;
-I$(top_srcdir)/include/GL/internal \
$(DEFINES) \
$(LIBDRM_CFLAGS) \
- $(X11_CFLAGS)
+ $(X11_INCLUDES)
TESTS = glx-test
check_PROGRAMS = glx-test
include Makefile.sources
+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
+
glapi_gen_mapi_deps := \
mapi_abi.py \
$(wildcard glapi/gen/*.xml) \
$(wildcard glapi/gen/*.py)
-# $(1): path to an XML file
-# $(2): name of the printer
-define glapi_gen_mapi
-@$(MKDIR_P) $(dir $@)
-$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/mapi_abi.py \
- --mode lib --printer $(2) $(1) > $@
-endef
-
if HAVE_SHARED_GLAPI
BUILT_SOURCES += shared-glapi/glapi_mapi_tmp.h
endif
shared-glapi/glapi_mapi_tmp.h : glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
- $(call glapi_gen_mapi,$<,shared-glapi)
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/mapi_abi.py --mode lib --printer shared-glapi \
+ $(srcdir)/glapi/gen/gl_and_es_API.xml > $@
if HAVE_OPENGL
noinst_LTLIBRARIES = glapi/libglapi.la
endif
es1api/glapi_mapi_tmp.h: glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
- $(call glapi_gen_mapi,$<,es1api)
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/mapi_abi.py --mode lib --printer es1api \
+ $(srcdir)/glapi/gen/gl_and_es_API.xml > $@
if HAVE_OPENGL_ES2
TESTS += es2api/ABI-check
endif
es2api/glapi_mapi_tmp.h: glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
- $(call glapi_gen_mapi,$<,es2api)
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/mapi_abi.py --mode lib --printer es2api \
+ $(srcdir)/glapi/gen/gl_and_es_API.xml > $@
include $(top_srcdir)/install-lib-links.mk
#ifndef MAPI_MODE_BRIDGE
-__asm__("x86_64_current_tls:\n\t"
- "movq " ENTRY_CURRENT_TABLE "@GOTTPOFF(%rip), %rax\n\t"
- "ret");
-
-extern unsigned long
-x86_64_current_tls();
-
#include <string.h>
#include "u_execmem.h"
char *code;
mapi_func entry;
- addr = x86_64_current_tls();
+ __asm__("movq " ENTRY_CURRENT_TABLE "@GOTTPOFF(%%rip), %0"
+ : "=r" (addr));
if ((addr >> 32) != 0xffffffff)
return NULL;
addr &= 0xffffffff;
<enum name="MAX_FRAMEBUFFER_LAYERS" value="0x9317" />
<enum name="MAX_FRAMEBUFFER_SAMPLES" value="0x9318" />
- <function name="FramebufferParameteri">
+ <function name="FramebufferParameteri" es2="3.1">
<param name="target" type="GLenum"/>
<param name="pname" type="GLenum"/>
<param name="param" type="GLint" />
</function>
- <function name="GetFramebufferParameteriv">
+ <function name="GetFramebufferParameteriv" es2="3.1">
<param name="target" type="GLenum" />
<param name="pname" type="GLenum" />
<param name="params" type="GLint *" output="true" />
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE OpenGLAPI SYSTEM "gl_API.dtd">
+
+<OpenGLAPI>
+
+<category name="GL_ARB_shader_storage_buffer" number="137">
+
+<enum name="SHADER_STORAGE_BUFFER" value="0x90D2" />
+<enum name="SHADER_STORAGE_BUFFER_BINDING" value="0x90D3" />
+<enum name="SHADER_STORAGE_BUFFER_START" value="0x90D4" />
+<enum name="SHADER_STORAGE_BUFFER_SIZE" value="0x90D5" />
+<enum name="MAX_VERTEX_SHADER_STORAGE_BLOCKS" value="0x90D6" />
+<enum name="MAX_GEOMETRY_SHADER_STORAGE_BLOCKS" value="0x90D7" />
+<enum name="MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS" value="0x90D8" />
+<enum name="MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS" value="0x90D9" />
+<enum name="MAX_FRAGMENT_SHADER_STORAGE_BLOCKS" value="0x90DA" />
+<enum name="MAX_COMPUTE_SHADER_STORAGE_BLOCKS" value="0x90DB" />
+<enum name="MAX_COMBINED_SHADER_STORAGE_BLOCKS" value="0x90DC" />
+<enum name="MAX_SHADER_STORAGE_BUFFER_BINDINGS" value="0x90DD" />
+<enum name="MAX_SHADER_STORAGE_BLOCK_SIZE" value="0x90DE" />
+<enum name="SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT" value="0x90DF" />
+<enum name="SHADER_STORAGE_BARRIER_BIT" value="0x2000" />
+<enum name="MAX_COMBINED_SHADER_OUTPUT_RESOURCES" value="0x8F39" />
+
+<!-- Duplicated with GL3x.xml: BindBufferRange, BindBufferBase,
+ GetIntegeri_v -->
+
+<function name="ShaderStorageBlockBinding">
+ <param name="program" type="GLuint" />
+ <param name="shaderStorageBlockIndex" type="GLuint" />
+ <param name="shaderStorageBlockBinding" type="GLuint" />
+</function>
+
+</category>
+
+</OpenGLAPI>
</category>
<category name="4.3">
- <enum name="DEPTH_STENCIL_TEXTURE_MODE" value="0x90EA"/>
+ <enum name="SHADER_STORAGE_BARRIER_BIT" value="0x2000" />
+ <enum name="MAX_COMBINED_SHADER_OUTPUT_RESOURCES" value="0x8F39" />
+ <enum name="SHADER_STORAGE_BUFFER" value="0x90D2"/>
+ <enum name="SHADER_STORAGE_BUFFER_BINDING" value="0x90D3"/>
+ <enum name="SHADER_STORAGE_BUFFER_START" value="0x90D4"/>
+ <enum name="SHADER_STORAGE_BUFFER_SIZE" value="0x90D5"/>
+ <enum name="MAX_VERTEX_SHADER_STORAGE_BLOCKS" value="0x90D6" />
+ <enum name="MAX_GEOMETRY_SHADER_STORAGE_BLOCKS" value="0x90D7" />
+ <enum name="MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS" value="0x90D8" />
+ <enum name="MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS" value="0x90D9" />
+ <enum name="MAX_FRAGMENT_SHADER_STORAGE_BLOCKS" value="0x90DA" />
+ <enum name="MAX_COMPUTE_SHADER_STORAGE_BLOCKS" value="0x90DB" />
+ <enum name="MAX_COMBINED_SHADER_STORAGE_BLOCKS" value="0x90DC" />
+ <enum name="MAX_SHADER_STORAGE_BUFFER_BINDINGS" value="0x90DD" />
+ <enum name="MAX_SHADER_STORAGE_BLOCK_SIZE" value="0x90DE" />
+ <enum name="SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT" value="0x90DF" />
+ <enum name="DEPTH_STENCIL_TEXTURE_MODE" value="0x90EA"/>
</category>
<category name="4.5">
MESA_DIR = $(top_builddir)/src/mesa
MESA_GLAPI_DIR = $(top_builddir)/src/mapi/glapi
-MESA_MAPI_DIR = $(top_builddir)/src/mapi
+MESA_MAPI_DIR = $(top_srcdir)/src/mapi
MESA_GLX_DIR = $(top_builddir)/src/glx
MESA_GLAPI_OUTPUTS = \
ARB_shader_atomic_counters.xml \
ARB_shader_image_load_store.xml \
ARB_shader_subroutine.xml \
+ ARB_shader_storage_buffer_object.xml \
ARB_sync.xml \
ARB_tessellation_shader.xml \
ARB_texture_barrier.xml \
COMMON_GLX = $(COMMON) glX_API.xml glX_XML.py glX_proto_common.py
-PYTHON_GEN = $(AM_V_GEN) $(PYTHON2) $(PYTHON_FLAGS)
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
######################################################################
######################################################################
$(MESA_GLAPI_DIR)/glapi_mapi_tmp.h: $(MESA_MAPI_DIR)/mapi_abi.py $(COMMON)
- $(PYTHON_GEN) $< \
+ $(PYTHON_GEN) $(MESA_MAPI_DIR)/mapi_abi.py \
--printer glapi --mode lib $(srcdir)/gl_and_es_API.xml > $@
$(MESA_GLAPI_DIR)/glprocs.h: gl_procs.py $(COMMON)
- $(PYTHON_GEN) $< -c -f $(srcdir)/gl_and_es_API.xml > $@
+ $(PYTHON_GEN) $(srcdir)/gl_procs.py -c -f $(srcdir)/gl_and_es_API.xml > $@
$(MESA_GLAPI_DIR)/glapitemp.h: gl_apitemp.py $(COMMON)
- $(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+ $(PYTHON_GEN) $(srcdir)/gl_apitemp.py -f $(srcdir)/gl_and_es_API.xml > $@
$(MESA_GLAPI_DIR)/glapitable.h: gl_table.py $(COMMON)
- $(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+ $(PYTHON_GEN) $(srcdir)/gl_table.py -f $(srcdir)/gl_and_es_API.xml > $@
$(MESA_GLAPI_DIR)/glapi_gentable.c: gl_gentable.py $(COMMON)
- $(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+ $(PYTHON_GEN) $(srcdir)/gl_gentable.py -f $(srcdir)/gl_and_es_API.xml > $@
######################################################################
$(MESA_GLAPI_DIR)/glapi_x86.S: gl_x86_asm.py $(COMMON)
- $(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+ $(PYTHON_GEN) $(srcdir)/gl_x86_asm.py -f $(srcdir)/gl_and_es_API.xml > $@
$(MESA_GLAPI_DIR)/glapi_x86-64.S: gl_x86-64_asm.py $(COMMON)
- $(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+ $(PYTHON_GEN) $(srcdir)/gl_x86-64_asm.py -f $(srcdir)/gl_and_es_API.xml > $@
$(MESA_GLAPI_DIR)/glapi_sparc.S: gl_SPARC_asm.py $(COMMON)
- $(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+ $(PYTHON_GEN) $(srcdir)/gl_SPARC_asm.py -f $(srcdir)/gl_and_es_API.xml > $@
######################################################################
$(MESA_DIR)/main/enums.c: gl_enums.py $(COMMON)
- $(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+ $(PYTHON_GEN) $(srcdir)/gl_enums.py -f $(srcdir)/gl_and_es_API.xml > $@
$(MESA_DIR)/main/api_exec.c: gl_genexec.py apiexec.py $(COMMON)
- $(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+ $(PYTHON_GEN) $(srcdir)/gl_genexec.py -f $(srcdir)/gl_and_es_API.xml > $@
$(MESA_DIR)/main/dispatch.h: gl_table.py $(COMMON)
- $(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml -m remap_table > $@
+ $(PYTHON_GEN) $(srcdir)/gl_table.py -f $(srcdir)/gl_and_es_API.xml -m remap_table > $@
$(MESA_DIR)/main/remap_helper.h: remap_helper.py $(COMMON)
- $(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+ $(PYTHON_GEN) $(srcdir)/remap_helper.py -f $(srcdir)/gl_and_es_API.xml > $@
######################################################################
$(MESA_GLX_DIR)/indirect.c: glX_proto_send.py $(COMMON_GLX)
- $(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m proto \
+ $(PYTHON_GEN) $(srcdir)/glX_proto_send.py -f $(srcdir)/gl_API.xml -m proto \
| $(INDENT) $(INDENT_FLAGS) > $@
$(MESA_GLX_DIR)/indirect.h: glX_proto_send.py $(COMMON_GLX)
- $(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m init_h > $@
+ $(PYTHON_GEN) $(srcdir)/glX_proto_send.py -f $(srcdir)/gl_API.xml -m init_h > $@
$(MESA_GLX_DIR)/indirect_init.c: glX_proto_send.py $(COMMON_GLX)
- $(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m init_c > $@
+ $(PYTHON_GEN) $(srcdir)/glX_proto_send.py -f $(srcdir)/gl_API.xml -m init_c > $@
$(MESA_GLX_DIR)/indirect_size.h $(XORG_GLX_DIR)/indirect_size.h: glX_proto_size.py $(COMMON_GLX)
- $(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m size_h --only-set \
+ $(PYTHON_GEN) $(srcdir)/glX_proto_size.py -f $(srcdir)/gl_API.xml -m size_h --only-set \
--header-tag _INDIRECT_SIZE_H_ \
| $(INDENT) $(INDENT_FLAGS) > $@
$(MESA_GLX_DIR)/indirect_size.c: glX_proto_size.py $(COMMON_GLX)
- $(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m size_c --only-set \
+ $(PYTHON_GEN) $(srcdir)/glX_proto_size.py -f $(srcdir)/gl_API.xml -m size_c --only-set \
| $(INDENT) $(INDENT_FLAGS) > $@
######################################################################
# OpenGL 4.3 / GL_ARB_framebuffer_no_attachments. Mesa can expose the
# extension with OpenGL 3.0.
- "FramebufferParameteri": exec_info(compatibility=30, core=31),
- "GetFramebufferParameteri": exec_info(compatibility=30, core=31),
+ "FramebufferParameteri": exec_info(compatibility=30, core=31, es2=31),
+ "GetFramebufferParameteri": exec_info(compatibility=30, core=31, es2=31),
# OpenGL 4.5 / GL_ARB_direct_state_access. Mesa can expose the extension
# with core profile.
<xi:include href="ARB_program_interface_query.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-<!-- ARB extensions #135...#138 -->
+<!-- ARB extensions #135...#136 -->
+
+<xi:include href="ARB_shader_storage_buffer_object.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
+
+<!-- ARB extensions #138 -->
<xi:include href="ARB_texture_buffer_range.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
#include "main/matrix.h"
#include "main/multisample.h"
#include "main/objectlabel.h"
+#include "main/objectpurge.h"
#include "main/performance_monitor.h"
#include "main/pipelineobj.h"
#include "main/pixel.h"
program/program_parse.tab.h \
main/git_sha1.h.tmp
-GET_HASH_GEN = main/get_hash_generator.py
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
-main/get_hash.h: ../mapi/glapi/gen/gl_and_es_API.xml main/get_hash_params.py \
- $(GET_HASH_GEN)
- $(AM_V_GEN)set -e; \
- $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/$(GET_HASH_GEN) \
- -f $< > $@.tmp; \
- mv $@.tmp $@;
+main/get_hash.h: ../mapi/glapi/gen/gl_and_es_API.xml main/get_hash_params.py \
+ main/get_hash_generator.py
+ $(PYTHON_GEN) $(srcdir)/main/get_hash_generator.py \
+ -f $(srcdir)/../mapi/glapi/gen/gl_and_es_API.xml > $@
-main/format_info.h: main/formats.csv \
+main/format_info.h: main/formats.csv \
main/format_parser.py main/format_info.py
- $(AM_V_GEN)set -e; \
- $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/main/format_info.py \
- $< > $@.tmp; \
- mv $@.tmp $@;
+ $(PYTHON_GEN) $(srcdir)/main/format_info.py $(srcdir)/main/formats.csv > $@
-main/format_pack.c: main/format_pack.py main/formats.csv \
+main/format_pack.c: main/format_pack.py main/formats.csv \
main/format_parser.py
- $(AM_V_GEN)set -e; \
- $(PYTHON2) $(PYTHON_FLAGS) \
- $(srcdir)/main/format_pack.py \
- $(srcdir)/main/formats.csv \
- | $(INDENT) $(INDENT_FLAGS) > $@;
+ $(PYTHON_GEN) $(srcdir)/main/format_pack.py $(srcdir)/main/formats.csv > $@
main/format_unpack.c: main/format_unpack.py main/formats.csv \
main/format_parser.py
- $(AM_V_GEN)set -e; \
- $(PYTHON2) $(PYTHON_FLAGS) \
- $(srcdir)/main/format_unpack.py \
- $(srcdir)/main/formats.csv \
- | $(INDENT) $(INDENT_FLAGS) > $@;
+ $(PYTHON_GEN) $(srcdir)/main/format_unpack.py $(srcdir)/main/formats.csv > $@
main/formats.c: main/format_info.h
pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = gl.pc
+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+YACC_GEN = $(AM_V_GEN)$(YACC) $(YFLAGS)
+LEX_GEN = $(AM_V_GEN)$(LEX) $(LFLAGS)
+
program/lex.yy.c: program/program_lexer.l
- $(AM_V_at)$(MKDIR_P) program
- $(AM_V_GEN) $(LEX) --never-interactive --outfile=$@ $<
+ $(MKDIR_GEN)
+ $(LEX_GEN) -o $@ $(srcdir)/program/program_lexer.l
program/program_parse.tab.c program/program_parse.tab.h: program/program_parse.y
- $(AM_V_at)$(MKDIR_P) program
- $(AM_V_GEN) $(YACC) -p "_mesa_program_" -v -d --output=program/program_parse.tab.c $<
+ $(MKDIR_GEN)
+ $(YACC_GEN) -o $@ -p "_mesa_program_" --defines=$(builddir)/program/program_parse.tab.h $(srcdir)/program/program_parse.y
if GEN_ASM_OFFSETS
matypes.h: $(gen_matypes_SOURCES)
main/multisample.h \
main/objectlabel.c \
main/objectlabel.h \
+ main/objectpurge.c \
+ main/objectpurge.h \
main/pack.c \
main/pack.h \
main/pbo.c \
program/sampler.h \
program/string_to_uint_map.cpp \
program/symbol_table.c \
- program/symbol_table.h
+ program/symbol_table.h \
+ ../glsl/shader_enums.c \
+ ../glsl/shader_enums.h
PROGRAM_NIR_FILES = \
program/prog_to_nir.c \
/* Save the shader state from ctx->Shader (instead of ctx->_Shader) so
* that we don't have to worry about the current pipeline state.
*/
- for (i = 0; i <= MESA_SHADER_FRAGMENT; i++) {
+ for (i = 0; i < MESA_SHADER_STAGES; i++) {
_mesa_reference_shader_program(ctx, &save->Shader[i],
ctx->Shader.CurrentProgram[i]);
}
GL_TESS_EVALUATION_SHADER,
GL_GEOMETRY_SHADER,
GL_FRAGMENT_SHADER,
+ GL_COMPUTE_SHADER,
};
+ STATIC_ASSERT(MESA_SHADER_STAGES == ARRAY_SIZE(targets));
bool any_shader;
}
any_shader = false;
- for (i = 0; i <= MESA_SHADER_FRAGMENT; i++) {
+ for (i = 0; i < MESA_SHADER_STAGES; i++) {
/* It is safe to call _mesa_use_shader_program even if the extension
* necessary for that program state is not supported. In that case,
* the saved program object must be NULL and the currently bound
bool
_mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx,
struct gl_texture_image *src_tex_image,
+ struct gl_renderbuffer *src_renderbuffer,
int src_x, int src_y, int src_z,
struct gl_texture_image *dst_tex_image,
+ struct gl_renderbuffer *dst_renderbuffer,
int dst_x, int dst_y, int dst_z,
int src_width, int src_height);
" vec2 tex_coord = texCoords - s_0_offset;\n"
"\n"
" tex_coord *= scale;\n"
- " clamp(tex_coord.x, 0.0f, scale.x * src_width - 1.0f);\n"
- " clamp(tex_coord.y, 0.0f, scale.y * src_height - 1.0f);\n"
+ " tex_coord.x = clamp(tex_coord.x, 0.0f, scale.x * src_width - 1.0f);\n"
+ " tex_coord.y = clamp(tex_coord.y, 0.0f, scale.y * src_height - 1.0f);\n"
" interp = fract(tex_coord);\n"
" tex_coord = ivec2(tex_coord) * scale_inv;\n"
"\n"
#include "mtypes.h"
#include "meta.h"
+/**
+ * Create a texture image that wraps a renderbuffer.
+ */
+static struct gl_texture_image *
+wrap_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb)
+{
+ GLenum texTarget;
+ struct gl_texture_object *texObj;
+ struct gl_texture_image *texImage;
+
+ if (rb->NumSamples > 1)
+ texTarget = GL_TEXTURE_2D_MULTISAMPLE;
+ else
+ texTarget = GL_TEXTURE_2D;
+
+ /* Texture ID is not significant since it never goes into the hash table */
+ texObj = ctx->Driver.NewTextureObject(ctx, 0, texTarget);
+ assert(texObj);
+ if (!texObj)
+ return NULL;
+
+ texImage = _mesa_get_tex_image(ctx, texObj, texTarget, 0);
+ assert(texImage);
+ if (!texImage)
+ return NULL;
+
+ if (!ctx->Driver.BindRenderbufferTexImage(ctx, rb, texImage)) {
+ _mesa_problem(ctx, "Failed to create texture from renderbuffer");
+ return NULL;
+ }
+
+ if (ctx->Driver.FinishRenderTexture && !rb->NeedsFinishRenderTexture) {
+ rb->NeedsFinishRenderTexture = true;
+ ctx->Driver.FinishRenderTexture(ctx, rb);
+ }
+
+ return texImage;
+}
+
+
/* This function makes a texture view without bothering with all of the API
* checks. Most of them are the same for CopyTexSubImage so checking would
* be redundant. The one major difference is that we don't check for
bool
_mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx,
struct gl_texture_image *src_tex_image,
+ struct gl_renderbuffer *src_renderbuffer,
int src_x, int src_y, int src_z,
struct gl_texture_image *dst_tex_image,
+ struct gl_renderbuffer *dst_renderbuffer,
int dst_x, int dst_y, int dst_z,
int src_width, int src_height)
{
+ mesa_format src_format, dst_format;
+ GLint src_internal_format, dst_internal_format;
GLuint src_view_texture = 0;
struct gl_texture_image *src_view_tex_image;
GLuint fbos[2];
GLbitfield mask;
GLenum status, attachment;
- if (_mesa_is_format_compressed(dst_tex_image->TexFormat))
+ if (src_renderbuffer) {
+ src_format = src_renderbuffer->Format;
+ src_internal_format = src_renderbuffer->InternalFormat;
+ } else {
+ assert(src_tex_image);
+ src_format = src_tex_image->TexFormat;
+ src_internal_format = src_tex_image->InternalFormat;
+ }
+
+ if (dst_renderbuffer) {
+ dst_format = dst_renderbuffer->Format;
+ dst_internal_format = dst_renderbuffer->InternalFormat;
+ } else {
+ assert(dst_tex_image);
+ dst_format = dst_tex_image->TexFormat;
+ dst_internal_format = dst_tex_image->InternalFormat;
+ }
+
+ if (_mesa_is_format_compressed(src_format))
return false;
- if (_mesa_is_format_compressed(src_tex_image->TexFormat))
+ if (_mesa_is_format_compressed(dst_format))
return false;
- if (src_tex_image->InternalFormat == dst_tex_image->InternalFormat) {
+ if (src_internal_format == dst_internal_format) {
src_view_tex_image = src_tex_image;
} else {
+ if (src_renderbuffer) {
+ assert(src_tex_image == NULL);
+ src_tex_image = wrap_renderbuffer(ctx, src_renderbuffer);
+ }
if (!make_view(ctx, src_tex_image, &src_view_tex_image, &src_view_texture,
dst_tex_image->InternalFormat))
goto cleanup;
_mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, fbos[0]);
_mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbos[1]);
- switch (_mesa_get_format_base_format(src_tex_image->TexFormat)) {
+ switch (_mesa_get_format_base_format(src_format)) {
case GL_DEPTH_COMPONENT:
attachment = GL_DEPTH_ATTACHMENT;
mask = GL_DEPTH_BUFFER_BIT;
_mesa_ReadBuffer(GL_COLOR_ATTACHMENT0);
}
- _mesa_meta_bind_fbo_image(GL_READ_FRAMEBUFFER, attachment,
- src_view_tex_image, src_z);
+ if (src_view_tex_image) {
+ /* Prever the tex image because, even if we have a renderbuffer, we may
+ * have had to wrap it in a texture view.
+ */
+ _mesa_meta_bind_fbo_image(GL_READ_FRAMEBUFFER, attachment,
+ src_view_tex_image, src_z);
+ } else {
+ _mesa_FramebufferRenderbuffer(GL_READ_FRAMEBUFFER,
+ attachment,
+ GL_RENDERBUFFER,
+ src_renderbuffer->Name);
+ }
status = _mesa_CheckFramebufferStatus(GL_READ_FRAMEBUFFER);
if (status != GL_FRAMEBUFFER_COMPLETE)
goto meta_end;
- _mesa_meta_bind_fbo_image(GL_DRAW_FRAMEBUFFER, attachment,
- dst_tex_image, dst_z);
+ if (dst_renderbuffer) {
+ _mesa_FramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER,
+ attachment,
+ GL_RENDERBUFFER,
+ dst_renderbuffer->Name);
+ } else {
+ _mesa_meta_bind_fbo_image(GL_DRAW_FRAMEBUFFER, attachment,
+ dst_tex_image, dst_z);
+ }
status = _mesa_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER);
if (status != GL_FRAMEBUFFER_COMPLETE)
cleanup:
_mesa_DeleteTextures(1, &src_view_texture);
+ /* If we got a renderbuffer source, delete the temporary texture */
+ if (src_renderbuffer && src_tex_image)
+ ctx->Driver.DeleteTexture(ctx, src_tex_image->TexObject);
+
return success;
}
const GLuint maxLevel = texObj->MaxLevel;
const GLint maxLevelSave = texObj->MaxLevel;
const GLboolean genMipmapSave = texObj->GenerateMipmap;
- const GLuint currentTexUnitSave = ctx->Texture.CurrentUnit;
const GLboolean use_glsl_version = ctx->Extensions.ARB_vertex_shader &&
ctx->Extensions.ARB_fragment_shader;
GLenum faceTarget;
samplerSave = ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler ?
ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler->Name : 0;
- if (currentTexUnitSave != 0)
- _mesa_BindTexture(target, texObj->Name);
+ /* We may have been called from glGenerateTextureMipmap with CurrentUnit
+ * still set to 0, so we don't know when we can skip binding the texture.
+ * Assume that _mesa_BindTexture will be fast if we're rebinding the same
+ * texture.
+ */
+ _mesa_BindTexture(target, texObj->Name);
if (!mipmap->Sampler) {
_mesa_GenSamplers(1, &mipmap->Sampler);
#include "uniforms.h"
#include "varray.h"
+static bool
+need_signed_unsigned_int_conversion(mesa_format mesaFormat,
+ GLenum format, GLenum type)
+{
+ const GLenum mesaFormatType = _mesa_get_format_datatype(mesaFormat);
+ const bool is_format_integer = _mesa_is_enum_format_integer(format);
+ return (mesaFormatType == GL_INT &&
+ is_format_integer &&
+ (type == GL_UNSIGNED_INT ||
+ type == GL_UNSIGNED_SHORT ||
+ type == GL_UNSIGNED_BYTE)) ||
+ (mesaFormatType == GL_UNSIGNED_INT &&
+ is_format_integer &&
+ (type == GL_INT ||
+ type == GL_SHORT ||
+ type == GL_BYTE));
+}
+
static struct gl_texture_image *
-create_texture_for_pbo(struct gl_context *ctx, bool create_pbo,
- GLenum pbo_target, int width, int height,
+create_texture_for_pbo(struct gl_context *ctx,
+ bool create_pbo, GLenum pbo_target,
+ int dims, int width, int height, int depth,
GLenum format, GLenum type, const void *pixels,
const struct gl_pixelstore_attrib *packing,
GLuint *tmp_pbo, GLuint *tmp_tex)
return NULL;
/* Account for SKIP_PIXELS, SKIP_ROWS, ALIGNMENT, and SKIP_IMAGES */
- pixels = _mesa_image_address3d(packing, pixels,
- width, height, format, type, 0, 0, 0);
+ uint32_t first_pixel = _mesa_image_offset(dims, packing, width, height,
+ format, type,
+ 0, 0, 0);
+ uint32_t last_pixel = _mesa_image_offset(dims, packing, width, height,
+ format, type,
+ depth-1, height-1, width);
row_stride = _mesa_image_row_stride(packing, width, format, type);
if (_mesa_is_bufferobj(packing->BufferObj)) {
*tmp_pbo = 0;
buffer_obj = packing->BufferObj;
+ first_pixel += (intptr_t)pixels;
} else {
bool is_pixel_pack = pbo_target == GL_PIXEL_PACK_BUFFER;
* data to avoid unnecessary data copying in _mesa_BufferData().
*/
if (is_pixel_pack)
- _mesa_BufferData(pbo_target, row_stride * height, NULL,
+ _mesa_BufferData(pbo_target,
+ last_pixel - first_pixel,
+ NULL,
GL_STREAM_READ);
else
- _mesa_BufferData(pbo_target, row_stride * height, pixels,
+ _mesa_BufferData(pbo_target,
+ last_pixel - first_pixel,
+ (char *)pixels + first_pixel,
GL_STREAM_DRAW);
buffer_obj = packing->BufferObj;
- pixels = NULL;
+ first_pixel = 0;
_mesa_BindBuffer(pbo_target, 0);
}
internal_format = _mesa_get_format_base_format(pbo_format);
+ /* The texture is addressed as a single very-tall image, so we
+ * need to pack the multiple image depths together taking the
+ * inter-image padding into account.
+ */
+ int image_height = packing->ImageHeight == 0 ? height : packing->ImageHeight;
+ int full_height = image_height * (depth - 1) + height;
+
tex_image = _mesa_get_tex_image(ctx, tex_obj, tex_obj->Target, 0);
- _mesa_init_teximage_fields(ctx, tex_image, width, height, 1,
+ _mesa_init_teximage_fields(ctx, tex_image, width, full_height, 1,
0, internal_format, pbo_format);
read_only = pbo_target == GL_PIXEL_UNPACK_BUFFER;
if (!ctx->Driver.SetTextureStorageForBufferObject(ctx, tex_obj,
buffer_obj,
- (intptr_t)pixels,
+ first_pixel,
row_stride,
read_only)) {
_mesa_DeleteTextures(1, tmp_tex);
const struct gl_pixelstore_attrib *packing)
{
GLuint pbo = 0, pbo_tex = 0, fbos[2] = { 0, 0 };
- int full_height, image_height;
+ int image_height;
struct gl_texture_image *pbo_tex_image;
GLenum status;
bool success = false;
if (ctx->_ImageTransferState)
return false;
+ /* This function rely on BlitFramebuffer to fill in the pixel data for
+ * glTex[Sub]Image*D. But, BlitFrameBuffer doesn't support signed to
+ * unsigned or unsigned to signed integer conversions.
+ */
+ if (need_signed_unsigned_int_conversion(tex_image->TexFormat, format, type))
+ return false;
+
/* For arrays, use a tall (height * depth) 2D texture but taking into
* account the inter-image padding specified with the image height packing
* property.
*/
image_height = packing->ImageHeight == 0 ? height : packing->ImageHeight;
- full_height = image_height * (depth - 1) + height;
pbo_tex_image = create_texture_for_pbo(ctx, create_pbo,
GL_PIXEL_UNPACK_BUFFER,
- width, full_height,
+ dims, width, height, depth,
format, type, pixels, packing,
&pbo, &pbo_tex);
if (!pbo_tex_image)
return success;
}
-static bool
-need_signed_unsigned_int_conversion(mesa_format rbFormat,
- GLenum format, GLenum type)
-{
- const GLenum srcType = _mesa_get_format_datatype(rbFormat);
- const bool is_dst_format_integer = _mesa_is_enum_format_integer(format);
- return (srcType == GL_INT &&
- is_dst_format_integer &&
- (type == GL_UNSIGNED_INT ||
- type == GL_UNSIGNED_SHORT ||
- type == GL_UNSIGNED_BYTE)) ||
- (srcType == GL_UNSIGNED_INT &&
- is_dst_format_integer &&
- (type == GL_INT ||
- type == GL_SHORT ||
- type == GL_BYTE));
-}
-
bool
_mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
struct gl_texture_image *tex_image,
const struct gl_pixelstore_attrib *packing)
{
GLuint pbo = 0, pbo_tex = 0, fbos[2] = { 0, 0 };
- int full_height, image_height;
+ int image_height;
struct gl_texture_image *pbo_tex_image;
struct gl_renderbuffer *rb = NULL;
GLenum dstBaseFormat = _mesa_unpack_format_to_base_format(format);
* property.
*/
image_height = packing->ImageHeight == 0 ? height : packing->ImageHeight;
- full_height = image_height * (depth - 1) + height;
pbo_tex_image = create_texture_for_pbo(ctx, false, GL_PIXEL_PACK_BUFFER,
- width, full_height * depth,
+ dims, width, height, depth,
format, type, pixels, packing,
&pbo, &pbo_tex);
if (!pbo_tex_image)
-I$(top_srcdir)/src/gallium/auxiliary \
$(LIBDRM_CFLAGS) \
$(DEFINES) \
+ -DSYSCONFDIR=\"$(sysconfdir)\" \
$(VISIBILITY_CFLAGS)
noinst_LTLIBRARIES = \
uint64_t
-driParseDebugString( const char * debug,
- const struct dri_debug_control * control )
+driParseDebugString(const char *debug,
+ const struct dri_debug_control *control)
{
uint64_t flag = 0;
- if ( debug != NULL ) {
- while( control->string != NULL ) {
- if ( !strcmp( debug, "all" ) ||
- strstr( debug, control->string ) != NULL ) {
- flag |= control->flag;
- }
-
- control++;
+ if (debug != NULL) {
+ for (; control->string != NULL; control++) {
+ if (!strcmp(debug, "all")) {
+ flag |= control->flag;
+
+ } else {
+ const char *s = debug;
+ unsigned n;
+
+ for (; n = strcspn(s, ", "), *s; s += MAX2(1, n)) {
+ if (strlen(control->string) == n &&
+ !strncmp(control->string, s, n))
+ flag |= control->flag;
+ }
+ }
}
}
#undef BUF_SIZE
}
+#ifndef SYSCONFDIR
+#define SYSCONFDIR "/etc"
+#endif
+
void driParseConfigFiles (driOptionCache *cache, const driOptionCache *info,
int screenNum, const char *driverName) {
- char *filenames[2] = {"/etc/drirc", NULL};
+ char *filenames[2] = { SYSCONFDIR "/drirc", NULL};
char *home;
uint32_t i;
struct OptConfData userData;
$(MOS)
# Default target options.h
-options.h: LOCALEDIR := .
+LOCALEDIR := .
options.h: t_options.h $(MOS)
$(AM_V_GEN) $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/gen_xmlpool.py $(srcdir)/t_options.h $(LOCALEDIR) $(LANGS) > options.h
abort();
}
batch->total = 0;
+#else
+ (void) intel;
#endif
}
}
}
-/**
- * @param for_bo Indicates that the caller is
- * intel_miptree_create_for_bo(). If true, then do not create
- * \c stencil_mt.
- */
struct intel_mipmap_tree *
intel_miptree_create_layout(struct intel_context *intel,
GLenum target,
GLuint last_level,
GLuint width0,
GLuint height0,
- GLuint depth0,
- bool for_bo)
+ GLuint depth0)
{
struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
if (!mt)
mt = intel_miptree_create_layout(intel, target, format,
first_level, last_level, width0,
- height0, depth0,
- false);
+ height0, depth0);
/*
* pitch == 0 || height == 0 indicates the null texture
*/
mt = intel_miptree_create_layout(intel, GL_TEXTURE_2D, format,
0, 0,
- width, height, 1,
- true);
+ width, height, 1);
if (!mt) {
free(region);
return mt;
}
void
-intel_miptree_unmap_raw(struct intel_context *intel,
- struct intel_mipmap_tree *mt)
+intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
{
drm_intel_bo_unmap(mt->region->bo);
}
}
static void
-intel_miptree_unmap_gtt(struct intel_context *intel,
- struct intel_mipmap_tree *mt,
- struct intel_miptree_map *map,
- unsigned int level,
- unsigned int slice)
+intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
{
- intel_miptree_unmap_raw(intel, mt);
+ intel_miptree_unmap_raw(mt);
}
static void
{
struct gl_context *ctx = &intel->ctx;
- intel_miptree_unmap_raw(intel, map->mt);
+ intel_miptree_unmap_raw(map->mt);
if (map->mode & GL_MAP_WRITE_BIT) {
bool ok = intel_miptree_blit(intel,
if (map->mt) {
intel_miptree_unmap_blit(intel, mt, map, level, slice);
} else {
- intel_miptree_unmap_gtt(intel, mt, map, level, slice);
+ intel_miptree_unmap_gtt(mt);
}
intel_miptree_release_map(mt, level, slice);
GLuint last_level,
GLuint width0,
GLuint height0,
- GLuint depth0,
- bool for_bo);
+ GLuint depth0);
struct intel_mipmap_tree *
intel_miptree_create_for_bo(struct intel_context *intel,
uint32_t level,
uint32_t layer)
{
+ (void) mt;
+ (void) level;
+ (void) layer;
+
assert(level >= mt->first_level);
assert(level <= mt->last_level);
assert(layer < mt->level[level].depth);
*/
void i915_miptree_layout(struct intel_mipmap_tree *mt);
void i945_miptree_layout(struct intel_mipmap_tree *mt);
-void brw_miptree_layout(struct intel_context *intel,
- struct intel_mipmap_tree *mt);
void *intel_miptree_map_raw(struct intel_context *intel,
struct intel_mipmap_tree *mt);
-void intel_miptree_unmap_raw(struct intel_context *intel,
- struct intel_mipmap_tree *mt);
+void intel_miptree_unmap_raw(struct intel_mipmap_tree *mt);
void
intel_miptree_map(struct intel_context *intel,
#define HAVE_LINE_STRIPS 1
#define HAVE_TRIANGLES 1
#define HAVE_TRI_STRIPS 1
-#define HAVE_TRI_STRIP_1 0 /* has it, template can't use it yet */
#define HAVE_TRI_FANS 1
#define HAVE_POLYGONS 1
-#define HAVE_QUADS 0
-#define HAVE_QUAD_STRIPS 0
#define HAVE_ELTS 0
-static uint32_t hw_prim[GL_POLYGON + 1] = {
+static const uint32_t hw_prim[GL_POLYGON + 1] = {
0,
PRIM3D_LINELIST,
PRIM3D_LINESTRIP,
continue;
intel_render_tab_verts[prim & PRIM_MODE_MASK] (ctx, start,
- start + length, prim);
+ length, prim);
}
tnl->Driver.Render.Finish(ctx);
intel_image->mt = intel_miptree_create_layout(intel, target, image->TexFormat,
0, 0,
- width, height, 1,
- true);
+ width, height, 1);
if (intel_image->mt == NULL)
return;
intel_region_reference(&intel_image->mt->region, region);
_mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage");
}
- intel_miptree_unmap_raw(intel, temp_mt);
+ intel_miptree_unmap_raw(temp_mt);
bool ret;
brw_conditional_render.c \
brw_context.c \
brw_context.h \
- brw_cs.cpp \
+ brw_cs.c \
brw_cs.h \
brw_cubemap_normalize.cpp \
brw_curbe.c \
brw_fs_sel_peephole.cpp \
brw_fs_surface_builder.cpp \
brw_fs_surface_builder.h \
+ brw_fs_validate.cpp \
brw_fs_vector_splitting.cpp \
brw_fs_visitor.cpp \
brw_gs.c \
brw_urb.c \
brw_util.c \
brw_util.h \
+ brw_vec4_builder.h \
brw_vec4_copy_propagation.cpp \
brw_vec4.cpp \
brw_vec4_cse.cpp \
brw_vec4_nir.cpp \
brw_vec4_gs_nir.cpp \
brw_vec4_reg_allocate.cpp \
+ brw_vec4_surface_builder.cpp \
+ brw_vec4_surface_builder.h \
brw_vec4_visitor.cpp \
brw_vec4_vp.cpp \
brw_vec4_vs_visitor.cpp \
gen6_wm_state.c \
gen7_blorp.cpp \
gen7_blorp.h \
+ gen7_cs_state.c \
gen7_disable.c \
gen7_gs_state.c \
gen7_misc_state.c \
/**
* Enable hardware binding tables and set up the binding table pool.
*/
-void
+static void
gen7_enable_hw_binding_tables(struct brw_context *brw)
{
if (!brw->use_resource_streamer)
{
uint32_t mask_x, mask_y;
- intel_miptree_get_tile_masks(mt, &mask_x, &mask_y, map_stencil_as_y_tiled);
+ intel_get_tile_masks(mt->tiling, mt->tr_mode, mt->cpp,
+ map_stencil_as_y_tiled,
+ &mask_x, &mask_y);
*tile_x = x_offset & mask_x;
*tile_y = y_offset & mask_y;
struct intel_renderbuffer *src_irb = intel_renderbuffer(src_rb);
struct intel_texture_image *intel_image = intel_texture_image(dst_image);
+ /* No pixel transfer operations (zoom, bias, mapping), just a blit */
+ if (brw->ctx._ImageTransferState)
+ return false;
+
/* Sync up the state of window system buffers. We need to do this before
* we go looking at the src renderbuffer's miptree.
*/
-/**************************************************************************
- *
+/*
* Copyright 2003 VMware, Inc.
* Copyright 2009, 2012 Intel Corporation.
* All Rights Reserved.
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
#include "main/glheader.h"
#include "main/mtypes.h"
#include "brw_draw.h"
#include "brw_state.h"
#include "intel_batchbuffer.h"
+#include "intel_buffer_objects.h"
#include "brw_defines.h"
static void
-brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
+brw_emit_gpgpu_walker(struct brw_context *brw)
{
const struct brw_cs_prog_data *prog_data = brw->cs.prog_data;
+ const GLuint *num_groups = brw->compute.num_work_groups;
+ uint32_t indirect_flag;
+
+ if (brw->compute.num_work_groups_bo == NULL) {
+ indirect_flag = 0;
+ } else {
+ GLintptr indirect_offset = brw->compute.num_work_groups_offset;
+ drm_intel_bo *bo = brw->compute.num_work_groups_bo;
+
+ indirect_flag = GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE;
+
+ brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMX, bo,
+ I915_GEM_DOMAIN_VERTEX, 0,
+ indirect_offset + 0);
+ brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMY, bo,
+ I915_GEM_DOMAIN_VERTEX, 0,
+ indirect_offset + 4);
+ brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMZ, bo,
+ I915_GEM_DOMAIN_VERTEX, 0,
+ indirect_offset + 8);
+ }
+
const unsigned simd_size = prog_data->simd_size;
unsigned group_size = prog_data->local_size[0] *
prog_data->local_size[1] * prog_data->local_size[2];
uint32_t dwords = brw->gen < 8 ? 11 : 15;
BEGIN_BATCH(dwords);
- OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2));
+ OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2) | indirect_flag);
OUT_BATCH(0);
if (brw->gen >= 8) {
OUT_BATCH(0); /* Indirect Data Length */
static void
-brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
+brw_dispatch_compute_common(struct gl_context *ctx)
{
struct brw_context *brw = brw_context(ctx);
int estimated_buffer_space_needed;
brw->no_batch_wrap = true;
brw_upload_compute_state(brw);
- brw_emit_gpgpu_walker(brw, num_groups);
+ brw_emit_gpgpu_walker(brw);
brw->no_batch_wrap = false;
*/
}
+static void
+brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) {
+ struct brw_context *brw = brw_context(ctx);
+
+ brw->compute.num_work_groups_bo = NULL;
+ brw->compute.num_work_groups = num_groups;
+ ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
+
+ brw_dispatch_compute_common(ctx);
+}
+
+static void
+brw_dispatch_compute_indirect(struct gl_context *ctx, GLintptr indirect)
+{
+ struct brw_context *brw = brw_context(ctx);
+ static const GLuint indirect_group_counts[3] = { 0, 0, 0 };
+ struct gl_buffer_object *indirect_buffer = ctx->DispatchIndirectBuffer;
+ drm_intel_bo *bo =
+ intel_bufferobj_buffer(brw,
+ intel_buffer_object(indirect_buffer),
+ indirect, 3 * sizeof(GLuint));
+
+ brw->compute.num_work_groups_bo = bo;
+ brw->compute.num_work_groups_offset = indirect;
+ brw->compute.num_work_groups = indirect_group_counts;
+ ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
+
+ brw_dispatch_compute_common(ctx);
+}
void
brw_init_compute_functions(struct dd_function_table *functions)
{
functions->DispatchCompute = brw_dispatch_compute;
+ functions->DispatchComputeIndirect = brw_dispatch_compute_indirect;
}
ctx->Const.StripTextureBorder = true;
+ ctx->Const.MaxUniformBlockSize = 65536;
+ for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+ struct gl_program_constants *prog = &ctx->Const.Program[i];
+ prog->MaxUniformBlocks = 12;
+ prog->MaxCombinedUniformComponents =
+ prog->MaxUniformComponents +
+ ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
+ }
+
ctx->Const.MaxDualSourceDrawBuffers = 1;
ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = max_samplers;
* However, unaligned accesses are slower, so enforce buffer alignment.
*/
ctx->Const.UniformBufferOffsetAlignment = 16;
+
+ /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
+ * that we can safely have the CPU and GPU writing the same SSBO on
+ * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
+ * writes, so there's no problem. For an SSBO, the GPU and the CPU can
+ * be updating disjoint regions of the buffer simultaneously and that will
+ * break if the regions overlap the same cacheline.
+ */
+ ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
ctx->Const.TextureBufferOffsetAlignment = 16;
ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
+ /* FIXME: Tessellation stages are not yet supported in i965, so
+ * MaxCombinedShaderStorageBlocks doesn't take them into account.
+ */
+ ctx->Const.Program[MESA_SHADER_VERTEX].MaxShaderStorageBlocks = 12;
+ ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxShaderStorageBlocks = 12;
+ ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxShaderStorageBlocks = 0;
+ ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxShaderStorageBlocks = 0;
+ ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks = 12;
+ ctx->Const.Program[MESA_SHADER_COMPUTE].MaxShaderStorageBlocks = 12;
+ ctx->Const.MaxCombinedShaderStorageBlocks = 12 * 3;
+ ctx->Const.MaxShaderStorageBufferBindings = 36;
+
+ if (_mesa_extension_override_enables.ARB_compute_shader)
+ ctx->Const.MaxShaderStorageBufferBindings += 12;
+
if (brw->gen >= 6) {
ctx->Const.MaxVarying = 32;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
BRW_STATE_GS_CONSTBUF,
BRW_STATE_PROGRAM_CACHE,
BRW_STATE_STATE_BASE_ADDRESS,
- BRW_STATE_VUE_MAP_VS,
BRW_STATE_VUE_MAP_GEOM_OUT,
BRW_STATE_TRANSFORM_FEEDBACK,
BRW_STATE_RASTERIZER_DISCARD,
BRW_STATE_SAMPLER_STATE_TABLE,
BRW_STATE_VS_ATTRIB_WORKAROUNDS,
BRW_STATE_COMPUTE_PROGRAM,
+ BRW_STATE_CS_WORK_GROUPS,
BRW_NUM_STATE_BITS
};
#define BRW_NEW_GS_CONSTBUF (1ull << BRW_STATE_GS_CONSTBUF)
#define BRW_NEW_PROGRAM_CACHE (1ull << BRW_STATE_PROGRAM_CACHE)
#define BRW_NEW_STATE_BASE_ADDRESS (1ull << BRW_STATE_STATE_BASE_ADDRESS)
-#define BRW_NEW_VUE_MAP_VS (1ull << BRW_STATE_VUE_MAP_VS)
#define BRW_NEW_VUE_MAP_GEOM_OUT (1ull << BRW_STATE_VUE_MAP_GEOM_OUT)
#define BRW_NEW_TRANSFORM_FEEDBACK (1ull << BRW_STATE_TRANSFORM_FEEDBACK)
#define BRW_NEW_RASTERIZER_DISCARD (1ull << BRW_STATE_RASTERIZER_DISCARD)
#define BRW_NEW_SAMPLER_STATE_TABLE (1ull << BRW_STATE_SAMPLER_STATE_TABLE)
#define BRW_NEW_VS_ATTRIB_WORKAROUNDS (1ull << BRW_STATE_VS_ATTRIB_WORKAROUNDS)
#define BRW_NEW_COMPUTE_PROGRAM (1ull << BRW_STATE_COMPUTE_PROGRAM)
+#define BRW_NEW_CS_WORK_GROUPS (1ull << BRW_STATE_CS_WORK_GROUPS)
struct brw_state_flags {
/** State update flags signalled by mesa internals */
GLuint dispatch_grf_start_reg_16;
unsigned local_size[3];
unsigned simd_size;
+ bool uses_barrier;
+ bool uses_num_work_groups;
+
+ struct {
+ /** @{
+ * surface indices the CS-specific surfaces
+ */
+ uint32_t work_groups_start;
+ /** @} */
+ } binding_table;
};
/**
GLbitfield64 slots_valid;
/**
+ * Is this VUE map for a separate shader pipeline?
+ *
+ * Separable programs (GL_ARB_separate_shader_objects) can be mixed and matched
+ * without the linker having a chance to dead code eliminate unused varyings.
+ *
+ * This means that we have to use a fixed slot layout, based on the output's
+ * location field, rather than assigning slots in a compact contiguous block.
+ */
+ bool separate;
+
+ /**
* Map from gl_varying_slot value to VUE slot. For gl_varying_slots that are
* not stored in a slot (because they are not written, or because
* additional processing is applied before storing them in the VUE), the
void brw_compute_vue_map(const struct brw_device_info *devinfo,
struct brw_vue_map *vue_map,
- GLbitfield64 slots_valid);
+ GLbitfield64 slots_valid,
+ bool separate_shader);
/**
12 + /* ubo */ \
BRW_MAX_ABO + \
BRW_MAX_IMAGES + \
- 2 /* shader time, pull constants */)
+ 2 + /* shader time, pull constants */ \
+ 1 /* cs num work groups */)
#define SURF_INDEX_GEN6_SOL_BINDING(t) (t)
bool include_primitive_id;
+ /**
+ * The number of vertices emitted, if constant - otherwise -1.
+ */
+ int static_vertex_count;
+
int invocations;
/**
} draw;
struct {
+ /**
+ * For gl_NumWorkGroups: If num_work_groups_bo is non NULL, then it is
+ * an indirect call, and num_work_groups_offset is valid. Otherwise,
+ * num_work_groups is set based on glDispatchCompute.
+ */
+ drm_intel_bo *num_work_groups_bo;
+ GLintptr num_work_groups_offset;
+ const GLuint *num_work_groups;
+ } compute;
+
+ struct {
struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
struct brw_vertex_buffer buffers[VERT_ATTRIB_MAX];
} curbe;
/**
- * Layout of vertex data exiting the vertex shader.
- *
- * BRW_NEW_VUE_MAP_VS is flagged when this VUE map changes.
- */
- struct brw_vue_map vue_map_vs;
-
- /**
* Layout of vertex data exiting the geometry portion of the pipleine.
- * This comes from the geometry shader if one exists, otherwise from the
- * vertex shader.
+ * This comes from the last enabled shader stage (GS, DS, or VS).
*
* BRW_NEW_VUE_MAP_GEOM_OUT is flagged when the VUE map changes.
*/
int num_atoms[BRW_NUM_PIPELINES];
const struct brw_tracked_state render_atoms[60];
- const struct brw_tracked_state compute_atoms[4];
+ const struct brw_tracked_state compute_atoms[7];
/* If (INTEL_DEBUG & DEBUG_BATCH) */
struct {
uint32_t size,
uint32_t *out_offset,
bool dword_pitch);
+void brw_create_buffer_surface(struct brw_context *brw,
+ drm_intel_bo *bo,
+ uint32_t offset,
+ uint32_t size,
+ uint32_t *out_offset,
+ bool dword_pitch);
void brw_update_buffer_texture_surface(struct gl_context *ctx,
unsigned unit,
uint32_t *surf_offset);
uint32_t get_hw_prim_for_gl_prim(int mode);
void
-brw_setup_vue_key_clip_info(struct brw_context *brw,
- struct brw_vue_prog_key *key,
- bool program_uses_clip_distance);
-
-void
gen6_upload_push_constants(struct brw_context *brw,
const struct gl_program *prog,
const struct brw_stage_prog_data *prog_data,
--- /dev/null
+/*
+ * Copyright (c) 2014 - 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "util/ralloc.h"
+#include "brw_context.h"
+#include "brw_cs.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+#include "brw_shader.h"
+#include "intel_mipmap_tree.h"
+#include "brw_state.h"
+#include "intel_batchbuffer.h"
+
+bool
+brw_cs_prog_data_compare(const void *in_a, const void *in_b)
+{
+ const struct brw_cs_prog_data *a =
+ (const struct brw_cs_prog_data *)in_a;
+ const struct brw_cs_prog_data *b =
+ (const struct brw_cs_prog_data *)in_b;
+
+ /* Compare the base structure. */
+ if (!brw_stage_prog_data_compare(&a->base, &b->base))
+ return false;
+
+ /* Compare the rest of the structure. */
+ const unsigned offset = sizeof(struct brw_stage_prog_data);
+ if (memcmp(((char *) a) + offset, ((char *) b) + offset,
+ sizeof(struct brw_cs_prog_data) - offset))
+ return false;
+
+ return true;
+}
+
+static bool
+brw_codegen_cs_prog(struct brw_context *brw,
+ struct gl_shader_program *prog,
+ struct brw_compute_program *cp,
+ struct brw_cs_prog_key *key)
+{
+ struct gl_context *ctx = &brw->ctx;
+ const GLuint *program;
+ void *mem_ctx = ralloc_context(NULL);
+ GLuint program_size;
+ struct brw_cs_prog_data prog_data;
+ bool start_busy = false;
+ double start_time = 0;
+
+ struct brw_shader *cs =
+ (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE];
+ assert (cs);
+
+ memset(&prog_data, 0, sizeof(prog_data));
+
+ /* Allocate the references to the uniforms that will end up in the
+ * prog_data associated with the compiled program, and which will be freed
+ * by the state cache.
+ */
+ int param_count = cs->base.num_uniform_components +
+ cs->base.NumImages * BRW_IMAGE_PARAM_SIZE;
+
+ /* The backend also sometimes adds params for texture size. */
+ param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
+ prog_data.base.param =
+ rzalloc_array(NULL, const gl_constant_value *, param_count);
+ prog_data.base.pull_param =
+ rzalloc_array(NULL, const gl_constant_value *, param_count);
+ prog_data.base.image_param =
+ rzalloc_array(NULL, struct brw_image_param, cs->base.NumImages);
+ prog_data.base.nr_params = param_count;
+ prog_data.base.nr_image_params = cs->base.NumImages;
+
+ if (unlikely(brw->perf_debug)) {
+ start_busy = (brw->batch.last_bo &&
+ drm_intel_bo_busy(brw->batch.last_bo));
+ start_time = get_time();
+ }
+
+ program = brw_cs_emit(brw, mem_ctx, key, &prog_data,
+ &cp->program, prog, &program_size);
+ if (program == NULL) {
+ ralloc_free(mem_ctx);
+ return false;
+ }
+
+ if (unlikely(brw->perf_debug) && cs) {
+ if (cs->compiled_once) {
+ _mesa_problem(&brw->ctx, "CS programs shouldn't need recompiles");
+ }
+ cs->compiled_once = true;
+
+ if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
+ perf_debug("CS compile took %.03f ms and stalled the GPU\n",
+ (get_time() - start_time) * 1000);
+ }
+ }
+
+ if (prog_data.base.total_scratch) {
+ brw_get_scratch_bo(brw, &brw->cs.base.scratch_bo,
+ prog_data.base.total_scratch * brw->max_cs_threads);
+ }
+
+ if (unlikely(INTEL_DEBUG & DEBUG_CS))
+ fprintf(stderr, "\n");
+
+ brw_upload_cache(&brw->cache, BRW_CACHE_CS_PROG,
+ key, sizeof(*key),
+ program, program_size,
+ &prog_data, sizeof(prog_data),
+ &brw->cs.base.prog_offset, &brw->cs.prog_data);
+ ralloc_free(mem_ctx);
+
+ return true;
+}
+
+
+static void
+brw_cs_populate_key(struct brw_context *brw, struct brw_cs_prog_key *key)
+{
+ struct gl_context *ctx = &brw->ctx;
+ /* BRW_NEW_COMPUTE_PROGRAM */
+ const struct brw_compute_program *cp =
+ (struct brw_compute_program *) brw->compute_program;
+ const struct gl_program *prog = (struct gl_program *) cp;
+
+ memset(key, 0, sizeof(*key));
+
+ /* _NEW_TEXTURE */
+ brw_populate_sampler_prog_key_data(ctx, prog, brw->cs.base.sampler_count,
+ &key->tex);
+
+ /* The unique compute program ID */
+ key->program_string_id = cp->id;
+}
+
+
+void
+brw_upload_cs_prog(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->ctx;
+ struct brw_cs_prog_key key;
+ struct brw_compute_program *cp = (struct brw_compute_program *)
+ brw->compute_program;
+
+ if (!cp)
+ return;
+
+ if (!brw_state_dirty(brw, _NEW_TEXTURE, BRW_NEW_COMPUTE_PROGRAM))
+ return;
+
+ brw->cs.base.sampler_count =
+ _mesa_fls(ctx->ComputeProgram._Current->Base.SamplersUsed);
+
+ brw_cs_populate_key(brw, &key);
+
+ if (!brw_search_cache(&brw->cache, BRW_CACHE_CS_PROG,
+ &key, sizeof(key),
+ &brw->cs.base.prog_offset, &brw->cs.prog_data)) {
+ bool success =
+ brw_codegen_cs_prog(brw,
+ ctx->Shader.CurrentProgram[MESA_SHADER_COMPUTE],
+ cp, &key);
+ (void) success;
+ assert(success);
+ }
+ brw->cs.base.prog_data = &brw->cs.prog_data->base;
+}
+
+
+bool
+brw_cs_precompile(struct gl_context *ctx,
+ struct gl_shader_program *shader_prog,
+ struct gl_program *prog)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_cs_prog_key key;
+
+ struct gl_compute_program *cp = (struct gl_compute_program *) prog;
+ struct brw_compute_program *bcp = brw_compute_program(cp);
+
+ memset(&key, 0, sizeof(key));
+ key.program_string_id = bcp->id;
+
+ brw_setup_tex_for_precompile(brw, &key.tex, prog);
+
+ uint32_t old_prog_offset = brw->cs.base.prog_offset;
+ struct brw_cs_prog_data *old_prog_data = brw->cs.prog_data;
+
+ bool success = brw_codegen_cs_prog(brw, shader_prog, bcp, &key);
+
+ brw->cs.base.prog_offset = old_prog_offset;
+ brw->cs.prog_data = old_prog_data;
+
+ return success;
+}
struct gl_shader_program *prog,
unsigned *final_assembly_size);
+unsigned
+brw_cs_prog_local_id_payload_dwords(const struct gl_program *prog,
+ unsigned dispatch_width);
+
#ifdef __cplusplus
}
#endif
#define _3DPRIM_TRIFAN 0x06
#define _3DPRIM_QUADLIST 0x07
#define _3DPRIM_QUADSTRIP 0x08
-#define _3DPRIM_LINELIST_ADJ 0x09
-#define _3DPRIM_LINESTRIP_ADJ 0x0A
-#define _3DPRIM_TRILIST_ADJ 0x0B
-#define _3DPRIM_TRISTRIP_ADJ 0x0C
+#define _3DPRIM_LINELIST_ADJ 0x09 /* G45+ */
+#define _3DPRIM_LINESTRIP_ADJ 0x0A /* G45+ */
+#define _3DPRIM_TRILIST_ADJ 0x0B /* G45+ */
+#define _3DPRIM_TRISTRIP_ADJ 0x0C /* G45+ */
#define _3DPRIM_TRISTRIP_REVERSE 0x0D
#define _3DPRIM_POLYGON 0x0E
#define _3DPRIM_RECTLIST 0x0F
#define _3DPRIM_LINESTRIP_CONT 0x12
#define _3DPRIM_LINESTRIP_BF 0x13
#define _3DPRIM_LINESTRIP_CONT_BF 0x14
-#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
+#define _3DPRIM_TRIFAN_NOSTIPPLE 0x16
#endif
/* We use this offset to be able to pass native primitive types in struct
SHADER_OPCODE_TG4_LOGICAL,
SHADER_OPCODE_TG4_OFFSET,
SHADER_OPCODE_TG4_OFFSET_LOGICAL,
+ SHADER_OPCODE_SAMPLEINFO,
/**
* Combines multiple sources of size 1 into a larger virtual GRF.
FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7,
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD,
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
+ FS_OPCODE_GET_BUFFER_SIZE,
FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
FS_OPCODE_DISCARD_JUMP,
FS_OPCODE_SET_SAMPLE_ID,
VS_OPCODE_PULL_CONSTANT_LOAD,
VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
+
+ VS_OPCODE_GET_BUFFER_SIZE,
+
VS_OPCODE_UNPACK_FLAGS_SIMD4X2,
/**
#define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4 8
#define GEN5_SAMPLER_MESSAGE_LOD 9
#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10
+#define GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO 11
#define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C 16
#define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO 17
#define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C 18
#define BRW_URB_OPCODE_WRITE_HWORD 0
#define BRW_URB_OPCODE_WRITE_OWORD 1
-#define GEN8_URB_OPCODE_SIMD8_WRITE 7
+#define BRW_URB_OPCODE_READ_HWORD 2
+#define BRW_URB_OPCODE_READ_OWORD 3
+#define GEN7_URB_OPCODE_ATOMIC_MOV 4
+#define GEN7_URB_OPCODE_ATOMIC_INC 5
+#define GEN8_URB_OPCODE_ATOMIC_ADD 6
+#define GEN8_URB_OPCODE_SIMD8_WRITE 7
+#define GEN8_URB_OPCODE_SIMD8_READ 8
#define BRW_URB_SWIZZLE_NONE 0
#define BRW_URB_SWIZZLE_INTERLEAVE 1
/* DW3: PS */
#define _3DSTATE_SAMPLER_STATE_POINTERS_VS 0x782B /* GEN7+ */
+#define _3DSTATE_SAMPLER_STATE_POINTERS_HS 0x782C /* GEN7+ */
+#define _3DSTATE_SAMPLER_STATE_POINTERS_DS 0x782D /* GEN7+ */
#define _3DSTATE_SAMPLER_STATE_POINTERS_GS 0x782E /* GEN7+ */
#define _3DSTATE_SAMPLER_STATE_POINTERS_PS 0x782F /* GEN7+ */
#define GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES (5*128)
#define _3DSTATE_PUSH_CONSTANT_ALLOC_VS 0x7912 /* GEN7+ */
+#define _3DSTATE_PUSH_CONSTANT_ALLOC_HS 0x7913 /* GEN7+ */
+#define _3DSTATE_PUSH_CONSTANT_ALLOC_DS 0x7914 /* GEN7+ */
#define _3DSTATE_PUSH_CONSTANT_ALLOC_GS 0x7915 /* GEN7+ */
#define _3DSTATE_PUSH_CONSTANT_ALLOC_PS 0x7916 /* GEN7+ */
# define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16
# define GEN6_GS_SVBI_POSTINCREMENT_VALUE_MASK INTEL_MASK(25, 16)
# define GEN6_GS_ENABLE (1 << 15)
+/* Gen8+ DW8 */
+# define GEN8_GS_STATIC_OUTPUT (1 << 30)
+# define GEN8_GS_STATIC_VERTEX_COUNT_SHIFT 16
+# define GEN8_GS_STATIC_VERTEX_COUNT_MASK INTEL_MASK(26, 16)
+
/* Gen8+ DW9 */
# define GEN8_GS_URB_ENTRY_OUTPUT_OFFSET_SHIFT 21
# define GEN8_GS_URB_OUTPUT_LENGTH_SHIFT 16
#define GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES (62*16)
#define _3DSTATE_HS 0x781B /* GEN7+ */
+/* DW1 */
+# define GEN7_HS_SAMPLER_COUNT_MASK INTEL_MASK(29, 27)
+# define GEN7_HS_SAMPLER_COUNT_SHIFT 27
+# define GEN7_HS_BINDING_TABLE_ENTRY_COUNT_MASK INTEL_MASK(25, 18)
+# define GEN7_HS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+# define GEN7_HS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
+# define GEN7_HS_FLOATING_POINT_MODE_ALT (1 << 16)
+# define GEN7_HS_MAX_THREADS_SHIFT 0
+/* DW2 */
+# define GEN7_HS_ENABLE (1 << 31)
+# define GEN7_HS_STATISTICS_ENABLE (1 << 29)
+# define GEN8_HS_MAX_THREADS_SHIFT 8
+# define GEN7_HS_INSTANCE_COUNT_MASK INTEL_MASK(3, 0)
+# define GEN7_HS_INSTANCE_COUNT_SHIFT 0
+/* DW5 */
+# define GEN7_HS_SINGLE_PROGRAM_FLOW (1 << 27)
+# define GEN7_HS_VECTOR_MASK_ENABLE (1 << 26)
+# define HSW_HS_ACCESSES_UAV (1 << 25)
+# define GEN7_HS_INCLUDE_VERTEX_HANDLES (1 << 24)
+# define GEN7_HS_DISPATCH_START_GRF_MASK INTEL_MASK(23, 19)
+# define GEN7_HS_DISPATCH_START_GRF_SHIFT 19
+# define GEN7_HS_URB_READ_LENGTH_MASK INTEL_MASK(16, 11)
+# define GEN7_HS_URB_READ_LENGTH_SHIFT 11
+# define GEN7_HS_URB_ENTRY_READ_OFFSET_MASK INTEL_MASK(9, 4)
+# define GEN7_HS_URB_ENTRY_READ_OFFSET_SHIFT 4
+
#define _3DSTATE_TE 0x781C /* GEN7+ */
+/* DW1 */
+# define GEN7_TE_PARTITIONING_SHIFT 12
+# define GEN7_TE_OUTPUT_TOPOLOGY_SHIFT 8
+# define GEN7_TE_DOMAIN_SHIFT 4
+//# define GEN7_TE_MODE_SW (1 << 1)
+# define GEN7_TE_ENABLE (1 << 0)
+
#define _3DSTATE_DS 0x781D /* GEN7+ */
+/* DW2 */
+# define GEN7_DS_SINGLE_DOMAIN_POINT_DISPATCH (1 << 31)
+# define GEN7_DS_VECTOR_MASK_ENABLE (1 << 30)
+# define GEN7_DS_SAMPLER_COUNT_MASK INTEL_MASK(29, 27)
+# define GEN7_DS_SAMPLER_COUNT_SHIFT 27
+# define GEN7_DS_BINDING_TABLE_ENTRY_COUNT_MASK INTEL_MASK(25, 18)
+# define GEN7_DS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+# define GEN7_DS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
+# define GEN7_DS_FLOATING_POINT_MODE_ALT (1 << 16)
+# define HSW_DS_ACCESSES_UAV (1 << 14)
+/* DW4 */
+# define GEN7_DS_DISPATCH_START_GRF_MASK INTEL_MASK(24, 20)
+# define GEN7_DS_DISPATCH_START_GRF_SHIFT 20
+# define GEN7_DS_URB_READ_LENGTH_MASK INTEL_MASK(17, 11)
+# define GEN7_DS_URB_READ_LENGTH_SHIFT 11
+# define GEN7_DS_URB_ENTRY_READ_OFFSET_MASK INTEL_MASK(9, 4)
+# define GEN7_DS_URB_ENTRY_READ_OFFSET_SHIFT 4
+/* DW5 */
+# define GEN7_DS_MAX_THREADS_SHIFT 25
+# define HSW_DS_MAX_THREADS_SHIFT 21
+# define GEN7_DS_STATISTICS_ENABLE (1 << 10)
+# define GEN7_DS_SIMD8_DISPATCH_ENABLE (1 << 3)
+# define GEN7_DS_COMPUTE_W_COORDINATE_ENABLE (1 << 2)
+# define GEN7_DS_CACHE_DISABLE (1 << 1)
+# define GEN7_DS_ENABLE (1 << 0)
+/* Gen8+ DW8 */
+# define GEN8_DS_URB_ENTRY_OUTPUT_OFFSET_MASK INTEL_MASK(26, 21)
+# define GEN8_DS_URB_ENTRY_OUTPUT_OFFSET_SHIFT 21
+# define GEN8_DS_URB_OUTPUT_LENGTH_MASK INTEL_MASK(20, 16)
+# define GEN8_DS_URB_OUTPUT_LENGTH_SHIFT 16
+# define GEN8_DS_USER_CLIP_DISTANCE_MASK INTEL_MASK(15, 8)
+# define GEN8_DS_USER_CLIP_DISTANCE_SHIFT 8
+# define GEN8_DS_USER_CULL_DISTANCE_MASK INTEL_MASK(7, 0)
+# define GEN8_DS_USER_CULL_DISTANCE_SHIFT 0
+
#define _3DSTATE_CLIP 0x7812 /* GEN6+ */
/* DW1 */
BRW_PSCDEPTH_ON_LE = 3, /* PS guarantees output depth <= source depth */
};
+enum brw_pixel_shader_coverage_mask_mode {
+ BRW_PSICMS_OFF = 0, /* PS does not use input coverage masks. */
+ BRW_PSICMS_NORMAL = 1, /* Input Coverage masks based on outer conservatism
+ * and factors in SAMPLE_MASK. If Pixel is
+ * conservatively covered, all samples are enabled.
+ */
+
+ BRW_PSICMS_INNER = 2, /* Input Coverage masks based on inner conservatism
+ * and factors in SAMPLE_MASK. If Pixel is
+ * conservatively *FULLY* covered, all samples are
+ * enabled.
+ */
+ BRW_PCICMS_DEPTH = 3,
+};
+
#define _3DSTATE_PS_EXTRA 0x784F /* GEN8+ */
/* DW1 */
# define GEN8_PSX_PIXEL_SHADER_VALID (1 << 31)
# define GEN9_PSX_SHADER_PULLS_BARY (1 << 3)
# define GEN8_PSX_SHADER_HAS_UAV (1 << 2)
# define GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK (1 << 1)
+# define GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT 0
enum brw_wm_barycentric_interp_mode {
BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC = 0,
# define MEDIA_VFE_STATE_CURBE_ALLOC_SHIFT 0
# define MEDIA_VFE_STATE_CURBE_ALLOC_MASK INTEL_MASK(15, 0)
+#define MEDIA_CURBE_LOAD 0x7001
#define MEDIA_INTERFACE_DESCRIPTOR_LOAD 0x7002
+/* GEN7 DW4, GEN8+ DW5 */
+# define MEDIA_CURBE_READ_LENGTH_SHIFT 16
+# define MEDIA_CURBE_READ_LENGTH_MASK INTEL_MASK(31, 16)
+# define MEDIA_CURBE_READ_OFFSET_SHIFT 0
+# define MEDIA_CURBE_READ_OFFSET_MASK INTEL_MASK(15, 0)
/* GEN7 DW5, GEN8+ DW6 */
+# define MEDIA_BARRIER_ENABLE_SHIFT 21
+# define MEDIA_BARRIER_ENABLE_MASK INTEL_MASK(21, 21)
# define MEDIA_GPGPU_THREAD_COUNT_SHIFT 0
# define MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(7, 0)
# define GEN8_MEDIA_GPGPU_THREAD_COUNT_SHIFT 0
# define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(9, 0)
#define MEDIA_STATE_FLUSH 0x7004
#define GPGPU_WALKER 0x7105
+/* GEN7 DW0 */
+# define GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE (1 << 10)
/* GEN8+ DW2 */
# define GPGPU_WALKER_INDIRECT_LENGTH_SHIFT 0
# define GPGPU_WALKER_INDIRECT_LENGTH_MASK INTEL_MASK(15, 0)
.max_wm_threads = 64 * 6, \
.max_cs_threads = 56, \
.urb = { \
- .size = 192, \
+ .size = 384, \
.min_vs_entries = 64, \
.max_vs_entries = 1856, \
.max_hs_entries = 672, \
static const struct brw_device_info brw_device_info_skl_gt1 = {
GEN9_FEATURES, .gt = 1,
+ .urb.size = 192,
};
static const struct brw_device_info brw_device_info_skl_gt2 = {
[BRW_MESSAGE_GATEWAY_SFID_MMIO_READ_WRITE] = "mmio read/write",
};
+static const char *const gen4_dp_read_port_msg_type[4] = {
+ [0b00] = "OWord Block Read",
+ [0b01] = "OWord Dual Block Read",
+ [0b10] = "Media Block Read",
+ [0b11] = "DWord Scattered Read",
+};
+
+static const char *const g45_dp_read_port_msg_type[8] = {
+ [0b000] = "OWord Block Read",
+ [0b010] = "OWord Dual Block Read",
+ [0b100] = "Media Block Read",
+ [0b110] = "DWord Scattered Read",
+ [0b001] = "Render Target UNORM Read",
+ [0b011] = "AVC Loop Filter Read",
+};
+
static const char *const dp_write_port_msg_type[8] = {
[0b000] = "OWord block write",
[0b001] = "OWord dual block write",
};
static const char *const gen7_urb_opcode[] = {
- [0] = "write HWord",
- [1] = "write OWord",
- [2] = "read HWord",
- [3] = "read OWord",
- [4] = "atomic mov", /* Gen7+ */
- [5] = "atomic inc", /* Gen7+ */
- [6] = "atomic add", /* Gen8+ */
- [7] = "SIMD8 write", /* Gen8+ */
- [8] = "SIMD8 read", /* Gen8+ */
+ [BRW_URB_OPCODE_WRITE_HWORD] = "write HWord",
+ [BRW_URB_OPCODE_WRITE_OWORD] = "write OWord",
+ [BRW_URB_OPCODE_READ_HWORD] = "read HWord",
+ [BRW_URB_OPCODE_READ_OWORD] = "read OWord",
+ [GEN7_URB_OPCODE_ATOMIC_MOV] = "atomic mov", /* Gen7+ */
+ [GEN7_URB_OPCODE_ATOMIC_INC] = "atomic inc", /* Gen7+ */
+ [GEN8_URB_OPCODE_ATOMIC_ADD] = "atomic add", /* Gen8+ */
+ [GEN8_URB_OPCODE_SIMD8_WRITE] = "SIMD8 write", /* Gen8+ */
+ [GEN8_URB_OPCODE_SIMD8_READ] = "SIMD8 read", /* Gen8+ */
/* [9-15] - reserved */
};
[GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4] = "gather4",
[GEN5_SAMPLER_MESSAGE_LOD] = "lod",
[GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO] = "resinfo",
+ [GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO] = "sampleinfo",
[GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C] = "gather4_c",
[GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO] = "gather4_po",
[GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C] = "gather4_po_c",
brw_inst_dp_msg_type(devinfo, inst),
devinfo->gen >= 7 ? 0 : brw_inst_dp_write_commit(devinfo, inst));
} else {
- format(file, " (%ld, %ld, %ld)",
- brw_inst_binding_table_index(devinfo, inst),
- brw_inst_dp_read_msg_control(devinfo, inst),
- brw_inst_dp_read_msg_type(devinfo, inst));
+ bool is_965 = devinfo->gen == 4 && !devinfo->is_g4x;
+ err |= control(file, "DP read message type",
+ is_965 ? gen4_dp_read_port_msg_type :
+ g45_dp_read_port_msg_type,
+ brw_inst_dp_read_msg_type(devinfo, inst),
+ &space);
+
+ format(file, " MsgCtrl = 0x%lx",
+ brw_inst_dp_read_msg_control(devinfo, inst));
+
+ format(file, " Surface = %ld", brw_inst_binding_table_index(devinfo, inst));
}
break;
-/**************************************************************************
- *
+/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
#include <sys/errno.h>
- /**************************************************************************
- *
+/*
* Copyright 2005 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
#ifndef BRW_DRAW_H
#define BRW_DRAW_H
-/**************************************************************************
- *
+/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
#include "main/glheader.h"
#include "main/bufferobj.h"
if (devinfo->gen >= 9 || devinfo->is_cherryview)
uncompacted |= brw_inst_bits(src, 36, 35) << 24; /* 2b */
- for (int i = 0; i < ARRAY_SIZE(gen8_3src_control_index_table); i++) {
+ for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_control_index_table); i++) {
if (gen8_3src_control_index_table[i] == uncompacted) {
brw_compact_inst_set_3src_control_index(dst, i);
return true;
(brw_inst_bits(src, 104, 104) << 44); /* 1b */
}
- for (int i = 0; i < ARRAY_SIZE(gen8_3src_source_index_table); i++) {
+ for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_source_index_table); i++) {
if (gen8_3src_source_index_table[i] == uncompacted) {
brw_compact_inst_set_3src_source_index(dst, i);
return true;
enum brw_reg_type type, unsigned file)
{
if (file == BRW_IMMEDIATE_VALUE) {
- const static int imm_hw_types[] = {
+ static const int imm_hw_types[] = {
[BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD,
[BRW_REGISTER_TYPE_D] = BRW_HW_REG_TYPE_D,
[BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW,
return imm_hw_types[type];
} else {
/* Non-immediate registers */
- const static int hw_types[] = {
+ static const int hw_types[] = {
[BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD,
[BRW_REGISTER_TYPE_D] = BRW_HW_REG_TYPE_D,
[BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW,
{
const struct brw_device_info *devinfo = p->devinfo;
- if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
- dest.file != BRW_MESSAGE_REGISTER_FILE)
+ if (dest.file == BRW_MESSAGE_REGISTER_FILE)
+ assert((dest.nr & ~(1 << 7)) < BRW_MAX_MRF(devinfo->gen));
+ else if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE)
assert(dest.nr < 128);
gen7_convert_mrf_to_grf(p, &dest);
reg.file == BRW_ARF_NULL)
return;
+ /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
+ *
+ * "Swizzling is not allowed when an accumulator is used as an implicit
+ * source or an explicit source in an instruction."
+ */
+ if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+ reg.nr == BRW_ARF_ACCUMULATOR)
+ assert(reg.dw1.bits.swizzle == BRW_SWIZZLE_XYZW);
+
assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg));
hstride = hstride_for_reg[reg.hstride];
{
const struct brw_device_info *devinfo = p->devinfo;
- if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
+ if (reg.file == BRW_MESSAGE_REGISTER_FILE)
+ assert((reg.nr & ~(1 << 7)) < BRW_MAX_MRF(devinfo->gen));
+ else if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
assert(reg.nr < 128);
gen7_convert_mrf_to_grf(p, ®);
if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
assert(reg.nr < 128);
+ /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
+ *
+ * "Accumulator registers may be accessed explicitly as src0
+ * operands only."
+ */
+ assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE ||
+ reg.nr != BRW_ARF_ACCUMULATOR);
+
gen7_convert_mrf_to_grf(p, ®);
assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
insn = next_insn(p, BRW_OPCODE_SEND);
- assert(msg_length < BRW_MAX_MRF);
+ assert(msg_length < BRW_MAX_MRF(devinfo->gen));
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
#include "brw_eu.h"
#include "brw_wm.h"
#include "brw_fs.h"
+#include "brw_cs.h"
#include "brw_cfg.h"
#include "brw_dead_control_flow.h"
#include "main/uniforms.h"
break;
case CS_OPCODE_CS_TERMINATE:
+ case SHADER_OPCODE_BARRIER:
return 1;
default:
case SHADER_OPCODE_TXL:
case SHADER_OPCODE_TXS:
case SHADER_OPCODE_LOD:
+ case SHADER_OPCODE_SAMPLEINFO:
return 1;
case FS_OPCODE_FB_WRITE:
return 2;
+ case FS_OPCODE_GET_BUFFER_SIZE:
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
case SHADER_OPCODE_GEN4_SCRATCH_READ:
return 1;
}
}
}
+
+ /* This may be updated in assign_urb_setup or assign_vs_urb_setup. */
+ this->first_non_payload_grf = payload.num_regs + prog_data->curb_read_length;
}
void
*/
struct brw_vue_map prev_stage_vue_map;
brw_compute_vue_map(devinfo, &prev_stage_vue_map,
- key->input_slots_valid);
+ key->input_slots_valid,
+ shader_prog->SeparateShader);
int first_slot = 2 * BRW_SF_URB_ENTRY_READ_OFFSET;
assert(prev_stage_vue_map.num_slots <= first_slot + 32);
for (int slot = first_slot; slot < prev_stage_vue_map.num_slots;
}
/* Each attribute is 4 setup channels, each of which is half a reg. */
- this->first_non_payload_grf =
- urb_start + prog_data->num_varying_inputs * 2;
+ this->first_non_payload_grf += prog_data->num_varying_inputs * 2;
}
void
count++;
/* Each attribute is 4 regs. */
- this->first_non_payload_grf =
- payload.num_regs + prog_data->curb_read_length + count * 4;
+ this->first_non_payload_grf += count * 4;
unsigned vue_entries =
MAX2(count, vs_prog_data->base.vue_map.num_slots);
inst->src[i].file = HW_REG;
inst->src[i].fixed_hw_reg =
- retype(brw_vec8_grf(grf, 0), inst->src[i].type);
+ stride(byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type),
+ inst->src[i].subreg_offset),
+ inst->exec_size * inst->src[i].stride,
+ inst->exec_size, inst->src[i].stride);
}
}
}
{
int write_len = inst->regs_written;
int first_write_grf = inst->dst.reg;
- bool needs_dep[BRW_MAX_MRF];
+ bool needs_dep[BRW_MAX_MRF(devinfo->gen)];
assert(write_len < (int)sizeof(needs_dep) - 1);
memset(needs_dep, false, sizeof(needs_dep));
{
int write_len = inst->regs_written;
int first_write_grf = inst->dst.reg;
- bool needs_dep[BRW_MAX_MRF];
+ bool needs_dep[BRW_MAX_MRF(devinfo->gen)];
assert(write_len < (int)sizeof(needs_dep) - 1);
memset(needs_dep, false, sizeof(needs_dep));
* schedule multi-component multiplications much better.
*/
- if (inst->conditional_mod && inst->dst.is_null()) {
+ fs_reg orig_dst = inst->dst;
+ if (orig_dst.is_null() || orig_dst.file == MRF) {
inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8),
inst->dst.type);
}
ibld.ADD(dst, low, high);
- if (inst->conditional_mod) {
- fs_reg null(retype(ibld.null_reg_f(), inst->dst.type));
+ if (inst->conditional_mod || orig_dst.file == MRF) {
set_condmod(inst->conditional_mod,
- ibld.MOV(null, inst->dst));
+ ibld.MOV(orig_dst, inst->dst));
}
}
assert(devinfo->gen >= 7);
payload.num_regs = 1;
+
+ if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
+ const unsigned local_id_dwords =
+ brw_cs_prog_local_id_payload_dwords(prog, dispatch_width);
+ assert((local_id_dwords & 0x7) == 0);
+ const unsigned local_id_regs = local_id_dwords / 8;
+ payload.local_invocation_id_reg = payload.num_regs;
+ payload.num_regs += local_id_regs;
+ }
}
void
-fs_visitor::assign_binding_table_offsets()
+fs_visitor::assign_fs_binding_table_offsets()
{
assert(stage == MESA_SHADER_FRAGMENT);
brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
}
void
+fs_visitor::assign_cs_binding_table_offsets()
+{
+ assert(stage == MESA_SHADER_COMPUTE);
+ brw_cs_prog_data *prog_data = (brw_cs_prog_data*) this->prog_data;
+ uint32_t next_binding_table_offset = 0;
+
+ /* May not be used if the gl_NumWorkGroups variable is not accessed. */
+ prog_data->binding_table.work_groups_start = next_binding_table_offset;
+ next_binding_table_offset++;
+
+ assign_common_binding_table_offsets(next_binding_table_offset);
+}
+
+void
fs_visitor::calculate_register_pressure()
{
invalidate_live_intervals();
void
fs_visitor::optimize()
{
+ /* Start by validating the shader we currently have. */
+ validate();
+
/* bld is the common builder object pointing at the end of the program we
* used to translate it into i965 IR. For the optimization and lowering
* passes coming next, any code added after the end of the program without
assign_constant_locations();
demote_pull_constants();
+ validate();
+
split_virtual_grfs();
+ validate();
#define OPT(pass, args...) ({ \
pass_num++; \
backend_shader::dump_instructions(filename); \
} \
\
+ validate(); \
+ \
progress = progress || this_progress; \
this_progress; \
})
OPT(lower_integer_multiplication);
lower_uniform_pull_constant_loads();
+
+ validate();
}
/**
assert(stage == MESA_SHADER_FRAGMENT);
+ sanity_param_count = prog->Parameters->NumParameters;
+
if (prog_data->map_entries == NULL)
- assign_binding_table_offsets();
+ assign_fs_binding_table_offsets();
if (devinfo->gen >= 6)
setup_payload_gen6();
sanity_param_count = prog->Parameters->NumParameters;
- assign_common_binding_table_offsets(0);
+ assign_cs_binding_table_offsets();
setup_cs_payload();
struct gl_shader_program *prog,
unsigned *final_assembly_size)
{
- bool start_busy = false;
- double start_time = 0;
-
- if (unlikely(brw->perf_debug)) {
- start_busy = (brw->batch.last_bo &&
- drm_intel_bo_busy(brw->batch.last_bo));
- start_time = get_time();
- }
-
struct brw_shader *shader = NULL;
if (prog)
shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
if (simd16_cfg)
prog_data->prog_offset_16 = g.generate_code(simd16_cfg, 16);
- if (unlikely(brw->perf_debug) && shader) {
- if (shader->compiled_once)
- brw_wm_debug_recompile(brw, prog, key);
- shader->compiled_once = true;
-
- if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
- perf_debug("FS compile took %.03f ms and stalled the GPU\n",
- (get_time() - start_time) * 1000);
- }
- }
-
return g.get_assembly(final_assembly_size);
}
-extern "C" bool
-brw_fs_precompile(struct gl_context *ctx,
- struct gl_shader_program *shader_prog,
- struct gl_program *prog)
+fs_reg *
+fs_visitor::emit_cs_local_invocation_id_setup()
{
- struct brw_context *brw = brw_context(ctx);
- struct brw_wm_prog_key key;
-
- struct gl_fragment_program *fp = (struct gl_fragment_program *) prog;
- struct brw_fragment_program *bfp = brw_fragment_program(fp);
- bool program_uses_dfdy = fp->UsesDFdy;
-
- memset(&key, 0, sizeof(key));
+ assert(stage == MESA_SHADER_COMPUTE);
- if (brw->gen < 6) {
- if (fp->UsesKill)
- key.iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+ fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::uvec3_type));
- if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
- key.iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+ struct brw_reg src =
+ brw_vec8_grf(payload.local_invocation_id_reg, 0);
+ src = retype(src, BRW_REGISTER_TYPE_UD);
+ bld.MOV(*reg, src);
+ src.nr += dispatch_width / 8;
+ bld.MOV(offset(*reg, bld, 1), src);
+ src.nr += dispatch_width / 8;
+ bld.MOV(offset(*reg, bld, 2), src);
- /* Just assume depth testing. */
- key.iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
- key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
- }
+ return reg;
+}
- if (brw->gen < 6 || _mesa_bitcount_64(fp->Base.InputsRead &
- BRW_FS_VARYING_INPUT_MASK) > 16)
- key.input_slots_valid = fp->Base.InputsRead | VARYING_BIT_POS;
+fs_reg *
+fs_visitor::emit_cs_work_group_id_setup()
+{
+ assert(stage == MESA_SHADER_COMPUTE);
- brw_setup_tex_for_precompile(brw, &key.tex, &fp->Base);
+ fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::uvec3_type));
- if (fp->Base.InputsRead & VARYING_BIT_POS) {
- key.drawable_height = ctx->DrawBuffer->Height;
- }
+ struct brw_reg r0_1(retype(brw_vec1_grf(0, 1), BRW_REGISTER_TYPE_UD));
+ struct brw_reg r0_6(retype(brw_vec1_grf(0, 6), BRW_REGISTER_TYPE_UD));
+ struct brw_reg r0_7(retype(brw_vec1_grf(0, 7), BRW_REGISTER_TYPE_UD));
- key.nr_color_regions = _mesa_bitcount_64(fp->Base.OutputsWritten &
- ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
- BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)));
+ bld.MOV(*reg, r0_1);
+ bld.MOV(offset(*reg, bld, 1), r0_6);
+ bld.MOV(offset(*reg, bld, 2), r0_7);
- if ((fp->Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) {
- key.render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer) ||
- key.nr_color_regions > 1;
- }
+ return reg;
+}
- key.program_string_id = bfp->id;
+const unsigned *
+brw_cs_emit(struct brw_context *brw,
+ void *mem_ctx,
+ const struct brw_cs_prog_key *key,
+ struct brw_cs_prog_data *prog_data,
+ struct gl_compute_program *cp,
+ struct gl_shader_program *prog,
+ unsigned *final_assembly_size)
+{
+ struct brw_shader *shader =
+ (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE];
- uint32_t old_prog_offset = brw->wm.base.prog_offset;
- struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data;
+ if (unlikely(INTEL_DEBUG & DEBUG_CS))
+ brw_dump_ir("compute", prog, &shader->base, &cp->Base);
- bool success = brw_codegen_wm_prog(brw, shader_prog, bfp, &key);
+ prog_data->local_size[0] = cp->LocalSize[0];
+ prog_data->local_size[1] = cp->LocalSize[1];
+ prog_data->local_size[2] = cp->LocalSize[2];
+ unsigned local_workgroup_size =
+ cp->LocalSize[0] * cp->LocalSize[1] * cp->LocalSize[2];
- brw->wm.base.prog_offset = old_prog_offset;
- brw->wm.prog_data = old_prog_data;
+ cfg_t *cfg = NULL;
+ const char *fail_msg = NULL;
- return success;
-}
+ int st_index = -1;
+ if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+ st_index = brw_get_shader_time_index(brw, prog, &cp->Base, ST_CS);
-void
-brw_setup_tex_for_precompile(struct brw_context *brw,
- struct brw_sampler_prog_key_data *tex,
- struct gl_program *prog)
-{
- const bool has_shader_channel_select = brw->is_haswell || brw->gen >= 8;
- unsigned sampler_count = _mesa_fls(prog->SamplersUsed);
- for (unsigned i = 0; i < sampler_count; i++) {
- if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
- /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
- tex->swizzles[i] =
- MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
+ /* Now the main event: Visit the shader IR and generate our CS IR for it.
+ */
+ fs_visitor v8(brw->intelScreen->compiler, brw,
+ mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
+ &cp->Base, 8, st_index);
+ if (!v8.run_cs()) {
+ fail_msg = v8.fail_msg;
+ } else if (local_workgroup_size <= 8 * brw->max_cs_threads) {
+ cfg = v8.cfg;
+ prog_data->simd_size = 8;
+ }
+
+ fs_visitor v16(brw->intelScreen->compiler, brw,
+ mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
+ &cp->Base, 16, st_index);
+ if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
+ !fail_msg && !v8.simd16_unsupported &&
+ local_workgroup_size <= 16 * brw->max_cs_threads) {
+ /* Try a SIMD16 compile */
+ v16.import_uniforms(&v8);
+ if (!v16.run_cs()) {
+ perf_debug("SIMD16 shader failed to compile: %s", v16.fail_msg);
+ if (!cfg) {
+ fail_msg =
+ "Couldn't generate SIMD16 program and not "
+ "enough threads for SIMD8";
+ }
} else {
- /* Color sampler: assume no swizzling. */
- tex->swizzles[i] = SWIZZLE_XYZW;
+ cfg = v16.cfg;
+ prog_data->simd_size = 16;
}
}
+
+ if (unlikely(cfg == NULL)) {
+ assert(fail_msg);
+ prog->LinkStatus = false;
+ ralloc_strcat(&prog->InfoLog, fail_msg);
+ _mesa_problem(NULL, "Failed to compile compute shader: %s\n",
+ fail_msg);
+ return NULL;
+ }
+
+ fs_generator g(brw->intelScreen->compiler, brw,
+ mem_ctx, (void*) key, &prog_data->base, &cp->Base,
+ v8.promoted_constants, v8.runtime_check_aads_emit, "CS");
+ if (INTEL_DEBUG & DEBUG_CS) {
+ char *name = ralloc_asprintf(mem_ctx, "%s compute shader %d",
+ prog->Label ? prog->Label : "unnamed",
+ prog->Name);
+ g.enable_debug(name);
+ }
+
+ g.generate_code(cfg, prog_data->simd_size);
+
+ return g.get_assembly(final_assembly_size);
}
bool run_cs();
void optimize();
void allocate_registers();
- void assign_binding_table_offsets();
+ void assign_fs_binding_table_offsets();
+ void assign_cs_binding_table_offsets();
void setup_payload_gen4();
void setup_payload_gen6();
void setup_vs_payload();
void invalidate_live_intervals();
void calculate_live_intervals();
void calculate_register_pressure();
+ void validate();
bool opt_algebraic();
bool opt_redundant_discard_jumps();
bool opt_cse();
nir_ssa_undef_instr *instr);
void nir_emit_intrinsic(const brw::fs_builder &bld,
nir_intrinsic_instr *instr);
+ void nir_emit_ssbo_atomic(const brw::fs_builder &bld,
+ int op, nir_intrinsic_instr *instr);
void nir_emit_texture(const brw::fs_builder &bld,
nir_tex_instr *instr);
void nir_emit_jump(const brw::fs_builder &bld,
void emit_fb_writes();
void emit_urb_writes();
void emit_cs_terminate();
+ fs_reg *emit_cs_local_invocation_id_setup();
+ fs_reg *emit_cs_work_group_id_setup();
void emit_barrier();
uint8_t sample_pos_reg;
uint8_t sample_mask_in_reg;
uint8_t barycentric_coord_reg[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
+ uint8_t local_invocation_id_reg;
/** The number of thread payload registers the hardware will supply. */
uint8_t num_regs;
struct brw_reg *src);
void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
struct brw_reg sampler_index);
+ void generate_get_buffer_size(fs_inst *inst, struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg surf_index);
void generate_math_gen6(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src0,
bool brw_do_channel_expressions(struct exec_list *instructions);
bool brw_do_vector_splitting(struct exec_list *instructions);
-void brw_setup_tex_for_precompile(struct brw_context *brw,
- struct brw_sampler_prog_key_data *tex,
- struct gl_program *prog);
}
case ir_binop_ubo_load:
+ case ir_unop_get_buffer_size:
unreachable("not yet supported");
case ir_triop_fma:
case ir_triop_vector_insert:
case ir_quadop_bitfield_insert:
case ir_quadop_vector:
+ case ir_unop_ssbo_unsized_array_length:
unreachable("should have been lowered");
case ir_unop_unpack_half_2x16_split_x:
can_change_source_types(fs_inst *inst)
{
return !inst->src[0].abs && !inst->src[0].negate &&
+ inst->dst.type == inst->src[0].type &&
(inst->opcode == BRW_OPCODE_MOV ||
(inst->opcode == BRW_OPCODE_SEL &&
inst->predicate != BRW_PREDICATE_NONE &&
}
static struct brw_reg
-brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg)
+brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen)
{
struct brw_reg brw_reg;
switch (reg->file) {
- case GRF:
case MRF:
+ assert((reg->reg & ~(1 << 7)) < BRW_MAX_MRF(gen));
+ /* Fallthrough */
+ case GRF:
if (reg->stride == 0) {
brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg, 0);
} else if (inst->exec_size < 8) {
brw_fb_WRITE(p,
16 /* dispatch_width */,
brw_message_reg(inst->base_mrf),
- brw_reg_from_fs_reg(inst, &inst->src[0]),
+ brw_reg_from_fs_reg(inst, &inst->src[0], devinfo->gen),
BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE,
inst->target,
inst->mlen,
}
void
+fs_generator::generate_get_buffer_size(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg surf_index)
+{
+ assert(devinfo->gen >= 7);
+ assert(surf_index.file == BRW_IMMEDIATE_VALUE);
+
+ uint32_t simd_mode;
+ int rlen = 4;
+
+ switch (inst->exec_size) {
+ case 8:
+ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
+ break;
+ case 16:
+ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+ break;
+ default:
+ unreachable("Invalid width for texture instruction");
+ }
+
+ if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
+ rlen = 8;
+ dst = vec16(dst);
+ }
+
+ brw_SAMPLE(p,
+ retype(dst, BRW_REGISTER_TYPE_UW),
+ inst->base_mrf,
+ src,
+ surf_index.dw1.ud,
+ 0,
+ GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO,
+ rlen, /* response length */
+ inst->mlen,
+ inst->header_size > 0,
+ simd_mode,
+ BRW_SAMPLER_RETURN_FORMAT_SINT32);
+
+ brw_mark_surface_used(prog_data, surf_index.dw1.ud);
+}
+
+void
fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
struct brw_reg sampler_index)
{
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
}
break;
+ case SHADER_OPCODE_SAMPLEINFO:
+ msg_type = GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO;
+ break;
default:
unreachable("not reached");
}
annotate(p->devinfo, &annotation, cfg, inst, p->next_insn_offset);
for (unsigned int i = 0; i < inst->sources; i++) {
- src[i] = brw_reg_from_fs_reg(inst, &inst->src[i]);
+ src[i] = brw_reg_from_fs_reg(inst, &inst->src[i], devinfo->gen);
/* The accumulator result appears to get used for the
* conditional modifier generation. When negating a UD
inst->src[i].type != BRW_REGISTER_TYPE_UD ||
!inst->src[i].negate);
}
- dst = brw_reg_from_fs_reg(inst, &inst->dst);
+ dst = brw_reg_from_fs_reg(inst, &inst->dst, devinfo->gen);
brw_set_default_predicate_control(p, inst->predicate);
brw_set_default_predicate_inverse(p, inst->predicate_inverse);
brw_set_default_acc_write_control(p, inst->writes_accumulator);
brw_set_default_exec_size(p, cvt(inst->exec_size) - 1);
+ assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->gen));
+ assert(inst->mlen <= BRW_MAX_MSG_LENGTH);
+
switch (inst->exec_size) {
case 1:
case 2:
src[0].subnr = 4 * type_sz(src[0].type);
brw_MOV(p, dst, stride(src[0], 8, 4, 1));
break;
+ case FS_OPCODE_GET_BUFFER_SIZE:
+ generate_get_buffer_size(inst, dst, src[0], src[1]);
+ break;
case SHADER_OPCODE_TEX:
case FS_OPCODE_TXB:
case SHADER_OPCODE_TXD:
case SHADER_OPCODE_LOD:
case SHADER_OPCODE_TG4:
case SHADER_OPCODE_TG4_OFFSET:
+ case SHADER_OPCODE_SAMPLEINFO:
generate_tex(inst, dst, src[0], src[1]);
break;
case FS_OPCODE_DDX_COARSE:
#include "brw_fs.h"
#include "brw_fs_surface_builder.h"
#include "brw_nir.h"
+#include "brw_fs_surface_builder.h"
using namespace brw;
+using namespace brw::surface_access;
void
fs_visitor::emit_nir_code()
BRW_REGISTER_TYPE_D));
break;
+ case nir_intrinsic_load_local_invocation_id:
+ assert(v->stage == MESA_SHADER_COMPUTE);
+ reg = &v->nir_system_values[SYSTEM_VALUE_LOCAL_INVOCATION_ID];
+ if (reg->file == BAD_FILE)
+ *reg = *v->emit_cs_local_invocation_id_setup();
+ break;
+
+ case nir_intrinsic_load_work_group_id:
+ assert(v->stage == MESA_SHADER_COMPUTE);
+ reg = &v->nir_system_values[SYSTEM_VALUE_WORK_GROUP_ID];
+ if (reg->file == BAD_FILE)
+ *reg = *v->emit_cs_work_group_id_setup();
+ break;
+
default:
break;
}
break;
}
+ case nir_intrinsic_image_samples:
+ /* The driver does not support multi-sampled images. */
+ bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), fs_reg(1));
+ break;
+
case nir_intrinsic_load_front_face:
bld.MOV(retype(dest, BRW_REGISTER_TYPE_D),
*emit_frontfacing_interpolation());
case nir_intrinsic_load_vertex_id:
unreachable("should be lowered by lower_vertex_id()");
- case nir_intrinsic_load_vertex_id_zero_base: {
- fs_reg vertex_id = nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
- assert(vertex_id.file != BAD_FILE);
- dest.type = vertex_id.type;
- bld.MOV(dest, vertex_id);
- break;
- }
-
- case nir_intrinsic_load_base_vertex: {
- fs_reg base_vertex = nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
- assert(base_vertex.file != BAD_FILE);
- dest.type = base_vertex.type;
- bld.MOV(dest, base_vertex);
- break;
- }
-
- case nir_intrinsic_load_instance_id: {
- fs_reg instance_id = nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
- assert(instance_id.file != BAD_FILE);
- dest.type = instance_id.type;
- bld.MOV(dest, instance_id);
- break;
- }
-
- case nir_intrinsic_load_sample_mask_in: {
- fs_reg sample_mask_in = nir_system_values[SYSTEM_VALUE_SAMPLE_MASK_IN];
- assert(sample_mask_in.file != BAD_FILE);
- dest.type = sample_mask_in.type;
- bld.MOV(dest, sample_mask_in);
+ case nir_intrinsic_load_vertex_id_zero_base:
+ case nir_intrinsic_load_base_vertex:
+ case nir_intrinsic_load_instance_id:
+ case nir_intrinsic_load_sample_mask_in:
+ case nir_intrinsic_load_sample_id: {
+ gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
+ fs_reg val = nir_system_values[sv];
+ assert(val.file != BAD_FILE);
+ dest.type = val.type;
+ bld.MOV(dest, val);
break;
}
break;
}
- case nir_intrinsic_load_sample_id: {
- fs_reg sample_id = nir_system_values[SYSTEM_VALUE_SAMPLE_ID];
- assert(sample_id.file != BAD_FILE);
- dest.type = sample_id.type;
- bld.MOV(dest, sample_id);
- break;
- }
-
case nir_intrinsic_load_uniform_indirect:
has_indirect = true;
/* fallthrough */
*/
brw_mark_surface_used(prog_data,
stage_prog_data->binding_table.ubo_start +
- shader_prog->NumUniformBlocks - 1);
+ shader_prog->NumBufferInterfaceBlocks - 1);
}
if (has_indirect) {
break;
}
+ case nir_intrinsic_load_ssbo_indirect:
+ has_indirect = true;
+ /* fallthrough */
+ case nir_intrinsic_load_ssbo: {
+ assert(devinfo->gen >= 7);
+
+ nir_const_value *const_uniform_block =
+ nir_src_as_const_value(instr->src[0]);
+
+ fs_reg surf_index;
+ if (const_uniform_block) {
+ unsigned index = stage_prog_data->binding_table.ubo_start +
+ const_uniform_block->u[0];
+ surf_index = fs_reg(index);
+ brw_mark_surface_used(prog_data, index);
+ } else {
+ surf_index = vgrf(glsl_type::uint_type);
+ bld.ADD(surf_index, get_nir_src(instr->src[0]),
+ fs_reg(stage_prog_data->binding_table.ubo_start));
+ surf_index = bld.emit_uniformize(surf_index);
+
+ /* Assume this may touch any UBO. It would be nice to provide
+ * a tighter bound, but the array information is already lowered away.
+ */
+ brw_mark_surface_used(prog_data,
+ stage_prog_data->binding_table.ubo_start +
+ shader_prog->NumBufferInterfaceBlocks - 1);
+ }
+
+ /* Get the offset to read from */
+ fs_reg offset_reg = vgrf(glsl_type::uint_type);
+ unsigned const_offset_bytes = 0;
+ if (has_indirect) {
+ bld.MOV(offset_reg, get_nir_src(instr->src[1]));
+ } else {
+ const_offset_bytes = instr->const_index[0];
+ bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+ }
+
+ /* Read the vector */
+ for (int i = 0; i < instr->num_components; i++) {
+ fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
+ 1 /* dims */, 1 /* size */,
+ BRW_PREDICATE_NONE);
+ read_result.type = dest.type;
+ bld.MOV(dest, read_result);
+ dest = offset(dest, bld, 1);
+
+ /* Vector components are stored contiguous in memory */
+ if (i < instr->num_components) {
+ if (!has_indirect) {
+ const_offset_bytes += 4;
+ bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+ } else {
+ bld.ADD(offset_reg, offset_reg, brw_imm_ud(4));
+ }
+ }
+ }
+
+ break;
+ }
+
case nir_intrinsic_load_input_indirect:
has_indirect = true;
/* fallthrough */
break;
}
+ case nir_intrinsic_store_ssbo_indirect:
+ has_indirect = true;
+ /* fallthrough */
+ case nir_intrinsic_store_ssbo: {
+ assert(devinfo->gen >= 7);
+
+ /* Block index */
+ fs_reg surf_index;
+ nir_const_value *const_uniform_block =
+ nir_src_as_const_value(instr->src[1]);
+ if (const_uniform_block) {
+ unsigned index = stage_prog_data->binding_table.ubo_start +
+ const_uniform_block->u[0];
+ surf_index = fs_reg(index);
+ brw_mark_surface_used(prog_data, index);
+ } else {
+ surf_index = vgrf(glsl_type::uint_type);
+ bld.ADD(surf_index, get_nir_src(instr->src[1]),
+ fs_reg(stage_prog_data->binding_table.ubo_start));
+ surf_index = bld.emit_uniformize(surf_index);
+
+ brw_mark_surface_used(prog_data,
+ stage_prog_data->binding_table.ubo_start +
+ shader_prog->NumBufferInterfaceBlocks - 1);
+ }
+
+ /* Offset */
+ fs_reg offset_reg = vgrf(glsl_type::uint_type);
+ unsigned const_offset_bytes = 0;
+ if (has_indirect) {
+ bld.MOV(offset_reg, get_nir_src(instr->src[2]));
+ } else {
+ const_offset_bytes = instr->const_index[0];
+ bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+ }
+
+ /* Value */
+ fs_reg val_reg = get_nir_src(instr->src[0]);
+
+ /* Writemask */
+ unsigned writemask = instr->const_index[1];
+
+ /* Write each component present in the writemask */
+ unsigned skipped_channels = 0;
+ for (int i = 0; i < instr->num_components; i++) {
+ int component_mask = 1 << i;
+ if (writemask & component_mask) {
+ if (skipped_channels) {
+ if (!has_indirect) {
+ const_offset_bytes += 4 * skipped_channels;
+ bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+ } else {
+ bld.ADD(offset_reg, offset_reg,
+ brw_imm_ud(4 * skipped_channels));
+ }
+ skipped_channels = 0;
+ }
+
+ emit_untyped_write(bld, surf_index, offset_reg,
+ offset(val_reg, bld, i),
+ 1 /* dims */, 1 /* size */,
+ BRW_PREDICATE_NONE);
+ }
+
+ skipped_channels++;
+ }
+ break;
+ }
+
case nir_intrinsic_store_output_indirect:
has_indirect = true;
/* fallthrough */
case nir_intrinsic_barrier:
emit_barrier();
+ if (stage == MESA_SHADER_COMPUTE)
+ ((struct brw_cs_prog_data *) prog_data)->uses_barrier = true;
+ break;
+
+ case nir_intrinsic_load_local_invocation_id:
+ case nir_intrinsic_load_work_group_id: {
+ gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
+ fs_reg val = nir_system_values[sv];
+ assert(val.file != BAD_FILE);
+ dest.type = val.type;
+ for (unsigned i = 0; i < 3; i++)
+ bld.MOV(offset(dest, bld, i), offset(val, bld, i));
+ break;
+ }
+
+ case nir_intrinsic_ssbo_atomic_add:
+ nir_emit_ssbo_atomic(bld, BRW_AOP_ADD, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_min:
+ if (dest.type == BRW_REGISTER_TYPE_D)
+ nir_emit_ssbo_atomic(bld, BRW_AOP_IMIN, instr);
+ else
+ nir_emit_ssbo_atomic(bld, BRW_AOP_UMIN, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_max:
+ if (dest.type == BRW_REGISTER_TYPE_D)
+ nir_emit_ssbo_atomic(bld, BRW_AOP_IMAX, instr);
+ else
+ nir_emit_ssbo_atomic(bld, BRW_AOP_UMAX, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_and:
+ nir_emit_ssbo_atomic(bld, BRW_AOP_AND, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_or:
+ nir_emit_ssbo_atomic(bld, BRW_AOP_OR, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_xor:
+ nir_emit_ssbo_atomic(bld, BRW_AOP_XOR, instr);
break;
+ case nir_intrinsic_ssbo_atomic_exchange:
+ nir_emit_ssbo_atomic(bld, BRW_AOP_MOV, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_comp_swap:
+ nir_emit_ssbo_atomic(bld, BRW_AOP_CMPWR, instr);
+ break;
+
+ case nir_intrinsic_get_buffer_size: {
+ nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
+ unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
+ int reg_width = dispatch_width / 8;
+
+ assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage);
+
+ /* Set LOD = 0 */
+ fs_reg source = fs_reg(0);
+
+ int mlen = 1 * reg_width;
+ fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
+ BRW_REGISTER_TYPE_UD);
+ bld.LOAD_PAYLOAD(src_payload, &source, 1, 0);
+
+ fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start + ubo_index);
+ fs_inst *inst = bld.emit(FS_OPCODE_GET_BUFFER_SIZE, dest,
+ src_payload, surf_index);
+ inst->header_size = 0;
+ inst->mlen = mlen;
+ bld.emit(inst);
+ break;
+ }
+
+ case nir_intrinsic_load_num_work_groups: {
+ assert(devinfo->gen >= 7);
+ assert(stage == MESA_SHADER_COMPUTE);
+
+ struct brw_cs_prog_data *cs_prog_data =
+ (struct brw_cs_prog_data *) prog_data;
+ const unsigned surface =
+ cs_prog_data->binding_table.work_groups_start;
+
+ cs_prog_data->uses_num_work_groups = true;
+
+ fs_reg surf_index = fs_reg(surface);
+ brw_mark_surface_used(prog_data, surface);
+
+ /* Read the 3 GLuint components of gl_NumWorkGroups */
+ for (unsigned i = 0; i < 3; i++) {
+ fs_reg read_result =
+ emit_untyped_read(bld, surf_index,
+ fs_reg(i << 2),
+ 1 /* dims */, 1 /* size */,
+ BRW_PREDICATE_NONE);
+ read_result.type = dest.type;
+ bld.MOV(dest, read_result);
+ dest = offset(dest, bld, 1);
+ }
+ break;
+ }
default:
unreachable("unknown intrinsic");
}
void
+fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
+ int op, nir_intrinsic_instr *instr)
+{
+ fs_reg dest;
+ if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+ dest = get_nir_dest(instr->dest);
+
+ fs_reg surface;
+ nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
+ if (const_surface) {
+ unsigned surf_index = stage_prog_data->binding_table.ubo_start +
+ const_surface->u[0];
+ surface = fs_reg(surf_index);
+ brw_mark_surface_used(prog_data, surf_index);
+ } else {
+ surface = vgrf(glsl_type::uint_type);
+ bld.ADD(surface, get_nir_src(instr->src[0]),
+ fs_reg(stage_prog_data->binding_table.ubo_start));
+
+ /* Assume this may touch any UBO. This is the same we do for other
+ * UBO/SSBO accesses with non-constant surface.
+ */
+ brw_mark_surface_used(prog_data,
+ stage_prog_data->binding_table.ubo_start +
+ shader_prog->NumBufferInterfaceBlocks - 1);
+ }
+
+ fs_reg offset = get_nir_src(instr->src[1]);
+ fs_reg data1 = get_nir_src(instr->src[2]);
+ fs_reg data2;
+ if (op == BRW_AOP_CMPWR)
+ data2 = get_nir_src(instr->src[3]);
+
+ /* Emit the actual atomic operation operation */
+
+ fs_reg atomic_result =
+ surface_access::emit_untyped_atomic(bld, surface, offset,
+ data1, data2,
+ 1 /* dims */, 1 /* rsize */,
+ op,
+ BRW_PREDICATE_NONE);
+ dest.type = atomic_result.type;
+ bld.MOV(dest, atomic_result);
+}
+
+void
fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
{
uint32_t set = instr->sampler_set;
case nir_texop_txf_ms: op = ir_txf_ms; break;
case nir_texop_txl: op = ir_txl; break;
case nir_texop_txs: op = ir_txs; break;
+ case nir_texop_texture_samples: {
+ fs_reg dst = retype(get_nir_dest(instr->dest), BRW_REGISTER_TYPE_D);
+ fs_inst *inst = bld.emit(SHADER_OPCODE_SAMPLEINFO, dst,
+ bld.vgrf(BRW_REGISTER_TYPE_D, 1),
+ sampler_reg);
+ inst->mlen = 1;
+ inst->header_size = 1;
+ inst->base_mrf = -1;
+ return;
+ }
default:
unreachable("unknown texture opcode");
}
#include "glsl/glsl_types.h"
#include "glsl/ir_optimization.h"
+#define FIRST_SPILL_MRF(gen) (gen == 6 ? 21 : 13)
+
using namespace brw;
static void
{
int reg_width = v->dispatch_width / 8;
- memset(mrf_used, 0, BRW_MAX_MRF * sizeof(bool));
+ memset(mrf_used, 0, BRW_MAX_MRF(v->devinfo->gen) * sizeof(bool));
foreach_block_and_inst(block, fs_inst, inst, v->cfg) {
if (inst->dst.file == MRF) {
setup_mrf_hack_interference(fs_visitor *v, struct ra_graph *g,
int first_mrf_node, int *first_used_mrf)
{
- bool mrf_used[BRW_MAX_MRF];
+ bool mrf_used[BRW_MAX_MRF(v->devinfo->gen)];
get_used_mrfs(v, mrf_used);
- *first_used_mrf = BRW_MAX_MRF;
- for (int i = 0; i < BRW_MAX_MRF; i++) {
+ *first_used_mrf = BRW_MAX_MRF(v->devinfo->gen);
+ for (int i = 0; i < BRW_MAX_MRF(v->devinfo->gen); i++) {
/* Mark each MRF reg node as being allocated to its physical register.
*
* The alternative would be to have per-physical-register classes, which
setup_payload_interference(g, payload_node_count, first_payload_node);
if (devinfo->gen >= 7) {
- int first_used_mrf = BRW_MAX_MRF;
+ int first_used_mrf = BRW_MAX_MRF(devinfo->gen);
setup_mrf_hack_interference(this, g, first_mrf_hack_node,
&first_used_mrf);
* register early enough in the register file that we don't
* conflict with any used MRF hack registers.
*/
- reg -= BRW_MAX_MRF - first_used_mrf;
+ reg -= BRW_MAX_MRF(devinfo->gen) - first_used_mrf;
ra_set_node_reg(g, inst->src[0].reg, reg);
break;
}
/* Debug of register spilling: Go spill everything. */
- if (unlikely(INTEL_DEBUG & DEBUG_SPILL)) {
+ if (unlikely(INTEL_DEBUG & DEBUG_SPILL_FS)) {
int reg = choose_spill_reg(g);
if (reg != -1) {
unspill_inst->regs_written = reg_size;
if (!gen7_read) {
- unspill_inst->base_mrf = 14;
+ unspill_inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
unspill_inst->mlen = 1; /* header contains offset */
}
uint32_t spill_offset, int count)
{
int reg_size = 1;
- int spill_base_mrf = 14;
+ int spill_base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
if (dispatch_width == 16 && count % 2 == 0) {
- spill_base_mrf = 13;
+ spill_base_mrf = FIRST_SPILL_MRF(devinfo->gen);
reg_size = 2;
}
int size = alloc.sizes[spill_reg];
unsigned int spill_offset = last_scratch;
assert(ALIGN(spill_offset, 16) == spill_offset); /* oword read/write req. */
- int spill_base_mrf = dispatch_width > 8 ? 13 : 14;
+ int spill_base_mrf = dispatch_width > 8 ? FIRST_SPILL_MRF(devinfo->gen) :
+ FIRST_SPILL_MRF(devinfo->gen) + 1;
/* Spills may use MRFs 13-15 in the SIMD16 case. Our texturing is done
* using up to 11 MRFs starting from either m1 or m2, and fb writes can use
* SIMD16 mode, because we'd stomp the FB writes.
*/
if (!spilled_any_registers) {
- bool mrf_used[BRW_MAX_MRF];
+ bool mrf_used[BRW_MAX_MRF(devinfo->gen)];
get_used_mrfs(this, mrf_used);
- for (int i = spill_base_mrf; i < BRW_MAX_MRF; i++) {
+ for (int i = spill_base_mrf; i < BRW_MAX_MRF(devinfo->gen); i++) {
if (mrf_used[i]) {
fail("Register spilling not supported with m%d used", i);
return;
namespace image_validity {
/**
+ * Check whether the bound image is suitable for untyped access.
+ */
+ brw_predicate
+ emit_untyped_image_check(const fs_builder &bld, const fs_reg &image,
+ brw_predicate pred)
+ {
+ const brw_device_info *devinfo = bld.shader->devinfo;
+ const fs_reg stride = offset(image, bld, BRW_IMAGE_PARAM_STRIDE_OFFSET);
+
+ if (devinfo->gen == 7 && !devinfo->is_haswell) {
+ /* Check whether the first stride component (i.e. the Bpp value)
+ * is greater than four, what on Gen7 indicates that a surface of
+ * type RAW has been bound for untyped access. Reading or writing
+ * to a surface of type other than RAW using untyped surface
+ * messages causes a hang on IVB and VLV.
+ */
+ set_predicate(pred,
+ bld.CMP(bld.null_reg_ud(), stride, fs_reg(4),
+ BRW_CONDITIONAL_G));
+
+ return BRW_PREDICATE_NORMAL;
+ } else {
+ /* More recent generations handle the format mismatch
+ * gracefully.
+ */
+ return pred;
+ }
+ }
+
+ /**
* Check whether there is an image bound at the given index and write
* the comparison result to f0.0. Returns an appropriate predication
* mode to use on subsequent image operations.
*/
brw_predicate
- emit_surface_check(const fs_builder &bld, const fs_reg &image)
+ emit_typed_atomic_check(const fs_builder &bld, const fs_reg &image)
{
const brw_device_info *devinfo = bld.shader->devinfo;
const fs_reg size = offset(image, bld, BRW_IMAGE_PARAM_SIZE_OFFSET);
* surface read on the result,
*/
const brw_predicate pred =
- emit_bounds_check(bld, image, saddr, dims);
+ emit_untyped_image_check(bld, image,
+ emit_bounds_check(bld, image,
+ saddr, dims));
/* and they don't know about surface coordinates, we need to
* convert them to a raw memory offset.
tmp = emit_untyped_read(bld, image, laddr, 1, size, pred);
/* An out of bounds surface access should give zero as result. */
- for (unsigned c = 0; c < 4; ++c)
+ for (unsigned c = 0; c < size; ++c)
set_predicate(pred, bld.SEL(offset(tmp, bld, c),
offset(tmp, bld, c), fs_reg(0)));
}
* the surface write on the result,
*/
const brw_predicate pred =
- emit_bounds_check(bld, image, saddr, dims);
+ emit_untyped_image_check(bld, image,
+ emit_bounds_check(bld, image,
+ saddr, dims));
/* and, phew, they don't know about surface coordinates, we
* need to convert them to a raw memory offset.
using namespace image_coordinates;
using namespace surface_access;
/* Avoid performing an atomic operation on an unbound surface. */
- const brw_predicate pred = emit_surface_check(bld, image);
+ const brw_predicate pred = emit_typed_atomic_check(bld, image);
/* Transform the image coordinates into actual surface coordinates. */
const fs_reg saddr =
--- /dev/null
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file brw_fs_validate.cpp
+ *
+ * Implements a pass that validates various invariants of the IR. The current
+ * pass only validates that GRF's uses are sane. More can be added later.
+ */
+
+#include "brw_fs.h"
+#include "brw_cfg.h"
+
+#define fsv_assert(cond) \
+ if (!(cond)) { \
+ fprintf(stderr, "ASSERT: FS validation failed!\n"); \
+ dump_instruction(inst, stderr); \
+ fprintf(stderr, "%s:%d: %s\n", __FILE__, __LINE__, #cond); \
+ abort(); \
+ }
+
+void
+fs_visitor::validate()
+{
+ foreach_block_and_inst (block, fs_inst, inst, cfg) {
+ if (inst->dst.file == GRF) {
+ fsv_assert(inst->dst.reg_offset + inst->regs_written <=
+ alloc.sizes[inst->dst.reg]);
+ }
+
+ for (unsigned i = 0; i < inst->sources; i++) {
+ if (inst->src[i].file == GRF) {
+ fsv_assert(inst->src[i].reg_offset + inst->regs_read(i) <=
+ (int)alloc.sizes[inst->src[i].reg]);
+ }
+ }
+ }
+}
#include "glsl/ir_visitor.h"
#include "glsl/ir_rvalue_visitor.h"
#include "glsl/glsl_types.h"
+#include "util/hash_table.h"
static bool debug = false;
ir_vector_reference_visitor(void)
{
this->mem_ctx = ralloc_context(NULL);
- this->variable_list.make_empty();
+ this->ht = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
}
~ir_vector_reference_visitor(void)
variable_entry *get_variable_entry(ir_variable *var);
/* List of variable_entry */
- exec_list variable_list;
+ struct hash_table *ht;
void *mem_ctx;
};
switch (var->data.mode) {
case ir_var_uniform:
+ case ir_var_shader_storage:
case ir_var_shader_in:
case ir_var_shader_out:
case ir_var_system_value:
break;
}
- foreach_in_list(variable_entry, entry, &variable_list) {
- if (entry->var == var)
- return entry;
- }
+ struct hash_entry *hte = _mesa_hash_table_search(ht, var);
+ if (hte)
+ return (struct variable_entry *) hte->data;
variable_entry *entry = new(mem_ctx) variable_entry(var);
- this->variable_list.push_tail(entry);
+ _mesa_hash_table_insert(ht, var, entry);
return entry;
}
class ir_vector_splitting_visitor : public ir_rvalue_visitor {
public:
- ir_vector_splitting_visitor(exec_list *vars)
+ ir_vector_splitting_visitor(struct hash_table *vars)
{
- this->variable_list = vars;
+ this->ht = vars;
}
virtual ir_visitor_status visit_leave(ir_assignment *);
void handle_rvalue(ir_rvalue **rvalue);
variable_entry *get_splitting_entry(ir_variable *var);
- exec_list *variable_list;
+ struct hash_table *ht;
};
variable_entry *
if (!var->type->is_vector())
return NULL;
- foreach_in_list(variable_entry, entry, variable_list) {
- if (entry->var == var) {
- return entry;
- }
- }
-
- return NULL;
+ struct hash_entry *hte = _mesa_hash_table_search(ht, var);
+ return hte ? (struct variable_entry *) hte->data : NULL;
}
void
bool
brw_do_vector_splitting(exec_list *instructions)
{
+ struct hash_entry *hte;
+
ir_vector_reference_visitor refs;
visit_list_elements(&refs, instructions);
/* Trim out variables we can't split. */
- foreach_in_list_safe(variable_entry, entry, &refs.variable_list) {
+ hash_table_foreach(refs.ht, hte) {
+ struct variable_entry *entry = (struct variable_entry *) hte->data;
if (debug) {
fprintf(stderr, "vector %s@%p: whole_access %d\n",
entry->var->name, (void *) entry->var,
}
if (entry->whole_vector_access) {
- entry->remove();
+ _mesa_hash_table_remove(refs.ht, hte);
}
}
- if (refs.variable_list.is_empty())
+ if (refs.ht->entries == 0)
return false;
void *mem_ctx = ralloc_context(NULL);
/* Replace the decls of the vectors to be split with their split
* components.
*/
- foreach_in_list(variable_entry, entry, &refs.variable_list) {
+ hash_table_foreach(refs.ht, hte) {
+ struct variable_entry *entry = (struct variable_entry *) hte->data;
const struct glsl_type *type;
type = glsl_type::get_instance(entry->var->type->base_type, 1, 1);
entry->var->remove();
}
- ir_vector_splitting_visitor split(&refs.variable_list);
+ ir_vector_splitting_visitor split(refs.ht);
visit_list_elements(&split, instructions);
ralloc_free(mem_ctx);
void
fs_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes)
{
- const struct brw_vue_prog_key *key =
- (const struct brw_vue_prog_key *) this->key;
+ const struct brw_vs_prog_key *key =
+ (const struct brw_vs_prog_key *) this->key;
for (int i = 0; i < key->nr_userclip_plane_consts; i++) {
this->userplane[i] = fs_reg(UNIFORM, uniforms);
{
struct brw_vue_prog_data *vue_prog_data =
(struct brw_vue_prog_data *) prog_data;
- const struct brw_vue_prog_key *key =
- (const struct brw_vue_prog_key *) this->key;
+ const struct brw_vs_prog_key *key =
+ (const struct brw_vs_prog_key *) this->key;
/* Bail unless some sort of legacy clipping is enabled */
- if (!key->userclip_active || prog->UsesClipDistanceOut)
+ if (key->nr_userclip_plane_consts == 0)
return;
/* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
const fs_builder abld = bld.annotate("user clip distances");
this->outputs[VARYING_SLOT_CLIP_DIST0] = vgrf(glsl_type::vec4_type);
+ this->output_components[VARYING_SLOT_CLIP_DIST0] = 4;
this->outputs[VARYING_SLOT_CLIP_DIST1] = vgrf(glsl_type::vec4_type);
+ this->output_components[VARYING_SLOT_CLIP_DIST1] = 4;
for (int i = 0; i < key->nr_userclip_plane_consts; i++) {
fs_reg u = userplane[i];
unreachable("unexpected scalar vs output");
break;
- case BRW_VARYING_SLOT_PAD:
- break;
-
default:
/* gl_Position is always in the vue map, but isn't always written by
* the shader. Other varyings (clip distances) get added to the vue
* slot for writing we flush a mlen 5 urb write, otherwise we just
* advance the urb_offset.
*/
- if (this->outputs[varying].file == BAD_FILE) {
+ if (varying == BRW_VARYING_SLOT_PAD ||
+ this->outputs[varying].file == BAD_FILE) {
if (length > 0)
flush = true;
else
sources[length++] = reg;
}
} else {
- for (int i = 0; i < 4; i++)
+ for (unsigned i = 0; i < output_components[varying]; i++)
sources[length++] = offset(this->outputs[varying], bld, i);
+ for (unsigned i = output_components[varying]; i < 4; i++)
+ sources[length++] = fs_reg(0);
}
break;
}
fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
+ const fs_builder pbld = bld.exec_all().group(8, 0);
+
/* Clear the message payload */
- bld.exec_all().MOV(payload, fs_reg(0u));
+ pbld.MOV(payload, fs_reg(0u));
/* Copy bits 27:24 of r0.2 (barrier id) to the message payload reg.2 */
fs_reg r0_2 = fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD));
- bld.exec_all().AND(component(payload, 2), r0_2, fs_reg(0x0f000000u));
+ pbld.AND(component(payload, 2), r0_2, fs_reg(0x0f000000u));
/* Emit a gateway "barrier" message using the payload we set up, followed
* by a wait instruction.
key_tex = &((const brw_wm_prog_key *) key)->tex;
break;
case MESA_SHADER_VERTEX:
+ key_tex = &((const brw_vs_prog_key *) key)->tex;
+ break;
case MESA_SHADER_GEOMETRY:
- key_tex = &((const brw_vue_prog_key *) key)->tex;
+ key_tex = &((const brw_gs_prog_key *) key)->tex;
break;
case MESA_SHADER_COMPUTE:
key_tex = &((const brw_cs_prog_key*) key)->tex;
struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
int param_count = gs->num_uniform_components * 4;
- /* We also upload clip plane data as uniforms */
- param_count += MAX_CLIP_PLANES * 4;
param_count += gs->NumImages * BRW_IMAGE_PARAM_SIZE;
c.prog_data.base.base.param =
c.prog_data.base.base.nr_params = param_count;
c.prog_data.base.base.nr_image_params = gs->NumImages;
+ if (brw->gen >= 8) {
+ c.prog_data.static_vertex_count = !gp->program.Base.nir ? -1 :
+ nir_gs_count_vertices(gp->program.Base.nir);
+ }
+
if (brw->gen >= 7) {
if (gp->program.OutputType == GL_POINTS) {
/* When the output type is points, the geometry shader may output data
GLbitfield64 outputs_written = gp->program.Base.OutputsWritten;
- /* In order for legacy clipping to work, we need to populate the clip
- * distance varying slots whenever clipping is enabled, even if the vertex
- * shader doesn't write to gl_ClipDistance.
- */
- if (c.key.base.userclip_active) {
- outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
- outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
- }
-
brw_compute_vue_map(brw->intelScreen->devinfo,
- &c.prog_data.base.vue_map, outputs_written);
+ &c.prog_data.base.vue_map, outputs_written,
+ prog ? prog->SeparateShader : false);
/* Compute the output vertex size.
*
c.prog_data.output_topology =
get_hw_prim_for_gl_prim(gp->program.OutputType);
+ /* The GLSL linker will have already matched up GS inputs and the outputs
+ * of prior stages. The driver does extend VS outputs in some cases, but
+ * only for legacy OpenGL or Gen4-5 hardware, neither of which offer
+ * geometry shader support. So we can safely ignore that.
+ *
+ * For SSO pipelines, we use a fixed VUE map layout based on variable
+ * locations, so we can rely on rendezvous-by-location making this work.
+ *
+ * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not
+ * written by previous stages and shows up via payload magic.
+ */
+ GLbitfield64 inputs_read =
+ gp->program.Base.InputsRead & ~VARYING_BIT_PRIMITIVE_ID;
brw_compute_vue_map(brw->intelScreen->devinfo,
- &c.input_vue_map, c.key.input_varyings);
+ &c.input_vue_map, inputs_read,
+ prog->SeparateShader);
/* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we
* need to program a URB read length of ceiling(num_slots / 2).
return brw_state_dirty(brw,
_NEW_TEXTURE,
BRW_NEW_GEOMETRY_PROGRAM |
- BRW_NEW_TRANSFORM_FEEDBACK |
- BRW_NEW_VUE_MAP_VS);
+ BRW_NEW_TRANSFORM_FEEDBACK);
}
static void
memset(key, 0, sizeof(*key));
- key->base.program_string_id = gp->id;
- brw_setup_vue_key_clip_info(brw, &key->base,
- gp->program.Base.UsesClipDistanceOut);
+ key->program_string_id = gp->id;
/* _NEW_TEXTURE */
brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count,
- &key->base.tex);
-
- /* BRW_NEW_VUE_MAP_VS */
- key->input_varyings = brw->vue_map_vs.slots_valid;
+ &key->tex);
}
void
if (gp == NULL) {
/* No geometry shader. Vertex data just passes straight through. */
- if (brw->ctx.NewDriverState & BRW_NEW_VUE_MAP_VS) {
- brw->vue_map_geom_out = brw->vue_map_vs;
- brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
- }
-
if (brw->gen == 6 &&
(brw->ctx.NewDriverState & BRW_NEW_TRANSFORM_FEEDBACK)) {
gen6_brw_upload_ff_gs_prog(brw);
(void)success;
}
brw->gs.base.prog_data = &brw->gs.prog_data->base.base;
-
- if (memcmp(&brw->gs.prog_data->base.vue_map, &brw->vue_map_geom_out,
- sizeof(brw->vue_map_geom_out)) != 0) {
- brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map;
- brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
- }
}
bool
memset(&key, 0, sizeof(key));
- brw_vue_setup_prog_key_for_precompile(ctx, &key.base, bgp->id, &gp->Base);
-
- /* Assume that the set of varyings coming in from the vertex shader exactly
- * matches what the geometry shader requires.
- */
- key.input_varyings = gp->Base.InputsRead;
+ brw_setup_tex_for_precompile(brw, &key.tex, prog);
+ key.program_string_id = bgp->id;
success = brw_codegen_gs_prog(brw, shader_prog, bgp, &key);
extern "C" {
#endif
+/** Maximum SEND message length */
+#define BRW_MAX_MSG_LENGTH 15
+
/* brw_context.h has a forward declaration of brw_inst, so name the struct. */
typedef struct brw_inst {
uint64_t data[2];
const src_reg &src1 = src_reg(),
const src_reg &src2 = src_reg());
- struct brw_reg get_dst(void);
+ struct brw_reg get_dst(unsigned gen);
struct brw_reg get_src(const struct brw_vue_prog_data *prog_data, int i);
dst_reg dst;
bool is_send_from_grf();
unsigned regs_read(unsigned arg) const;
- bool can_reswizzle(int dst_writemask, int swizzle, int swizzle_mask);
+ bool can_reswizzle(const struct brw_device_info *devinfo, int dst_writemask,
+ int swizzle, int swizzle_mask);
void reswizzle(int dst_writemask, int swizzle);
bool can_do_source_mods(const struct brw_device_info *devinfo);
private:
void emit(ir_variable *, ir_rvalue *);
+ ir_variable *temp(void *ctx, const glsl_type *type, const char *name);
};
/**
base_ir->insert_before(assign(var, value));
}
+/**
+ * Emit a temporary variable declaration
+ */
+ir_variable *
+lower_texture_grad_visitor::temp(void *ctx, const glsl_type *type, const char *name)
+{
+ ir_variable *var = new(ctx) ir_variable(type, name, ir_var_temporary);
+ base_ir->insert_before(var);
+ return var;
+}
+
static const glsl_type *
txs_type(const glsl_type *type)
{
new(mem_ctx) ir_variable(grad_type, "dPdy", ir_var_temporary);
emit(dPdy, mul(size, ir->lod_info.grad.dPdy));
- /* Calculate rho from equation 3.20 of the GL 3.0 specification. */
- ir_rvalue *rho;
- if (dPdx->type->is_scalar()) {
- rho = expr(ir_binop_max, expr(ir_unop_abs, dPdx),
- expr(ir_unop_abs, dPdy));
- } else {
- rho = expr(ir_binop_max, expr(ir_unop_sqrt, dot(dPdx, dPdx)),
- expr(ir_unop_sqrt, dot(dPdy, dPdy)));
- }
-
- /* lambda_base = log2(rho). We're ignoring GL state biases for now.
- *
- * For cube maps the result of these formulas is giving us a value of rho
- * that is twice the value we should use, so divide it by 2 or,
- * alternatively, remove one unit from the result of the log2 computation.
- */
ir->op = ir_txl;
if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
- ir->lod_info.lod = expr(ir_binop_add,
- expr(ir_unop_log2, rho),
- new(mem_ctx) ir_constant(-1.0f));
+ /* Cubemap texture lookups first generate a texture coordinate normalized
+ * to [-1, 1] on the appropiate face. The appropiate face is determined
+ * by which component has largest magnitude and its sign. The texture
+ * coordinate is the quotient of the remaining texture coordinates against
+ * that absolute value of the component of largest magnitude. This
+ * division requires that the computing of the derivative of the texel
+ * coordinate must use the quotient rule. The high level GLSL code is as
+ * follows:
+ *
+ * Step 1: selection
+ *
+ * vec3 abs_p, Q, dQdx, dQdy;
+ * abs_p = abs(ir->coordinate);
+ * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
+ * Q = ir->coordinate.yzx;
+ * dQdx = ir->lod_info.grad.dPdx.yzx;
+ * dQdy = ir->lod_info.grad.dPdy.yzx;
+ * }
+ * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
+ * Q = ir->coordinate.xzy;
+ * dQdx = ir->lod_info.grad.dPdx.xzy;
+ * dQdy = ir->lod_info.grad.dPdy.xzy;
+ * }
+ * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
+ * Q = ir->coordinate;
+ * dQdx = ir->lod_info.grad.dPdx;
+ * dQdy = ir->lod_info.grad.dPdy;
+ * }
+ *
+ * Step 2: use quotient rule to compute derivative. The normalized to
+ * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
+ * only concerned with the magnitudes of the derivatives whose values are
+ * not affected by the sign. We drop the sign from the computation.
+ *
+ * vec2 dx, dy;
+ * float recip;
+ *
+ * recip = 1.0 / Q.z;
+ * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
+ * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
+ *
+ * Step 3: compute LOD. At this point we have the derivatives of the
+ * texture coordinates normalized to [-1,1]. We take the LOD to be
+ * result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
+ * = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
+ * = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
+ * = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
+ * = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
+ * where L is the dimension of the cubemap. The code is:
+ *
+ * float M, result;
+ * M = max(dot(dx, dx), dot(dy, dy));
+ * L = textureSize(sampler, 0).x;
+ * result = -1.0 + 0.5 * log2(L * L * M);
+ */
+
+/* Helpers to make code more human readable. */
+#define EMIT(instr) base_ir->insert_before(instr)
+#define THEN(irif, instr) irif->then_instructions.push_tail(instr)
+#define CLONE(x) x->clone(mem_ctx, NULL)
+
+ ir_variable *abs_p = temp(mem_ctx, glsl_type::vec3_type, "abs_p");
+
+ EMIT(assign(abs_p, swizzle_for_size(abs(CLONE(ir->coordinate)), 3)));
+
+ ir_variable *Q = temp(mem_ctx, glsl_type::vec3_type, "Q");
+ ir_variable *dQdx = temp(mem_ctx, glsl_type::vec3_type, "dQdx");
+ ir_variable *dQdy = temp(mem_ctx, glsl_type::vec3_type, "dQdy");
+
+ /* unmodified dPdx, dPdy values */
+ ir_rvalue *dPdx = ir->lod_info.grad.dPdx;
+ ir_rvalue *dPdy = ir->lod_info.grad.dPdy;
+
+ /* 1. compute selector */
+
+ /* if (abs_p.x >= max(abs_p.y, abs_p.z)) ... */
+ ir_if *branch_x =
+ new(mem_ctx) ir_if(gequal(swizzle_x(abs_p),
+ max2(swizzle_y(abs_p), swizzle_z(abs_p))));
+
+ /* Q = p.yzx;
+ * dQdx = dPdx.yzx;
+ * dQdy = dPdy.yzx;
+ */
+ int yzx = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, 0);
+ THEN(branch_x, assign(Q, swizzle(CLONE(ir->coordinate), yzx, 3)));
+ THEN(branch_x, assign(dQdx, swizzle(CLONE(dPdx), yzx, 3)));
+ THEN(branch_x, assign(dQdy, swizzle(CLONE(dPdy), yzx, 3)));
+ EMIT(branch_x);
+
+ /* if (abs_p.y >= max(abs_p.x, abs_p.z)) */
+ ir_if *branch_y =
+ new(mem_ctx) ir_if(gequal(swizzle_y(abs_p),
+ max2(swizzle_x(abs_p), swizzle_z(abs_p))));
+
+ /* Q = p.xzy;
+ * dQdx = dPdx.xzy;
+ * dQdy = dPdy.xzy;
+ */
+ int xzy = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Z, SWIZZLE_Y, 0);
+ THEN(branch_y, assign(Q, swizzle(CLONE(ir->coordinate), xzy, 3)));
+ THEN(branch_y, assign(dQdx, swizzle(CLONE(dPdx), xzy, 3)));
+ THEN(branch_y, assign(dQdy, swizzle(CLONE(dPdy), xzy, 3)));
+ EMIT(branch_y);
+
+ /* if (abs_p.z >= max(abs_p.x, abs_p.y)) */
+ ir_if *branch_z =
+ new(mem_ctx) ir_if(gequal(swizzle_z(abs_p),
+ max2(swizzle_x(abs_p), swizzle_y(abs_p))));
+
+ /* Q = p;
+ * dQdx = dPdx;
+ * dQdy = dPdy;
+ */
+ THEN(branch_z, assign(Q, swizzle_for_size(CLONE(ir->coordinate), 3)));
+ THEN(branch_z, assign(dQdx, CLONE(dPdx)));
+ THEN(branch_z, assign(dQdy, CLONE(dPdy)));
+ EMIT(branch_z);
+
+ /* 2. quotient rule */
+ ir_variable *recip = temp(mem_ctx, glsl_type::float_type, "recip");
+ EMIT(assign(recip, div(new(mem_ctx) ir_constant(1.0f), swizzle_z(Q))));
+
+ ir_variable *dx = temp(mem_ctx, glsl_type::vec2_type, "dx");
+ ir_variable *dy = temp(mem_ctx, glsl_type::vec2_type, "dy");
+
+ /* tmp = Q.xy * recip;
+ * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
+ * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
+ */
+ ir_variable *tmp = temp(mem_ctx, glsl_type::vec2_type, "tmp");
+ EMIT(assign(tmp, mul(swizzle_xy(Q), recip)));
+ EMIT(assign(dx, mul(recip, sub(swizzle_xy(dQdx),
+ mul(tmp, swizzle_z(dQdx))))));
+ EMIT(assign(dy, mul(recip, sub(swizzle_xy(dQdy),
+ mul(tmp, swizzle_z(dQdy))))));
+
+ /* M = max(dot(dx, dx), dot(dy, dy)); */
+ ir_variable *M = temp(mem_ctx, glsl_type::float_type, "M");
+ EMIT(assign(M, max2(dot(dx, dx), dot(dy, dy))));
+
+ /* size has textureSize() of LOD 0 */
+ ir_variable *L = temp(mem_ctx, glsl_type::float_type, "L");
+ EMIT(assign(L, swizzle_x(size)));
+
+ ir_variable *result = temp(mem_ctx, glsl_type::float_type, "result");
+
+ /* result = -1.0 + 0.5 * log2(L * L * M); */
+ EMIT(assign(result,
+ add(new(mem_ctx)ir_constant(-1.0f),
+ mul(new(mem_ctx)ir_constant(0.5f),
+ expr(ir_unop_log2, mul(mul(L, L), M))))));
+
+ /* 3. final assignment of parameters to textureLod call */
+ ir->lod_info.lod = new (mem_ctx) ir_dereference_variable(result);
+
+#undef THEN
+#undef EMIT
+
} else {
+ /* Calculate rho from equation 3.20 of the GL 3.0 specification. */
+ ir_rvalue *rho;
+ if (dPdx->type->is_scalar()) {
+ rho = expr(ir_binop_max, expr(ir_unop_abs, dPdx),
+ expr(ir_unop_abs, dPdy));
+ } else {
+ rho = expr(ir_binop_max, expr(ir_unop_sqrt, dot(dPdx, dPdx)),
+ expr(ir_unop_sqrt, dot(dPdy, dPdy)));
+ }
+
+ /* lambda_base = log2(rho). We're ignoring GL state biases for now. */
ir->lod_info.lod = expr(ir_unop_log2, rho);
}
}
static void
-get_fast_clear_rect(struct brw_context *brw, struct gl_framebuffer *fb,
+get_fast_clear_rect(struct gl_framebuffer *fb,
struct intel_renderbuffer *irb, struct rect *rect)
{
unsigned int x_align, y_align;
* alignment size returned by intel_get_non_msrt_mcs_alignment(), but
* with X alignment multiplied by 16 and Y alignment multiplied by 32.
*/
- intel_get_non_msrt_mcs_alignment(brw, irb->mt, &x_align, &y_align);
+ intel_get_non_msrt_mcs_alignment(irb->mt, &x_align, &y_align);
x_align *= 16;
y_align *= 32;
irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
irb->need_downsample = true;
fast_clear_buffers |= 1 << index;
- get_fast_clear_rect(brw, fb, irb, &fast_clear_rect);
+ get_fast_clear_rect(fb, irb, &fast_clear_rect);
break;
case REP_CLEAR:
* by 8 and 16 and 8 and 8 for SKL.
*/
- intel_get_non_msrt_mcs_alignment(brw, mt, &x_align, &y_align);
+ intel_get_non_msrt_mcs_alignment(mt, &x_align, &y_align);
if (brw->gen >= 9) {
x_scaledown = x_align * 8;
y_scaledown = y_align * 8;
/**
* Setup uniforms telling the destination width, height and the offset. These
- * are needed to unnoormalize the input coordinates and to correctly translate
+ * are needed to unnormalize the input coordinates and to correctly translate
* between destination and source that may have differing offsets.
*/
static void
uint32_t tile_mask_x = 0, tile_mask_y = 0;
if (depth_mt) {
- intel_miptree_get_tile_masks(depth_mt, &tile_mask_x, &tile_mask_y, false);
+ intel_get_tile_masks(depth_mt->tiling, depth_mt->tr_mode,
+ depth_mt->cpp, false,
+ &tile_mask_x, &tile_mask_y);
if (intel_miptree_level_has_hiz(depth_mt, depth_level)) {
uint32_t hiz_tile_mask_x, hiz_tile_mask_y;
- intel_miptree_get_tile_masks(depth_mt->hiz_buf->mt,
- &hiz_tile_mask_x, &hiz_tile_mask_y,
- false);
+ intel_get_tile_masks(depth_mt->hiz_buf->mt->tiling,
+ depth_mt->hiz_buf->mt->tr_mode,
+ depth_mt->hiz_buf->mt->cpp,
+ false, &hiz_tile_mask_x,
+ &hiz_tile_mask_y);
/* Each HiZ row represents 2 rows of pixels */
hiz_tile_mask_y = hiz_tile_mask_y << 1 | 1;
tile_mask_y |= 63;
} else {
uint32_t stencil_tile_mask_x, stencil_tile_mask_y;
- intel_miptree_get_tile_masks(stencil_mt,
- &stencil_tile_mask_x,
- &stencil_tile_mask_y, false);
+ intel_get_tile_masks(stencil_mt->tiling,
+ stencil_mt->tr_mode,
+ stencil_mt->cpp,
+ false, &stencil_tile_mask_x,
+ &stencil_tile_mask_y);
tile_mask_x |= stencil_tile_mask_x;
tile_mask_y |= stencil_tile_mask_y;
nir_validate_shader(nir);
progress |= nir_opt_constant_folding(nir);
nir_validate_shader(nir);
+ progress |= nir_opt_dead_cf(nir);
+ nir_validate_shader(nir);
progress |= nir_opt_remove_phis(nir);
nir_validate_shader(nir);
progress |= nir_opt_undef(nir);
gl_shader_stage stage, bool is_scalar)
{
bool debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
+ static const nir_lower_tex_options tex_options = {
+ .lower_txp = ~0,
+ };
+
+ if (stage == MESA_SHADER_GEOMETRY) {
+ nir_lower_gs_intrinsics(nir);
+ nir_validate_shader(nir);
+ }
nir_lower_global_vars_to_local(nir);
nir_validate_shader(nir);
- nir_lower_tex_projector(nir);
+ nir_lower_tex(nir, &tex_options);
nir_validate_shader(nir);
nir_normalize_cubemap_coords(nir);
nir_print_shader(nir, stderr);
}
- nir_convert_from_ssa(nir, is_scalar);
+ nir_convert_from_ssa(nir, true);
nir_validate_shader(nir);
if (!is_scalar) {
+ nir_move_vec_src_uses_to_dest(nir);
+ nir_validate_shader(nir);
+
nir_lower_vec_to_movs(nir);
nir_validate_shader(nir);
}
{
assert(brw->gen >= 6 && brw->gen <= 9);
+ /* Starting on BDW, these pipe controls are unnecessary.
+ *
+ * WM HW will internally manage the draining pipe and flushing of the caches
+ * when this command is issued. The PIPE_CONTROL restrictions are removed.
+ */
+ if (brw->gen >= 8)
+ return;
+
brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_CACHE_FLUSH);
brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
_mesa_print_program(prog);
}
}
+
+void
+brw_setup_tex_for_precompile(struct brw_context *brw,
+ struct brw_sampler_prog_key_data *tex,
+ struct gl_program *prog)
+{
+ const bool has_shader_channel_select = brw->is_haswell || brw->gen >= 8;
+ unsigned sampler_count = _mesa_fls(prog->SamplersUsed);
+ for (unsigned i = 0; i < sampler_count; i++) {
+ if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
+ /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
+ tex->swizzles[i] =
+ MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
+ } else {
+ /* Color sampler: assume no swizzling. */
+ tex->swizzles[i] = SWIZZLE_XYZW;
+ }
+ }
+}
};
-struct brw_vue_prog_key {
- unsigned program_string_id;
-
- /**
- * True if at least one clip flag is enabled, regardless of whether the
- * shader uses clip planes or gl_ClipDistance.
- */
- bool userclip_active:1;
-
- /**
- * How many user clipping planes are being uploaded to the vertex shader as
- * push constants.
- */
- unsigned nr_userclip_plane_consts:4;
-
- struct brw_sampler_prog_key_data tex;
-};
-
/** The program key for Vertex Shaders. */
struct brw_vs_prog_key {
- struct brw_vue_prog_key base;
+ unsigned program_string_id;
/*
* Per-attribute workaround flags
bool clamp_vertex_color:1;
/**
+ * How many user clipping planes are being uploaded to the vertex shader as
+ * push constants.
+ *
+ * These are used for lowering legacy gl_ClipVertex/gl_Position clipping to
+ * clip distances.
+ */
+ unsigned nr_userclip_plane_consts:4;
+
+ /**
* For pre-Gen6 hardware, a bitfield indicating which texture coordinates
* are going to be replaced with point coordinates (as a consequence of a
* call to glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)). Because
* the VUE, even if they aren't written by the vertex shader.
*/
uint8_t point_coord_replace;
+
+ struct brw_sampler_prog_key_data tex;
};
/** The program key for Geometry Shaders. */
struct brw_gs_prog_key
{
- struct brw_vue_prog_key base;
+ unsigned program_string_id;
- uint64_t input_varyings;
+ struct brw_sampler_prog_key_data tex;
};
/** The program key for Fragment/Pixel Shaders. */
extern "C" {
#endif
+void brw_setup_tex_for_precompile(struct brw_context *brw,
+ struct brw_sampler_prog_key_data *tex,
+ struct gl_program *prog);
+
void brw_populate_sampler_prog_key_data(struct gl_context *ctx,
const struct gl_program *prog,
unsigned sampler_count,
#define GEN7_MRF_HACK_START 112
/** Number of message register file registers */
-#define BRW_MAX_MRF 16
+#define BRW_MAX_MRF(gen) (gen == 6 ? 24 : 16)
#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
struct brw_reg reg;
if (file == BRW_GENERAL_REGISTER_FILE)
assert(nr < BRW_MAX_GRF);
- else if (file == BRW_MESSAGE_REGISTER_FILE)
- assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
assert(nr <= BRW_ARF_TIMESTAMP);
+ /* Asserting on the MRF register number requires to know the hardware gen
+ * (gen6 has 24 MRF registers), which we don't know here, so we assert
+ * for that in the generators and in brw_eu_emit.c
+ */
reg.type = type;
reg.file = file;
static inline struct brw_reg
brw_message_reg(unsigned nr)
{
- assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0);
}
unsigned min_lod,
unsigned max_lod,
int lod_bias,
- unsigned base_level,
unsigned shadow_function,
bool non_normalized_coordinates,
uint32_t border_color_offset)
{
ss[0] = BRW_SAMPLER_LOD_PRECLAMP_ENABLE |
- SET_FIELD(base_level, BRW_SAMPLER_BASE_MIPLEVEL) |
SET_FIELD(mip_filter, BRW_SAMPLER_MIP_FILTER) |
SET_FIELD(mag_filter, BRW_SAMPLER_MAG_FILTER) |
SET_FIELD(min_filter, BRW_SAMPLER_MIN_FILTER);
const unsigned max_lod = U_FIXED(CLAMP(sampler->MaxLod, 0, 13), lod_bits);
const int lod_bias =
S_FIXED(CLAMP(tex_unit_lod_bias + sampler->LodBias, -16, 15), lod_bits);
- const unsigned base_level = U_FIXED(0, 1);
/* Upload the border color if necessary. If not, just point it at
* offset 0 (the start of the batch) - the color should be ignored,
max_anisotropy,
address_rounding,
wrap_s, wrap_t, wrap_r,
- min_lod, max_lod, lod_bias, base_level,
+ min_lod, max_lod, lod_bias,
shadow_function,
non_normalized_coords,
border_color_offset);
* GRF registers.
*/
schedule_node *last_grf_write[grf_count * 16];
- schedule_node *last_mrf_write[BRW_MAX_MRF];
+ schedule_node *last_mrf_write[BRW_MAX_MRF(v->devinfo->gen)];
schedule_node *last_conditional_mod[2] = { NULL, NULL };
schedule_node *last_accumulator_write = NULL;
/* Fixed HW registers are assumed to be separate from the virtual
vec4_instruction_scheduler::calculate_deps()
{
schedule_node *last_grf_write[grf_count];
- schedule_node *last_mrf_write[BRW_MAX_MRF];
+ schedule_node *last_mrf_write[BRW_MAX_MRF(v->devinfo->gen)];
schedule_node *last_conditional_mod = NULL;
schedule_node *last_accumulator_write = NULL;
/* Fixed HW registers are assumed to be separate from the virtual
nir_options->lower_sub = true;
nir_options->lower_fdiv = true;
+ /* In the vec4 backend, our dpN instruction replicates its result to all
+ * the components of a vec4. We would like NIR to give us replicated fdot
+ * instructions because it can optimize better for us.
+ *
+ * For the FS backend, it should be lowered away by the scalarizing pass so
+ * we should never see fdot anyway.
+ */
+ nir_options->fdot_replicates = true;
+
/* We want the GLSL compiler to emit code that uses condition codes */
for (int i = 0; i < MESA_SHADER_STAGES; i++) {
compiler->glsl_compiler_options[i].MaxUnrollIterations = 32;
compiler->glsl_compiler_options[i].EmitNoNoise = true;
compiler->glsl_compiler_options[i].EmitNoMainReturn = true;
compiler->glsl_compiler_options[i].EmitNoIndirectInput = true;
- compiler->glsl_compiler_options[i].EmitNoIndirectOutput =
- (i == MESA_SHADER_FRAGMENT);
- compiler->glsl_compiler_options[i].EmitNoIndirectTemp =
- (i == MESA_SHADER_FRAGMENT);
compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false;
compiler->glsl_compiler_options[i].LowerClipDistance = true;
+ bool is_scalar;
+ switch (i) {
+ case MESA_SHADER_FRAGMENT:
+ case MESA_SHADER_COMPUTE:
+ is_scalar = true;
+ break;
+ case MESA_SHADER_VERTEX:
+ is_scalar = compiler->scalar_vs;
+ break;
+ default:
+ is_scalar = false;
+ break;
+ }
+
+ compiler->glsl_compiler_options[i].EmitNoIndirectOutput = is_scalar;
+ compiler->glsl_compiler_options[i].EmitNoIndirectTemp = is_scalar;
+ compiler->glsl_compiler_options[i].OptimizeForAOS = !is_scalar;
+
/* !ARB_gpu_shader5 */
if (devinfo->gen < 7)
compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true;
- }
- compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = true;
- compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
-
- if (compiler->scalar_vs || brw_env_var_as_boolean("INTEL_USE_NIR", true)) {
- if (compiler->scalar_vs) {
- /* If we're using the scalar backend for vertex shaders, we need to
- * configure these accordingly.
- */
- compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true;
- compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true;
- compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = false;
- }
-
- compiler->glsl_compiler_options[MESA_SHADER_VERTEX].NirOptions = nir_options;
- }
-
- if (brw_env_var_as_boolean("INTEL_USE_NIR", true)) {
- compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].NirOptions = nir_options;
+ if (is_scalar || brw_env_var_as_boolean("INTEL_USE_NIR", true))
+ compiler->glsl_compiler_options[i].NirOptions = nir_options;
}
- compiler->glsl_compiler_options[MESA_SHADER_FRAGMENT].NirOptions = nir_options;
- compiler->glsl_compiler_options[MESA_SHADER_COMPUTE].NirOptions = nir_options;
-
return compiler;
}
{
switch (stage) {
case MESA_SHADER_FRAGMENT:
+ case MESA_SHADER_COMPUTE:
return true;
case MESA_SHADER_VERTEX:
return brw->intelScreen->compiler->scalar_vs;
options, ctx->Const.NativeIntegers) || progress;
} while (progress);
- if (options->NirOptions != NULL)
- lower_output_reads(stage, shader->ir);
-
validate_ir_tree(shader->ir);
/* Now that we've finished altering the linked IR, reparent any live IR back
return "tg4_offset";
case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
return "tg4_offset_logical";
+ case SHADER_OPCODE_SAMPLEINFO:
+ return "sampleinfo";
case SHADER_OPCODE_SHADER_TIME_ADD:
return "shader_time_add";
case FS_OPCODE_PIXEL_Y:
return "pixel_y";
+ case FS_OPCODE_GET_BUFFER_SIZE:
+ return "fs_get_buffer_size";
+
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
return "uniform_pull_const";
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
return "set_simd4x2_header_gen9";
+ case VS_OPCODE_GET_BUFFER_SIZE:
+ return "vs_get_buffer_size";
+
case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
return "unpack_flags_simd4x2";
extern const struct brw_tracked_state brw_vertices;
extern const struct brw_tracked_state brw_index_buffer;
extern const struct brw_tracked_state brw_cs_state;
+extern const struct brw_tracked_state gen7_cs_push_constants;
extern const struct brw_tracked_state gen6_binding_table_pointers;
extern const struct brw_tracked_state gen6_blend_state;
extern const struct brw_tracked_state gen6_cc_state_pointers;
extern const struct brw_tracked_state gen8_vertices;
extern const struct brw_tracked_state gen8_vf_topology;
extern const struct brw_tracked_state gen8_vs_state;
+extern const struct brw_tracked_state brw_cs_work_groups_surface;
static inline bool
brw_state_dirty(struct brw_context *brw, GLuint mesa_flags, uint64_t brw_flags)
uint32_t
brw_depthbuffer_format(struct brw_context *brw);
-/* gen8_misc_state.c */
-void gen8_upload_state_base_address(struct brw_context *brw);
-
-
/***********************************************************************
* brw_state.c
*/
unsigned min_lod,
unsigned max_lod,
int lod_bias,
- unsigned base_level,
unsigned shadow_function,
bool non_normalized_coordinates,
uint32_t border_color_offset);
/* brw_vs_surface_state.c */
void
brw_upload_pull_constants(struct brw_context *brw,
- GLbitfield brw_new_constbuf,
+ GLbitfield64 brw_new_constbuf,
const struct gl_program *prog,
struct brw_stage_state *stage_state,
const struct brw_stage_prog_data *prog_data,
gl_shader_stage stage,
const uint32_t* binding_table,
int num_surfaces);
-void gen7_enable_hw_binding_tables(struct brw_context *brw);
void gen7_disable_hw_binding_tables(struct brw_context *brw);
void gen7_reset_hw_bt_pool_offsets(struct brw_context *brw);
static void dump_sampler_state(struct brw_context *brw,
uint32_t offset, uint32_t size)
{
- int i;
+ unsigned i;
uint32_t *samp = brw->batch.bo->virtual + offset;
for (i = 0; i < size / 16; i++) {
char name[20];
- sprintf(name, "WM SAMP%d", i);
+ sprintf(name, "WM SAMP%u", i);
batch_out(brw, name, offset, 0, "filtering\n");
batch_out(brw, name, offset, 1, "wrapping, lod\n");
batch_out(brw, name, offset, 2, "default color pointer\n");
{
&brw_state_base_address,
&brw_cs_image_surfaces,
+ &gen7_cs_push_constants,
&brw_cs_abo_surfaces,
+ &brw_texture_surfaces,
+ &brw_cs_work_groups_surface,
&brw_cs_state,
};
{
&gen8_state_base_address,
&brw_cs_image_surfaces,
+ &gen7_cs_push_constants,
&brw_cs_abo_surfaces,
+ &brw_texture_surfaces,
+ &brw_cs_work_groups_surface,
&brw_cs_state,
};
ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
+ ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
- DEFINE_BIT(BRW_NEW_VUE_MAP_VS),
DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
+ DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
{0, 0, 0}
};
else
brw_upload_gs_prog(brw);
+ /* Update the VUE map for data exiting the GS stage of the pipeline.
+ * This comes from the last enabled shader stage.
+ */
+ GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
+ bool old_separate = brw->vue_map_geom_out.separate;
+ if (brw->geometry_program)
+ brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map;
+ else
+ brw->vue_map_geom_out = brw->vs.prog_data->base.vue_map;
+
+ /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
+ if (old_slots != brw->vue_map_geom_out.slots_valid ||
+ old_separate != brw->vue_map_geom_out.separate)
+ brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
+
brw_upload_wm_prog(brw);
} else if (pipeline == BRW_COMPUTE_PIPELINE) {
brw_upload_cs_prog(brw);
enum brw_pipeline pipeline)
{
/* Save all dirty state into the other pipelines */
- for (int i = 0; i < BRW_NUM_PIPELINES; i++) {
+ for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
if (i != pipeline) {
brw->state.pipelines[i].mesa |= brw->NewGLState;
brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
this->swizzle = brw_swizzle_for_size(type->vector_elements);
else
this->swizzle = BRW_SWIZZLE_XYZW;
+ if (type)
+ this->type = brw_type_for_base_type(type);
}
/** Generic unset register constructor. */
case SHADER_OPCODE_TXS:
case SHADER_OPCODE_TG4:
case SHADER_OPCODE_TG4_OFFSET:
+ case SHADER_OPCODE_SAMPLEINFO:
+ case VS_OPCODE_GET_BUFFER_SIZE:
return inst->header_size;
default:
unreachable("not reached");
}
bool
-vec4_instruction::can_reswizzle(int dst_writemask,
+vec4_instruction::can_reswizzle(const struct brw_device_info *devinfo,
+ int dst_writemask,
int swizzle,
int swizzle_mask)
{
+ /* Gen6 MATH instructions can not execute in align16 mode, so swizzles
+ * or writemasking are not allowed.
+ */
+ if (devinfo->gen == 6 && is_math() &&
+ (swizzle != BRW_SWIZZLE_XYZW || dst_writemask != WRITEMASK_XYZW))
+ return false;
+
/* If this instruction sets anything not referenced by swizzle, then we'd
* totally break it when we reswizzle.
*/
if (mlen > 0)
return false;
+ /* We can't use swizzles on the accumulator and that's really the only
+ * HW_REG we would care to reswizzle so just disallow them all.
+ */
+ for (int i = 0; i < 3; i++) {
+ if (src[i].file == HW_REG)
+ return false;
+ }
+
return true;
}
inst->src[0].abs || inst->src[0].negate || inst->src[0].reladdr)
continue;
+ /* Remove no-op MOVs */
+ if (inst->dst.file == inst->src[0].file &&
+ inst->dst.reg == inst->src[0].reg &&
+ inst->dst.reg_offset == inst->src[0].reg_offset) {
+ bool is_nop_mov = true;
+
+ for (unsigned c = 0; c < 4; c++) {
+ if ((inst->dst.writemask & (1 << c)) == 0)
+ continue;
+
+ if (BRW_GET_SWZ(inst->src[0].swizzle, c) != c) {
+ is_nop_mov = false;
+ break;
+ }
+ }
+
+ if (is_nop_mov) {
+ inst->remove(block);
+ continue;
+ }
+ }
+
bool to_mrf = (inst->dst.file == MRF);
/* Can't coalesce this GRF if someone else was going to
}
}
+ /* This doesn't handle saturation on the instruction we
+ * want to coalesce away if the register types do not match.
+ * But if scan_inst is a non type-converting 'mov', we can fix
+ * the types later.
+ */
+ if (inst->saturate &&
+ inst->dst.type != scan_inst->dst.type &&
+ !(scan_inst->opcode == BRW_OPCODE_MOV &&
+ scan_inst->dst.type == scan_inst->src[0].type))
+ break;
+
/* If we can't handle the swizzle, bail. */
- if (!scan_inst->can_reswizzle(inst->dst.writemask,
+ if (!scan_inst->can_reswizzle(devinfo, inst->dst.writemask,
inst->src[0].swizzle,
chans_needed)) {
break;
if (interfered)
break;
- /* If somebody else writes our destination here, we can't coalesce
- * before that.
+ /* If somebody else writes the same channels of our destination here,
+ * we can't coalesce before that.
*/
- if (inst->dst.in_range(scan_inst->dst, scan_inst->regs_written))
- break;
+ if (inst->dst.in_range(scan_inst->dst, scan_inst->regs_written) &&
+ (inst->dst.writemask & scan_inst->dst.writemask) != 0) {
+ break;
+ }
/* Check for reads of the register we're trying to coalesce into. We
* can't go rewriting instructions above that to put some other value
scan_inst->dst.file = inst->dst.file;
scan_inst->dst.reg = inst->dst.reg;
scan_inst->dst.reg_offset = inst->dst.reg_offset;
+ if (inst->saturate &&
+ inst->dst.type != scan_inst->dst.type) {
+ /* If we have reached this point, scan_inst is a non
+ * type-converting 'mov' and we can modify its register types
+ * to match the ones in inst. Otherwise, we could have an
+ * incorrect saturation result.
+ */
+ scan_inst->dst.type = inst->dst.type;
+ scan_inst->src[0].type = inst->src[0].type;
+ }
scan_inst->saturate |= inst->saturate;
}
scan_inst = (vec4_instruction *)scan_inst->next;
}
bool
-vec4_visitor::run(gl_clip_plane *clip_planes)
+vec4_visitor::run()
{
bool use_vec4_nir =
compiler->glsl_compiler_options[stage].NirOptions != NULL;
}
base_ir = NULL;
- if (key->userclip_active && !prog->UsesClipDistanceOut)
- setup_uniform_clipplane_values(clip_planes);
-
emit_thread_end();
calculate_cfg();
setup_payload();
- if (false) {
+ if (unlikely(INTEL_DEBUG & DEBUG_SPILL_VEC4)) {
/* Debug of register spilling: Go spill everything. */
const int grf_count = alloc.count;
float spill_costs[alloc.count];
struct gl_shader_program *prog,
unsigned *final_assembly_size)
{
- bool start_busy = false;
- double start_time = 0;
const unsigned *assembly = NULL;
- if (unlikely(brw->perf_debug)) {
- start_busy = (brw->batch.last_bo &&
- drm_intel_bo_busy(brw->batch.last_bo));
- start_time = get_time();
- }
-
struct brw_shader *shader = NULL;
if (prog)
shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
vec4_vs_visitor v(brw->intelScreen->compiler, brw, key, prog_data,
- vp, prog, mem_ctx, st_index,
+ vp, prog, brw_select_clip_planes(&brw->ctx),
+ mem_ctx, st_index,
!_mesa_is_gles3(&brw->ctx));
- if (!v.run(brw_select_clip_planes(&brw->ctx))) {
+ if (!v.run()) {
if (prog) {
prog->LinkStatus = false;
ralloc_strcat(&prog->InfoLog, v.fail_msg);
assembly = g.generate_assembly(v.cfg, final_assembly_size);
}
- if (unlikely(brw->perf_debug) && shader) {
- if (shader->compiled_once) {
- brw_vs_debug_recompile(brw, prog, key);
- }
- if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
- perf_debug("VS compile took %.03f ms and stalled the GPU\n",
- (get_time() - start_time) * 1000);
- }
- shader->compiled_once = true;
- }
-
return assembly;
}
-
-void
-brw_vue_setup_prog_key_for_precompile(struct gl_context *ctx,
- struct brw_vue_prog_key *key,
- GLuint id, struct gl_program *prog)
-{
- struct brw_context *brw = brw_context(ctx);
- key->program_string_id = id;
-
- brw_setup_tex_for_precompile(brw, &key->tex, prog);
-}
-
} /* extern "C" */
extern "C" {
#endif
-void
-brw_vue_setup_prog_key_for_precompile(struct gl_context *ctx,
- struct brw_vue_prog_key *key,
- GLuint id, struct gl_program *prog);
-
#ifdef __cplusplus
} /* extern "C" */
vec4_visitor(const struct brw_compiler *compiler,
void *log_data,
struct gl_program *prog,
- const struct brw_vue_prog_key *key,
+ const struct brw_sampler_prog_key_data *key,
struct brw_vue_prog_data *prog_data,
struct gl_shader_program *shader_prog,
gl_shader_stage stage,
return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
}
- const struct brw_vue_prog_key * const key;
+ const struct brw_sampler_prog_key_data * const key_tex;
struct brw_vue_prog_data * const prog_data;
unsigned int sanity_param_count;
struct hash_table *variable_ht;
- bool run(gl_clip_plane *clip_planes);
+ bool run();
void fail(const char *msg, ...);
- void setup_uniform_clipplane_values(gl_clip_plane *clip_planes);
virtual void setup_vec4_uniform_value(unsigned param_offset,
const gl_constant_value *values,
unsigned n);
void emit_ndc_computation();
void emit_psiz_and_flags(dst_reg reg);
- void emit_clip_distances(dst_reg reg, int offset);
vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying);
- void emit_urb_slot(dst_reg reg, int varying);
+ virtual void emit_urb_slot(dst_reg reg, int varying);
void emit_shader_time_begin();
void emit_shader_time_end();
virtual void nir_emit_alu(nir_alu_instr *instr);
virtual void nir_emit_jump(nir_jump_instr *instr);
virtual void nir_emit_texture(nir_tex_instr *instr);
+ virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
+ virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
dst_reg get_nir_dest(nir_dest dest, enum brw_reg_type type);
dst_reg get_nir_dest(nir_dest dest, nir_alu_type type);
struct brw_reg offset);
void generate_set_simd4x2_header_gen9(vec4_instruction *inst,
struct brw_reg dst);
+
+ void generate_get_buffer_size(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg index);
+
void generate_unpack_flags(struct brw_reg dst);
const struct brw_compiler *compiler;
--- /dev/null
+/* -*- c++ -*- */
+/*
+ * Copyright © 2010-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef BRW_VEC4_BUILDER_H
+#define BRW_VEC4_BUILDER_H
+
+#include "brw_ir_vec4.h"
+#include "brw_ir_allocator.h"
+#include "brw_context.h"
+
+namespace brw {
+ /**
+ * Toolbox to assemble a VEC4 IR program out of individual instructions.
+ *
+ * This object is meant to have an interface consistent with
+ * brw::fs_builder. They cannot be fully interchangeable because
+ * brw::fs_builder generates scalar code while brw::vec4_builder generates
+ * vector code.
+ */
+ class vec4_builder {
+ public:
+ /** Type used in this IR to represent a source of an instruction. */
+ typedef brw::src_reg src_reg;
+
+ /** Type used in this IR to represent the destination of an instruction. */
+ typedef brw::dst_reg dst_reg;
+
+ /** Type used in this IR to represent an instruction. */
+ typedef vec4_instruction instruction;
+
+ /**
+ * Construct a vec4_builder that inserts instructions into \p shader.
+ */
+ vec4_builder(backend_shader *shader) :
+ shader(shader), block(NULL), cursor(NULL),
+ force_writemask_all(false),
+ annotation()
+ {
+ }
+
+ /**
+ * Construct a vec4_builder that inserts instructions into \p shader
+ * before instruction \p inst in basic block \p block. The default
+ * execution controls and debug annotation are initialized from the
+ * instruction passed as argument.
+ */
+ vec4_builder(backend_shader *shader, bblock_t *block, instruction *inst) :
+ shader(shader), block(block), cursor(inst)
+ {
+ annotation.str = inst->annotation;
+ annotation.ir = inst->ir;
+ }
+
+ /**
+ * Construct a vec4_builder that inserts instructions before \p cursor
+ * in basic block \p block, inheriting other code generation parameters
+ * from this.
+ */
+ vec4_builder
+ at(bblock_t *block, exec_node *cursor) const
+ {
+ vec4_builder bld = *this;
+ bld.block = block;
+ bld.cursor = cursor;
+ return bld;
+ }
+
+ /**
+ * Construct a vec4_builder appending instructions at the end of the
+ * instruction list of the shader, inheriting other code generation
+ * parameters from this.
+ */
+ vec4_builder
+ at_end() const
+ {
+ return at(NULL, (exec_node *)&shader->instructions.tail);
+ }
+
+ /**
+ * Construct a builder with per-channel control flow execution masking
+ * disabled if \p b is true. If control flow execution masking is
+ * already disabled this has no effect.
+ */
+ vec4_builder
+ exec_all(bool b = true) const
+ {
+ vec4_builder bld = *this;
+ if (b)
+ bld.force_writemask_all = true;
+ return bld;
+ }
+
+ /**
+ * Construct a builder with the given debug annotation info.
+ */
+ vec4_builder
+ annotate(const char *str, const void *ir = NULL) const
+ {
+ vec4_builder bld = *this;
+ bld.annotation.str = str;
+ bld.annotation.ir = ir;
+ return bld;
+ }
+
+ /**
+ * Get the SIMD width in use.
+ */
+ unsigned
+ dispatch_width() const
+ {
+ return 8;
+ }
+
+ /**
+ * Allocate a virtual register of natural vector size (four for this IR)
+ * and SIMD width. \p n gives the amount of space to allocate in
+ * dispatch_width units (which is just enough space for four logical
+ * components in this IR).
+ */
+ dst_reg
+ vgrf(enum brw_reg_type type, unsigned n = 1) const
+ {
+ assert(dispatch_width() <= 32);
+
+ if (n > 0)
+ return retype(dst_reg(GRF, shader->alloc.allocate(
+ n * DIV_ROUND_UP(type_sz(type), 4))),
+ type);
+ else
+ return retype(null_reg_ud(), type);
+ }
+
+ /**
+ * Create a null register of floating type.
+ */
+ dst_reg
+ null_reg_f() const
+ {
+ return dst_reg(retype(brw_null_vec(dispatch_width()),
+ BRW_REGISTER_TYPE_F));
+ }
+
+ /**
+ * Create a null register of signed integer type.
+ */
+ dst_reg
+ null_reg_d() const
+ {
+ return dst_reg(retype(brw_null_vec(dispatch_width()),
+ BRW_REGISTER_TYPE_D));
+ }
+
+ /**
+ * Create a null register of unsigned integer type.
+ */
+ dst_reg
+ null_reg_ud() const
+ {
+ return dst_reg(retype(brw_null_vec(dispatch_width()),
+ BRW_REGISTER_TYPE_UD));
+ }
+
+ /**
+ * Insert an instruction into the program.
+ */
+ instruction *
+ emit(const instruction &inst) const
+ {
+ return emit(new(shader->mem_ctx) instruction(inst));
+ }
+
+ /**
+ * Create and insert a nullary control instruction into the program.
+ */
+ instruction *
+ emit(enum opcode opcode) const
+ {
+ return emit(instruction(opcode));
+ }
+
+ /**
+ * Create and insert a nullary instruction into the program.
+ */
+ instruction *
+ emit(enum opcode opcode, const dst_reg &dst) const
+ {
+ return emit(instruction(opcode, dst));
+ }
+
+ /**
+ * Create and insert a unary instruction into the program.
+ */
+ instruction *
+ emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0) const
+ {
+ switch (opcode) {
+ case SHADER_OPCODE_RCP:
+ case SHADER_OPCODE_RSQ:
+ case SHADER_OPCODE_SQRT:
+ case SHADER_OPCODE_EXP2:
+ case SHADER_OPCODE_LOG2:
+ case SHADER_OPCODE_SIN:
+ case SHADER_OPCODE_COS:
+ return fix_math_instruction(
+ emit(instruction(opcode, dst,
+ fix_math_operand(src0))));
+
+ default:
+ return emit(instruction(opcode, dst, src0));
+ }
+ }
+
+ /**
+ * Create and insert a binary instruction into the program.
+ */
+ instruction *
+ emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
+ const src_reg &src1) const
+ {
+ switch (opcode) {
+ case SHADER_OPCODE_POW:
+ case SHADER_OPCODE_INT_QUOTIENT:
+ case SHADER_OPCODE_INT_REMAINDER:
+ return fix_math_instruction(
+ emit(instruction(opcode, dst,
+ fix_math_operand(src0),
+ fix_math_operand(src1))));
+
+ default:
+ return emit(instruction(opcode, dst, src0, src1));
+ }
+ }
+
+ /**
+ * Create and insert a ternary instruction into the program.
+ */
+ instruction *
+ emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
+ const src_reg &src1, const src_reg &src2) const
+ {
+ switch (opcode) {
+ case BRW_OPCODE_BFE:
+ case BRW_OPCODE_BFI2:
+ case BRW_OPCODE_MAD:
+ case BRW_OPCODE_LRP:
+ return emit(instruction(opcode, dst,
+ fix_3src_operand(src0),
+ fix_3src_operand(src1),
+ fix_3src_operand(src2)));
+
+ default:
+ return emit(instruction(opcode, dst, src0, src1, src2));
+ }
+ }
+
+ /**
+ * Insert a preallocated instruction into the program.
+ */
+ instruction *
+ emit(instruction *inst) const
+ {
+ inst->force_writemask_all = force_writemask_all;
+ inst->annotation = annotation.str;
+ inst->ir = annotation.ir;
+
+ if (block)
+ static_cast<instruction *>(cursor)->insert_before(block, inst);
+ else
+ cursor->insert_before(inst);
+
+ return inst;
+ }
+
+ /**
+ * Select \p src0 if the comparison of both sources with the given
+ * conditional mod evaluates to true, otherwise select \p src1.
+ *
+ * Generally useful to get the minimum or maximum of two values.
+ */
+ void
+ emit_minmax(const dst_reg &dst, const src_reg &src0,
+ const src_reg &src1, brw_conditional_mod mod) const
+ {
+ if (shader->devinfo->gen >= 6) {
+ set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
+ fix_unsigned_negate(src1)));
+ } else {
+ CMP(null_reg_d(), src0, src1, mod);
+ set_predicate(BRW_PREDICATE_NORMAL,
+ SEL(dst, src0, src1));
+ }
+ }
+
+ /**
+ * Copy any live channel from \p src to the first channel of the result.
+ */
+ src_reg
+ emit_uniformize(const src_reg &src) const
+ {
+ const vec4_builder ubld = exec_all();
+ const dst_reg chan_index =
+ writemask(vgrf(BRW_REGISTER_TYPE_UD), WRITEMASK_X);
+ const dst_reg dst = vgrf(src.type);
+
+ ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
+ ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, src_reg(chan_index));
+
+ return src_reg(dst);
+ }
+
+ /**
+ * Assorted arithmetic ops.
+ * @{
+ */
+#define ALU1(op) \
+ instruction * \
+ op(const dst_reg &dst, const src_reg &src0) const \
+ { \
+ return emit(BRW_OPCODE_##op, dst, src0); \
+ }
+
+#define ALU2(op) \
+ instruction * \
+ op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
+ { \
+ return emit(BRW_OPCODE_##op, dst, src0, src1); \
+ }
+
+#define ALU2_ACC(op) \
+ instruction * \
+ op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
+ { \
+ instruction *inst = emit(BRW_OPCODE_##op, dst, src0, src1); \
+ inst->writes_accumulator = true; \
+ return inst; \
+ }
+
+#define ALU3(op) \
+ instruction * \
+ op(const dst_reg &dst, const src_reg &src0, const src_reg &src1, \
+ const src_reg &src2) const \
+ { \
+ return emit(BRW_OPCODE_##op, dst, src0, src1, src2); \
+ }
+
+ ALU2(ADD)
+ ALU2_ACC(ADDC)
+ ALU2(AND)
+ ALU2(ASR)
+ ALU2(AVG)
+ ALU3(BFE)
+ ALU2(BFI1)
+ ALU3(BFI2)
+ ALU1(BFREV)
+ ALU1(CBIT)
+ ALU2(CMPN)
+ ALU3(CSEL)
+ ALU2(DP2)
+ ALU2(DP3)
+ ALU2(DP4)
+ ALU2(DPH)
+ ALU1(F16TO32)
+ ALU1(F32TO16)
+ ALU1(FBH)
+ ALU1(FBL)
+ ALU1(FRC)
+ ALU2(LINE)
+ ALU1(LZD)
+ ALU2(MAC)
+ ALU2_ACC(MACH)
+ ALU3(MAD)
+ ALU1(MOV)
+ ALU2(MUL)
+ ALU1(NOT)
+ ALU2(OR)
+ ALU2(PLN)
+ ALU1(RNDD)
+ ALU1(RNDE)
+ ALU1(RNDU)
+ ALU1(RNDZ)
+ ALU2(SAD2)
+ ALU2_ACC(SADA2)
+ ALU2(SEL)
+ ALU2(SHL)
+ ALU2(SHR)
+ ALU2_ACC(SUBB)
+ ALU2(XOR)
+
+#undef ALU3
+#undef ALU2_ACC
+#undef ALU2
+#undef ALU1
+ /** @} */
+
+ /**
+ * CMP: Sets the low bit of the destination channels with the result
+ * of the comparison, while the upper bits are undefined, and updates
+ * the flag register with the packed 16 bits of the result.
+ */
+ instruction *
+ CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
+ brw_conditional_mod condition) const
+ {
+ /* Take the instruction:
+ *
+ * CMP null<d> src0<f> src1<f>
+ *
+ * Original gen4 does type conversion to the destination type
+ * before comparison, producing garbage results for floating
+ * point comparisons.
+ *
+ * The destination type doesn't matter on newer generations,
+ * so we set the type to match src0 so we can compact the
+ * instruction.
+ */
+ return set_condmod(condition,
+ emit(BRW_OPCODE_CMP, retype(dst, src0.type),
+ fix_unsigned_negate(src0),
+ fix_unsigned_negate(src1)));
+ }
+
+ /**
+ * Gen4 predicated IF.
+ */
+ instruction *
+ IF(brw_predicate predicate) const
+ {
+ return set_predicate(predicate, emit(BRW_OPCODE_IF));
+ }
+
+ /**
+ * Gen6 IF with embedded comparison.
+ */
+ instruction *
+ IF(const src_reg &src0, const src_reg &src1,
+ brw_conditional_mod condition) const
+ {
+ assert(shader->devinfo->gen == 6);
+ return set_condmod(condition,
+ emit(BRW_OPCODE_IF,
+ null_reg_d(),
+ fix_unsigned_negate(src0),
+ fix_unsigned_negate(src1)));
+ }
+
+ /**
+ * Emit a linear interpolation instruction.
+ */
+ instruction *
+ LRP(const dst_reg &dst, const src_reg &x, const src_reg &y,
+ const src_reg &a) const
+ {
+ if (shader->devinfo->gen >= 6) {
+ /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
+ * we need to reorder the operands.
+ */
+ return emit(BRW_OPCODE_LRP, dst, a, y, x);
+
+ } else {
+ /* We can't use the LRP instruction. Emit x*(1-a) + y*a. */
+ const dst_reg y_times_a = vgrf(dst.type);
+ const dst_reg one_minus_a = vgrf(dst.type);
+ const dst_reg x_times_one_minus_a = vgrf(dst.type);
+
+ MUL(y_times_a, y, a);
+ ADD(one_minus_a, negate(a), src_reg(1.0f));
+ MUL(x_times_one_minus_a, x, src_reg(one_minus_a));
+ return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a));
+ }
+ }
+
+ backend_shader *shader;
+
+ protected:
+ /**
+ * Workaround for negation of UD registers. See comment in
+ * fs_generator::generate_code() for the details.
+ */
+ src_reg
+ fix_unsigned_negate(const src_reg &src) const
+ {
+ if (src.type == BRW_REGISTER_TYPE_UD && src.negate) {
+ dst_reg temp = vgrf(BRW_REGISTER_TYPE_UD);
+ MOV(temp, src);
+ return src_reg(temp);
+ } else {
+ return src;
+ }
+ }
+
+ /**
+ * Workaround for register access modes not supported by the ternary
+ * instruction encoding.
+ */
+ src_reg
+ fix_3src_operand(const src_reg &src) const
+ {
+ /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
+ * able to use vertical stride of zero to replicate the vec4 uniform, like
+ *
+ * g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
+ *
+ * But you can't, since vertical stride is always four in three-source
+ * instructions. Instead, insert a MOV instruction to do the replication so
+ * that the three-source instruction can consume it.
+ */
+
+ /* The MOV is only needed if the source is a uniform or immediate. */
+ if (src.file != UNIFORM && src.file != IMM)
+ return src;
+
+ if (src.file == UNIFORM && brw_is_single_value_swizzle(src.swizzle))
+ return src;
+
+ const dst_reg expanded = vgrf(src.type);
+ emit(VEC4_OPCODE_UNPACK_UNIFORM, expanded, src);
+ return src_reg(expanded);
+ }
+
+ /**
+ * Workaround for register access modes not supported by the math
+ * instruction.
+ */
+ src_reg
+ fix_math_operand(const src_reg &src) const
+ {
+ /* The gen6 math instruction ignores the source modifiers --
+ * swizzle, abs, negate, and at least some parts of the register
+ * region description.
+ *
+ * Rather than trying to enumerate all these cases, *always* expand the
+ * operand to a temp GRF for gen6.
+ *
+ * For gen7, keep the operand as-is, except if immediate, which gen7 still
+ * can't use.
+ */
+ if (shader->devinfo->gen == 6 ||
+ (shader->devinfo->gen == 7 && src.file == IMM)) {
+ const dst_reg tmp = vgrf(src.type);
+ MOV(tmp, src);
+ return src_reg(tmp);
+ } else {
+ return src;
+ }
+ }
+
+ /**
+ * Workaround other weirdness of the math instruction.
+ */
+ instruction *
+ fix_math_instruction(instruction *inst) const
+ {
+ if (shader->devinfo->gen == 6 &&
+ inst->dst.writemask != WRITEMASK_XYZW) {
+ const dst_reg tmp = vgrf(inst->dst.type);
+ MOV(inst->dst, src_reg(tmp));
+ inst->dst = tmp;
+
+ } else if (shader->devinfo->gen < 6) {
+ const unsigned sources = (inst->src[1].file == BAD_FILE ? 1 : 2);
+ inst->base_mrf = 1;
+ inst->mlen = sources;
+ }
+
+ return inst;
+ }
+
+ bblock_t *block;
+ exec_node *cursor;
+
+ bool force_writemask_all;
+
+ /** Debug annotation info. */
+ struct {
+ const char *str;
+ const void *ir;
+ } annotation;
+ };
+}
+
+#endif
return true;
}
break;
+ case GS_OPCODE_SET_WRITE_OFFSET:
+ /* This is just a multiply by a constant with special strides.
+ * The generator will handle immediates in both arguments (generating
+ * a single MOV of the product). So feel free to propagate in src0.
+ */
+ inst->src[arg] = value;
+ return true;
case BRW_OPCODE_CMP:
if (arg == 1) {
}
static bool
+can_change_source_types(vec4_instruction *inst)
+{
+ return inst->dst.type == inst->src[0].type &&
+ !inst->src[0].abs && !inst->src[0].negate && !inst->saturate &&
+ (inst->opcode == BRW_OPCODE_MOV ||
+ (inst->opcode == BRW_OPCODE_SEL &&
+ inst->dst.type == inst->src[1].type &&
+ inst->predicate != BRW_PREDICATE_NONE &&
+ !inst->src[1].abs && !inst->src[1].negate));
+}
+
+static bool
try_copy_propagate(const struct brw_device_info *devinfo,
vec4_instruction *inst,
int arg, struct copy_entry *entry)
{
+ /* Build up the value we are propagating as if it were the source of a
+ * single MOV
+ */
/* For constant propagation, we only handle the same constant
* across all 4 channels. Some day, we should handle the 8-bit
* float vector format, which would let us constant propagate
for (int i = 0; i < 4; i++) {
s[i] = BRW_GET_SWZ(entry->value[i]->swizzle, i);
}
- value.swizzle = brw_compose_swizzle(inst->src[arg].swizzle,
- BRW_SWIZZLE4(s[0], s[1], s[2], s[3]));
+ value.swizzle = BRW_SWIZZLE4(s[0], s[1], s[2], s[3]);
+ /* Check that we can propagate that value */
if (value.file != UNIFORM &&
value.file != GRF &&
value.file != ATTR)
return false;
}
- if (inst->src[arg].abs) {
- value.negate = false;
- value.abs = true;
- }
- if (inst->src[arg].negate)
- value.negate = !value.negate;
-
bool has_source_modifiers = value.negate || value.abs;
/* gen6 math and gen7+ SENDs from GRFs ignore source modifiers on
value.swizzle != BRW_SWIZZLE_XYZW) && !inst->can_do_source_mods(devinfo))
return false;
- if (has_source_modifiers && value.type != inst->src[arg].type)
+ if (has_source_modifiers &&
+ value.type != inst->src[arg].type &&
+ !can_change_source_types(inst))
return false;
if (has_source_modifiers &&
inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_WRITE)
return false;
- if (inst->is_3src() && value.file == UNIFORM)
+ unsigned composed_swizzle = brw_compose_swizzle(inst->src[arg].swizzle,
+ value.swizzle);
+ if (inst->is_3src() &&
+ value.file == UNIFORM &&
+ !brw_is_single_value_swizzle(composed_swizzle))
return false;
if (inst->is_send_from_grf())
}
}
- value.type = inst->src[arg].type;
+ /* Build the final value */
+ if (inst->src[arg].abs) {
+ value.negate = false;
+ value.abs = true;
+ }
+ if (inst->src[arg].negate)
+ value.negate = !value.negate;
+
+ value.swizzle = composed_swizzle;
+ if (has_source_modifiers &&
+ value.type != inst->src[arg].type) {
+ assert(can_change_source_types(inst));
+ for (int i = 0; i < 3; i++) {
+ inst->src[i].type = value.type;
+ }
+ inst->dst.type = value.type;
+ } else {
+ value.type = inst->src[arg].type;
+ }
+
inst->src[arg] = value;
return true;
}
namespace brw {
struct brw_reg
-vec4_instruction::get_dst(void)
+vec4_instruction::get_dst(unsigned gen)
{
struct brw_reg brw_reg;
break;
case MRF:
+ assert(((dst.reg + dst.reg_offset) & ~(1 << 7)) < BRW_MAX_MRF(gen));
brw_reg = brw_message_reg(dst.reg + dst.reg_offset);
brw_reg = retype(brw_reg, dst.type);
brw_reg.dw1.bits.writemask = dst.writemask;
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
}
break;
+ case SHADER_OPCODE_SAMPLEINFO:
+ msg_type = GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO;
+ break;
default:
unreachable("should not get here: invalid vec4 texture opcode");
}
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
- brw_MOV(p, get_element_ud(inst->get_dst(), 0),
+ brw_MOV(p, get_element_ud(inst->get_dst(devinfo->gen), 0),
get_element_ud(inst->get_src(this->prog_data, 0), 0));
brw_set_default_access_mode(p, BRW_ALIGN_16);
brw_pop_insn_state(p);
inst->base_mrf, /* starting mrf reg nr */
src,
BRW_URB_WRITE_EOT | inst->urb_write_flags,
- devinfo->gen >= 8 ? 2 : 1,/* message len */
+ inst->mlen,
0, /* response len */
0, /* urb destination offset */
BRW_URB_SWIZZLE_INTERLEAVE);
src1.file == BRW_IMMEDIATE_VALUE &&
src1.type == BRW_REGISTER_TYPE_UD &&
src1.dw1.ud <= USHRT_MAX);
- brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4),
- retype(src1, BRW_REGISTER_TYPE_UW));
+ if (src0.file == IMM) {
+ brw_MOV(p, suboffset(stride(dst, 2, 2, 1), 3),
+ brw_imm_ud(src0.dw1.ud * src1.dw1.ud));
+ } else {
+ brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4),
+ retype(src1, BRW_REGISTER_TYPE_UW));
+ }
brw_set_default_access_mode(p, BRW_ALIGN_16);
brw_pop_insn_state(p);
}
}
void
+vec4_generator::generate_get_buffer_size(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg surf_index)
+{
+ assert(devinfo->gen >= 7);
+ assert(surf_index.type == BRW_REGISTER_TYPE_UD &&
+ surf_index.file == BRW_IMMEDIATE_VALUE);
+
+ brw_SAMPLE(p,
+ dst,
+ inst->base_mrf,
+ src,
+ surf_index.dw1.ud,
+ 0,
+ GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO,
+ 1, /* response length */
+ inst->mlen,
+ inst->header_size > 0,
+ BRW_SAMPLER_SIMD_MODE_SIMD4X2,
+ BRW_SAMPLER_RETURN_FORMAT_SINT32);
+
+ brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
+}
+
+void
vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
struct brw_reg dst,
struct brw_reg surf_index,
for (unsigned int i = 0; i < 3; i++) {
src[i] = inst->get_src(this->prog_data, i);
}
- dst = inst->get_dst();
+ dst = inst->get_dst(devinfo->gen);
brw_set_default_predicate_control(p, inst->predicate);
brw_set_default_predicate_inverse(p, inst->predicate_inverse);
brw_set_default_mask_control(p, inst->force_writemask_all);
brw_set_default_acc_write_control(p, inst->writes_accumulator);
+ assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->gen));
+ assert(inst->mlen <= BRW_MAX_MSG_LENGTH);
+
unsigned pre_emit_nr_insn = p->nr_insn;
if (dst.width == BRW_WIDTH_4) {
case SHADER_OPCODE_TXS:
case SHADER_OPCODE_TG4:
case SHADER_OPCODE_TG4_OFFSET:
+ case SHADER_OPCODE_SAMPLEINFO:
generate_tex(inst, dst, src[0], src[1]);
break;
generate_set_simd4x2_header_gen9(inst, dst);
break;
+
+ case VS_OPCODE_GET_BUFFER_SIZE:
+ generate_get_buffer_size(inst, dst, src[0], src[1]);
+ break;
+
case GS_OPCODE_URB_WRITE:
generate_gs_urb_write(inst);
break;
src_reg src;
switch (instr->intrinsic) {
- case nir_intrinsic_emit_vertex: {
+ case nir_intrinsic_emit_vertex_with_counter: {
+ this->vertex_count =
+ retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
int stream_id = instr->const_index[0];
gs_emit_vertex(stream_id);
break;
}
- case nir_intrinsic_end_primitive:
+ case nir_intrinsic_end_primitive_with_counter:
+ this->vertex_count =
+ retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
gs_end_primitive();
break;
+ case nir_intrinsic_set_vertex_count:
+ this->vertex_count =
+ retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
+ break;
+
case nir_intrinsic_load_invocation_id: {
src_reg invocation_id =
src_reg(nir_system_values[SYSTEM_VALUE_INVOCATION_ID]);
bool no_spills,
int shader_time_index)
: vec4_visitor(compiler, log_data,
- &c->gp->program.Base, &c->key.base,
+ &c->gp->program.Base, &c->key.tex,
&c->prog_data.base, prog, MESA_SHADER_GEOMETRY, mem_ctx,
no_spills, shader_time_index),
c(c)
*/
int base_mrf = 1;
+ bool static_vertex_count = c->prog_data.static_vertex_count != -1;
+
+ /* If the previous instruction was a URB write, we don't need to issue
+ * a second one - we can just set the EOT bit on the previous write.
+ *
+ * Skip this on Gen8+ unless there's a static vertex count, as we also
+ * need to write the vertex count out, and combining the two may not be
+ * possible (or at least not straightforward).
+ */
+ vec4_instruction *last = (vec4_instruction *) instructions.get_tail();
+ if (last && last->opcode == GS_OPCODE_URB_WRITE &&
+ !(INTEL_DEBUG & DEBUG_SHADER_TIME) &&
+ devinfo->gen >= 8 && static_vertex_count) {
+ last->urb_write_flags = BRW_URB_WRITE_EOT | last->urb_write_flags;
+ return;
+ }
+
current_annotation = "thread end";
dst_reg mrf_reg(MRF, base_mrf);
src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
vec4_instruction *inst = emit(MOV(mrf_reg, r0));
inst->force_writemask_all = true;
- emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count);
+ if (devinfo->gen < 8 || !static_vertex_count)
+ emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count);
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
emit_shader_time_end();
inst = emit(GS_OPCODE_THREAD_END);
inst->base_mrf = base_mrf;
- inst->mlen = 1;
+ inst->mlen = devinfo->gen >= 8 && !static_vertex_count ? 2 : 1;
}
/* We need to increment Global Offset by 1 to make room for Broadwell's
* extra "Vertex Count" payload at the beginning of the URB entry.
*/
- if (devinfo->gen >= 8)
+ if (devinfo->gen >= 8 && c->prog_data.static_vertex_count == -1)
inst->offset++;
inst->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
* URB entry. Since this is an OWord message, Global Offset is counted
* in 128-bit units, so we must set it to 2.
*/
- if (devinfo->gen >= 8)
+ if (devinfo->gen >= 8 && c->prog_data.static_vertex_count == -1)
inst->offset = 2;
inst->base_mrf = base_mrf;
inst->mlen = 2;
if (stream_id > 0 && shader_prog->TransformFeedback.NumVarying == 0)
return;
- /* To ensure that we don't output more vertices than the shader specified
- * using max_vertices, do the logic inside a conditional of the form "if
- * (vertex_count < MAX)"
+ /* If we're outputting 32 control data bits or less, then we can wait
+ * until the shader is over to output them all. Otherwise we need to
+ * output them as we go. Now is the time to do it, since we're about to
+ * output the vertex_count'th vertex, so it's guaranteed that the
+ * control data bits associated with the (vertex_count - 1)th vertex are
+ * correct.
*/
- unsigned num_output_vertices = c->gp->program.VerticesOut;
- emit(CMP(dst_null_d(), this->vertex_count,
- src_reg(num_output_vertices), BRW_CONDITIONAL_L));
- emit(IF(BRW_PREDICATE_NORMAL));
- {
- /* If we're outputting 32 control data bits or less, then we can wait
- * until the shader is over to output them all. Otherwise we need to
- * output them as we go. Now is the time to do it, since we're about to
- * output the vertex_count'th vertex, so it's guaranteed that the
- * control data bits associated with the (vertex_count - 1)th vertex are
- * correct.
+ if (c->control_data_header_size_bits > 32) {
+ this->current_annotation = "emit vertex: emit control data bits";
+ /* Only emit control data bits if we've finished accumulating a batch
+ * of 32 bits. This is the case when:
+ *
+ * (vertex_count * bits_per_vertex) % 32 == 0
+ *
+ * (in other words, when the last 5 bits of vertex_count *
+ * bits_per_vertex are 0). Assuming bits_per_vertex == 2^n for some
+ * integer n (which is always the case, since bits_per_vertex is
+ * always 1 or 2), this is equivalent to requiring that the last 5-n
+ * bits of vertex_count are 0:
+ *
+ * vertex_count & (2^(5-n) - 1) == 0
+ *
+ * 2^(5-n) == 2^5 / 2^n == 32 / bits_per_vertex, so this is
+ * equivalent to:
+ *
+ * vertex_count & (32 / bits_per_vertex - 1) == 0
*/
- if (c->control_data_header_size_bits > 32) {
- this->current_annotation = "emit vertex: emit control data bits";
- /* Only emit control data bits if we've finished accumulating a batch
- * of 32 bits. This is the case when:
- *
- * (vertex_count * bits_per_vertex) % 32 == 0
- *
- * (in other words, when the last 5 bits of vertex_count *
- * bits_per_vertex are 0). Assuming bits_per_vertex == 2^n for some
- * integer n (which is always the case, since bits_per_vertex is
- * always 1 or 2), this is equivalent to requiring that the last 5-n
- * bits of vertex_count are 0:
- *
- * vertex_count & (2^(5-n) - 1) == 0
- *
- * 2^(5-n) == 2^5 / 2^n == 32 / bits_per_vertex, so this is
- * equivalent to:
- *
- * vertex_count & (32 / bits_per_vertex - 1) == 0
+ vec4_instruction *inst =
+ emit(AND(dst_null_d(), this->vertex_count,
+ (uint32_t) (32 / c->control_data_bits_per_vertex - 1)));
+ inst->conditional_mod = BRW_CONDITIONAL_Z;
+
+ emit(IF(BRW_PREDICATE_NORMAL));
+ {
+ /* If vertex_count is 0, then no control data bits have been
+ * accumulated yet, so we skip emitting them.
*/
- vec4_instruction *inst =
- emit(AND(dst_null_d(), this->vertex_count,
- (uint32_t) (32 / c->control_data_bits_per_vertex - 1)));
- inst->conditional_mod = BRW_CONDITIONAL_Z;
-
+ emit(CMP(dst_null_d(), this->vertex_count, 0u,
+ BRW_CONDITIONAL_NEQ));
emit(IF(BRW_PREDICATE_NORMAL));
- {
- /* If vertex_count is 0, then no control data bits have been
- * accumulated yet, so we skip emitting them.
- */
- emit(CMP(dst_null_d(), this->vertex_count, 0u,
- BRW_CONDITIONAL_NEQ));
- emit(IF(BRW_PREDICATE_NORMAL));
- emit_control_data_bits();
- emit(BRW_OPCODE_ENDIF);
-
- /* Reset control_data_bits to 0 so we can start accumulating a new
- * batch.
- *
- * Note: in the case where vertex_count == 0, this neutralizes the
- * effect of any call to EndPrimitive() that the shader may have
- * made before outputting its first vertex.
- */
- inst = emit(MOV(dst_reg(this->control_data_bits), 0u));
- inst->force_writemask_all = true;
- }
+ emit_control_data_bits();
emit(BRW_OPCODE_ENDIF);
- }
-
- this->current_annotation = "emit vertex: vertex data";
- emit_vertex();
- /* In stream mode we have to set control data bits for all vertices
- * unless we have disabled control data bits completely (which we do
- * do for GL_POINTS outputs that don't use streams).
- */
- if (c->control_data_header_size_bits > 0 &&
- c->prog_data.control_data_format ==
- GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) {
- this->current_annotation = "emit vertex: Stream control data bits";
- set_stream_control_data_bits(stream_id);
+ /* Reset control_data_bits to 0 so we can start accumulating a new
+ * batch.
+ *
+ * Note: in the case where vertex_count == 0, this neutralizes the
+ * effect of any call to EndPrimitive() that the shader may have
+ * made before outputting its first vertex.
+ */
+ inst = emit(MOV(dst_reg(this->control_data_bits), 0u));
+ inst->force_writemask_all = true;
}
+ emit(BRW_OPCODE_ENDIF);
+ }
- this->current_annotation = "emit vertex: increment vertex count";
- emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
- src_reg(1u)));
+ this->current_annotation = "emit vertex: vertex data";
+ emit_vertex();
+
+ /* In stream mode we have to set control data bits for all vertices
+ * unless we have disabled control data bits completely (which we do
+ * do for GL_POINTS outputs that don't use streams).
+ */
+ if (c->control_data_header_size_bits > 0 &&
+ c->prog_data.control_data_format ==
+ GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) {
+ this->current_annotation = "emit vertex: Stream control data bits";
+ set_stream_control_data_bits(stream_id);
}
- emit(BRW_OPCODE_ENDIF);
this->current_annotation = NULL;
}
void
vec4_gs_visitor::visit(ir_emit_vertex *ir)
{
+ /* To ensure that we don't output more vertices than the shader specified
+ * using max_vertices, do the logic inside a conditional of the form "if
+ * (vertex_count < MAX)"
+ */
+ unsigned num_output_vertices = c->gp->program.VerticesOut;
+ emit(CMP(dst_null_d(), this->vertex_count,
+ src_reg(num_output_vertices), BRW_CONDITIONAL_L));
+ emit(IF(BRW_PREDICATE_NORMAL));
+
gs_emit_vertex(ir->stream_id());
+
+ this->current_annotation = "emit vertex: increment vertex count";
+ emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
+ src_reg(1u)));
+
+ emit(BRW_OPCODE_ENDIF);
}
void
vec4_gs_visitor v(brw->intelScreen->compiler, brw,
c, prog, mem_ctx, true /* no_spills */, st_index);
- if (v.run(NULL /* clip planes */)) {
+ if (v.run()) {
return generate_assembly(brw, prog, &c->gp->program.Base,
&c->prog_data.base, mem_ctx, v.cfg,
final_assembly_size);
c, prog, mem_ctx, false /* no_spills */,
st_index);
- if (!gs->run(NULL /* clip planes */)) {
+ if (!gs->run()) {
prog->LinkStatus = false;
ralloc_strcat(&prog->InfoLog, gs->fail_msg);
} else {
#include "brw_nir.h"
#include "brw_vec4.h"
+#include "brw_vec4_builder.h"
+#include "brw_vec4_surface_builder.h"
#include "glsl/ir_uniform.h"
+using namespace brw;
+using namespace brw::surface_access;
+
namespace brw {
void
unreachable("should be lowered by lower_vertex_id().");
case nir_intrinsic_load_vertex_id_zero_base:
- reg = &this->nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
+ reg = &nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
if (reg->file == BAD_FILE)
- *reg =
- *this->make_reg_for_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
- glsl_type::int_type);
+ *reg = *make_reg_for_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
+ glsl_type::int_type);
break;
case nir_intrinsic_load_base_vertex:
- reg = &this->nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
+ reg = &nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
if (reg->file == BAD_FILE)
- *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_BASE_VERTEX,
- glsl_type::int_type);
+ *reg = *make_reg_for_system_value(SYSTEM_VALUE_BASE_VERTEX,
+ glsl_type::int_type);
break;
case nir_intrinsic_load_instance_id:
- reg = &this->nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
+ reg = &nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
if (reg->file == BAD_FILE)
- *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_INSTANCE_ID,
- glsl_type::int_type);
+ *reg = *make_reg_for_system_value(SYSTEM_VALUE_INSTANCE_ID,
+ glsl_type::int_type);
break;
default:
}
assert(uniforms < uniform_array_size);
- this->uniform_size[uniforms] = type_size_vec4(var->type);
+ uniform_size[uniforms] = type_size_vec4(var->type);
if (strncmp(var->name, "gl_", 3) == 0)
nir_setup_builtin_uniform(var);
strcmp(var->name, "parameters") == 0);
assert(uniforms < uniform_array_size);
- this->uniform_size[uniforms] = type_size_vec4(var->type);
+ uniform_size[uniforms] = type_size_vec4(var->type);
struct gl_program_parameter_list *plist = prog->Parameters;
for (unsigned p = 0; p < plist->NumParameters; p++) {
* ParameterValues directly, since unlike brw_fs.cpp, we never
* add new state references during compile.
*/
- int index = _mesa_add_state_reference(this->prog->Parameters,
+ int index = _mesa_add_state_reference(prog->Parameters,
(gl_state_index *)slots[i].tokens);
gl_constant_value *values =
- &this->prog->Parameters->ParameterValues[index][0];
+ &prog->Parameters->ParameterValues[index][0];
assert(uniforms < uniform_array_size);
stage_prog_data->param[uniforms * 4 + j] =
&values[GET_SWZ(slots[i].swizzle, j)];
- this->uniform_vector_size[uniforms] =
+ uniform_vector_size[uniforms] =
(var->type->is_scalar() || var->type->is_vector() ||
var->type->is_matrix() ? var->type->vector_elements : 4);
void
vec4_visitor::nir_emit_instr(nir_instr *instr)
{
- this->base_ir = instr;
+ base_ir = instr;
switch (instr->type) {
case nir_instr_type_load_const:
nir_emit_texture(nir_instr_as_tex(instr));
break;
+ case nir_instr_type_ssa_undef:
+ nir_emit_undef(nir_instr_as_ssa_undef(instr));
+ break;
+
default:
fprintf(stderr, "VS instruction not yet implemented by NIR->vec4\n");
break;
dst_reg
vec4_visitor::get_nir_dest(nir_dest dest)
{
- assert(!dest.is_ssa);
- return dst_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset,
- dest.reg.indirect);
+ if (dest.is_ssa) {
+ dst_reg dst = dst_reg(GRF, alloc.allocate(1));
+ nir_ssa_values[dest.ssa.index] = dst;
+ return dst;
+ } else {
+ return dst_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset,
+ dest.reg.indirect);
+ }
}
dst_reg
vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr)
{
dst_reg reg = dst_reg(GRF, alloc.allocate(1));
- reg.type = BRW_REGISTER_TYPE_F;
+ reg.type = BRW_REGISTER_TYPE_D;
unsigned remaining = brw_writemask_for_size(instr->def.num_components);
}
reg.writemask = writemask;
- emit(MOV(reg, src_reg(instr->value.f[i])));
+ emit(MOV(reg, src_reg(instr->value.i[i])));
remaining &= ~writemask;
}
break;
}
- case nir_intrinsic_load_vertex_id:
- unreachable("should be lowered by lower_vertex_id()");
+ case nir_intrinsic_get_buffer_size: {
+ nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
+ unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
+
+ assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage);
+
+ src_reg surf_index = src_reg(prog_data->base.binding_table.ubo_start +
+ ubo_index);
+ dst_reg result_dst = get_nir_dest(instr->dest);
+ vec4_instruction *inst = new(mem_ctx)
+ vec4_instruction(VS_OPCODE_GET_BUFFER_SIZE, result_dst);
+
+ inst->base_mrf = 2;
+ inst->mlen = 1; /* always at least one */
+ inst->src[1] = src_reg(surf_index);
+
+ /* MRF for the first parameter */
+ src_reg lod = src_reg(0);
+ int param_base = inst->base_mrf;
+ int writemask = WRITEMASK_X;
+ emit(MOV(dst_reg(MRF, param_base, glsl_type::int_type, writemask), lod));
+
+ emit(inst);
+ break;
+ }
+
+ case nir_intrinsic_store_ssbo_indirect:
+ has_indirect = true;
+ /* fallthrough */
+ case nir_intrinsic_store_ssbo: {
+ assert(devinfo->gen >= 7);
+
+ /* Block index */
+ src_reg surf_index;
+ nir_const_value *const_uniform_block =
+ nir_src_as_const_value(instr->src[1]);
+ if (const_uniform_block) {
+ unsigned index = prog_data->base.binding_table.ubo_start +
+ const_uniform_block->u[0];
+ surf_index = src_reg(index);
+ brw_mark_surface_used(&prog_data->base, index);
+ } else {
+ surf_index = src_reg(this, glsl_type::uint_type);
+ emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[1], 1),
+ src_reg(prog_data->base.binding_table.ubo_start)));
+ surf_index = emit_uniformize(surf_index);
+
+ brw_mark_surface_used(&prog_data->base,
+ prog_data->base.binding_table.ubo_start +
+ shader_prog->NumBufferInterfaceBlocks - 1);
+ }
+
+ /* Offset */
+ src_reg offset_reg = src_reg(this, glsl_type::uint_type);
+ unsigned const_offset_bytes = 0;
+ if (has_indirect) {
+ emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[2], 1)));
+ } else {
+ const_offset_bytes = instr->const_index[0];
+ emit(MOV(dst_reg(offset_reg), src_reg(const_offset_bytes)));
+ }
+
+ /* Value */
+ src_reg val_reg = get_nir_src(instr->src[0], 4);
+
+ /* Writemask */
+ unsigned write_mask = instr->const_index[1];
+
+ /* IvyBridge does not have a native SIMD4x2 untyped write message so untyped
+ * writes will use SIMD8 mode. In order to hide this and keep symmetry across
+ * typed and untyped messages and across hardware platforms, the
+ * current implementation of the untyped messages will transparently convert
+ * the SIMD4x2 payload into an equivalent SIMD8 payload by transposing it
+ * and enabling only channel X on the SEND instruction.
+ *
+ * The above, works well for full vector writes, but not for partial writes
+ * where we want to write some channels and not others, like when we have
+ * code such as v.xyw = vec3(1,2,4). Because the untyped write messages are
+ * quite restrictive with regards to the channel enables we can configure in
+ * the message descriptor (not all combinations are allowed) we cannot simply
+ * implement these scenarios with a single message while keeping the
+ * aforementioned symmetry in the implementation. For now we de decided that
+ * it is better to keep the symmetry to reduce complexity, so in situations
+ * such as the one described we end up emitting two untyped write messages
+ * (one for xy and another for w).
+ *
+ * The code below packs consecutive channels into a single write message,
+ * detects gaps in the vector write and if needed, sends a second message
+ * with the remaining channels. If in the future we decide that we want to
+ * emit a single message at the expense of losing the symmetry in the
+ * implementation we can:
+ *
+ * 1) For IvyBridge: Only use the red channel of the untyped write SIMD8
+ * message payload. In this mode we can write up to 8 offsets and dwords
+ * to the red channel only (for the two vec4s in the SIMD4x2 execution)
+ * and select which of the 8 channels carry data to write by setting the
+ * appropriate writemask in the dst register of the SEND instruction.
+ * It would require to write a new generator opcode specifically for
+ * IvyBridge since we would need to prepare a SIMD8 payload that could
+ * use any channel, not just X.
+ *
+ * 2) For Haswell+: Simply send a single write message but set the writemask
+ * on the dst of the SEND instruction to select the channels we want to
+ * write. It would require to modify the current messages to receive
+ * and honor the writemask provided.
+ */
+ const vec4_builder bld = vec4_builder(this).at_end()
+ .annotate(current_annotation, base_ir);
+
+ int swizzle[4] = { 0, 0, 0, 0};
+ int num_channels = 0;
+ unsigned skipped_channels = 0;
+ int num_components = instr->num_components;
+ for (int i = 0; i < num_components; i++) {
+ /* Check if this channel needs to be written. If so, record the
+ * channel we need to take the data from in the swizzle array
+ */
+ int component_mask = 1 << i;
+ int write_test = write_mask & component_mask;
+ if (write_test)
+ swizzle[num_channels++] = i;
+
+ /* If we don't have to write this channel it means we have a gap in the
+ * vector, so write the channels we accumulated until now, if any. Do
+ * the same if this was the last component in the vector.
+ */
+ if (!write_test || i == num_components - 1) {
+ if (num_channels > 0) {
+ /* We have channels to write, so update the offset we need to
+ * write at to skip the channels we skipped, if any.
+ */
+ if (skipped_channels > 0) {
+ if (!has_indirect) {
+ const_offset_bytes += 4 * skipped_channels;
+ offset_reg = src_reg(const_offset_bytes);
+ } else {
+ emit(ADD(dst_reg(offset_reg), offset_reg,
+ brw_imm_ud(4 * skipped_channels)));
+ }
+ }
+
+ /* Swizzle the data register so we take the data from the channels
+ * we need to write and send the write message. This will write
+ * num_channels consecutive dwords starting at offset.
+ */
+ val_reg.swizzle =
+ BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
+ emit_untyped_write(bld, surf_index, offset_reg, val_reg,
+ 1 /* dims */, num_channels /* size */,
+ BRW_PREDICATE_NONE);
+
+ /* If we have to do a second write we will have to update the
+ * offset so that we jump over the channels we have just written
+ * now.
+ */
+ skipped_channels = num_channels;
+
+ /* Restart the count for the next write message */
+ num_channels = 0;
+ }
+
+ /* We did not write the current channel, so increase skipped count */
+ skipped_channels++;
+ }
+ }
- case nir_intrinsic_load_vertex_id_zero_base: {
- src_reg vertex_id =
- src_reg(nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE]);
- assert(vertex_id.file != BAD_FILE);
- dest = get_nir_dest(instr->dest, vertex_id.type);
- emit(MOV(dest, vertex_id));
break;
}
- case nir_intrinsic_load_base_vertex: {
- src_reg base_vertex =
- src_reg(nir_system_values[SYSTEM_VALUE_BASE_VERTEX]);
- assert(base_vertex.file != BAD_FILE);
- dest = get_nir_dest(instr->dest, base_vertex.type);
- emit(MOV(dest, base_vertex));
+ case nir_intrinsic_load_ssbo_indirect:
+ has_indirect = true;
+ /* fallthrough */
+ case nir_intrinsic_load_ssbo: {
+ assert(devinfo->gen >= 7);
+
+ nir_const_value *const_uniform_block =
+ nir_src_as_const_value(instr->src[0]);
+
+ src_reg surf_index;
+ if (const_uniform_block) {
+ unsigned index = prog_data->base.binding_table.ubo_start +
+ const_uniform_block->u[0];
+ surf_index = src_reg(index);
+
+ brw_mark_surface_used(&prog_data->base, index);
+ } else {
+ surf_index = src_reg(this, glsl_type::uint_type);
+ emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[0], 1),
+ src_reg(prog_data->base.binding_table.ubo_start)));
+ surf_index = emit_uniformize(surf_index);
+
+ /* Assume this may touch any UBO. It would be nice to provide
+ * a tighter bound, but the array information is already lowered away.
+ */
+ brw_mark_surface_used(&prog_data->base,
+ prog_data->base.binding_table.ubo_start +
+ shader_prog->NumBufferInterfaceBlocks - 1);
+ }
+
+ src_reg offset_reg = src_reg(this, glsl_type::uint_type);
+ unsigned const_offset_bytes = 0;
+ if (has_indirect) {
+ emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[1], 1)));
+ } else {
+ const_offset_bytes = instr->const_index[0];
+ emit(MOV(dst_reg(offset_reg), src_reg(const_offset_bytes)));
+ }
+
+ /* Read the vector */
+ const vec4_builder bld = vec4_builder(this).at_end()
+ .annotate(current_annotation, base_ir);
+
+ src_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
+ 1 /* dims */, 4 /* size*/,
+ BRW_PREDICATE_NONE);
+ dst_reg dest = get_nir_dest(instr->dest);
+ read_result.type = dest.type;
+ read_result.swizzle = brw_swizzle_for_size(instr->num_components);
+ emit(MOV(dest, read_result));
+
break;
}
+ case nir_intrinsic_ssbo_atomic_add:
+ nir_emit_ssbo_atomic(BRW_AOP_ADD, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_min:
+ if (dest.type == BRW_REGISTER_TYPE_D)
+ nir_emit_ssbo_atomic(BRW_AOP_IMIN, instr);
+ else
+ nir_emit_ssbo_atomic(BRW_AOP_UMIN, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_max:
+ if (dest.type == BRW_REGISTER_TYPE_D)
+ nir_emit_ssbo_atomic(BRW_AOP_IMAX, instr);
+ else
+ nir_emit_ssbo_atomic(BRW_AOP_UMAX, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_and:
+ nir_emit_ssbo_atomic(BRW_AOP_AND, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_or:
+ nir_emit_ssbo_atomic(BRW_AOP_OR, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_xor:
+ nir_emit_ssbo_atomic(BRW_AOP_XOR, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_exchange:
+ nir_emit_ssbo_atomic(BRW_AOP_MOV, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_comp_swap:
+ nir_emit_ssbo_atomic(BRW_AOP_CMPWR, instr);
+ break;
+
+ case nir_intrinsic_load_vertex_id:
+ unreachable("should be lowered by lower_vertex_id()");
+
+ case nir_intrinsic_load_vertex_id_zero_base:
+ case nir_intrinsic_load_base_vertex:
case nir_intrinsic_load_instance_id: {
- src_reg instance_id =
- src_reg(nir_system_values[SYSTEM_VALUE_INSTANCE_ID]);
- assert(instance_id.file != BAD_FILE);
- dest = get_nir_dest(instr->dest, instance_id.type);
- emit(MOV(dest, instance_id));
+ gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
+ src_reg val = src_reg(nir_system_values[sv]);
+ assert(val.file != BAD_FILE);
+ dest = get_nir_dest(instr->dest, val.type);
+ emit(MOV(dest, val));
break;
}
*/
brw_mark_surface_used(&prog_data->base,
prog_data->base.binding_table.ubo_start +
- shader_prog->NumUniformBlocks - 1);
+ shader_prog->NumBufferInterfaceBlocks - 1);
}
unsigned const_offset = instr->const_index[1];
}
}
+void
+vec4_visitor::nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr)
+{
+ dst_reg dest;
+ if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+ dest = get_nir_dest(instr->dest);
+
+ src_reg surface;
+ nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
+ if (const_surface) {
+ unsigned surf_index = prog_data->base.binding_table.ubo_start +
+ const_surface->u[0];
+ surface = src_reg(surf_index);
+ brw_mark_surface_used(&prog_data->base, surf_index);
+ } else {
+ surface = src_reg(this, glsl_type::uint_type);
+ emit(ADD(dst_reg(surface), get_nir_src(instr->src[0]),
+ src_reg(prog_data->base.binding_table.ubo_start)));
+
+ /* Assume this may touch any UBO. This is the same we do for other
+ * UBO/SSBO accesses with non-constant surface.
+ */
+ brw_mark_surface_used(&prog_data->base,
+ prog_data->base.binding_table.ubo_start +
+ shader_prog->NumBufferInterfaceBlocks - 1);
+ }
+
+ src_reg offset = get_nir_src(instr->src[1], 1);
+ src_reg data1 = get_nir_src(instr->src[2], 1);
+ src_reg data2;
+ if (op == BRW_AOP_CMPWR)
+ data2 = get_nir_src(instr->src[3], 1);
+
+ /* Emit the actual atomic operation operation */
+ const vec4_builder bld =
+ vec4_builder(this).at_end().annotate(current_annotation, base_ir);
+
+ src_reg atomic_result =
+ surface_access::emit_untyped_atomic(bld, surface, offset,
+ data1, data2,
+ 1 /* dims */, 1 /* rsize */,
+ op,
+ BRW_PREDICATE_NONE);
+ dest.type = atomic_result.type;
+ bld.MOV(dest, atomic_result);
+}
+
static unsigned
brw_swizzle_for_nir_swizzle(uint8_t swizzle[4])
{
inst->predicate = BRW_PREDICATE_NORMAL;
break;
- case nir_op_fdot2:
+ case nir_op_fdot_replicated2:
inst = emit(BRW_OPCODE_DP2, dst, op[0], op[1]);
inst->saturate = instr->dest.saturate;
break;
- case nir_op_fdot3:
+ case nir_op_fdot_replicated3:
inst = emit(BRW_OPCODE_DP3, dst, op[0], op[1]);
inst->saturate = instr->dest.saturate;
break;
- case nir_op_fdot4:
+ case nir_op_fdot_replicated4:
inst = emit(BRW_OPCODE_DP4, dst, op[0], op[1]);
inst->saturate = instr->dest.saturate;
break;
+ case nir_op_fdph_replicated:
+ inst = emit(BRW_OPCODE_DPH, dst, op[0], op[1]);
+ inst->saturate = instr->dest.saturate;
+ break;
+
case nir_op_bany2:
case nir_op_bany3:
case nir_op_bany4: {
switch (texop) {
case nir_texop_lod: op = ir_lod; break;
case nir_texop_query_levels: op = ir_query_levels; break;
+ case nir_texop_texture_samples: op = ir_texture_samples; break;
case nir_texop_tex: op = ir_tex; break;
case nir_texop_tg4: op = ir_tg4; break;
case nir_texop_txb: op = ir_txb; break;
* emitting anything other than setting up the constant result.
*/
if (instr->op == nir_texop_tg4) {
- int swiz = GET_SWZ(key->tex.swizzles[sampler], instr->component);
+ int swiz = GET_SWZ(key_tex->swizzles[sampler], instr->component);
if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
emit(MOV(dest, src_reg(swiz == SWIZZLE_ONE ? 1.0f : 0.0f)));
return;
sample_index = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 1);
assert(coord_type != NULL);
if (devinfo->gen >= 7 &&
- key->tex.compressed_multisample_layout_mask & (1<<sampler)) {
+ key_tex->compressed_multisample_layout_mask & (1 << sampler)) {
mcs = emit_mcs_fetch(coord_type, coordinate, sampler_reg);
} else {
mcs = src_reg(0u);
mcs, is_cube_array, sampler, sampler_reg);
}
+void
+vec4_visitor::nir_emit_undef(nir_ssa_undef_instr *instr)
+{
+ nir_ssa_values[instr->def.index] = dst_reg(GRF, alloc.allocate(1));
+}
+
}
return true;
}
+/**
+ * When we decide to spill a register, instead of blindly spilling every use,
+ * save unspills when the spill register is used (read) in consecutive
+ * instructions. This can potentially save a bunch of unspills that would
+ * have very little impact in register allocation anyway.
+ *
+ * Notice that we need to account for this behavior when spilling a register
+ * and when evaluating spilling costs. This function is designed so it can
+ * be called from both places and avoid repeating the logic.
+ *
+ * - When we call this function from spill_reg(), we pass in scratch_reg the
+ * actual unspill/spill register that we want to reuse in the current
+ * instruction.
+ *
+ * - When we call this from evaluate_spill_costs(), we pass the register for
+ * which we are evaluating spilling costs.
+ *
+ * In either case, we check if the previous instructions read scratch_reg until
+ * we find one that writes to it with a compatible mask or does not read/write
+ * scratch_reg at all.
+ */
+static bool
+can_use_scratch_for_source(const vec4_instruction *inst, unsigned i,
+ unsigned scratch_reg)
+{
+ assert(inst->src[i].file == GRF);
+ bool prev_inst_read_scratch_reg = false;
+
+ /* See if any previous source in the same instructions reads scratch_reg */
+ for (unsigned n = 0; n < i; n++) {
+ if (inst->src[n].file == GRF && inst->src[n].reg == scratch_reg)
+ prev_inst_read_scratch_reg = true;
+ }
+
+ /* Now check if previous instructions read/write scratch_reg */
+ for (vec4_instruction *prev_inst = (vec4_instruction *) inst->prev;
+ !prev_inst->is_head_sentinel();
+ prev_inst = (vec4_instruction *) prev_inst->prev) {
+
+ /* If the previous instruction writes to scratch_reg then we can reuse
+ * it if the write is not conditional and the channels we write are
+ * compatible with our read mask
+ */
+ if (prev_inst->dst.file == GRF && prev_inst->dst.reg == scratch_reg) {
+ return (!prev_inst->predicate || prev_inst->opcode == BRW_OPCODE_SEL) &&
+ (brw_mask_for_swizzle(inst->src[i].swizzle) &
+ ~prev_inst->dst.writemask) == 0;
+ }
+
+ /* Skip scratch read/writes so that instructions generated by spilling
+ * other registers (that won't read/write scratch_reg) do not stop us from
+ * reusing scratch_reg for this instruction.
+ */
+ if (prev_inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_WRITE ||
+ prev_inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_READ)
+ continue;
+
+ /* If the previous instruction does not write to scratch_reg, then check
+ * if it reads it
+ */
+ int n;
+ for (n = 0; n < 3; n++) {
+ if (prev_inst->src[n].file == GRF &&
+ prev_inst->src[n].reg == scratch_reg) {
+ prev_inst_read_scratch_reg = true;
+ break;
+ }
+ }
+ if (n == 3) {
+ /* The previous instruction does not read scratch_reg. At this point,
+ * if no previous instruction has read scratch_reg it means that we
+ * will need to unspill it here and we can't reuse it (so we return
+ * false). Otherwise, if we found at least one consecutive instruction
+ * that read scratch_reg, then we know that we got here from
+ * evaluate_spill_costs (since for the spill_reg path any block of
+ * consecutive instructions using scratch_reg must start with a write
+ * to that register, so we would've exited the loop in the check for
+ * the write that we have at the start of this loop), and in that case
+ * it means that we found the point at which the scratch_reg would be
+ * unspilled. Since we always unspill a full vec4, it means that we
+ * have all the channels available and we can just return true to
+ * signal that we can reuse the register in the current instruction
+ * too.
+ */
+ return prev_inst_read_scratch_reg;
+ }
+ }
+
+ return prev_inst_read_scratch_reg;
+}
+
void
vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
{
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
for (unsigned int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF) {
- spill_costs[inst->src[i].reg] += loop_scale;
- if (inst->src[i].reladdr)
- no_spill[inst->src[i].reg] = true;
+ /* We will only unspill src[i] it it wasn't unspilled for the
+ * previous instruction, in which case we'll just reuse the scratch
+ * reg for this instruction.
+ */
+ if (!can_use_scratch_for_source(inst, i, inst->src[i].reg)) {
+ spill_costs[inst->src[i].reg] += loop_scale;
+ if (inst->src[i].reladdr)
+ no_spill[inst->src[i].reg] = true;
+ }
}
}
unsigned int spill_offset = last_scratch++;
/* Generate spill/unspill instructions for the objects being spilled. */
+ int scratch_reg = -1;
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
for (unsigned int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF && inst->src[i].reg == spill_reg_nr) {
- src_reg spill_reg = inst->src[i];
- inst->src[i].reg = alloc.allocate(1);
- dst_reg temp = dst_reg(inst->src[i]);
-
- emit_scratch_read(block, inst, temp, spill_reg, spill_offset);
+ if (scratch_reg == -1 ||
+ !can_use_scratch_for_source(inst, i, scratch_reg)) {
+ /* We need to unspill anyway so make sure we read the full vec4
+ * in any case. This way, the cached register can be reused
+ * for consecutive instructions that read different channels of
+ * the same vec4.
+ */
+ scratch_reg = alloc.allocate(1);
+ src_reg temp = inst->src[i];
+ temp.reg = scratch_reg;
+ temp.swizzle = BRW_SWIZZLE_XYZW;
+ emit_scratch_read(block, inst,
+ dst_reg(temp), inst->src[i], spill_offset);
+ }
+ assert(scratch_reg != -1);
+ inst->src[i].reg = scratch_reg;
}
}
if (inst->dst.file == GRF && inst->dst.reg == spill_reg_nr) {
emit_scratch_write(block, inst, spill_offset);
+ scratch_reg = inst->dst.reg;
}
}
--- /dev/null
+/*
+ * Copyright © 2013-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_vec4_surface_builder.h"
+
+using namespace brw;
+
+namespace {
+ namespace array_utils {
+ /**
+ * Copy one every \p src_stride logical components of the argument into
+ * one every \p dst_stride logical components of the result.
+ */
+ src_reg
+ emit_stride(const vec4_builder &bld, const src_reg &src, unsigned size,
+ unsigned dst_stride, unsigned src_stride)
+ {
+ if (src_stride == 1 && dst_stride == 1) {
+ return src;
+ } else {
+ const dst_reg dst = bld.vgrf(src.type,
+ DIV_ROUND_UP(size * dst_stride, 4));
+
+ for (unsigned i = 0; i < size; ++i)
+ bld.MOV(writemask(offset(dst, i * dst_stride / 4),
+ 1 << (i * dst_stride % 4)),
+ swizzle(offset(src, i * src_stride / 4),
+ brw_swizzle_for_mask(1 << (i * src_stride % 4))));
+
+ return src_reg(dst);
+ }
+ }
+
+ /**
+ * Convert a VEC4 into an array of registers with the layout expected by
+ * the recipient shared unit. If \p has_simd4x2 is true the argument is
+ * left unmodified in SIMD4x2 form, otherwise it will be rearranged into
+ * a SIMD8 vector.
+ */
+ src_reg
+ emit_insert(const vec4_builder &bld, const src_reg &src,
+ unsigned n, bool has_simd4x2)
+ {
+ if (src.file == BAD_FILE || n == 0) {
+ return src_reg();
+
+ } else {
+ /* Pad unused components with zeroes. */
+ const unsigned mask = (1 << n) - 1;
+ const dst_reg tmp = bld.vgrf(src.type);
+
+ bld.MOV(writemask(tmp, mask), src);
+ if (n < 4)
+ bld.MOV(writemask(tmp, ~mask), 0);
+
+ return emit_stride(bld, src_reg(tmp), n, has_simd4x2 ? 1 : 4, 1);
+ }
+ }
+
+ /**
+ * Convert an array of registers back into a VEC4 according to the
+ * layout expected from some shared unit. If \p has_simd4x2 is true the
+ * argument is left unmodified in SIMD4x2 form, otherwise it will be
+ * rearranged from SIMD8 form.
+ */
+ src_reg
+ emit_extract(const vec4_builder &bld, const src_reg src,
+ unsigned n, bool has_simd4x2)
+ {
+ if (src.file == BAD_FILE || n == 0) {
+ return src_reg();
+
+ } else {
+ return emit_stride(bld, src, n, 1, has_simd4x2 ? 1 : 4);
+ }
+ }
+ }
+}
+
+namespace brw {
+ namespace surface_access {
+ namespace {
+ using namespace array_utils;
+
+ /**
+ * Generate a send opcode for a surface message and return the
+ * result.
+ */
+ src_reg
+ emit_send(const vec4_builder &bld, enum opcode op,
+ const src_reg &header,
+ const src_reg &addr, unsigned addr_sz,
+ const src_reg &src, unsigned src_sz,
+ const src_reg &surface,
+ unsigned arg, unsigned ret_sz,
+ brw_predicate pred = BRW_PREDICATE_NONE)
+ {
+ /* Calculate the total number of components of the payload. */
+ const unsigned header_sz = (header.file == BAD_FILE ? 0 : 1);
+ const unsigned sz = header_sz + addr_sz + src_sz;
+
+ /* Construct the payload. */
+ const dst_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
+ unsigned n = 0;
+
+ if (header_sz)
+ bld.exec_all().MOV(offset(payload, n++),
+ retype(header, BRW_REGISTER_TYPE_UD));
+
+ for (unsigned i = 0; i < addr_sz; i++)
+ bld.MOV(offset(payload, n++),
+ offset(retype(addr, BRW_REGISTER_TYPE_UD), i));
+
+ for (unsigned i = 0; i < src_sz; i++)
+ bld.MOV(offset(payload, n++),
+ offset(retype(src, BRW_REGISTER_TYPE_UD), i));
+
+ /* Reduce the dynamically uniform surface index to a single
+ * scalar.
+ */
+ const src_reg usurface = bld.emit_uniformize(surface);
+
+ /* Emit the message send instruction. */
+ const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, ret_sz);
+ vec4_instruction *inst =
+ bld.emit(op, dst, src_reg(payload), usurface, arg);
+ inst->mlen = sz;
+ inst->regs_written = ret_sz;
+ inst->header_size = header_sz;
+ inst->predicate = pred;
+
+ return src_reg(dst);
+ }
+ }
+
+ /**
+ * Emit an untyped surface read opcode. \p dims determines the number
+ * of components of the address and \p size the number of components of
+ * the returned value.
+ */
+ src_reg
+ emit_untyped_read(const vec4_builder &bld,
+ const src_reg &surface, const src_reg &addr,
+ unsigned dims, unsigned size,
+ brw_predicate pred)
+ {
+ return emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_READ, src_reg(),
+ emit_insert(bld, addr, dims, true), 1,
+ src_reg(), 0,
+ surface, size, 1, pred);
+ }
+
+ /**
+ * Emit an untyped surface write opcode. \p dims determines the number
+ * of components of the address and \p size the number of components of
+ * the argument.
+ */
+ void
+ emit_untyped_write(const vec4_builder &bld, const src_reg &surface,
+ const src_reg &addr, const src_reg &src,
+ unsigned dims, unsigned size,
+ brw_predicate pred)
+ {
+ const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
+ bld.shader->devinfo->is_haswell);
+ emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_WRITE, src_reg(),
+ emit_insert(bld, addr, dims, has_simd4x2),
+ has_simd4x2 ? 1 : dims,
+ emit_insert(bld, src, size, has_simd4x2),
+ has_simd4x2 ? 1 : size,
+ surface, size, 0, pred);
+ }
+
+ /**
+ * Emit an untyped surface atomic opcode. \p dims determines the number
+ * of components of the address and \p rsize the number of components of
+ * the returned value (either zero or one).
+ */
+ src_reg
+ emit_untyped_atomic(const vec4_builder &bld,
+ const src_reg &surface, const src_reg &addr,
+ const src_reg &src0, const src_reg &src1,
+ unsigned dims, unsigned rsize, unsigned op,
+ brw_predicate pred)
+ {
+ const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
+ bld.shader->devinfo->is_haswell);
+
+ /* Zip the components of both sources, they are represented as the X
+ * and Y components of the same vector.
+ */
+ const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
+ const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD);
+
+ if (size >= 1)
+ bld.MOV(writemask(srcs, WRITEMASK_X), src0);
+ if (size >= 2)
+ bld.MOV(writemask(srcs, WRITEMASK_Y), src1);
+
+ return emit_send(bld, SHADER_OPCODE_UNTYPED_ATOMIC, src_reg(),
+ emit_insert(bld, addr, dims, has_simd4x2),
+ has_simd4x2 ? 1 : dims,
+ emit_insert(bld, src_reg(srcs), size, has_simd4x2),
+ has_simd4x2 ? 1 : size,
+ surface, op, rsize, pred);
+ }
+
+ namespace {
+ /**
+ * Initialize the header present in typed surface messages.
+ */
+ src_reg
+ emit_typed_message_header(const vec4_builder &bld)
+ {
+ const vec4_builder ubld = bld.exec_all();
+ const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD);
+
+ ubld.MOV(dst, src_reg(0));
+
+ if (bld.shader->devinfo->gen == 7 &&
+ !bld.shader->devinfo->is_haswell) {
+ /* The sample mask is used on IVB for the SIMD8 messages that
+ * have no SIMD4x2 variant. We only use the two X channels
+ * in that case, mask everything else out.
+ */
+ ubld.MOV(writemask(dst, WRITEMASK_W), src_reg(0x11));
+ }
+
+ return src_reg(dst);
+ }
+ }
+
+ /**
+ * Emit a typed surface read opcode. \p dims determines the number of
+ * components of the address and \p size the number of components of the
+ * returned value.
+ */
+ src_reg
+ emit_typed_read(const vec4_builder &bld, const src_reg &surface,
+ const src_reg &addr, unsigned dims, unsigned size)
+ {
+ const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
+ bld.shader->devinfo->is_haswell);
+ const src_reg tmp =
+ emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_READ,
+ emit_typed_message_header(bld),
+ emit_insert(bld, addr, dims, has_simd4x2),
+ has_simd4x2 ? 1 : dims,
+ src_reg(), 0,
+ surface, size,
+ has_simd4x2 ? 1 : size);
+
+ return emit_extract(bld, tmp, size, has_simd4x2);
+ }
+
+ /**
+ * Emit a typed surface write opcode. \p dims determines the number of
+ * components of the address and \p size the number of components of the
+ * argument.
+ */
+ void
+ emit_typed_write(const vec4_builder &bld, const src_reg &surface,
+ const src_reg &addr, const src_reg &src,
+ unsigned dims, unsigned size)
+ {
+ const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
+ bld.shader->devinfo->is_haswell);
+ emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_WRITE,
+ emit_typed_message_header(bld),
+ emit_insert(bld, addr, dims, has_simd4x2),
+ has_simd4x2 ? 1 : dims,
+ emit_insert(bld, src, size, has_simd4x2),
+ has_simd4x2 ? 1 : size,
+ surface, size, 0);
+ }
+
+ /**
+ * Emit a typed surface atomic opcode. \p dims determines the number of
+ * components of the address and \p rsize the number of components of
+ * the returned value (either zero or one).
+ */
+ src_reg
+ emit_typed_atomic(const vec4_builder &bld,
+ const src_reg &surface, const src_reg &addr,
+ const src_reg &src0, const src_reg &src1,
+ unsigned dims, unsigned rsize, unsigned op,
+ brw_predicate pred)
+ {
+ const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
+ bld.shader->devinfo->is_haswell);
+
+ /* Zip the components of both sources, they are represented as the X
+ * and Y components of the same vector.
+ */
+ const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
+ const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD);
+
+ if (size >= 1)
+ bld.MOV(writemask(srcs, WRITEMASK_X), src0);
+ if (size >= 2)
+ bld.MOV(writemask(srcs, WRITEMASK_Y), src1);
+
+ return emit_send(bld, SHADER_OPCODE_TYPED_ATOMIC,
+ emit_typed_message_header(bld),
+ emit_insert(bld, addr, dims, has_simd4x2),
+ has_simd4x2 ? 1 : dims,
+ emit_insert(bld, src_reg(srcs), size, has_simd4x2),
+ has_simd4x2 ? 1 : size,
+ surface, op, rsize, pred);
+ }
+ }
+}
--- /dev/null
+/* -*- c++ -*- */
+/*
+ * Copyright © 2013-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef BRW_VEC4_SURFACE_BUILDER_H
+#define BRW_VEC4_SURFACE_BUILDER_H
+
+#include "brw_vec4_builder.h"
+
+namespace brw {
+ namespace surface_access {
+ src_reg
+ emit_untyped_read(const vec4_builder &bld,
+ const src_reg &surface, const src_reg &addr,
+ unsigned dims, unsigned size,
+ brw_predicate pred = BRW_PREDICATE_NONE);
+
+ void
+ emit_untyped_write(const vec4_builder &bld, const src_reg &surface,
+ const src_reg &addr, const src_reg &src,
+ unsigned dims, unsigned size,
+ brw_predicate pred = BRW_PREDICATE_NONE);
+
+ src_reg
+ emit_untyped_atomic(const vec4_builder &bld,
+ const src_reg &surface, const src_reg &addr,
+ const src_reg &src0, const src_reg &src1,
+ unsigned dims, unsigned rsize, unsigned op,
+ brw_predicate pred = BRW_PREDICATE_NONE);
+
+ src_reg
+ emit_typed_read(const vec4_builder &bld, const src_reg &surface,
+ const src_reg &addr, unsigned dims, unsigned size);
+
+ void
+ emit_typed_write(const vec4_builder &bld, const src_reg &surface,
+ const src_reg &addr, const src_reg &src,
+ unsigned dims, unsigned size);
+
+ src_reg
+ emit_typed_atomic(const vec4_builder &bld, const src_reg &surface,
+ const src_reg &addr,
+ const src_reg &src0, const src_reg &src1,
+ unsigned dims, unsigned rsize, unsigned op,
+ brw_predicate pred = BRW_PREDICATE_NONE);
+ }
+}
+
+#endif
#include "glsl/ir_uniform.h"
#include "program/sampler.h"
+#define FIRST_SPILL_MRF(gen) (gen == 6 ? 21 : 13)
+
namespace brw {
vec4_instruction::vec4_instruction(enum opcode opcode, const dst_reg &dst,
inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_READ,
dst, index);
- inst->base_mrf = 14;
+ inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
inst->mlen = 2;
return inst;
inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_WRITE,
dst, src, index);
- inst->base_mrf = 13;
+ inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen);
inst->mlen = 3;
return inst;
}
}
-void
-vec4_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes)
-{
- for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
- assert(this->uniforms < uniform_array_size);
- this->uniform_vector_size[this->uniforms] = 4;
- this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
- this->userplane[i].type = BRW_REGISTER_TYPE_F;
- for (int j = 0; j < 4; ++j) {
- stage_prog_data->param[this->uniforms * 4 + j] =
- (gl_constant_value *) &clip_planes[i][j];
- }
- ++this->uniforms;
- }
-}
-
/* Our support for builtin uniforms is even scarier than non-builtin.
* It sits on top of the PROG_STATE_VAR parameters that are
* automatically updated from GL context state.
break;
case ir_var_uniform:
+ case ir_var_shader_storage:
reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
/* Thanks to the lower_ubo_reference pass, we will see only
- * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
- * variables, so no need for them to be in variable_ht.
+ * ir_binop_{ubo,ssbo}_load expressions and not ir_dereference_variable
+ * for UBO/SSBO variables, so no need for them to be in variable_ht.
*
* Some uniforms, such as samplers and atomic counters, have no actual
* storage, so we should ignore them.
dst,
surf_index,
offset_reg);
- pull->base_mrf = 14;
+ pull->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
pull->mlen = 1;
}
emit(MOV(result_dst, op[0]));
break;
+ case ir_unop_ssbo_unsized_array_length:
+ unreachable("not reached: should be handled by lower_ubo_reference");
+ break;
+
case ir_binop_add:
emit(ADD(result_dst, op[0], op[1]));
break;
emit(RNDE(result_dst, op[0]));
break;
+ case ir_unop_get_buffer_size:
+ unreachable("not reached: not implemented");
+ break;
+
case ir_binop_min:
emit_minmax(BRW_CONDITIONAL_L, result_dst, op[0], op[1]);
break;
*/
brw_mark_surface_used(&prog_data->base,
prog_data->base.binding_table.ubo_start +
- shader_prog->NumUniformBlocks - 1);
+ shader_prog->NumBufferInterfaceBlocks - 1);
}
if (const_offset_ir) {
case ir_tg4: opcode = offset_value.file != BAD_FILE
? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break;
case ir_query_levels: opcode = SHADER_OPCODE_TXS; break;
+ case ir_texture_samples: opcode = SHADER_OPCODE_SAMPLEINFO; break;
case ir_txb:
unreachable("TXB is not valid for vertex shaders.");
case ir_lod:
* - Texel offsets
* - Gather channel selection
* - Sampler indices too large to fit in a 4-bit value.
+ * - Sampleinfo message - takes no parameters, but mlen = 0 is illegal
*/
inst->header_size =
(devinfo->gen < 5 || devinfo->gen >= 9 ||
inst->offset != 0 || op == ir_tg4 ||
+ op == ir_texture_samples ||
is_high_sampler(sampler_reg)) ? 1 : 0;
inst->base_mrf = 2;
- inst->mlen = inst->header_size + 1; /* always at least one */
+ inst->mlen = inst->header_size;
inst->dst.writemask = WRITEMASK_XYZW;
inst->shadow_compare = shadow_comparitor.file != BAD_FILE;
if (op == ir_txs || op == ir_query_levels) {
int writemask = devinfo->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
emit(MOV(dst_reg(MRF, param_base, lod.type, writemask), lod));
+ inst->mlen++;
+ } else if (op == ir_texture_samples) {
+ inst->dst.writemask = WRITEMASK_X;
} else {
/* Load the coordinate */
/* FINISHME: gl_clamp_mask and saturate */
emit(MOV(dst_reg(MRF, param_base, coordinate.type, coord_mask),
coordinate));
+ inst->mlen++;
if (zero_mask != 0) {
emit(MOV(dst_reg(MRF, param_base, coordinate.type, zero_mask),
mrf = param_base;
writemask = WRITEMASK_W;
}
- lod.swizzle = BRW_SWIZZLE_XXXX;
emit(MOV(dst_reg(MRF, mrf, lod.type, writemask), lod));
} else if (op == ir_txf) {
emit(MOV(dst_reg(MRF, param_base, lod.type, WRITEMASK_W), lod));
}
if (devinfo->gen == 6 && op == ir_tg4) {
- emit_gen6_gather_wa(key->tex.gen6_gather_wa[sampler], inst->dst);
+ emit_gen6_gather_wa(key_tex->gen6_gather_wa[sampler], inst->dst);
}
swizzle_result(op, dest,
*/
if (ir->op == ir_tg4) {
ir_constant *chan = ir->lod_info.component->as_constant();
- int swiz = GET_SWZ(key->tex.swizzles[sampler], chan->value.i[0]);
+ int swiz = GET_SWZ(key_tex->swizzles[sampler], chan->value.i[0]);
if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
dst_reg result(this, ir->type);
this->result = src_reg(result);
ir->lod_info.sample_index->accept(this);
sample_index = this->result;
- if (devinfo->gen >= 7 && key->tex.compressed_multisample_layout_mask & (1<<sampler))
+ if (devinfo->gen >= 7 && key_tex->compressed_multisample_layout_mask & (1 << sampler))
mcs = emit_mcs_fetch(ir->coordinate->type, coordinate, sampler_reg);
else
mcs = src_reg(0u);
case ir_txb:
case ir_lod:
case ir_tg4:
+ case ir_texture_samples:
break;
}
uint32_t
vec4_visitor::gather_channel(unsigned gather_component, uint32_t sampler)
{
- int swiz = GET_SWZ(key->tex.swizzles[sampler], gather_component);
+ int swiz = GET_SWZ(key_tex->swizzles[sampler], gather_component);
switch (swiz) {
case SWIZZLE_X: return 0;
case SWIZZLE_Y:
/* gather4 sampler is broken for green channel on RG32F --
* we must ask for blue instead.
*/
- if (key->tex.gather_channel_quirk_mask & (1<<sampler))
+ if (key_tex->gather_channel_quirk_mask & (1 << sampler))
return 2;
return 1;
case SWIZZLE_Z: return 2;
src_reg orig_val, uint32_t sampler,
const glsl_type *dest_type)
{
- int s = key->tex.swizzles[sampler];
+ int s = key_tex->swizzles[sampler];
dst_reg swizzled_result = dest;
{
if (devinfo->gen < 6 &&
((prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) ||
- key->userclip_active || devinfo->has_negative_rhw_bug)) {
+ output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE ||
+ devinfo->has_negative_rhw_bug)) {
dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
dst_reg header1_w = header1;
header1_w.writemask = WRITEMASK_W;
emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
}
- if (key->userclip_active) {
+ if (output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE) {
current_annotation = "Clipping flags";
dst_reg flags0 = dst_reg(this, glsl_type::uint_type);
dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
}
}
-void
-vec4_visitor::emit_clip_distances(dst_reg reg, int offset)
-{
- /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
- *
- * "If a linked set of shaders forming the vertex stage contains no
- * static write to gl_ClipVertex or gl_ClipDistance, but the
- * application has requested clipping against user clip planes through
- * the API, then the coordinate written to gl_Position is used for
- * comparison against the user clip planes."
- *
- * This function is only called if the shader didn't write to
- * gl_ClipDistance. Accordingly, we use gl_ClipVertex to perform clipping
- * if the user wrote to it; otherwise we use gl_Position.
- */
- gl_varying_slot clip_vertex = VARYING_SLOT_CLIP_VERTEX;
- if (!(prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)) {
- clip_vertex = VARYING_SLOT_POS;
- }
-
- for (int i = 0; i + offset < key->nr_userclip_plane_consts && i < 4;
- ++i) {
- reg.writemask = 1 << i;
- emit(DP4(reg,
- src_reg(output_reg[clip_vertex]),
- src_reg(this->userplane[i + offset])));
- }
-}
-
vec4_instruction *
vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
{
case BRW_VARYING_SLOT_PAD:
/* No need to write to this slot */
break;
- case VARYING_SLOT_COL0:
- case VARYING_SLOT_COL1:
- case VARYING_SLOT_BFC0:
- case VARYING_SLOT_BFC1: {
- /* These built-in varyings are only supported in compatibility mode,
- * and we only support GS in core profile. So, this must be a vertex
- * shader.
- */
- assert(stage == MESA_SHADER_VERTEX);
- vec4_instruction *inst = emit_generic_urb_slot(reg, varying);
- if (((struct brw_vs_prog_key *) key)->clamp_vertex_color)
- inst->saturate = true;
- break;
- }
-
default:
emit_generic_urb_slot(reg, varying);
break;
* may need to unspill a register or load from an array. Those
* reads would use MRFs 14-15.
*/
- int max_usable_mrf = 13;
+ int max_usable_mrf = FIRST_SPILL_MRF(devinfo->gen);
/* The following assertion verifies that max_usable_mrf causes an
* even-numbered amount of URB write data, which will meet gen6's
emit_ndc_computation();
}
- /* Lower legacy ff and ClipVertex clipping to clip distances */
- if (key->userclip_active && !prog->UsesClipDistanceOut) {
- current_annotation = "user clip distances";
-
- output_reg[VARYING_SLOT_CLIP_DIST0] = dst_reg(this, glsl_type::vec4_type);
- output_reg[VARYING_SLOT_CLIP_DIST1] = dst_reg(this, glsl_type::vec4_type);
-
- emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST0], 0);
- emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1], 4);
- }
-
/* We may need to split this up into several URB writes, so do them in a
* loop.
*/
prog_data->vue_map.slot_to_varying[slot]);
/* If this was max_usable_mrf, we can't fit anything more into this
- * URB WRITE.
+ * URB WRITE. Same thing if we reached the maximum length available.
*/
- if (mrf > max_usable_mrf) {
+ if (mrf > max_usable_mrf ||
+ align_interleaved_urb_mlen(devinfo, mrf - base_mrf + 1) > BRW_MAX_MSG_LENGTH) {
slot++;
break;
}
vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
void *log_data,
struct gl_program *prog,
- const struct brw_vue_prog_key *key,
+ const struct brw_sampler_prog_key_data *key_tex,
struct brw_vue_prog_data *prog_data,
struct gl_shader_program *shader_prog,
gl_shader_stage stage,
int shader_time_index)
: backend_shader(compiler, log_data, mem_ctx,
shader_prog, prog, &prog_data->base, stage),
- key(key),
+ key_tex(key_tex),
prog_data(prog_data),
sanity_param_count(0),
fail_msg(NULL),
void
+vec4_vs_visitor::emit_urb_slot(dst_reg reg, int varying)
+{
+ reg.type = BRW_REGISTER_TYPE_F;
+ output_reg[varying].type = reg.type;
+
+ switch (varying) {
+ case VARYING_SLOT_COL0:
+ case VARYING_SLOT_COL1:
+ case VARYING_SLOT_BFC0:
+ case VARYING_SLOT_BFC1: {
+ /* These built-in varyings are only supported in compatibility mode,
+ * and we only support GS in core profile. So, this must be a vertex
+ * shader.
+ */
+ vec4_instruction *inst = emit_generic_urb_slot(reg, varying);
+ if (key->clamp_vertex_color)
+ inst->saturate = true;
+ break;
+ }
+ default:
+ return vec4_visitor::emit_urb_slot(reg, varying);
+ }
+}
+
+
+void
+vec4_vs_visitor::emit_clip_distances(dst_reg reg, int offset)
+{
+ /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
+ *
+ * "If a linked set of shaders forming the vertex stage contains no
+ * static write to gl_ClipVertex or gl_ClipDistance, but the
+ * application has requested clipping against user clip planes through
+ * the API, then the coordinate written to gl_Position is used for
+ * comparison against the user clip planes."
+ *
+ * This function is only called if the shader didn't write to
+ * gl_ClipDistance. Accordingly, we use gl_ClipVertex to perform clipping
+ * if the user wrote to it; otherwise we use gl_Position.
+ */
+ gl_varying_slot clip_vertex = VARYING_SLOT_CLIP_VERTEX;
+ if (!(prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)) {
+ clip_vertex = VARYING_SLOT_POS;
+ }
+
+ for (int i = 0; i + offset < key->nr_userclip_plane_consts && i < 4;
+ ++i) {
+ reg.writemask = 1 << i;
+ emit(DP4(reg,
+ src_reg(output_reg[clip_vertex]),
+ src_reg(this->userplane[i + offset])));
+ }
+}
+
+
+void
+vec4_vs_visitor::setup_uniform_clipplane_values()
+{
+ for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
+ assert(this->uniforms < uniform_array_size);
+ this->uniform_vector_size[this->uniforms] = 4;
+ this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
+ this->userplane[i].type = BRW_REGISTER_TYPE_F;
+ for (int j = 0; j < 4; ++j) {
+ stage_prog_data->param[this->uniforms * 4 + j] =
+ (gl_constant_value *) &clip_planes[i][j];
+ }
+ ++this->uniforms;
+ }
+}
+
+
+void
vec4_vs_visitor::emit_thread_end()
{
+ setup_uniform_clipplane_values();
+
+ /* Lower legacy ff and ClipVertex clipping to clip distances */
+ if (key->nr_userclip_plane_consts > 0) {
+ current_annotation = "user clip distances";
+
+ output_reg[VARYING_SLOT_CLIP_DIST0] = dst_reg(this, glsl_type::vec4_type);
+ output_reg[VARYING_SLOT_CLIP_DIST1] = dst_reg(this, glsl_type::vec4_type);
+
+ emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST0], 0);
+ emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1], 4);
+ }
+
/* For VS, we always end the thread by emitting a single vertex.
* emit_urb_write_opcode() will take care of setting the eot flag on the
* SEND instruction.
struct brw_vs_prog_data *vs_prog_data,
struct gl_vertex_program *vp,
struct gl_shader_program *prog,
+ gl_clip_plane *clip_planes,
void *mem_ctx,
int shader_time_index,
bool use_legacy_snorm_formula)
: vec4_visitor(compiler, log_data,
- &vp->Base, &key->base, &vs_prog_data->base, prog,
+ &vp->Base, &key->tex, &vs_prog_data->base, prog,
MESA_SHADER_VERTEX,
mem_ctx, false /* no_spills */,
shader_time_index),
key(key),
vs_prog_data(vs_prog_data),
vp(vp),
+ clip_planes(clip_planes),
use_legacy_snorm_formula(use_legacy_snorm_formula)
{
}
struct brw_stage_prog_data *stage_prog_data = &prog_data.base.base;
void *mem_ctx;
int i;
- struct gl_shader *vs = NULL;
+ struct brw_shader *vs = NULL;
+ bool start_busy = false;
+ double start_time = 0;
if (prog)
- vs = prog->_LinkedShaders[MESA_SHADER_VERTEX];
+ vs = (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
memset(&prog_data, 0, sizeof(prog_data));
* case being a float value that gets blown up to a vec4, so be
* conservative here.
*/
- param_count = vs->num_uniform_components * 4 +
- vs->NumImages * BRW_IMAGE_PARAM_SIZE;
- stage_prog_data->nr_image_params = vs->NumImages;
+ param_count = vs->base.num_uniform_components * 4 +
+ vs->base.NumImages * BRW_IMAGE_PARAM_SIZE;
+ stage_prog_data->nr_image_params = vs->base.NumImages;
} else {
param_count = vp->program.Base.Parameters->NumParameters * 4;
}
/* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip
* planes as uniforms.
*/
- param_count += key->base.nr_userclip_plane_consts * 4;
+ param_count += key->nr_userclip_plane_consts * 4;
stage_prog_data->param =
rzalloc_array(NULL, const gl_constant_value *, param_count);
* distance varying slots whenever clipping is enabled, even if the vertex
* shader doesn't write to gl_ClipDistance.
*/
- if (key->base.userclip_active) {
+ if (key->nr_userclip_plane_consts > 0) {
outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
}
brw_compute_vue_map(brw->intelScreen->devinfo,
- &prog_data.base.vue_map, outputs_written);
+ &prog_data.base.vue_map, outputs_written,
+ prog ? prog->SeparateShader : false);
if (0) {
_mesa_fprint_program_opt(stderr, &vp->program.Base, PROG_PRINT_DEBUG,
true);
}
+ if (unlikely(brw->perf_debug)) {
+ start_busy = (brw->batch.last_bo &&
+ drm_intel_bo_busy(brw->batch.last_bo));
+ start_time = get_time();
+ }
+
/* Emit GEN4 code.
*/
program = brw_vs_emit(brw, mem_ctx, key, &prog_data,
return false;
}
+ if (unlikely(brw->perf_debug) && vs) {
+ if (vs->compiled_once) {
+ brw_vs_debug_recompile(brw, prog, key);
+ }
+ if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
+ perf_debug("VS compile took %.03f ms and stalled the GPU\n",
+ (get_time() - start_time) * 1000);
+ }
+ vs->compiled_once = true;
+ }
+
/* Scratch space is used for register spilling */
if (prog_data.base.base.total_scratch) {
brw_get_scratch_bo(brw, &brw->vs.base.scratch_bo,
if (c->cache_id == BRW_CACHE_VS_PROG) {
old_key = c->key;
- if (old_key->base.program_string_id == key->base.program_string_id)
+ if (old_key->program_string_id == key->program_string_id)
break;
}
}
key->gl_attrib_wa_flags[i]);
}
- found |= key_debug(brw, "user clip flags",
- old_key->base.userclip_active, key->base.userclip_active);
-
- found |= key_debug(brw, "user clipping planes as push constants",
- old_key->base.nr_userclip_plane_consts,
- key->base.nr_userclip_plane_consts);
+ found |= key_debug(brw, "legacy user clipping",
+ old_key->nr_userclip_plane_consts,
+ key->nr_userclip_plane_consts);
found |= key_debug(brw, "copy edgeflag",
old_key->copy_edgeflag, key->copy_edgeflag);
found |= key_debug(brw, "vertex color clamping",
old_key->clamp_vertex_color, key->clamp_vertex_color);
- found |= brw_debug_recompile_sampler_key(brw, &old_key->base.tex,
- &key->base.tex);
+ found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex);
if (!found) {
perf_debug(" Something else\n");
}
}
-
-void
-brw_setup_vue_key_clip_info(struct brw_context *brw,
- struct brw_vue_prog_key *key,
- bool program_uses_clip_distance)
-{
- struct gl_context *ctx = &brw->ctx;
-
- key->userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
- if (key->userclip_active && !program_uses_clip_distance) {
- key->nr_userclip_plane_consts
- = _mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
- }
-}
-
static bool
brw_vs_state_dirty(struct brw_context *brw)
{
/* Just upload the program verbatim for now. Always send it all
* the inputs it asks for, whether they are varying or not.
*/
- key->base.program_string_id = vp->id;
- brw_setup_vue_key_clip_info(brw, &key->base,
- vp->program.Base.UsesClipDistanceOut);
+ key->program_string_id = vp->id;
+
+ if (ctx->Transform.ClipPlanesEnabled != 0 &&
+ ctx->API == API_OPENGL_COMPAT &&
+ !vp->program.Base.UsesClipDistanceOut) {
+ key->nr_userclip_plane_consts =
+ _mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
+ }
/* _NEW_POLYGON */
if (brw->gen < 6) {
/* _NEW_TEXTURE */
brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count,
- &key->base.tex);
+ &key->tex);
/* BRW_NEW_VS_ATTRIB_WORKAROUNDS */
memcpy(key->gl_attrib_wa_flags, brw->vb.attrib_wa_flags,
assert(success);
}
brw->vs.base.prog_data = &brw->vs.prog_data->base.base;
-
- if (memcmp(&brw->vs.prog_data->base.vue_map, &brw->vue_map_geom_out,
- sizeof(brw->vue_map_geom_out)) != 0) {
- brw->vue_map_vs = brw->vs.prog_data->base.vue_map;
- brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_VS;
- if (brw->gen < 6) {
- /* No geometry shader support, so the VS VUE map is the VUE map for
- * the output of the "geometry" portion of the pipeline.
- */
- brw->vue_map_geom_out = brw->vue_map_vs;
- brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
- }
- }
}
bool
memset(&key, 0, sizeof(key));
- brw_vue_setup_prog_key_for_precompile(ctx, &key.base, bvp->id, &vp->Base);
+ brw_setup_tex_for_precompile(brw, &key.tex, prog);
+ key.program_string_id = bvp->id;
key.clamp_vertex_color =
(prog->OutputsWritten & (VARYING_BIT_COL0 | VARYING_BIT_COL1 |
VARYING_BIT_BFC0 | VARYING_BIT_BFC1));
struct brw_vs_prog_data *vs_prog_data,
struct gl_vertex_program *vp,
struct gl_shader_program *prog,
+ gl_clip_plane *clip_planes,
void *mem_ctx,
int shader_time_index,
bool use_legacy_snorm_formula);
virtual void emit_program_code();
virtual void emit_thread_end();
virtual void emit_urb_write_header(int mrf);
+ virtual void emit_urb_slot(dst_reg reg, int varying);
virtual vec4_instruction *emit_urb_write_opcode(bool complete);
private:
int setup_attributes(int payload_reg);
void setup_vp_regs();
+ void setup_uniform_clipplane_values();
+ void emit_clip_distances(dst_reg reg, int offset);
dst_reg get_vp_dst_reg(const prog_dst_register &dst);
src_reg get_vp_src_reg(const prog_src_register &src);
src_reg *vp_temp_regs;
src_reg vp_addr_reg;
+ gl_clip_plane *clip_planes;
+
bool use_legacy_snorm_formula;
};
*/
void
brw_upload_pull_constants(struct brw_context *brw,
- GLbitfield brw_new_constbuf,
+ GLbitfield64 brw_new_constbuf,
const struct gl_program *prog,
struct brw_stage_state *stage_state,
const struct brw_stage_prog_data *prog_data,
#include "brw_context.h"
static inline void
-assign_vue_slot(struct brw_vue_map *vue_map, int varying)
+assign_vue_slot(struct brw_vue_map *vue_map, int varying, int slot)
{
/* Make sure this varying hasn't been assigned a slot already */
assert (vue_map->varying_to_slot[varying] == -1);
- vue_map->varying_to_slot[varying] = vue_map->num_slots;
- vue_map->slot_to_varying[vue_map->num_slots++] = varying;
+ vue_map->varying_to_slot[varying] = slot;
+ vue_map->slot_to_varying[slot] = varying;
}
/**
void
brw_compute_vue_map(const struct brw_device_info *devinfo,
struct brw_vue_map *vue_map,
- GLbitfield64 slots_valid)
+ GLbitfield64 slots_valid,
+ bool separate)
{
+ /* Keep using the packed/contiguous layout on old hardware - we only need
+ * the SSO layout when using geometry/tessellation shaders or 32 FS input
+ * varyings, which only exist on Gen >= 6. It's also a bit more efficient.
+ */
+ if (devinfo->gen < 6)
+ separate = false;
+
vue_map->slots_valid = slots_valid;
- int i;
+ vue_map->separate = separate;
/* gl_Layer and gl_ViewportIndex don't get their own varying slots -- they
* are stored in the first VUE slot (VARYING_SLOT_PSIZ).
*/
STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 127);
- vue_map->num_slots = 0;
- for (i = 0; i < BRW_VARYING_SLOT_COUNT; ++i) {
+ for (int i = 0; i < BRW_VARYING_SLOT_COUNT; ++i) {
vue_map->varying_to_slot[i] = -1;
- vue_map->slot_to_varying[i] = BRW_VARYING_SLOT_COUNT;
+ vue_map->slot_to_varying[i] = BRW_VARYING_SLOT_PAD;
}
+ int slot = 0;
+
/* VUE header: format depends on chip generation and whether clipping is
* enabled.
*
* On Ironlake the VUE header is nominally 20 dwords, but the hardware
* will accept the same header layout as Gen4 [and should be a bit faster]
*/
- assign_vue_slot(vue_map, VARYING_SLOT_PSIZ);
- assign_vue_slot(vue_map, BRW_VARYING_SLOT_NDC);
- assign_vue_slot(vue_map, VARYING_SLOT_POS);
+ assign_vue_slot(vue_map, VARYING_SLOT_PSIZ, slot++);
+ assign_vue_slot(vue_map, BRW_VARYING_SLOT_NDC, slot++);
+ assign_vue_slot(vue_map, VARYING_SLOT_POS, slot++);
} else {
/* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
* dword 0-3 of the header is indices, point width, clip flags.
* enabled.
* dword 8-11 or 16-19 is the first vertex element data we fill.
*/
- assign_vue_slot(vue_map, VARYING_SLOT_PSIZ);
- assign_vue_slot(vue_map, VARYING_SLOT_POS);
+ assign_vue_slot(vue_map, VARYING_SLOT_PSIZ, slot++);
+ assign_vue_slot(vue_map, VARYING_SLOT_POS, slot++);
if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0))
- assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST0);
+ assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST0, slot++);
if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1))
- assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST1);
+ assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST1, slot++);
/* front and back colors need to be consecutive so that we can use
* ATTRIBUTE_SWIZZLE_INPUTATTR_FACING to swizzle them when doing
* two-sided color.
*/
if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL0))
- assign_vue_slot(vue_map, VARYING_SLOT_COL0);
+ assign_vue_slot(vue_map, VARYING_SLOT_COL0, slot++);
if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC0))
- assign_vue_slot(vue_map, VARYING_SLOT_BFC0);
+ assign_vue_slot(vue_map, VARYING_SLOT_BFC0, slot++);
if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL1))
- assign_vue_slot(vue_map, VARYING_SLOT_COL1);
+ assign_vue_slot(vue_map, VARYING_SLOT_COL1, slot++);
if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC1))
- assign_vue_slot(vue_map, VARYING_SLOT_BFC1);
+ assign_vue_slot(vue_map, VARYING_SLOT_BFC1, slot++);
}
- /* The hardware doesn't care about the rest of the vertex outputs, so just
- * assign them contiguously. Don't reassign outputs that already have a
- * slot.
+ /* The hardware doesn't care about the rest of the vertex outputs, so we
+ * can assign them however we like. For normal programs, we simply assign
+ * them contiguously.
+ *
+ * For separate shader pipelines, we first assign built-in varyings
+ * contiguous slots. This works because ARB_separate_shader_objects
+ * requires that all shaders have matching built-in varying interface
+ * blocks. Next, we assign generic varyings based on their location
+ * (either explicit or linker assigned). This guarantees a fixed layout.
*
* We generally don't need to assign a slot for VARYING_SLOT_CLIP_VERTEX,
* since it's encoded as the clip distances by emit_clip_distances().
* However, it may be output by transform feedback, and we'd rather not
* recompute state when TF changes, so we just always include it.
*/
- for (int i = 0; i < VARYING_SLOT_MAX; ++i) {
- if ((slots_valid & BITFIELD64_BIT(i)) &&
- vue_map->varying_to_slot[i] == -1) {
- assign_vue_slot(vue_map, i);
+ GLbitfield64 builtins = slots_valid & BITFIELD64_MASK(VARYING_SLOT_VAR0);
+ while (builtins != 0) {
+ const int varying = ffsll(builtins) - 1;
+ if (vue_map->varying_to_slot[varying] == -1) {
+ assign_vue_slot(vue_map, varying, slot++);
}
+ builtins &= ~BITFIELD64_BIT(varying);
}
+
+ const int first_generic_slot = slot;
+ GLbitfield64 generics = slots_valid & ~BITFIELD64_MASK(VARYING_SLOT_VAR0);
+ while (generics != 0) {
+ const int varying = ffsll(generics) - 1;
+ if (separate) {
+ slot = first_generic_slot + varying - VARYING_SLOT_VAR0;
+ assign_vue_slot(vue_map, varying, slot);
+ } else {
+ assign_vue_slot(vue_map, varying, slot++);
+ }
+ generics &= ~BITFIELD64_BIT(varying);
+ }
+
+ vue_map->num_slots = separate ? slot + 1 : slot;
}
#include "brw_context.h"
#include "brw_wm.h"
#include "brw_state.h"
+#include "brw_shader.h"
#include "main/enums.h"
#include "main/formats.h"
#include "main/fbobject.h"
void *mem_ctx = ralloc_context(NULL);
struct brw_wm_prog_data prog_data;
const GLuint *program;
- struct gl_shader *fs = NULL;
+ struct brw_shader *fs = NULL;
GLuint program_size;
+ bool start_busy = false;
+ double start_time = 0;
if (prog)
- fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
+ fs = (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
memset(&prog_data, 0, sizeof(prog_data));
/* key->alpha_test_func means simulating alpha testing via discards,
fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
prog_data.computed_depth_mode = computed_depth_mode(&fp->program);
- prog_data.early_fragment_tests = fs && fs->EarlyFragmentTests;
+ prog_data.early_fragment_tests = fs && fs->base.EarlyFragmentTests;
/* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
if (!prog)
*/
int param_count;
if (fs) {
- param_count = fs->num_uniform_components +
- fs->NumImages * BRW_IMAGE_PARAM_SIZE;
- prog_data.base.nr_image_params = fs->NumImages;
+ param_count = fs->base.num_uniform_components +
+ fs->base.NumImages * BRW_IMAGE_PARAM_SIZE;
+ prog_data.base.nr_image_params = fs->base.NumImages;
} else {
param_count = fp->program.Base.Parameters->NumParameters * 4;
}
key->persample_shading,
&fp->program);
+ if (unlikely(brw->perf_debug)) {
+ start_busy = (brw->batch.last_bo &&
+ drm_intel_bo_busy(brw->batch.last_bo));
+ start_time = get_time();
+ }
+
program = brw_wm_fs_emit(brw, mem_ctx, key, &prog_data,
&fp->program, prog, &program_size);
if (program == NULL) {
return false;
}
+ if (unlikely(brw->perf_debug) && fs) {
+ if (fs->compiled_once)
+ brw_wm_debug_recompile(brw, prog, key);
+ fs->compiled_once = true;
+
+ if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
+ perf_debug("FS compile took %.03f ms and stalled the GPU\n",
+ (get_time() - start_time) * 1000);
+ }
+ }
+
if (prog_data.base.total_scratch) {
brw_get_scratch_bo(brw, &brw->wm.base.scratch_bo,
prog_data.base.total_scratch * brw->max_wm_threads);
}
brw->wm.base.prog_data = &brw->wm.prog_data->base;
}
+
+bool
+brw_fs_precompile(struct gl_context *ctx,
+ struct gl_shader_program *shader_prog,
+ struct gl_program *prog)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_wm_prog_key key;
+
+ struct gl_fragment_program *fp = (struct gl_fragment_program *) prog;
+ struct brw_fragment_program *bfp = brw_fragment_program(fp);
+ bool program_uses_dfdy = fp->UsesDFdy;
+
+ memset(&key, 0, sizeof(key));
+
+ if (brw->gen < 6) {
+ if (fp->UsesKill)
+ key.iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+
+ if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
+ key.iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+
+ /* Just assume depth testing. */
+ key.iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
+ key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
+ }
+
+ if (brw->gen < 6 || _mesa_bitcount_64(fp->Base.InputsRead &
+ BRW_FS_VARYING_INPUT_MASK) > 16)
+ key.input_slots_valid = fp->Base.InputsRead | VARYING_BIT_POS;
+
+ brw_setup_tex_for_precompile(brw, &key.tex, &fp->Base);
+
+ if (fp->Base.InputsRead & VARYING_BIT_POS) {
+ key.drawable_height = ctx->DrawBuffer->Height;
+ }
+
+ key.nr_color_regions = _mesa_bitcount_64(fp->Base.OutputsWritten &
+ ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
+ BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)));
+
+ if ((fp->Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) {
+ key.render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer) ||
+ key.nr_color_regions > 1;
+ }
+
+ key.program_string_id = bfp->id;
+
+ uint32_t old_prog_offset = brw->wm.base.prog_offset;
+ struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data;
+
+ bool success = brw_codegen_wm_prog(brw, shader_prog, bfp, &key);
+
+ brw->wm.base.prog_offset = old_prog_offset;
+ brw->wm.prog_data = old_prog_data;
+
+ return success;
+}
}
/**
+ * Create the buffer surface. Shader buffer variables will be
+ * read from / write to this buffer with Data Port Read/Write
+ * instructions/messages.
+ */
+void
+brw_create_buffer_surface(struct brw_context *brw,
+ drm_intel_bo *bo,
+ uint32_t offset,
+ uint32_t size,
+ uint32_t *out_offset,
+ bool dword_pitch)
+{
+ /* Use a raw surface so we can reuse existing untyped read/write/atomic
+ * messages. We need these specifically for the fragment shader since they
+ * include a pixel mask header that we need to ensure correct behavior
+ * with helper invocations, which cannot write to the buffer.
+ */
+ brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
+ BRW_SURFACEFORMAT_RAW,
+ size, 1, true);
+}
+
+/**
* Set up a binding table entry for use by stream output logic (transform
* feedback).
*
/* BRW_NEW_FRAGMENT_PROGRAM */
struct gl_program *fs = (struct gl_program *) brw->fragment_program;
+ /* BRW_NEW_COMPUTE_PROGRAM */
+ struct gl_program *cs = (struct gl_program *) brw->compute_program;
+
/* _NEW_TEXTURE */
update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
+ update_stage_texture_surfaces(brw, cs, &brw->cs.base, false);
/* emit alternate set of surface state for gather. this
* allows the surface format to be overriden for only the
update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
if (fs && fs->UsesGather)
update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
+ if (cs && cs->UsesGather)
+ update_stage_texture_surfaces(brw, cs, &brw->cs.base, true);
}
brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
.dirty = {
.mesa = _NEW_TEXTURE,
.brw = BRW_NEW_BATCH |
+ BRW_NEW_COMPUTE_PROGRAM |
BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_FS_PROG_DATA |
BRW_NEW_GEOMETRY_PROGRAM |
uint32_t *surf_offsets =
&stage_state->surf_offset[prog_data->binding_table.ubo_start];
- for (unsigned i = 0; i < shader->NumUniformBlocks; i++) {
- struct gl_uniform_buffer_binding *binding;
+ for (int i = 0; i < shader->NumUniformBlocks; i++) {
struct intel_buffer_object *intel_bo;
- binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
- intel_bo = intel_buffer_object(binding->BufferObject);
- drm_intel_bo *bo =
- intel_bufferobj_buffer(brw, intel_bo,
- binding->Offset,
- binding->BufferObject->Size - binding->Offset);
-
/* Because behavior for referencing outside of the binding's size in the
* glBindBufferRange case is undefined, we can just bind the whole buffer
* glBindBufferBase wants and be a correct implementation.
*/
- brw_create_constant_surface(brw, bo, binding->Offset,
- bo->size - binding->Offset,
- &surf_offsets[i],
- dword_pitch);
+ if (!shader->UniformBlocks[i].IsShaderStorage) {
+ struct gl_uniform_buffer_binding *binding;
+ binding =
+ &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
+ if (binding->BufferObject == ctx->Shared->NullBufferObj) {
+ brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &surf_offsets[i]);
+ } else {
+ intel_bo = intel_buffer_object(binding->BufferObject);
+ drm_intel_bo *bo =
+ intel_bufferobj_buffer(brw, intel_bo,
+ binding->Offset,
+ binding->BufferObject->Size - binding->Offset);
+ brw_create_constant_surface(brw, bo, binding->Offset,
+ binding->BufferObject->Size - binding->Offset,
+ &surf_offsets[i],
+ dword_pitch);
+ }
+ } else {
+ struct gl_shader_storage_buffer_binding *binding;
+ binding =
+ &ctx->ShaderStorageBufferBindings[shader->UniformBlocks[i].Binding];
+ if (binding->BufferObject == ctx->Shared->NullBufferObj) {
+ brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &surf_offsets[i]);
+ } else {
+ intel_bo = intel_buffer_object(binding->BufferObject);
+ drm_intel_bo *bo =
+ intel_bufferobj_buffer(brw, intel_bo,
+ binding->Offset,
+ binding->BufferObject->Size - binding->Offset);
+ brw_create_buffer_surface(brw, bo, binding->Offset,
+ binding->BufferObject->Size - binding->Offset,
+ &surf_offsets[i],
+ dword_pitch);
+ }
+ }
}
if (shader->NumUniformBlocks)
brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
}
+
+static void
+brw_upload_cs_work_groups_surface(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->ctx;
+ /* _NEW_PROGRAM */
+ struct gl_shader_program *prog =
+ ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
+
+ if (prog && brw->cs.prog_data->uses_num_work_groups) {
+ const unsigned surf_idx =
+ brw->cs.prog_data->binding_table.work_groups_start;
+ uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
+ drm_intel_bo *bo;
+ uint32_t bo_offset;
+
+ if (brw->compute.num_work_groups_bo == NULL) {
+ bo = NULL;
+ intel_upload_data(brw,
+ (void *)brw->compute.num_work_groups,
+ 3 * sizeof(GLuint),
+ sizeof(GLuint),
+ &bo,
+ &bo_offset);
+ } else {
+ bo = brw->compute.num_work_groups_bo;
+ bo_offset = brw->compute.num_work_groups_offset;
+ }
+
+ brw->vtbl.emit_buffer_surface_state(brw, surf_offset,
+ bo, bo_offset,
+ BRW_SURFACEFORMAT_RAW,
+ 3 * sizeof(GLuint), 1, true);
+ brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
+ }
+}
+
+const struct brw_tracked_state brw_cs_work_groups_surface = {
+ .dirty = {
+ .brw = BRW_NEW_CS_WORK_GROUPS
+ },
+ .emit = brw_upload_cs_work_groups_surface,
+};
0, /* min LOD */
max_lod,
0, /* LOD bias */
- 0, /* base miplevel */
0, /* shadow function */
non_normalized_coords,
0); /* border color offset - unused */
void
gen6_gs_visitor::visit(ir_emit_vertex *ir)
{
+ /* To ensure that we don't output more vertices than the shader specified
+ * using max_vertices, do the logic inside a conditional of the form "if
+ * (vertex_count < MAX)"
+ */
+ unsigned num_output_vertices = c->gp->program.VerticesOut;
+ emit(CMP(dst_null_d(), this->vertex_count,
+ src_reg(num_output_vertices), BRW_CONDITIONAL_L));
+ emit(IF(BRW_PREDICATE_NORMAL));
+
gs_emit_vertex(ir->stream_id());
+
+ this->current_annotation = "emit vertex: increment vertex count";
+ emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
+ src_reg(1u)));
+
+ emit(BRW_OPCODE_ENDIF);
}
+
void
gen6_gs_visitor::gs_emit_vertex(int stream_id)
{
this->current_annotation = "gen6 emit vertex";
- /* Honor max_vertex layout indication in geometry shader by ignoring any
- * vertices coming after c->gp->program.VerticesOut.
- */
- unsigned num_output_vertices = c->gp->program.VerticesOut;
- emit(CMP(dst_null_d(), this->vertex_count, src_reg(num_output_vertices),
- BRW_CONDITIONAL_L));
- emit(IF(BRW_PREDICATE_NORMAL));
- {
- /* Buffer all output slots for this vertex in vertex_output */
- for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
- int varying = prog_data->vue_map.slot_to_varying[slot];
- if (varying != VARYING_SLOT_PSIZ) {
- dst_reg dst(this->vertex_output);
- dst.reladdr = ralloc(mem_ctx, src_reg);
- memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
- emit_urb_slot(dst, varying);
- } else {
- /* The PSIZ slot can pack multiple varyings in different channels
- * and emit_urb_slot() will produce a MOV instruction for each of
- * them. Since we are writing to an array, that will translate to
- * possibly multiple MOV instructions with an array destination and
- * each will generate a scratch write with the same offset into
- * scratch space (thus, each one overwriting the previous). This is
- * not what we want. What we will do instead is emit PSIZ to a
- * a regular temporary register, then move that resgister into the
- * array. This way we only have one instruction with an array
- * destination and we only produce a single scratch write.
- */
- dst_reg tmp = dst_reg(src_reg(this, glsl_type::uvec4_type));
- emit_urb_slot(tmp, varying);
- dst_reg dst(this->vertex_output);
- dst.reladdr = ralloc(mem_ctx, src_reg);
- memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
- vec4_instruction *inst = emit(MOV(dst, src_reg(tmp)));
- inst->force_writemask_all = true;
- }
- emit(ADD(dst_reg(this->vertex_output_offset),
- this->vertex_output_offset, 1u));
- }
-
- /* Now buffer flags for this vertex */
- dst_reg dst(this->vertex_output);
- dst.reladdr = ralloc(mem_ctx, src_reg);
- memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
- if (c->gp->program.OutputType == GL_POINTS) {
- /* If we are outputting points, then every vertex has PrimStart and
- * PrimEnd set.
- */
- emit(MOV(dst, (_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) |
- URB_WRITE_PRIM_START | URB_WRITE_PRIM_END));
- emit(ADD(dst_reg(this->prim_count), this->prim_count, 1u));
+ /* Buffer all output slots for this vertex in vertex_output */
+ for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
+ int varying = prog_data->vue_map.slot_to_varying[slot];
+ if (varying != VARYING_SLOT_PSIZ) {
+ dst_reg dst(this->vertex_output);
+ dst.reladdr = ralloc(mem_ctx, src_reg);
+ memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
+ emit_urb_slot(dst, varying);
} else {
- /* Otherwise, we can only set the PrimStart flag, which we have stored
- * in the first_vertex register. We will have to wait until we execute
- * EndPrimitive() or we end the thread to set the PrimEnd flag on a
- * vertex.
+ /* The PSIZ slot can pack multiple varyings in different channels
+ * and emit_urb_slot() will produce a MOV instruction for each of
+ * them. Since we are writing to an array, that will translate to
+ * possibly multiple MOV instructions with an array destination and
+ * each will generate a scratch write with the same offset into
+ * scratch space (thus, each one overwriting the previous). This is
+ * not what we want. What we will do instead is emit PSIZ to a
+ * a regular temporary register, then move that resgister into the
+ * array. This way we only have one instruction with an array
+ * destination and we only produce a single scratch write.
*/
- emit(OR(dst, this->first_vertex,
- (c->prog_data.output_topology << URB_WRITE_PRIM_TYPE_SHIFT)));
- emit(MOV(dst_reg(this->first_vertex), 0u));
+ dst_reg tmp = dst_reg(src_reg(this, glsl_type::uvec4_type));
+ emit_urb_slot(tmp, varying);
+ dst_reg dst(this->vertex_output);
+ dst.reladdr = ralloc(mem_ctx, src_reg);
+ memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
+ vec4_instruction *inst = emit(MOV(dst, src_reg(tmp)));
+ inst->force_writemask_all = true;
}
+
emit(ADD(dst_reg(this->vertex_output_offset),
this->vertex_output_offset, 1u));
+ }
- /* Update vertex count */
- emit(ADD(dst_reg(this->vertex_count), this->vertex_count, 1u));
+ /* Now buffer flags for this vertex */
+ dst_reg dst(this->vertex_output);
+ dst.reladdr = ralloc(mem_ctx, src_reg);
+ memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
+ if (c->gp->program.OutputType == GL_POINTS) {
+ /* If we are outputting points, then every vertex has PrimStart and
+ * PrimEnd set.
+ */
+ emit(MOV(dst, (_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) |
+ URB_WRITE_PRIM_START | URB_WRITE_PRIM_END));
+ emit(ADD(dst_reg(this->prim_count), this->prim_count, 1u));
+ } else {
+ /* Otherwise, we can only set the PrimStart flag, which we have stored
+ * in the first_vertex register. We will have to wait until we execute
+ * EndPrimitive() or we end the thread to set the PrimEnd flag on a
+ * vertex.
+ */
+ emit(OR(dst, this->first_vertex,
+ (c->prog_data.output_topology << URB_WRITE_PRIM_TYPE_SHIFT)));
+ emit(MOV(dst_reg(this->first_vertex), 0u));
}
- emit(BRW_OPCODE_ENDIF);
+ emit(ADD(dst_reg(this->vertex_output_offset),
+ this->vertex_output_offset, 1u));
}
void
--- /dev/null
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "util/ralloc.h"
+#include "brw_context.h"
+#include "brw_cs.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+#include "brw_shader.h"
+#include "intel_mipmap_tree.h"
+#include "intel_batchbuffer.h"
+#include "brw_state.h"
+
+static unsigned
+get_cs_thread_count(const struct brw_cs_prog_data *cs_prog_data)
+{
+ const unsigned simd_size = cs_prog_data->simd_size;
+ unsigned group_size = cs_prog_data->local_size[0] *
+ cs_prog_data->local_size[1] * cs_prog_data->local_size[2];
+
+ return (group_size + simd_size - 1) / simd_size;
+}
+
+
+static void
+brw_upload_cs_state(struct brw_context *brw)
+{
+ if (!brw->cs.prog_data)
+ return;
+
+ uint32_t offset;
+ uint32_t *desc = (uint32_t*) brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
+ 8 * 4, 64, &offset);
+ struct gl_program *prog = (struct gl_program *) brw->compute_program;
+ struct brw_stage_state *stage_state = &brw->cs.base;
+ struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data;
+ struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
+
+ if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
+ brw->vtbl.emit_buffer_surface_state(
+ brw, &stage_state->surf_offset[
+ prog_data->binding_table.shader_time_start],
+ brw->shader_time.bo, 0, BRW_SURFACEFORMAT_RAW,
+ brw->shader_time.bo->size, 1, true);
+ }
+
+ uint32_t *bind = (uint32_t*) brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
+ prog_data->binding_table.size_bytes,
+ 32, &stage_state->bind_bo_offset);
+
+ unsigned local_id_dwords = 0;
+
+ if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
+ local_id_dwords =
+ brw_cs_prog_local_id_payload_dwords(prog, cs_prog_data->simd_size);
+ }
+
+ unsigned push_constant_data_size =
+ (prog_data->nr_params + local_id_dwords) * sizeof(gl_constant_value);
+ unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
+ unsigned push_constant_regs = reg_aligned_constant_size / 32;
+ unsigned threads = get_cs_thread_count(cs_prog_data);
+
+ uint32_t dwords = brw->gen < 8 ? 8 : 9;
+ BEGIN_BATCH(dwords);
+ OUT_BATCH(MEDIA_VFE_STATE << 16 | (dwords - 2));
+
+ if (prog_data->total_scratch) {
+ if (brw->gen >= 8)
+ OUT_RELOC64(stage_state->scratch_bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ ffs(prog_data->total_scratch) - 11);
+ else
+ OUT_RELOC(stage_state->scratch_bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ ffs(prog_data->total_scratch) - 11);
+ } else {
+ OUT_BATCH(0);
+ if (brw->gen >= 8)
+ OUT_BATCH(0);
+ }
+
+ const uint32_t vfe_num_urb_entries = brw->gen >= 8 ? 2 : 0;
+ const uint32_t vfe_gpgpu_mode =
+ brw->gen == 7 ? SET_FIELD(1, GEN7_MEDIA_VFE_STATE_GPGPU_MODE) : 0;
+ OUT_BATCH(SET_FIELD(brw->max_cs_threads - 1, MEDIA_VFE_STATE_MAX_THREADS) |
+ SET_FIELD(vfe_num_urb_entries, MEDIA_VFE_STATE_URB_ENTRIES) |
+ SET_FIELD(1, MEDIA_VFE_STATE_RESET_GTW_TIMER) |
+ SET_FIELD(1, MEDIA_VFE_STATE_BYPASS_GTW) |
+ vfe_gpgpu_mode);
+
+ OUT_BATCH(0);
+ const uint32_t vfe_urb_allocation = brw->gen >= 8 ? 2 : 0;
+
+ /* We are uploading duplicated copies of push constant uniforms for each
+ * thread. Although the local id data needs to vary per thread, it won't
+ * change for other uniform data. Unfortunately this duplication is
+ * required for gen7. As of Haswell, this duplication can be avoided, but
+ * this older mechanism with duplicated data continues to work.
+ *
+ * FINISHME: As of Haswell, we could make use of the
+ * INTERFACE_DESCRIPTOR_DATA "Cross-Thread Constant Data Read Length" field
+ * to only store one copy of uniform data.
+ *
+ * FINISHME: Broadwell adds a new alternative "Indirect Payload Storage"
+ * which is described in the GPGPU_WALKER command and in the Broadwell PRM
+ * Volume 7: 3D Media GPGPU, under Media GPGPU Pipeline => Mode of
+ * Operations => GPGPU Mode => Indirect Payload Storage.
+ *
+ * Note: The constant data is built in brw_upload_cs_push_constants below.
+ */
+ const uint32_t vfe_curbe_allocation = push_constant_regs * threads;
+ OUT_BATCH(SET_FIELD(vfe_urb_allocation, MEDIA_VFE_STATE_URB_ALLOC) |
+ SET_FIELD(vfe_curbe_allocation, MEDIA_VFE_STATE_CURBE_ALLOC));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+
+ if (reg_aligned_constant_size > 0) {
+ BEGIN_BATCH(4);
+ OUT_BATCH(MEDIA_CURBE_LOAD << 16 | (4 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(reg_aligned_constant_size * threads);
+ OUT_BATCH(stage_state->push_const_offset);
+ ADVANCE_BATCH();
+ }
+
+ /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
+ memcpy(bind, stage_state->surf_offset,
+ prog_data->binding_table.size_bytes);
+
+ memset(desc, 0, 8 * 4);
+
+ int dw = 0;
+ desc[dw++] = brw->cs.base.prog_offset;
+ if (brw->gen >= 8)
+ desc[dw++] = 0; /* Kernel Start Pointer High */
+ desc[dw++] = 0;
+ desc[dw++] = stage_state->sampler_offset |
+ ((stage_state->sampler_count + 3) / 4);
+ desc[dw++] = stage_state->bind_bo_offset;
+ desc[dw++] = SET_FIELD(push_constant_regs, MEDIA_CURBE_READ_LENGTH);
+ const uint32_t media_threads =
+ brw->gen >= 8 ?
+ SET_FIELD(threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) :
+ SET_FIELD(threads, MEDIA_GPGPU_THREAD_COUNT);
+ assert(threads <= brw->max_cs_threads);
+ desc[dw++] =
+ SET_FIELD(cs_prog_data->uses_barrier, MEDIA_BARRIER_ENABLE) |
+ media_threads;
+
+ BEGIN_BATCH(4);
+ OUT_BATCH(MEDIA_INTERFACE_DESCRIPTOR_LOAD << 16 | (4 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(8 * 4);
+ OUT_BATCH(offset);
+ ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state brw_cs_state = {
+ .dirty = {
+ .mesa = _NEW_PROGRAM_CONSTANTS,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_CS_PROG_DATA |
+ BRW_NEW_PUSH_CONSTANT_ALLOCATION |
+ BRW_NEW_SURFACES,
+ },
+ .emit = brw_upload_cs_state
+};
+
+
+/**
+ * We are building the local ID push constant data using the simplest possible
+ * method. We simply push the local IDs directly as they should appear in the
+ * registers for the uvec3 gl_LocalInvocationID variable.
+ *
+ * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6
+ * registers worth of push constant space.
+ *
+ * Note: Any updates to brw_cs_prog_local_id_payload_dwords,
+ * fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup need
+ * to coordinated.
+ *
+ * FINISHME: There are a few easy optimizations to consider.
+ *
+ * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is
+ * no need for using push constant space for that dimension.
+ *
+ * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can
+ * easily use 16-bit words rather than 32-bit dwords in the push constant
+ * data.
+ *
+ * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for
+ * conveying the data, and thereby reduce push constant usage.
+ *
+ */
+unsigned
+brw_cs_prog_local_id_payload_dwords(const struct gl_program *prog,
+ unsigned dispatch_width)
+{
+ return 3 * dispatch_width;
+}
+
+
+static void
+fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data,
+ void *buffer, unsigned *x, unsigned *y, unsigned *z)
+{
+ uint32_t *param = (uint32_t *)buffer;
+ for (unsigned i = 0; i < cs_prog_data->simd_size; i++) {
+ param[0 * cs_prog_data->simd_size + i] = *x;
+ param[1 * cs_prog_data->simd_size + i] = *y;
+ param[2 * cs_prog_data->simd_size + i] = *z;
+
+ (*x)++;
+ if (*x == cs_prog_data->local_size[0]) {
+ *x = 0;
+ (*y)++;
+ if (*y == cs_prog_data->local_size[1]) {
+ *y = 0;
+ (*z)++;
+ if (*z == cs_prog_data->local_size[2])
+ *z = 0;
+ }
+ }
+ }
+}
+
+
+/**
+ * Creates a region containing the push constants for the CS on gen7+.
+ *
+ * Push constants are constant values (such as GLSL uniforms) that are
+ * pre-loaded into a shader stage's register space at thread spawn time.
+ *
+ * For other stages, see brw_curbe.c:brw_upload_constant_buffer for the
+ * equivalent gen4/5 code and gen6_vs_state.c:gen6_upload_push_constants for
+ * gen6+.
+ */
+static void
+brw_upload_cs_push_constants(struct brw_context *brw,
+ const struct gl_program *prog,
+ const struct brw_cs_prog_data *cs_prog_data,
+ struct brw_stage_state *stage_state,
+ enum aub_state_struct_type type)
+{
+ struct gl_context *ctx = &brw->ctx;
+ const struct brw_stage_prog_data *prog_data =
+ (struct brw_stage_prog_data*) cs_prog_data;
+ unsigned local_id_dwords = 0;
+
+ if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
+ local_id_dwords =
+ brw_cs_prog_local_id_payload_dwords(prog, cs_prog_data->simd_size);
+ }
+
+ /* Updates the ParamaterValues[i] pointers for all parameters of the
+ * basic type of PROGRAM_STATE_VAR.
+ */
+ /* XXX: Should this happen somewhere before to get our state flag set? */
+ _mesa_load_state_parameters(ctx, prog->Parameters);
+
+ if (prog_data->nr_params == 0 && local_id_dwords == 0) {
+ stage_state->push_const_size = 0;
+ } else {
+ gl_constant_value *param;
+ unsigned i, t;
+
+ const unsigned push_constant_data_size =
+ (local_id_dwords + prog_data->nr_params) * sizeof(gl_constant_value);
+ const unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
+ const unsigned param_aligned_count =
+ reg_aligned_constant_size / sizeof(*param);
+
+ unsigned threads = get_cs_thread_count(cs_prog_data);
+
+ param = (gl_constant_value*)
+ brw_state_batch(brw, type,
+ reg_aligned_constant_size * threads,
+ 32, &stage_state->push_const_offset);
+ assert(param);
+
+ STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
+
+ /* _NEW_PROGRAM_CONSTANTS */
+ unsigned x = 0, y = 0, z = 0;
+ for (t = 0; t < threads; t++) {
+ gl_constant_value *next_param = ¶m[t * param_aligned_count];
+ if (local_id_dwords > 0) {
+ fill_local_id_payload(cs_prog_data, (void*)next_param, &x, &y, &z);
+ next_param += local_id_dwords;
+ }
+ for (i = 0; i < prog_data->nr_params; i++) {
+ next_param[i] = *prog_data->param[i];
+ }
+ }
+
+ stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8;
+ }
+}
+
+
+static void
+gen7_upload_cs_push_constants(struct brw_context *brw)
+{
+ struct brw_stage_state *stage_state = &brw->cs.base;
+
+ /* BRW_NEW_COMPUTE_PROGRAM */
+ const struct brw_compute_program *cp =
+ (struct brw_compute_program *) brw->compute_program;
+
+ if (cp) {
+ /* CACHE_NEW_CS_PROG */
+ struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data;
+
+ brw_upload_cs_push_constants(brw, &cp->program.Base, cs_prog_data,
+ stage_state, AUB_TRACE_WM_CONSTANTS);
+ }
+}
+
+const struct brw_tracked_state gen7_cs_push_constants = {
+ .dirty = {
+ .mesa = _NEW_PROGRAM_CONSTANTS,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_COMPUTE_PROGRAM |
+ BRW_NEW_PUSH_CONSTANT_ALLOCATION,
+ },
+ .emit = gen7_upload_cs_push_constants,
+};
uint32_t dw8 = brw->gs.prog_data->control_data_format <<
HSW_GS_CONTROL_DATA_FORMAT_SHIFT;
+ if (brw->gs.prog_data->static_vertex_count != -1) {
+ dw8 |= GEN8_GS_STATIC_OUTPUT |
+ SET_FIELD(brw->gs.prog_data->static_vertex_count,
+ GEN8_GS_STATIC_VERTEX_COUNT);
+ }
+
if (brw->gen < 9)
dw7 |= (brw->max_gs_threads / 2 - 1) << HSW_GS_MAX_THREADS_SHIFT;
else
/**
* Define the base addresses which some state is referenced from.
*/
-void gen8_upload_state_base_address(struct brw_context *brw)
+static void
+gen8_upload_state_base_address(struct brw_context *brw)
{
uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
int pkt_len = brw->gen >= 9 ? 19 : 16;
_mesa_get_min_invocations_per_fragment(ctx, fp, false) > 1)
dw1 |= GEN8_PSX_SHADER_IS_PER_SAMPLE;
- if (fp->Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_MASK_IN)
- dw1 |= GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK;
+ if (fp->Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_MASK_IN) {
+ if (brw->gen >= 9)
+ dw1 |= BRW_PSICMS_INNER << GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT;
+ else
+ dw1 |= GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK;
+ }
if (prog_data->uses_omask)
dw1 |= GEN8_PSX_OMASK_TO_RENDER_TARGET;
}
if (aux_mt) {
+ uint32_t tile_w, tile_h;
+ assert(aux_mt->tiling == I915_TILING_Y);
+ intel_get_tile_dims(aux_mt->tiling, aux_mt->tr_mode,
+ aux_mt->cpp, &tile_w, &tile_h);
surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
- SET_FIELD((aux_mt->pitch / 128) - 1, GEN8_SURFACE_AUX_PITCH) |
+ SET_FIELD((aux_mt->pitch / tile_w) - 1,
+ GEN8_SURFACE_AUX_PITCH) |
aux_mode;
} else {
surf[6] = 0;
}
if (aux_mt) {
+ uint32_t tile_w, tile_h;
+ assert(aux_mt->tiling == I915_TILING_Y);
+ intel_get_tile_dims(aux_mt->tiling, aux_mt->tr_mode,
+ aux_mt->cpp, &tile_w, &tile_h);
surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
- SET_FIELD((aux_mt->pitch / 128) - 1, GEN8_SURFACE_AUX_PITCH) |
+ SET_FIELD((aux_mt->pitch / tile_w) - 1,
+ GEN8_SURFACE_AUX_PITCH) |
aux_mode;
} else {
surf[6] = 0;
-/**************************************************************************
- *
+/*
* Copyright 2006 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
#include "intel_batchbuffer.h"
#include "intel_buffer_objects.h"
abort();
}
batch->total = 0;
+#else
+ (void) brw;
#endif
}
-/**************************************************************************
- *
+/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
+ */
#include "main/mtypes.h"
#include "main/blit.h"
if (dst_flip)
dst_y = minify(dst_mt->physical_height0, dst_level - dst_mt->first_level) - dst_y - height;
- int src_pitch = src_mt->pitch;
- if (src_flip != dst_flip)
- src_pitch = -src_pitch;
-
uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y;
intel_miptree_get_image_offset(src_mt, src_level, src_slice,
&src_image_x, &src_image_y);
if (!intelEmitCopyBlit(brw,
src_mt->cpp,
- src_pitch,
+ src_flip == dst_flip ? src_mt->pitch : -src_mt->pitch,
src_mt->bo, src_mt->offset,
src_mt->tiling,
src_mt->tr_mode,
dst_tr_mode == INTEL_MIPTREE_TRMODE_NONE)
return false;
+ /* The start pixel for Fast Copy blit should be on an OWord boundary. */
+ if ((dst_x * cpp | src_x * cpp) & 15)
+ return false;
+
/* For all surface types buffers must be cacheline-aligned. */
if ((dst_offset | src_offset) & 63)
return false;
(dst_tiling_none && dst_pitch % 16 != 0))
return false;
- /* For Tiled surfaces, the pitch has to be a multiple of the Tile width
- * (X direction width of the Tile). This means the pitch value will
- * always be Cache Line aligned (64byte multiple).
- */
- if ((!dst_tiling_none && dst_pitch % 64 != 0) ||
- (!src_tiling_none && src_pitch % 64 != 0))
- return false;
-
return true;
}
bool dst_y_tiled = dst_tiling == I915_TILING_Y;
bool src_y_tiled = src_tiling == I915_TILING_Y;
bool use_fast_copy_blit = false;
+ uint32_t src_tile_w, src_tile_h;
+ uint32_t dst_tile_w, dst_tile_h;
if ((dst_y_tiled || src_y_tiled) && brw->gen < 6)
return false;
src_buffer, src_pitch, src_offset, src_x, src_y,
dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);
+ intel_get_tile_dims(src_tiling, src_tr_mode, cpp, &src_tile_w, &src_tile_h);
+ intel_get_tile_dims(dst_tiling, dst_tr_mode, cpp, &dst_tile_w, &dst_tile_h);
+
+ /* For Tiled surfaces, the pitch has to be a multiple of the Tile width
+ * (X direction width of the Tile). This is ensured while allocating the
+ * buffer object.
+ */
+ assert(src_tiling == I915_TILING_NONE || (src_pitch % src_tile_w) == 0);
+ assert(dst_tiling == I915_TILING_NONE || (dst_pitch % dst_tile_w) == 0);
+
use_fast_copy_blit = can_fast_copy_blit(brw,
src_buffer,
src_x, src_y,
dst_tiling, dst_tr_mode,
cpp, use_fast_copy_blit);
- /* For tiled source and destination, pitch value should be specified
- * as a number of Dwords.
- */
- if (dst_tiling != I915_TILING_NONE)
- dst_pitch /= 4;
-
- if (src_tiling != I915_TILING_NONE)
- src_pitch /= 4;
-
} else {
- assert(!dst_y_tiled || (dst_pitch % 128) == 0);
- assert(!src_y_tiled || (src_pitch % 128) == 0);
-
/* For big formats (such as floating point), do the copy using 16 or
* 32bpp and multiply the coordinates.
*/
CMD = xy_blit_cmd(src_tiling, src_tr_mode,
dst_tiling, dst_tr_mode,
cpp, use_fast_copy_blit);
+ }
- if (dst_tiling != I915_TILING_NONE)
- dst_pitch /= 4;
+ /* For tiled source and destination, pitch value should be specified
+ * as a number of Dwords.
+ */
+ if (dst_tiling != I915_TILING_NONE)
+ dst_pitch /= 4;
- if (src_tiling != I915_TILING_NONE)
- src_pitch /= 4;
- }
+ if (src_tiling != I915_TILING_NONE)
+ src_pitch /= 4;
- if (dst_y2 <= dst_y || dst_x2 <= dst_x) {
+ if (dst_y2 <= dst_y || dst_x2 <= dst_x)
return true;
- }
assert(dst_x < dst_x2);
assert(dst_y < dst_y2);
int16_t src_x, dst_x;
bool ok;
- /* The pitch given to the GPU must be DWORD aligned, and
- * we want width to match pitch. Max width is (1 << 15 - 1),
- * rounding that down to the nearest DWORD is 1 << 15 - 4
- */
- pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 1), 4);
- height = (pitch == 0) ? 1 : size / pitch;
- src_x = src_offset % 64;
- dst_x = dst_offset % 64;
- ok = intelEmitCopyBlit(brw, 1,
- pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
- INTEL_MIPTREE_TRMODE_NONE,
- pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
- INTEL_MIPTREE_TRMODE_NONE,
- src_x, 0, /* src x/y */
- dst_x, 0, /* dst x/y */
- pitch, height, /* w, h */
- GL_COPY);
- if (!ok)
- _mesa_problem(ctx, "Failed to linear blit %dx%d\n", pitch, height);
-
- src_offset += pitch * height;
- dst_offset += pitch * height;
- src_x = src_offset % 64;
- dst_x = dst_offset % 64;
- size -= pitch * height;
- assert (size < (1 << 15));
- pitch = ALIGN(size, 4);
-
- if (size != 0) {
+ do {
+ /* The pitch given to the GPU must be DWORD aligned, and
+ * we want width to match pitch. Max width is (1 << 15 - 1),
+ * rounding that down to the nearest DWORD is 1 << 15 - 4
+ */
+ pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 64), 4);
+ height = (size < pitch || pitch == 0) ? 1 : size / pitch;
+
+ src_x = src_offset % 64;
+ dst_x = dst_offset % 64;
+ pitch = ALIGN(MIN2(size, (1 << 15) - 64), 4);
+ assert(src_x + pitch < 1 << 15);
+ assert(dst_x + pitch < 1 << 15);
+
ok = intelEmitCopyBlit(brw, 1,
- pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
+ pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
INTEL_MIPTREE_TRMODE_NONE,
- pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
+ pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
INTEL_MIPTREE_TRMODE_NONE,
- src_x, 0, /* src x/y */
- dst_x, 0, /* dst x/y */
- size, 1, /* w, h */
- GL_COPY);
- if (!ok)
- _mesa_problem(ctx, "Failed to linear blit %dx%d\n", size, 1);
- }
+ src_x, 0, /* src x/y */
+ dst_x, 0, /* dst x/y */
+ MIN2(size, pitch), height, /* w, h */
+ GL_COPY);
+ if (!ok) {
+ _mesa_problem(ctx, "Failed to linear blit %dx%d\n",
+ MIN2(size, pitch), height);
+ return;
+ }
+
+ pitch *= height;
+ if (size <= pitch)
+ return;
+
+ src_offset += pitch;
+ dst_offset += pitch;
+ size -= pitch;
+ } while (1);
}
/**
-/**************************************************************************
- *
+/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
#ifndef INTEL_BLIT_H
#define INTEL_BLIT_H
-/**************************************************************************
- *
+/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
/**
* @file intel_buffer_objects.c
*/
if (intel_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER)
brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
+ if (intel_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER)
+ brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
if (intel_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER)
brw->ctx.NewDriverState |= BRW_NEW_TEXTURE_BUFFER;
if (intel_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER)
-/**************************************************************************
- *
+/*
* Copyright 2005 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
#ifndef INTEL_BUFFEROBJ_H
#define INTEL_BUFFEROBJ_H
-/**************************************************************************
- *
+/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
#include "brw_context.h"
#include "intel_buffers.h"
-
-/**************************************************************************
- *
+/*
* Copyright 2006 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
#ifndef INTEL_BUFFERS_H
#define INTEL_BUFFERS_H
* Jason Ekstrand <jason.ekstrand@intel.com>
*/
+#include "intel_fbo.h"
#include "intel_tex.h"
#include "intel_blit.h"
#include "intel_mipmap_tree.h"
#include "main/formats.h"
+#include "main/teximage.h"
#include "drivers/common/meta.h"
static bool
}
}
+
static void
intel_copy_image_sub_data(struct gl_context *ctx,
struct gl_texture_image *src_image,
+ struct gl_renderbuffer *src_renderbuffer,
int src_x, int src_y, int src_z,
struct gl_texture_image *dst_image,
+ struct gl_renderbuffer *dst_renderbuffer,
int dst_x, int dst_y, int dst_z,
int src_width, int src_height)
{
struct brw_context *brw = brw_context(ctx);
- struct intel_texture_image *intel_src_image = intel_texture_image(src_image);
- struct intel_texture_image *intel_dst_image = intel_texture_image(dst_image);
+ struct intel_mipmap_tree *src_mt, *dst_mt;
+ unsigned src_level, dst_level;
if (_mesa_meta_CopyImageSubData_uncompressed(ctx,
- src_image, src_x, src_y, src_z,
- dst_image, dst_x, dst_y, dst_z,
+ src_image, src_renderbuffer,
+ src_x, src_y, src_z,
+ dst_image, dst_renderbuffer,
+ dst_x, dst_y, dst_z,
src_width, src_height)) {
return;
}
- if (intel_src_image->mt->num_samples > 0 ||
- intel_dst_image->mt->num_samples > 0) {
+ if (src_image) {
+ src_mt = intel_texture_image(src_image)->mt;
+ } else {
+ assert(src_renderbuffer);
+ src_mt = intel_renderbuffer(src_renderbuffer)->mt;
+ src_image = src_renderbuffer->TexImage;
+ }
+
+ if (dst_image) {
+ dst_mt = intel_texture_image(dst_image)->mt;
+ } else {
+ assert(dst_renderbuffer);
+ dst_mt = intel_renderbuffer(dst_renderbuffer)->mt;
+ src_image = src_renderbuffer->TexImage;
+ }
+
+ if (src_mt->num_samples > 0 || dst_mt->num_samples > 0) {
_mesa_problem(ctx, "Failed to copy multisampled texture with meta path\n");
return;
}
- /* Cube maps actually have different images per face */
- if (src_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
- src_z = src_image->Face;
- if (dst_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
- dst_z = dst_image->Face;
+ if (src_image) {
+ src_level = src_image->Level + src_image->TexObject->MinLevel;
+
+ /* Cube maps actually have different images per face */
+ if (src_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
+ src_z = src_image->Face;
+ } else {
+ src_level = 0;
+ }
+
+ if (dst_image) {
+ dst_level = dst_image->Level + dst_image->TexObject->MinLevel;
+
+ /* Cube maps actually have different images per face */
+ if (dst_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
+ dst_z = dst_image->Face;
+ } else {
+ dst_level = 0;
+ }
/* We are now going to try and copy the texture using the blitter. If
* that fails, we will fall back mapping the texture and using memcpy.
* In either case, we need to do a full resolve.
*/
- intel_miptree_all_slices_resolve_hiz(brw, intel_src_image->mt);
- intel_miptree_all_slices_resolve_depth(brw, intel_src_image->mt);
- intel_miptree_resolve_color(brw, intel_src_image->mt);
+ intel_miptree_all_slices_resolve_hiz(brw, src_mt);
+ intel_miptree_all_slices_resolve_depth(brw, src_mt);
+ intel_miptree_resolve_color(brw, src_mt);
- intel_miptree_all_slices_resolve_hiz(brw, intel_dst_image->mt);
- intel_miptree_all_slices_resolve_depth(brw, intel_dst_image->mt);
- intel_miptree_resolve_color(brw, intel_dst_image->mt);
+ intel_miptree_all_slices_resolve_hiz(brw, dst_mt);
+ intel_miptree_all_slices_resolve_depth(brw, dst_mt);
+ intel_miptree_resolve_color(brw, dst_mt);
- unsigned src_level = src_image->Level + src_image->TexObject->MinLevel;
- unsigned dst_level = dst_image->Level + dst_image->TexObject->MinLevel;
- if (copy_image_with_blitter(brw, intel_src_image->mt, src_level,
+ if (copy_image_with_blitter(brw, src_mt, src_level,
src_x, src_y, src_z,
- intel_dst_image->mt, dst_level,
+ dst_mt, dst_level,
dst_x, dst_y, dst_z,
src_width, src_height))
return;
/* This is a worst-case scenario software fallback that maps the two
* textures and does a memcpy between them.
*/
- copy_image_with_memcpy(brw, intel_src_image->mt, src_level,
+ copy_image_with_memcpy(brw, src_mt, src_level,
src_x, src_y, src_z,
- intel_dst_image->mt, dst_level,
+ dst_mt, dst_level,
dst_x, dst_y, dst_z,
src_width, src_height);
}
{ "ann", DEBUG_ANNOTATION },
{ "no8", DEBUG_NO8 },
{ "vec4", DEBUG_VEC4VS },
- { "spill", DEBUG_SPILL },
+ { "spill_fs", DEBUG_SPILL_FS },
+ { "spill_vec4", DEBUG_SPILL_VEC4 },
{ "cs", DEBUG_CS },
{ NULL, 0 }
};
#define DEBUG_ANNOTATION (1ull << 28)
#define DEBUG_NO8 (1ull << 29)
#define DEBUG_VEC4VS (1ull << 30)
-#define DEBUG_SPILL (1ull << 31)
-#define DEBUG_CS (1ull << 32)
+#define DEBUG_SPILL_FS (1ull << 31)
+#define DEBUG_SPILL_VEC4 (1ull << 32)
+#define DEBUG_CS (1ull << 33)
#ifdef HAVE_ANDROID_PLATFORM
#define LOG_TAG "INTEL-MESA"
-/**************************************************************************
- *
+/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
#include "main/version.h"
ctx->Extensions.MESA_pack_invert = true;
ctx->Extensions.NV_conditional_render = true;
ctx->Extensions.NV_primitive_restart = true;
+ ctx->Extensions.NV_texture_barrier = true;
ctx->Extensions.NV_texture_env_combine4 = true;
ctx->Extensions.NV_texture_rectangle = true;
ctx->Extensions.TDFX_texture_compression_FXT1 = true;
ctx->Extensions.ARB_shader_atomic_counters = true;
ctx->Extensions.ARB_shader_image_load_store = true;
ctx->Extensions.ARB_shader_image_size = true;
+ ctx->Extensions.ARB_shader_texture_image_samples = true;
ctx->Extensions.ARB_texture_compression_bptc = true;
ctx->Extensions.ARB_texture_view = true;
+ ctx->Extensions.ARB_shader_storage_buffer_object = true;
if (can_do_pipelined_register_writes(brw)) {
ctx->Extensions.ARB_draw_indirect = true;
ctx->Extensions.ARB_stencil_texturing = true;
}
+ if (brw->gen >= 9) {
+ ctx->Extensions.KHR_texture_compression_astc_ldr = true;
+ ctx->Extensions.KHR_texture_compression_astc_hdr = true;
+ }
+
if (ctx->API == API_OPENGL_CORE)
ctx->Extensions.ARB_base_instance = true;
if (ctx->API != API_OPENGL_CORE)
-/**************************************************************************
- *
+/*
* Copyright 2006 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
+ */
#include "main/enums.h"
#include "main/imports.h"
intel_alloc_window_storage(struct gl_context * ctx, struct gl_renderbuffer *rb,
GLenum internalFormat, GLuint width, GLuint height)
{
+ (void) ctx;
assert(rb->Name == 0);
rb->Width = width;
rb->Height = height;
intel_nop_alloc_storage(struct gl_context * ctx, struct gl_renderbuffer *rb,
GLenum internalFormat, GLuint width, GLuint height)
{
+ (void) rb;
+ (void) internalFormat;
+ (void) width;
+ (void) height;
_mesa_problem(ctx, "intel_nop_alloc_storage should never be called.");
return false;
}
GLint srcX1, GLint srcY1,
GLint dstX0, GLint dstY0,
GLint dstX1, GLint dstY1,
- GLbitfield mask, GLenum filter)
+ GLbitfield mask)
{
struct brw_context *brw = brw_context(ctx);
mask = intel_blit_framebuffer_with_blitter(ctx, readFb, drawFb,
srcX0, srcY0, srcX1, srcY1,
dstX0, dstY0, dstX1, dstY1,
- mask, filter);
+ mask);
if (mask == 0x0)
return;
mask = intel_blit_framebuffer_with_blitter(ctx, readFb, drawFb,
srcX0, srcY0, srcX1, srcY1,
dstX0, dstY0, dstX1, dstY1,
- mask, filter);
+ mask);
if (mask == 0x0)
return;
uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD |
MIPTREE_LAYOUT_TILING_ANY;
- intel_miptree_get_dimensions_for_image(rb->TexImage, &width, &height, &depth);
+ intel_get_image_dims(rb->TexImage, &width, &height, &depth);
new_mt = intel_miptree_create(brw, rb->TexImage->TexObject->Target,
intel_image->base.Base.TexFormat,
-/**************************************************************************
- *
+/*
* Copyright 2006 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
#ifndef INTEL_FBO_H
#define INTEL_FBO_H
-/**************************************************************************
- *
+/*
* Copyright 2006 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
#ifndef INTEL_IMAGE_H
#define INTEL_IMAGE_H
-/**************************************************************************
- *
+/*
* Copyright 2006 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
#include <GL/gl.h>
#include <GL/internal/dri_interface.h>
#define FILE_DEBUG_FLAG DEBUG_MIPTREE
+static void *intel_miptree_map_raw(struct brw_context *brw,
+ struct intel_mipmap_tree *mt);
+
+static void intel_miptree_unmap_raw(struct intel_mipmap_tree *mt);
+
static bool
intel_miptree_alloc_mcs(struct brw_context *brw,
struct intel_mipmap_tree *mt,
* created, based on the chip generation and the surface type.
*/
static enum intel_msaa_layout
-compute_msaa_layout(struct brw_context *brw, mesa_format format, GLenum target,
+compute_msaa_layout(struct brw_context *brw, mesa_format format,
bool disable_aux_buffers)
{
/* Prior to Gen7, all MSAA surfaces used IMS layout. */
* by half the block width, and Y coordinates by half the block height.
*/
void
-intel_get_non_msrt_mcs_alignment(struct brw_context *brw,
- struct intel_mipmap_tree *mt,
+intel_get_non_msrt_mcs_alignment(struct intel_mipmap_tree *mt,
unsigned *width_px, unsigned *height)
{
switch (mt->tiling) {
if (num_samples > 1) {
/* Adjust width/height/depth for MSAA */
mt->msaa_layout = compute_msaa_layout(brw, format,
- mt->target, mt->disable_aux_buffers);
+ mt->disable_aux_buffers);
if (mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS) {
/* From the Ivybridge PRM, Volume 1, Part 1, page 108:
* "If the surface is multisampled and it is a depth or stencil
intel_get_yf_ys_bo_size(struct intel_mipmap_tree *mt, unsigned *alignment,
unsigned long *pitch)
{
- const uint32_t bpp = mt->cpp * 8;
- const uint32_t aspect_ratio = (bpp == 16 || bpp == 64) ? 2 : 1;
uint32_t tile_width, tile_height;
unsigned long stride, size, aligned_y;
assert(mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE);
-
- switch (bpp) {
- case 8:
- tile_height = 64;
- break;
- case 16:
- case 32:
- tile_height = 32;
- break;
- case 64:
- case 128:
- tile_height = 16;
- break;
- default:
- unreachable("not reached");
- }
-
- if (mt->tr_mode == INTEL_MIPTREE_TRMODE_YS)
- tile_height *= 4;
+ intel_get_tile_dims(mt->tiling, mt->tr_mode, mt->cpp,
+ &tile_width, &tile_height);
aligned_y = ALIGN(mt->total_height, tile_height);
stride = mt->total_width * mt->cpp;
- tile_width = tile_height * mt->cpp * aspect_ratio;
stride = ALIGN(stride, tile_width);
size = stride * aligned_y;
*mt = NULL;
}
+
void
-intel_miptree_get_dimensions_for_image(struct gl_texture_image *image,
- int *width, int *height, int *depth)
+intel_get_image_dims(struct gl_texture_image *image,
+ int *width, int *height, int *depth)
{
switch (image->TexObject->Target) {
case GL_TEXTURE_1D_ARRAY:
+ /* For a 1D Array texture the OpenGL API will treat the image height as
+ * the number of array slices. For Intel hardware, we treat the 1D array
+ * as a 2D Array with a height of 1. So, here we want to swap image
+ * height and depth.
+ */
*width = image->Width;
*height = 1;
*depth = image->Height;
if (image->TexFormat != mt_format)
return false;
- intel_miptree_get_dimensions_for_image(image, &width, &height, &depth);
+ intel_get_image_dims(image, &width, &height, &depth);
if (mt->target == GL_TEXTURE_CUBE_MAP)
depth = 6;
*y = mt->level[level].slice[slice].y_offset;
}
+
+/**
+ * This function computes the tile_w (in bytes) and tile_h (in rows) of
+ * different tiling patterns. If the BO is untiled, tile_w is set to cpp
+ * and tile_h is set to 1.
+ */
+void
+intel_get_tile_dims(uint32_t tiling, uint32_t tr_mode, uint32_t cpp,
+ uint32_t *tile_w, uint32_t *tile_h)
+{
+ if (tr_mode == INTEL_MIPTREE_TRMODE_NONE) {
+ switch (tiling) {
+ case I915_TILING_X:
+ *tile_w = 512;
+ *tile_h = 8;
+ break;
+ case I915_TILING_Y:
+ *tile_w = 128;
+ *tile_h = 32;
+ break;
+ case I915_TILING_NONE:
+ *tile_w = cpp;
+ *tile_h = 1;
+ break;
+ default:
+ unreachable("not reached");
+ }
+ } else {
+ uint32_t aspect_ratio = 1;
+ assert(_mesa_is_pow_two(cpp));
+
+ switch (cpp) {
+ case 1:
+ *tile_h = 64;
+ break;
+ case 2:
+ case 4:
+ *tile_h = 32;
+ break;
+ case 8:
+ case 16:
+ *tile_h = 16;
+ break;
+ default:
+ unreachable("not reached");
+ }
+
+ if (cpp == 2 || cpp == 8)
+ aspect_ratio = 2;
+
+ if (tr_mode == INTEL_MIPTREE_TRMODE_YS)
+ *tile_h *= 4;
+
+ *tile_w = *tile_h * aspect_ratio * cpp;
+ }
+}
+
+
/**
* This function computes masks that may be used to select the bits of the X
* and Y coordinates that indicate the offset within a tile. If the BO is
* untiled, the masks are set to 0.
*/
void
-intel_miptree_get_tile_masks(const struct intel_mipmap_tree *mt,
- uint32_t *mask_x, uint32_t *mask_y,
- bool map_stencil_as_y_tiled)
+intel_get_tile_masks(uint32_t tiling, uint32_t tr_mode, uint32_t cpp,
+ bool map_stencil_as_y_tiled,
+ uint32_t *mask_x, uint32_t *mask_y)
{
- int cpp = mt->cpp;
- uint32_t tiling = mt->tiling;
-
+ uint32_t tile_w_bytes, tile_h;
if (map_stencil_as_y_tiled)
tiling = I915_TILING_Y;
- switch (tiling) {
- default:
- unreachable("not reached");
- case I915_TILING_NONE:
- *mask_x = *mask_y = 0;
- break;
- case I915_TILING_X:
- *mask_x = 512 / cpp - 1;
- *mask_y = 7;
- break;
- case I915_TILING_Y:
- *mask_x = 128 / cpp - 1;
- *mask_y = 31;
- break;
- }
+ intel_get_tile_dims(tiling, tr_mode, cpp, &tile_w_bytes, &tile_h);
+
+ *mask_x = tile_w_bytes / cpp - 1;
+ *mask_y = tile_h - 1;
}
/**
uint32_t x, y;
uint32_t mask_x, mask_y;
- intel_miptree_get_tile_masks(mt, &mask_x, &mask_y, false);
+ intel_get_tile_masks(mt->tiling, mt->tr_mode, mt->cpp, false, &mask_x, &mask_y);
intel_miptree_get_image_offset(mt, level, slice, &x, &y);
*tile_x = x & mask_x;
*/
void *data = intel_miptree_map_raw(brw, mt->mcs_mt);
memset(data, 0xff, mt->mcs_mt->total_height * mt->mcs_mt->pitch);
- intel_miptree_unmap_raw(brw, mt->mcs_mt);
+ intel_miptree_unmap_raw(mt->mcs_mt);
mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR;
return mt->mcs_mt;
const mesa_format format = MESA_FORMAT_R_UINT32;
unsigned block_width_px;
unsigned block_height;
- intel_get_non_msrt_mcs_alignment(brw, mt, &block_width_px, &block_height);
+ intel_get_non_msrt_mcs_alignment(mt, &block_width_px, &block_height);
unsigned width_divisor = block_width_px * 4;
unsigned height_divisor = block_height * 8;
unsigned mcs_width =
/* Gen7 PRM Volume 2, Part 1, 11.5.3 "Hierarchical Depth Buffer" documents
* adjustments required for Z_Height and Z_Width based on multisampling.
*/
- if (brw->gen < 9) {
- switch (mt->num_samples) {
- case 0:
- case 1:
- break;
- case 2:
- case 4:
- z_width *= 2;
- z_height *= 2;
- break;
- case 8:
- z_width *= 4;
- z_height *= 2;
- break;
- default:
- unreachable("unsupported sample count");
- }
+ switch (mt->num_samples) {
+ case 0:
+ case 1:
+ break;
+ case 2:
+ case 4:
+ z_width *= 2;
+ z_height *= 2;
+ break;
+ case 8:
+ z_width *= 4;
+ z_height *= 2;
+ break;
+ default:
+ unreachable("unsupported sample count");
}
const unsigned vertical_align = 8; /* 'j' in the docs */
unsigned H_i = H0;
unsigned Z_i = Z0;
hz_height = 0;
- for (int level = mt->first_level; level <= mt->last_level; ++level) {
+ for (unsigned level = mt->first_level; level <= mt->last_level; ++level) {
unsigned h_i = ALIGN(H_i, vertical_align);
/* sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i))) */
hz_height += h_i * Z_i;
/* Gen7 PRM Volume 2, Part 1, 11.5.3 "Hierarchical Depth Buffer" documents
* adjustments required for Z_Height and Z_Width based on multisampling.
*/
- switch (mt->num_samples) {
- case 0:
- case 1:
- break;
- case 2:
- case 4:
- z_width *= 2;
- z_height *= 2;
- break;
- case 8:
- z_width *= 4;
- z_height *= 2;
- break;
- default:
- unreachable("unsupported sample count");
+ if (brw->gen < 9) {
+ switch (mt->num_samples) {
+ case 0:
+ case 1:
+ break;
+ case 2:
+ case 4:
+ z_width *= 2;
+ z_height *= 2;
+ break;
+ case 8:
+ z_width *= 4;
+ z_height *= 2;
+ break;
+ default:
+ unreachable("unsupported sample count");
+ }
}
const unsigned vertical_align = 8; /* 'j' in the docs */
unsigned Z_i = Z0;
unsigned sum_h_i = 0;
unsigned hz_height_3d_sum = 0;
- for (int level = mt->first_level; level <= mt->last_level; ++level) {
+ for (unsigned level = mt->first_level; level <= mt->last_level; ++level) {
unsigned i = level - mt->first_level;
unsigned h_i = ALIGN(H_i, vertical_align);
/* sum(i=2 to m; h_i) */
return false;
/* Mark that all slices need a HiZ resolve. */
- for (int level = mt->first_level; level <= mt->last_level; ++level) {
+ for (unsigned level = mt->first_level; level <= mt->last_level; ++level) {
if (!intel_miptree_level_enable_hiz(brw, mt, level))
continue;
- for (int layer = 0; layer < mt->level[level].depth; ++layer) {
+ for (unsigned layer = 0; layer < mt->level[level].depth; ++layer) {
struct intel_resolve_map *m = malloc(sizeof(struct intel_resolve_map));
exec_node_init(&m->link);
m->level = level;
}
void
-intel_miptree_unmap_raw(struct brw_context *brw,
- struct intel_mipmap_tree *mt)
+intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
{
drm_intel_bo_unmap(mt->bo);
}
}
static void
-intel_miptree_unmap_gtt(struct brw_context *brw,
- struct intel_mipmap_tree *mt,
- struct intel_miptree_map *map,
- unsigned int level,
- unsigned int slice)
+intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
{
- intel_miptree_unmap_raw(brw, mt);
+ intel_miptree_unmap_raw(mt);
}
static void
{
struct gl_context *ctx = &brw->ctx;
- intel_miptree_unmap_raw(brw, map->mt);
+ intel_miptree_unmap_raw(map->mt);
if (map->mode & GL_MAP_WRITE_BIT) {
bool ok = intel_miptree_blit(brw,
_mesa_streaming_load_memcpy(dst_ptr, src_ptr, width_bytes);
}
- intel_miptree_unmap_raw(brw, mt);
+ intel_miptree_unmap_raw(mt);
}
static void
}
}
- intel_miptree_unmap_raw(brw, mt);
+ intel_miptree_unmap_raw(mt);
DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __func__,
map->x, map->y, map->w, map->h,
}
}
- intel_miptree_unmap_raw(brw, mt);
+ intel_miptree_unmap_raw(mt);
}
free(map->buffer);
map->ptr, map->stride,
map->w, map->h, mt->etc_format);
- intel_miptree_unmap_raw(brw, mt);
+ intel_miptree_unmap_raw(mt);
free(map->buffer);
}
}
}
- intel_miptree_unmap_raw(brw, s_mt);
- intel_miptree_unmap_raw(brw, z_mt);
+ intel_miptree_unmap_raw(s_mt);
+ intel_miptree_unmap_raw(z_mt);
DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n",
__func__,
}
}
- intel_miptree_unmap_raw(brw, s_mt);
- intel_miptree_unmap_raw(brw, z_mt);
+ intel_miptree_unmap_raw(s_mt);
+ intel_miptree_unmap_raw(z_mt);
DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n",
__func__,
intel_miptree_unmap_movntdqa(brw, mt, map, level, slice);
#endif
} else {
- intel_miptree_unmap_gtt(brw, mt, map, level, slice);
+ intel_miptree_unmap_gtt(mt);
}
intel_miptree_release_map(mt, level, slice);
-/**************************************************************************
- *
+/*
* Copyright 2006 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
/** @file intel_mipmap_tree.h
*
};
void
-intel_get_non_msrt_mcs_alignment(struct brw_context *brw,
- struct intel_mipmap_tree *mt,
+intel_get_non_msrt_mcs_alignment(struct intel_mipmap_tree *mt,
unsigned *width_px, unsigned *height);
bool
intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling);
uint32_t level,
uint32_t layer)
{
+ (void) mt;
+ (void) level;
+ (void) layer;
+
assert(level >= mt->first_level);
assert(level <= mt->last_level);
assert(layer < mt->level[level].depth);
GLuint *x, GLuint *y);
void
-intel_miptree_get_dimensions_for_image(struct gl_texture_image *image,
- int *width, int *height, int *depth);
+intel_get_image_dims(struct gl_texture_image *image,
+ int *width, int *height, int *depth);
+
+void
+intel_get_tile_masks(uint32_t tiling, uint32_t tr_mode, uint32_t cpp,
+ bool map_stencil_as_y_tiled,
+ uint32_t *mask_x, uint32_t *mask_y);
void
-intel_miptree_get_tile_masks(const struct intel_mipmap_tree *mt,
- uint32_t *mask_x, uint32_t *mask_y,
- bool map_stencil_as_y_tiled);
+intel_get_tile_dims(uint32_t tiling, uint32_t tr_mode, uint32_t cpp,
+ uint32_t *tile_w, uint32_t *tile_h);
uint32_t
intel_miptree_get_tile_offsets(const struct intel_mipmap_tree *mt,
struct intel_mipmap_tree *mt,
uint32_t layout_flags);
-void *intel_miptree_map_raw(struct brw_context *brw,
- struct intel_mipmap_tree *mt);
-
-void intel_miptree_unmap_raw(struct brw_context *brw,
- struct intel_mipmap_tree *mt);
-
void
intel_miptree_map(struct brw_context *brw,
struct intel_mipmap_tree *mt,
-/**************************************************************************
- *
+/*
* Copyright 2006 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
#include "main/accum.h"
#include "main/enums.h"
-/**************************************************************************
- *
+/*
* Copyright 2006 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
#ifndef INTEL_PIXEL_H
#define INTEL_PIXEL_H
-/**************************************************************************
- *
+/*
* Copyright 2006 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
#include "main/blend.h"
#include "main/glheader.h"
-/**************************************************************************
- *
+/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
#include "main/glheader.h"
#include "main/image.h"
-/**************************************************************************
- *
+/*
* Copyright 2006 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
#include "main/glheader.h"
#include "main/enums.h"
-/**************************************************************************
- *
+/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
#include "main/glheader.h"
#include "main/enums.h"
pack->Invert)
return false;
+ /* Only a simple blit, no scale, bias or other mapping. */
+ if (ctx->_ImageTransferState)
+ return false;
+
/* This renderbuffer can come from a texture. In this case, we impose
* some of the same restrictions we have for textures and adjust for
* miplevels.
-/**************************************************************************
- *
+/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
#define CMD_MI (0x0 << 29)
#define CMD_2D (0x2 << 29)
#define GEN7_3DPRIM_START_INSTANCE 0x243C
#define GEN7_3DPRIM_BASE_VERTEX 0x2440
+/* Auto-Compute / Indirect Registers */
+#define GEN7_GPGPU_DISPATCHDIMX 0x2500
+#define GEN7_GPGPU_DISPATCHDIMY 0x2504
+#define GEN7_GPGPU_DISPATCHDIMZ 0x2508
+
#define GEN7_CACHE_MODE_1 0x7004
# define GEN8_HIZ_NP_PMA_FIX_ENABLE (1 << 11)
# define GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE (1 << 13)
-/**************************************************************************
- *
+/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
#include <errno.h>
#include <time.h>
-/**************************************************************************
- *
+/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
#ifndef _INTEL_INIT_H_
#define _INTEL_INIT_H_
-/**************************************************************************
- *
+/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
+ */
#include "main/glheader.h"
#include "main/context.h"
return true;
}
+static void
+intel_texture_barrier(struct gl_context *ctx)
+{
+ struct brw_context *brw = brw_context(ctx);
+
+ brw_emit_mi_flush(brw);
+}
+
void
intelInitTextureFuncs(struct dd_function_table *functions)
{
functions->TextureView = intel_texture_view;
functions->SetTextureStorageForBufferObject =
intel_set_texture_storage_for_buffer_object;
+ functions->TextureBarrier = intel_texture_barrier;
}
-/**************************************************************************
- *
+/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
#ifndef INTELTEX_INC
#define INTELTEX_INC
-/**************************************************************************
- *
+/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
#include "main/mtypes.h"
#include "main/enums.h"
const GLenum internalFormat = intelImage->base.Base.InternalFormat;
bool ret;
+ /* No pixel transfer operations (zoom, bias, mapping), just a blit */
+ if (brw->ctx._ImageTransferState)
+ return false;
+
intel_prepare_render(brw);
/* glCopyTexSubImage() can be called on a multisampled renderbuffer (if
int width, height, depth;
GLuint i;
- intel_miptree_get_dimensions_for_image(&intelImage->base.Base,
- &width, &height, &depth);
+ intel_get_image_dims(&intelImage->base.Base, &width, &height, &depth);
DBG("%s\n", __func__);
-/**************************************************************************
- *
+/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
#ifndef _INTEL_TEX_OBJ_H
#define _INTEL_TEX_OBJ_H
-
-/**************************************************************************
- *
+/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
+ */
#include "main/bufferobj.h"
#include "main/image.h"
packing->Invert)
return false;
+ /* Only a simple blit, no scale, bias or other mapping. */
+ if (ctx->_ImageTransferState)
+ return false;
+
if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp,
INTEL_UPLOAD))
return false;
/* May need to create a new tree:
*/
if (!intelObj->mt) {
- intel_miptree_get_dimensions_for_image(&firstImage->base.Base,
- &width, &height, &depth);
+ intel_get_image_dims(&firstImage->base.Base, &width, &height, &depth);
perf_debug("Creating new %s %dx%dx%d %d-level miptree to handle "
"finalized texture miptree.\n",
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
+ * distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
#define HAVE_LINE_STRIPS 1
#define HAVE_TRIANGLES 1
#define HAVE_TRI_STRIPS 1
-#define HAVE_TRI_STRIP_1 0
#define HAVE_TRI_FANS 1
#define HAVE_QUADS 0
#define HAVE_QUAD_STRIPS 0
#define HAVE_LINE_STRIPS 1
#define HAVE_TRIANGLES 1
#define HAVE_TRI_STRIPS 1
-#define HAVE_TRI_STRIP_1 0
#define HAVE_TRI_FANS 1
#define HAVE_QUADS 1
#define HAVE_QUAD_STRIPS 1
#define HAVE_LINE_STRIPS 1
#define HAVE_TRIANGLES 1
#define HAVE_TRI_STRIPS 1
-#define HAVE_TRI_STRIP_1 0
#define HAVE_TRI_FANS 1
-#define HAVE_QUADS 0
-#define HAVE_QUAD_STRIPS 0
#define HAVE_POLYGONS 0
/* \todo: is it possible to make "ELTS" work with t_vertex code ? */
#define HAVE_ELTS 0
start, start+length);
if (length)
- tab[prim & PRIM_MODE_MASK]( ctx, start, start + length, prim );
+ tab[prim & PRIM_MODE_MASK](ctx, start, length, prim);
}
tnl->Driver.Render.Finish( ctx );
#define HAVE_LINE_STRIPS 1
#define HAVE_TRIANGLES 1
#define HAVE_TRI_STRIPS 1
-#define HAVE_TRI_STRIP_1 0
#define HAVE_TRI_FANS 1
#define HAVE_QUADS 0
#define HAVE_QUAD_STRIPS 0
return GL_TRUE;
}
+
+static bool
+check_valid_to_compute(struct gl_context *ctx, const char *function)
+{
+ struct gl_shader_program *prog;
+
+ if (!_mesa_has_compute_shaders(ctx)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "unsupported function (%s) called",
+ function);
+ return false;
+ }
+
+ prog = ctx->Shader.CurrentProgram[MESA_SHADER_COMPUTE];
+ if (prog == NULL || prog->_LinkedShaders[MESA_SHADER_COMPUTE] == NULL) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "%s(no active compute shader)",
+ function);
+ return false;
+ }
+
+ return true;
+}
+
+GLboolean
+_mesa_validate_DispatchCompute(struct gl_context *ctx,
+ const GLuint *num_groups)
+{
+ int i;
+ FLUSH_CURRENT(ctx, 0);
+
+ if (!check_valid_to_compute(ctx, "glDispatchCompute"))
+ return GL_FALSE;
+
+ for (i = 0; i < 3; i++) {
+ if (num_groups[i] > ctx->Const.MaxComputeWorkGroupCount[i]) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "glDispatchCompute(num_groups_%c)", 'x' + i);
+ return GL_FALSE;
+ }
+ }
+
+ return GL_TRUE;
+}
+
+static GLboolean
+valid_dispatch_indirect(struct gl_context *ctx,
+ GLintptr indirect,
+ GLsizei size, const char *name)
+{
+ GLintptr end = (GLintptr)indirect + size;
+
+ if (!check_valid_to_compute(ctx, name))
+ return GL_FALSE;
+
+ /* From the ARB_compute_shader specification:
+ *
+ * "An INVALID_OPERATION error is generated [...] if <indirect> is less
+ * than zero or not a multiple of the size, in basic machine units, of
+ * uint."
+ */
+ if ((GLintptr)indirect & (sizeof(GLuint) - 1)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "%s(indirect is not aligned)", name);
+ return GL_FALSE;
+ }
+
+ if ((GLintptr)indirect < 0) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "%s(indirect is less than zero)", name);
+ return GL_FALSE;
+ }
+
+ if (!_mesa_is_bufferobj(ctx->DispatchIndirectBuffer)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "%s: no buffer bound to DISPATCH_INDIRECT_BUFFER", name);
+ return GL_FALSE;
+ }
+
+ if (_mesa_check_disallowed_mapping(ctx->DispatchIndirectBuffer)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "%s(DISPATCH_INDIRECT_BUFFER is mapped)", name);
+ return GL_FALSE;
+ }
+
+ /* From the ARB_compute_shader specification:
+ *
+ * "An INVALID_OPERATION error is generated if this command sources data
+ * beyond the end of the buffer object [...]"
+ */
+ if (ctx->DispatchIndirectBuffer->Size < end) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "%s(DISPATCH_INDIRECT_BUFFER too small)", name);
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean
+_mesa_validate_DispatchComputeIndirect(struct gl_context *ctx,
+ GLintptr indirect)
+{
+ FLUSH_CURRENT(ctx, 0);
+
+ return valid_dispatch_indirect(ctx, indirect, 3 * sizeof(GLuint),
+ "glDispatchComputeIndirect");
+}
GLsizei primcount,
GLsizei stride);
+extern GLboolean
+_mesa_validate_DispatchCompute(struct gl_context *ctx,
+ const GLuint *num_groups);
+
+extern GLboolean
+_mesa_validate_DispatchComputeIndirect(struct gl_context *ctx,
+ GLintptr indirect);
+
#endif
mtx_lock(&oldObj->Mutex);
assert(oldObj->RefCount > 0);
oldObj->RefCount--;
-#if 0
- printf("ArrayObj %p %d DECR to %d\n",
- (void *) oldObj, oldObj->Name, oldObj->RefCount);
-#endif
deleteFlag = (oldObj->RefCount == 0);
mtx_unlock(&oldObj->Mutex);
}
else {
vao->RefCount++;
-#if 0
- printf("ArrayObj %p %d INCR to %d\n",
- (void *) vao, vao->Name, vao->RefCount);
-#endif
*ptr = vao;
}
mtx_unlock(&vao->Mutex);
#include "enums.h"
#include "hash.h"
#include "imports.h"
-#include "image.h"
#include "context.h"
#include "bufferobj.h"
-#include "fbobject.h"
#include "mtypes.h"
-#include "texobj.h"
#include "teximage.h"
#include "glformats.h"
#include "texstore.h"
#include "transformfeedback.h"
-#include "dispatch.h"
/* Debug flags */
return &ctx->DrawIndirectBuffer;
}
break;
+ case GL_DISPATCH_INDIRECT_BUFFER:
+ if (_mesa_has_compute_shaders(ctx)) {
+ return &ctx->DispatchIndirectBuffer;
+ }
+ break;
case GL_TRANSFORM_FEEDBACK_BUFFER:
if (ctx->Extensions.EXT_transform_feedback) {
return &ctx->TransformFeedback.CurrentBuffer;
*/
static bool
buffer_object_subdata_range_good(struct gl_context *ctx,
- struct gl_buffer_object *bufObj,
+ const struct gl_buffer_object *bufObj,
GLintptr offset, GLsizeiptr size,
bool mappedRange, const char *caller)
{
/**
* Allocate and initialize a new buffer object.
- *
+ *
* Default callback for the \c dd_function_table::NewBufferObject() hook.
*/
static struct gl_buffer_object *
/**
* Delete a buffer object.
- *
+ *
* Default callback for the \c dd_function_table::DeleteBuffer() hook.
*/
static void
mtx_lock(&oldObj->Mutex);
assert(oldObj->RefCount > 0);
oldObj->RefCount--;
-#if 0
- printf("BufferObj %p %d DECR to %d\n",
- (void *) oldObj, oldObj->Name, oldObj->RefCount);
-#endif
deleteFlag = (oldObj->RefCount == 0);
mtx_unlock(&oldObj->Mutex);
if (deleteFlag) {
-
- /* some sanity checking: don't delete a buffer still in use */
-#if 0
- /* unfortunately, these tests are invalid during context tear-down */
- assert(ctx->Array.ArrayBufferObj != bufObj);
- assert(ctx->Array.VAO->IndexBufferObj != bufObj);
- assert(ctx->Array.VAO->Vertex.BufferObj != bufObj);
-#endif
-
assert(ctx->Driver.DeleteBuffer);
ctx->Driver.DeleteBuffer(ctx, oldObj);
}
}
else {
bufObj->RefCount++;
-#if 0
- printf("BufferObj %p %d INCR to %d\n",
- (void *) bufObj, bufObj->Name, bufObj->RefCount);
-#endif
*ptr = bufObj;
}
mtx_unlock(&bufObj->Mutex);
(const struct gl_buffer_object *) data;
GLuint *total = (GLuint *) userData;
+ (void) key;
*total = *total + bufObj->Size;
}
(void) offset;
(void) length;
(void) obj;
+ (void) index;
/* no-op */
}
_mesa_reference_buffer_object(ctx, &ctx->DrawIndirectBuffer,
ctx->Shared->NullBufferObj);
+ _mesa_reference_buffer_object(ctx, &ctx->DispatchIndirectBuffer,
+ ctx->Shared->NullBufferObj);
+
for (i = 0; i < MAX_COMBINED_UNIFORM_BUFFERS; i++) {
_mesa_reference_buffer_object(ctx,
&ctx->UniformBufferBindings[i].BufferObject,
_mesa_reference_buffer_object(ctx, &ctx->DrawIndirectBuffer, NULL);
+ _mesa_reference_buffer_object(ctx, &ctx->DispatchIndirectBuffer, NULL);
+
for (i = 0; i < MAX_COMBINED_UNIFORM_BUFFERS; i++) {
_mesa_reference_buffer_object(ctx,
&ctx->UniformBufferBindings[i].BufferObject,
bool
_mesa_handle_bind_buffer_gen(struct gl_context *ctx,
- GLenum target,
GLuint buffer,
struct gl_buffer_object **buf_handle,
const char *caller)
{
struct gl_buffer_object *buf = *buf_handle;
- if (!buf && ctx->API == API_OPENGL_CORE) {
+ if (!buf && (ctx->API == API_OPENGL_CORE || _mesa_is_gles31(ctx))) {
_mesa_error(ctx, GL_INVALID_OPERATION, "%s(non-gen name)", caller);
return false;
}
else {
/* non-default buffer object */
newBufObj = _mesa_lookup_bufferobj(ctx, buffer);
- if (!_mesa_handle_bind_buffer_gen(ctx, target, buffer,
+ if (!_mesa_handle_bind_buffer_gen(ctx, buffer,
&newBufObj, "glBindBuffer"))
return;
}
-
+
/* bind new buffer */
_mesa_reference_buffer_object(ctx, bindTarget, newBufObj);
}
/**
* Update the default buffer objects in the given context to reference those
- * specified in the shared state and release those referencing the old
+ * specified in the shared state and release those referencing the old
* shared state.
*/
void
/**
* Delete a set of buffer objects.
- *
+ *
* \param n Number of buffer objects to delete.
* \param ids Array of \c n buffer object IDs.
*/
_mesa_BindBuffer( GL_DRAW_INDIRECT_BUFFER, 0 );
}
+ /* unbind ARB_compute_shader binding point */
+ if (ctx->DispatchIndirectBuffer == bufObj) {
+ _mesa_BindBuffer(GL_DISPATCH_INDIRECT_BUFFER, 0);
+ }
+
/* unbind ARB_copy_buffer binding points */
if (ctx->CopyReadBuffer == bufObj) {
_mesa_BindBuffer( GL_COPY_READ_BUFFER, 0 );
/**
* Determine if ID is the name of a buffer object.
- *
+ *
* \param id ID of the potential buffer object.
- * \return \c GL_TRUE if \c id is the name of a buffer object,
+ * \return \c GL_TRUE if \c id is the name of a buffer object,
* \c GL_FALSE otherwise.
*/
GLboolean GLAPIENTRY
}
-static GLenum
-buffer_object_purgeable(struct gl_context *ctx, GLuint name, GLenum option)
-{
- struct gl_buffer_object *bufObj;
- GLenum retval;
-
- bufObj = _mesa_lookup_bufferobj(ctx, name);
- if (!bufObj) {
- _mesa_error(ctx, GL_INVALID_VALUE,
- "glObjectPurgeable(name = 0x%x)", name);
- return 0;
- }
- if (!_mesa_is_bufferobj(bufObj)) {
- _mesa_error(ctx, GL_INVALID_OPERATION, "glObjectPurgeable(buffer 0)" );
- return 0;
- }
-
- if (bufObj->Purgeable) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glObjectPurgeable(name = 0x%x) is already purgeable", name);
- return GL_VOLATILE_APPLE;
- }
-
- bufObj->Purgeable = GL_TRUE;
-
- retval = GL_VOLATILE_APPLE;
- if (ctx->Driver.BufferObjectPurgeable)
- retval = ctx->Driver.BufferObjectPurgeable(ctx, bufObj, option);
-
- return retval;
-}
-
-
-static GLenum
-renderbuffer_purgeable(struct gl_context *ctx, GLuint name, GLenum option)
-{
- struct gl_renderbuffer *bufObj;
- GLenum retval;
-
- bufObj = _mesa_lookup_renderbuffer(ctx, name);
- if (!bufObj) {
- _mesa_error(ctx, GL_INVALID_VALUE,
- "glObjectUnpurgeable(name = 0x%x)", name);
- return 0;
- }
-
- if (bufObj->Purgeable) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glObjectPurgeable(name = 0x%x) is already purgeable", name);
- return GL_VOLATILE_APPLE;
- }
-
- bufObj->Purgeable = GL_TRUE;
-
- retval = GL_VOLATILE_APPLE;
- if (ctx->Driver.RenderObjectPurgeable)
- retval = ctx->Driver.RenderObjectPurgeable(ctx, bufObj, option);
-
- return retval;
-}
-
-
-static GLenum
-texture_object_purgeable(struct gl_context *ctx, GLuint name, GLenum option)
-{
- struct gl_texture_object *bufObj;
- GLenum retval;
-
- bufObj = _mesa_lookup_texture(ctx, name);
- if (!bufObj) {
- _mesa_error(ctx, GL_INVALID_VALUE,
- "glObjectPurgeable(name = 0x%x)", name);
- return 0;
- }
-
- if (bufObj->Purgeable) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glObjectPurgeable(name = 0x%x) is already purgeable", name);
- return GL_VOLATILE_APPLE;
- }
-
- bufObj->Purgeable = GL_TRUE;
-
- retval = GL_VOLATILE_APPLE;
- if (ctx->Driver.TextureObjectPurgeable)
- retval = ctx->Driver.TextureObjectPurgeable(ctx, bufObj, option);
-
- return retval;
-}
-
-
-GLenum GLAPIENTRY
-_mesa_ObjectPurgeableAPPLE(GLenum objectType, GLuint name, GLenum option)
-{
- GLenum retval;
-
- GET_CURRENT_CONTEXT(ctx);
- ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, 0);
-
- if (name == 0) {
- _mesa_error(ctx, GL_INVALID_VALUE,
- "glObjectPurgeable(name = 0x%x)", name);
- return 0;
- }
-
- switch (option) {
- case GL_VOLATILE_APPLE:
- case GL_RELEASED_APPLE:
- /* legal */
- break;
- default:
- _mesa_error(ctx, GL_INVALID_ENUM,
- "glObjectPurgeable(name = 0x%x) invalid option: %d",
- name, option);
- return 0;
- }
-
- switch (objectType) {
- case GL_TEXTURE:
- retval = texture_object_purgeable(ctx, name, option);
- break;
- case GL_RENDERBUFFER_EXT:
- retval = renderbuffer_purgeable(ctx, name, option);
- break;
- case GL_BUFFER_OBJECT_APPLE:
- retval = buffer_object_purgeable(ctx, name, option);
- break;
- default:
- _mesa_error(ctx, GL_INVALID_ENUM,
- "glObjectPurgeable(name = 0x%x) invalid type: %d",
- name, objectType);
- return 0;
- }
-
- /* In strict conformance to the spec, we must only return VOLATILE when
- * when passed the VOLATILE option. Madness.
- *
- * XXX First fix the spec, then fix me.
- */
- return option == GL_VOLATILE_APPLE ? GL_VOLATILE_APPLE : retval;
-}
-
-
-static GLenum
-buffer_object_unpurgeable(struct gl_context *ctx, GLuint name, GLenum option)
-{
- struct gl_buffer_object *bufObj;
- GLenum retval;
-
- bufObj = _mesa_lookup_bufferobj(ctx, name);
- if (!bufObj) {
- _mesa_error(ctx, GL_INVALID_VALUE,
- "glObjectUnpurgeable(name = 0x%x)", name);
- return 0;
- }
-
- if (! bufObj->Purgeable) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glObjectUnpurgeable(name = 0x%x) object is "
- " already \"unpurged\"", name);
- return 0;
- }
-
- bufObj->Purgeable = GL_FALSE;
-
- retval = option;
- if (ctx->Driver.BufferObjectUnpurgeable)
- retval = ctx->Driver.BufferObjectUnpurgeable(ctx, bufObj, option);
-
- return retval;
-}
-
-
-static GLenum
-renderbuffer_unpurgeable(struct gl_context *ctx, GLuint name, GLenum option)
-{
- struct gl_renderbuffer *bufObj;
- GLenum retval;
-
- bufObj = _mesa_lookup_renderbuffer(ctx, name);
- if (!bufObj) {
- _mesa_error(ctx, GL_INVALID_VALUE,
- "glObjectUnpurgeable(name = 0x%x)", name);
- return 0;
- }
-
- if (! bufObj->Purgeable) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glObjectUnpurgeable(name = 0x%x) object is "
- " already \"unpurged\"", name);
- return 0;
- }
-
- bufObj->Purgeable = GL_FALSE;
-
- retval = option;
- if (ctx->Driver.RenderObjectUnpurgeable)
- retval = ctx->Driver.RenderObjectUnpurgeable(ctx, bufObj, option);
-
- return retval;
-}
-
-
-static GLenum
-texture_object_unpurgeable(struct gl_context *ctx, GLuint name, GLenum option)
-{
- struct gl_texture_object *bufObj;
- GLenum retval;
-
- bufObj = _mesa_lookup_texture(ctx, name);
- if (!bufObj) {
- _mesa_error(ctx, GL_INVALID_VALUE,
- "glObjectUnpurgeable(name = 0x%x)", name);
- return 0;
- }
-
- if (! bufObj->Purgeable) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glObjectUnpurgeable(name = 0x%x) object is"
- " already \"unpurged\"", name);
- return 0;
- }
-
- bufObj->Purgeable = GL_FALSE;
-
- retval = option;
- if (ctx->Driver.TextureObjectUnpurgeable)
- retval = ctx->Driver.TextureObjectUnpurgeable(ctx, bufObj, option);
-
- return retval;
-}
-
-
-GLenum GLAPIENTRY
-_mesa_ObjectUnpurgeableAPPLE(GLenum objectType, GLuint name, GLenum option)
-{
- GET_CURRENT_CONTEXT(ctx);
- ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, 0);
-
- if (name == 0) {
- _mesa_error(ctx, GL_INVALID_VALUE,
- "glObjectUnpurgeable(name = 0x%x)", name);
- return 0;
- }
-
- switch (option) {
- case GL_RETAINED_APPLE:
- case GL_UNDEFINED_APPLE:
- /* legal */
- break;
- default:
- _mesa_error(ctx, GL_INVALID_ENUM,
- "glObjectUnpurgeable(name = 0x%x) invalid option: %d",
- name, option);
- return 0;
- }
-
- switch (objectType) {
- case GL_BUFFER_OBJECT_APPLE:
- return buffer_object_unpurgeable(ctx, name, option);
- case GL_TEXTURE:
- return texture_object_unpurgeable(ctx, name, option);
- case GL_RENDERBUFFER_EXT:
- return renderbuffer_unpurgeable(ctx, name, option);
- default:
- _mesa_error(ctx, GL_INVALID_ENUM,
- "glObjectUnpurgeable(name = 0x%x) invalid type: %d",
- name, objectType);
- return 0;
- }
-}
-
-
-static void
-get_buffer_object_parameteriv(struct gl_context *ctx, GLuint name,
- GLenum pname, GLint *params)
-{
- struct gl_buffer_object *bufObj = _mesa_lookup_bufferobj(ctx, name);
- if (!bufObj) {
- _mesa_error(ctx, GL_INVALID_VALUE,
- "glGetObjectParameteriv(name = 0x%x) invalid object", name);
- return;
- }
-
- switch (pname) {
- case GL_PURGEABLE_APPLE:
- *params = bufObj->Purgeable;
- break;
- default:
- _mesa_error(ctx, GL_INVALID_ENUM,
- "glGetObjectParameteriv(name = 0x%x) invalid enum: %d",
- name, pname);
- break;
- }
-}
-
-
-static void
-get_renderbuffer_parameteriv(struct gl_context *ctx, GLuint name,
- GLenum pname, GLint *params)
-{
- struct gl_renderbuffer *rb = _mesa_lookup_renderbuffer(ctx, name);
- if (!rb) {
- _mesa_error(ctx, GL_INVALID_VALUE,
- "glObjectUnpurgeable(name = 0x%x)", name);
- return;
- }
-
- switch (pname) {
- case GL_PURGEABLE_APPLE:
- *params = rb->Purgeable;
- break;
- default:
- _mesa_error(ctx, GL_INVALID_ENUM,
- "glGetObjectParameteriv(name = 0x%x) invalid enum: %d",
- name, pname);
- break;
- }
-}
-
-
-static void
-get_texture_object_parameteriv(struct gl_context *ctx, GLuint name,
- GLenum pname, GLint *params)
-{
- struct gl_texture_object *texObj = _mesa_lookup_texture(ctx, name);
- if (!texObj) {
- _mesa_error(ctx, GL_INVALID_VALUE,
- "glObjectUnpurgeable(name = 0x%x)", name);
- return;
- }
-
- switch (pname) {
- case GL_PURGEABLE_APPLE:
- *params = texObj->Purgeable;
- break;
- default:
- _mesa_error(ctx, GL_INVALID_ENUM,
- "glGetObjectParameteriv(name = 0x%x) invalid enum: %d",
- name, pname);
- break;
- }
-}
-
-
-void GLAPIENTRY
-_mesa_GetObjectParameterivAPPLE(GLenum objectType, GLuint name, GLenum pname,
- GLint *params)
-{
- GET_CURRENT_CONTEXT(ctx);
-
- if (name == 0) {
- _mesa_error(ctx, GL_INVALID_VALUE,
- "glGetObjectParameteriv(name = 0x%x)", name);
- return;
- }
-
- switch (objectType) {
- case GL_TEXTURE:
- get_texture_object_parameteriv(ctx, name, pname, params);
- break;
- case GL_BUFFER_OBJECT_APPLE:
- get_buffer_object_parameteriv(ctx, name, pname, params);
- break;
- case GL_RENDERBUFFER_EXT:
- get_renderbuffer_parameteriv(ctx, name, pname, params);
- break;
- default:
- _mesa_error(ctx, GL_INVALID_ENUM,
- "glGetObjectParameteriv(name = 0x%x) invalid type: %d",
- name, objectType);
- }
-}
-
/**
* Binds a buffer object to a uniform buffer binding point.
*
} else {
bufObj = _mesa_lookup_bufferobj(ctx, buffer);
}
- if (!_mesa_handle_bind_buffer_gen(ctx, target, buffer,
+ if (!_mesa_handle_bind_buffer_gen(ctx, buffer,
&bufObj, "glBindBufferRange"))
return;
} else {
bufObj = _mesa_lookup_bufferobj(ctx, buffer);
}
- if (!_mesa_handle_bind_buffer_gen(ctx, target, buffer,
+ if (!_mesa_handle_bind_buffer_gen(ctx, buffer,
&bufObj, "glBindBufferBase"))
return;
extern bool
_mesa_handle_bind_buffer_gen(struct gl_context *ctx,
- GLenum target,
GLuint buffer,
struct gl_buffer_object **buf_handle,
const char *caller);
_mesa_FlushMappedNamedBufferRange(GLuint buffer, GLintptr offset,
GLsizeiptr length);
-GLenum GLAPIENTRY
-_mesa_ObjectPurgeableAPPLE(GLenum objectType, GLuint name, GLenum option);
-
-GLenum GLAPIENTRY
-_mesa_ObjectUnpurgeableAPPLE(GLenum objectType, GLuint name, GLenum option);
-
-void GLAPIENTRY
-_mesa_GetObjectParameterivAPPLE(GLenum objectType, GLuint name,
- GLenum pname, GLint* params);
-
void GLAPIENTRY
_mesa_BindBufferRange(GLenum target, GLuint index,
GLuint buffer, GLintptr offset, GLsizeiptr size);
#include "glheader.h"
#include "compute.h"
#include "context.h"
+#include "api_validate.h"
void GLAPIENTRY
_mesa_DispatchCompute(GLuint num_groups_x,
GLuint num_groups_z)
{
GET_CURRENT_CONTEXT(ctx);
- int i;
- struct gl_shader_program *prog;
const GLuint num_groups[3] = { num_groups_x, num_groups_y, num_groups_z };
- if (ctx->Extensions.ARB_compute_shader) {
- for (i = 0; i < 3; i++) {
- if (num_groups[i] > ctx->Const.MaxComputeWorkGroupCount[i]) {
- _mesa_error(ctx, GL_INVALID_VALUE,
- "glDispatchCompute(num_groups_%c)", 'x' + i);
- return;
- }
- }
- if (!_mesa_valid_to_render(ctx, "glDispatchCompute"))
- return;
- prog = ctx->Shader.CurrentProgram[MESA_SHADER_COMPUTE];
- if (prog == NULL || prog->_LinkedShaders[MESA_SHADER_COMPUTE] == NULL) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glDispatchCompute(no active compute shader)");
- return;
- }
- ctx->Driver.DispatchCompute(ctx, num_groups);
- } else {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "unsupported function (glDispatchCompute) called");
- }
+ if (MESA_VERBOSE & VERBOSE_API)
+ _mesa_debug(ctx, "glDispatchCompute(%d, %d, %d)\n",
+ num_groups_x, num_groups_y, num_groups_z);
+
+ if (!_mesa_validate_DispatchCompute(ctx, num_groups))
+ return;
+
+ ctx->Driver.DispatchCompute(ctx, num_groups);
}
extern void GLAPIENTRY
{
GET_CURRENT_CONTEXT(ctx);
- if (ctx->Extensions.ARB_compute_shader) {
- assert(!"TODO");
- } else {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "unsupported function (glDispatchComputeIndirect) called");
- }
+ if (MESA_VERBOSE & VERBOSE_API)
+ _mesa_debug(ctx, "glDispatchComputeIndirect(%d)\n", indirect);
+
+ if (!_mesa_validate_DispatchComputeIndirect(ctx, indirect))
+ return;
+
+ ctx->Driver.DispatchComputeIndirect(ctx, indirect);
}
#define MAX_PROGRAM_LOCAL_PARAMS 4096
#define MAX_UNIFORMS 4096
#define MAX_UNIFORM_BUFFERS 15 /* + 1 default uniform buffer */
-#define MAX_SHADER_STORAGE_BUFFERS 7 /* + 1 default shader storage buffer */
+#define MAX_SHADER_STORAGE_BUFFERS 16
/* 6 is for vertex, hull, domain, geometry, fragment, and compute shader. */
#define MAX_COMBINED_UNIFORM_BUFFERS (MAX_UNIFORM_BUFFERS * 6)
#define MAX_COMBINED_SHADER_STORAGE_BUFFERS (MAX_SHADER_STORAGE_BUFFERS * 6)
};
/**
- * Prepare the source or destination resource, including:
- * - Error checking
- * - Creating texture wrappers for renderbuffers
+ * Prepare the source or destination resource. This involves error
+ * checking and returning the relevant gl_texture_image or gl_renderbuffer.
+ * Note that one of the resulting tex_image or renderbuffer pointers will be
+ * NULL and the other will be non-null.
+ *
* \param name the texture or renderbuffer name
- * \param target GL_TEXTURE target or GL_RENDERBUFFER. For the later, will
- * be changed to a compatible GL_TEXTURE target.
+ * \param target One of GL_TEXTURE_x target or GL_RENDERBUFFER
* \param level mipmap level
- * \param tex_obj returns a pointer to a texture object
+ * \param z src or dest Z
+ * \param depth number of slices/faces/layers to copy
* \param tex_image returns a pointer to a texture image
- * \param tmp_tex returns temporary texture object name
+ * \param renderbuffer returns a pointer to a renderbuffer
* \return true if success, false if error
*/
static bool
-prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level,
- struct gl_texture_object **tex_obj,
- struct gl_texture_image **tex_image, GLuint *tmp_tex,
+prepare_target(struct gl_context *ctx, GLuint name, GLenum target,
+ int level, int z, int depth,
+ struct gl_texture_image **tex_image,
+ struct gl_renderbuffer **renderbuffer,
+ mesa_format *format,
+ GLenum *internalFormat,
const char *dbg_prefix)
{
if (name == 0) {
* - is TEXTURE_BUFFER, or
* - is one of the cubemap face selectors described in table 3.17,
*/
- switch (*target) {
+ switch (target) {
case GL_RENDERBUFFER:
/* Not a texture target, but valid */
case GL_TEXTURE_1D:
default:
_mesa_error(ctx, GL_INVALID_ENUM,
"glCopyImageSubData(%sTarget = %s)", dbg_prefix,
- _mesa_enum_to_string(*target));
+ _mesa_enum_to_string(target));
return false;
}
- if (*target == GL_RENDERBUFFER) {
+ if (target == GL_RENDERBUFFER) {
struct gl_renderbuffer *rb = _mesa_lookup_renderbuffer(ctx, name);
+
if (!rb) {
_mesa_error(ctx, GL_INVALID_VALUE,
"glCopyImageSubData(%sName = %u)", dbg_prefix, name);
return false;
}
- if (rb->NumSamples > 1)
- *target = GL_TEXTURE_2D_MULTISAMPLE;
- else
- *target = GL_TEXTURE_2D;
-
- *tmp_tex = 0;
- _mesa_GenTextures(1, tmp_tex);
- if (*tmp_tex == 0)
- return false; /* Error already set by GenTextures */
-
- _mesa_BindTexture(*target, *tmp_tex);
- *tex_obj = _mesa_lookup_texture(ctx, *tmp_tex);
- *tex_image = _mesa_get_tex_image(ctx, *tex_obj, *target, 0);
-
- if (!ctx->Driver.BindRenderbufferTexImage(ctx, rb, *tex_image)) {
- _mesa_problem(ctx, "Failed to create texture from renderbuffer");
- return false;
- }
-
- if (ctx->Driver.FinishRenderTexture && !rb->NeedsFinishRenderTexture) {
- rb->NeedsFinishRenderTexture = true;
- ctx->Driver.FinishRenderTexture(ctx, rb);
- }
+ *renderbuffer = rb;
+ *format = rb->Format;
+ *internalFormat = rb->InternalFormat;
+ *tex_image = NULL;
} else {
- *tex_obj = _mesa_lookup_texture(ctx, name);
- if (!*tex_obj) {
+ struct gl_texture_object *texObj = _mesa_lookup_texture(ctx, name);
+
+ if (!texObj) {
_mesa_error(ctx, GL_INVALID_VALUE,
"glCopyImageSubData(%sName = %u)", dbg_prefix, name);
return false;
}
- _mesa_test_texobj_completeness(ctx, *tex_obj);
- if (!(*tex_obj)->_BaseComplete ||
- (level != 0 && !(*tex_obj)->_MipmapComplete)) {
+ _mesa_test_texobj_completeness(ctx, texObj);
+ if (!texObj->_BaseComplete ||
+ (level != 0 && !texObj->_MipmapComplete)) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"glCopyImageSubData(%sName incomplete)", dbg_prefix);
return false;
}
- if ((*tex_obj)->Target != *target) {
- _mesa_error(ctx, GL_INVALID_ENUM,
+ /* Note that target will not be a cube face name */
+ if (texObj->Target != target) {
+ /*
+ * From GL_ARB_copy_image specification:
+ * "INVALID_VALUE is generated if either <srcName> or <dstName> does
+ * not correspond to a valid renderbuffer or texture object according
+ * to the corresponding target parameter."
+ */
+ _mesa_error(ctx, GL_INVALID_VALUE,
"glCopyImageSubData(%sTarget = %s)", dbg_prefix,
- _mesa_enum_to_string(*target));
+ _mesa_enum_to_string(target));
return false;
}
return false;
}
- *tex_image = _mesa_select_tex_image(*tex_obj, *target, level);
+ if (target == GL_TEXTURE_CUBE_MAP) {
+ int i;
+
+ assert(z < MAX_FACES); /* should have been caught earlier */
+
+ /* make sure all the cube faces are present */
+ for (i = 0; i < depth; i++) {
+ if (!texObj->Image[z+i][level]) {
+ /* missing cube face */
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "glCopyImageSubData(missing cube face)");
+ return false;
+ }
+ }
+
+ *tex_image = texObj->Image[z][level];
+ }
+ else {
+ *tex_image = _mesa_select_tex_image(texObj, target, level);
+ }
+
if (!*tex_image) {
_mesa_error(ctx, GL_INVALID_VALUE,
"glCopyImageSubData(%sLevel = %u)", dbg_prefix, level);
return false;
}
+
+ *renderbuffer = NULL;
+ *format = (*tex_image)->TexFormat;
+ *internalFormat = (*tex_image)->InternalFormat;
}
return true;
*/
static bool
check_region_bounds(struct gl_context *ctx,
+ GLenum target,
const struct gl_texture_image *tex_image,
+ const struct gl_renderbuffer *renderbuffer,
int x, int y, int z, int width, int height, int depth,
const char *dbg_prefix)
{
+ int surfWidth, surfHeight, surfDepth;
+
if (width < 0 || height < 0 || depth < 0) {
_mesa_error(ctx, GL_INVALID_VALUE,
"glCopyImageSubData(%sWidth, %sHeight, or %sDepth is negative)",
}
/* Check X direction */
- if (x + width > tex_image->Width) {
+ if (target == GL_RENDERBUFFER) {
+ surfWidth = renderbuffer->Width;
+ }
+ else {
+ surfWidth = tex_image->Width;
+ }
+
+ if (x + width > surfWidth) {
_mesa_error(ctx, GL_INVALID_VALUE,
"glCopyImageSubData(%sX or %sWidth exceeds image bounds)",
dbg_prefix, dbg_prefix);
}
/* Check Y direction */
- switch (tex_image->TexObject->Target) {
+ switch (target) {
+ case GL_RENDERBUFFER:
+ surfHeight = renderbuffer->Height;
+ break;
case GL_TEXTURE_1D:
case GL_TEXTURE_1D_ARRAY:
- if (y != 0 || height != 1) {
- _mesa_error(ctx, GL_INVALID_VALUE,
- "glCopyImageSubData(%sY or %sHeight exceeds image bounds)",
- dbg_prefix, dbg_prefix);
- return false;
- }
+ surfHeight = 1;
break;
default:
- if (y + height > tex_image->Height) {
- _mesa_error(ctx, GL_INVALID_VALUE,
- "glCopyImageSubData(%sY or %sHeight exceeds image bounds)",
- dbg_prefix, dbg_prefix);
- return false;
- }
- break;
+ surfHeight = tex_image->Height;
+ }
+
+ if (y + height > surfHeight) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "glCopyImageSubData(%sY or %sHeight exceeds image bounds)",
+ dbg_prefix, dbg_prefix);
+ return false;
}
/* Check Z direction */
- switch (tex_image->TexObject->Target) {
+ switch (target) {
+ case GL_RENDERBUFFER:
case GL_TEXTURE_1D:
case GL_TEXTURE_2D:
case GL_TEXTURE_2D_MULTISAMPLE:
case GL_TEXTURE_RECTANGLE:
- if (z != 0 || depth != 1) {
- _mesa_error(ctx, GL_INVALID_VALUE,
- "glCopyImageSubData(%sZ or %sDepth exceeds image bounds)",
- dbg_prefix, dbg_prefix);
- return false;
- }
+ surfDepth = 1;
break;
case GL_TEXTURE_CUBE_MAP:
- if (z < 0 || z + depth > 6) {
- _mesa_error(ctx, GL_INVALID_VALUE,
- "glCopyImageSubData(%sZ or %sDepth exceeds image bounds)",
- dbg_prefix, dbg_prefix);
- return false;
- }
+ surfDepth = 6;
break;
case GL_TEXTURE_1D_ARRAY:
- if (z < 0 || z + depth > tex_image->Height) {
- _mesa_error(ctx, GL_INVALID_VALUE,
- "glCopyImageSubData(%sZ or %sDepth exceeds image bounds)",
- dbg_prefix, dbg_prefix);
- return false;
- }
- break;
- case GL_TEXTURE_CUBE_MAP_ARRAY:
- case GL_TEXTURE_2D_ARRAY:
- case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
- case GL_TEXTURE_3D:
- if (z < 0 || z + depth > tex_image->Depth) {
- _mesa_error(ctx, GL_INVALID_VALUE,
- "glCopyImageSubData(%sZ or %sDepth exceeds image bounds)",
- dbg_prefix, dbg_prefix);
- return false;
- }
+ surfDepth = tex_image->Height;
break;
+ default:
+ surfDepth = tex_image->Depth;
+ }
+
+ if (z < 0 || z + depth > surfDepth) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "glCopyImageSubData(%sZ or %sDepth exceeds image bounds)",
+ dbg_prefix, dbg_prefix);
+ return false;
}
return true;
GLsizei srcWidth, GLsizei srcHeight, GLsizei srcDepth)
{
GET_CURRENT_CONTEXT(ctx);
- GLuint tmpTexNames[2] = { 0, 0 };
- struct gl_texture_object *srcTexObj, *dstTexObj;
struct gl_texture_image *srcTexImage, *dstTexImage;
+ struct gl_renderbuffer *srcRenderbuffer, *dstRenderbuffer;
+ mesa_format srcFormat, dstFormat;
+ GLenum srcIntFormat, dstIntFormat;
GLuint src_bw, src_bh, dst_bw, dst_bh;
+ int dstWidth, dstHeight, dstDepth;
int i;
if (MESA_VERBOSE & VERBOSE_API)
srcX, srcY, srcZ,
dstName, _mesa_enum_to_string(dstTarget), dstLevel,
dstX, dstY, dstZ,
- srcWidth, srcHeight, srcWidth);
+ srcWidth, srcHeight, srcDepth);
if (!ctx->Extensions.ARB_copy_image) {
_mesa_error(ctx, GL_INVALID_OPERATION,
return;
}
- if (!prepare_target(ctx, srcName, &srcTarget, srcLevel,
- &srcTexObj, &srcTexImage, &tmpTexNames[0], "src"))
- goto cleanup;
+ if (!prepare_target(ctx, srcName, srcTarget, srcLevel, srcZ, srcDepth,
+ &srcTexImage, &srcRenderbuffer, &srcFormat,
+ &srcIntFormat, "src"))
+ return;
- if (!prepare_target(ctx, dstName, &dstTarget, dstLevel,
- &dstTexObj, &dstTexImage, &tmpTexNames[1], "dst"))
- goto cleanup;
+ if (!prepare_target(ctx, dstName, dstTarget, dstLevel, dstZ, srcDepth,
+ &dstTexImage, &dstRenderbuffer, &dstFormat,
+ &dstIntFormat, "dst"))
+ return;
- _mesa_get_format_block_size(srcTexImage->TexFormat, &src_bw, &src_bh);
+ _mesa_get_format_block_size(srcFormat, &src_bw, &src_bh);
if ((srcX % src_bw != 0) || (srcY % src_bh != 0) ||
(srcWidth % src_bw != 0) || (srcHeight % src_bh != 0)) {
_mesa_error(ctx, GL_INVALID_VALUE,
"glCopyImageSubData(unaligned src rectangle)");
- goto cleanup;
+ return;
}
- _mesa_get_format_block_size(dstTexImage->TexFormat, &dst_bw, &dst_bh);
+ _mesa_get_format_block_size(dstFormat, &dst_bw, &dst_bh);
if ((dstX % dst_bw != 0) || (dstY % dst_bh != 0)) {
_mesa_error(ctx, GL_INVALID_VALUE,
"glCopyImageSubData(unaligned dst rectangle)");
- goto cleanup;
+ return;
}
- if (!check_region_bounds(ctx, srcTexImage, srcX, srcY, srcZ,
- srcWidth, srcHeight, srcDepth, "src"))
- goto cleanup;
+ /* From the GL_ARB_copy_image spec:
+ *
+ * "The dimensions are always specified in texels, even for compressed
+ * texture formats. But it should be noted that if only one of the
+ * source and destination textures is compressed then the number of
+ * texels touched in the compressed image will be a factor of the
+ * block size larger than in the uncompressed image."
+ *
+ * So, if copying from compressed to uncompressed, the dest region is
+ * shrunk by the src block size factor. If copying from uncompressed
+ * to compressed, the dest region is grown by the dest block size factor.
+ * Note that we're passed the _source_ width, height, depth and those
+ * dimensions are never changed.
+ */
+ dstWidth = srcWidth * dst_bw / src_bw;
+ dstHeight = srcHeight * dst_bh / src_bh;
+ dstDepth = srcDepth;
+
+ if (!check_region_bounds(ctx, srcTarget, srcTexImage, srcRenderbuffer,
+ srcX, srcY, srcZ, srcWidth, srcHeight, srcDepth,
+ "src"))
+ return;
- if (!check_region_bounds(ctx, dstTexImage, dstX, dstY, dstZ,
- (srcWidth / src_bw) * dst_bw,
- (srcHeight / src_bh) * dst_bh, srcDepth, "dst"))
- goto cleanup;
+ if (!check_region_bounds(ctx, dstTarget, dstTexImage, dstRenderbuffer,
+ dstX, dstY, dstZ, dstWidth, dstHeight, dstDepth,
+ "dst"))
+ return;
- if (!copy_format_compatible(ctx, srcTexImage->InternalFormat,
- dstTexImage->InternalFormat)) {
+ if (!copy_format_compatible(ctx, srcIntFormat, dstIntFormat)) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"glCopyImageSubData(internalFormat mismatch)");
- goto cleanup;
+ return;
}
+ /* loop over 2D slices/faces/layers */
for (i = 0; i < srcDepth; ++i) {
- int srcNewZ, dstNewZ;
-
- if (srcTexObj->Target == GL_TEXTURE_CUBE_MAP) {
- srcTexImage = srcTexObj->Image[i + srcZ][srcLevel];
- srcNewZ = 0;
- } else {
- srcNewZ = srcZ + i;
+ int newSrcZ = srcZ + i;
+ int newDstZ = dstZ + i;
+
+ if (srcTexImage &&
+ srcTexImage->TexObject->Target == GL_TEXTURE_CUBE_MAP) {
+ /* need to update srcTexImage pointer for the cube face */
+ assert(srcZ + i < MAX_FACES);
+ srcTexImage = srcTexImage->TexObject->Image[srcZ + i][srcLevel];
+ assert(srcTexImage);
+ newSrcZ = 0;
}
- if (dstTexObj->Target == GL_TEXTURE_CUBE_MAP) {
- dstTexImage = dstTexObj->Image[i + dstZ][dstLevel];
- dstNewZ = 0;
- } else {
- dstNewZ = dstZ + i;
+ if (dstTexImage &&
+ dstTexImage->TexObject->Target == GL_TEXTURE_CUBE_MAP) {
+ /* need to update dstTexImage pointer for the cube face */
+ assert(dstZ + i < MAX_FACES);
+ dstTexImage = dstTexImage->TexObject->Image[dstZ + i][dstLevel];
+ assert(dstTexImage);
+ newDstZ = 0;
}
- ctx->Driver.CopyImageSubData(ctx, srcTexImage, srcX, srcY, srcNewZ,
- dstTexImage, dstX, dstY, dstNewZ,
+ ctx->Driver.CopyImageSubData(ctx,
+ srcTexImage, srcRenderbuffer,
+ srcX, srcY, newSrcZ,
+ dstTexImage, dstRenderbuffer,
+ dstX, dstY, newDstZ,
srcWidth, srcHeight);
}
-
-cleanup:
- _mesa_DeleteTextures(2, tmpTexNames);
}
struct gl_renderbuffer *rb,
GLint x, GLint y,
GLsizei width, GLsizei height);
-
/**
* Called by glCopyImageSubData().
*
- * This function should copy one 2-D slice from srcTexImage to
- * dstTexImage. If one of the textures is 3-D or is a 1-D or 2-D array
+ * This function should copy one 2-D slice from src_teximage or
+ * src_renderbuffer to dst_teximage or dst_renderbuffer. Either the
+ * teximage or renderbuffer pointer will be non-null to indicate which
+ * is the real src/dst.
+ *
+ * If one of the textures is 3-D or is a 1-D or 2-D array
* texture, this function will be called multiple times: once for each
* slice. If one of the textures is a cube map, this function will be
* called once for each face to be copied.
*/
void (*CopyImageSubData)(struct gl_context *ctx,
- struct gl_texture_image *src_image,
+ struct gl_texture_image *src_teximage,
+ struct gl_renderbuffer *src_renderbuffer,
int src_x, int src_y, int src_z,
- struct gl_texture_image *dstTexImage,
+ struct gl_texture_image *dst_teximage,
+ struct gl_renderbuffer *dst_renderbuffer,
int dst_x, int dst_y, int dst_z,
int src_width, int src_height);
*/
/*@{*/
void (*DispatchCompute)(struct gl_context *ctx, const GLuint *num_groups);
+ void (*DispatchComputeIndirect)(struct gl_context *ctx, GLintptr indirect);
/*@}*/
};
{ "GL_ARB_shader_stencil_export", o(ARB_shader_stencil_export), GL, 2009 },
{ "GL_ARB_shader_storage_buffer_object", o(ARB_shader_storage_buffer_object), GL, 2012 },
{ "GL_ARB_shader_subroutine", o(ARB_shader_subroutine), GLC, 2010 },
+ { "GL_ARB_shader_texture_image_samples", o(ARB_shader_texture_image_samples), GL, 2014 },
{ "GL_ARB_shader_texture_lod", o(ARB_shader_texture_lod), GL, 2009 },
{ "GL_ARB_shading_language_100", o(dummy_true), GLL, 2003 },
{ "GL_ARB_shading_language_packing", o(ARB_shading_language_packing), GL, 2011 },
{ "GL_EXT_texture_compression_dxt1", o(ANGLE_texture_compression_dxt), GL | ES1 | ES2, 2004 },
{ "GL_ANGLE_texture_compression_dxt3", o(ANGLE_texture_compression_dxt), GL | ES1 | ES2, 2011 },
{ "GL_ANGLE_texture_compression_dxt5", o(ANGLE_texture_compression_dxt), GL | ES1 | ES2, 2011 },
- { "GL_EXT_texture_compression_latc", o(EXT_texture_compression_latc), GL, 2006 },
+ { "GL_EXT_texture_compression_latc", o(EXT_texture_compression_latc), GLL, 2006 },
{ "GL_EXT_texture_compression_rgtc", o(ARB_texture_compression_rgtc), GL, 2004 },
{ "GL_EXT_texture_compression_s3tc", o(EXT_texture_compression_s3tc), GL, 2000 },
{ "GL_EXT_texture_cube_map", o(ARB_texture_cube_map), GLL, 2001 },
{ "GL_ATI_draw_buffers", o(dummy_true), GLL, 2002 },
{ "GL_ATI_fragment_shader", o(ATI_fragment_shader), GLL, 2001 },
{ "GL_ATI_separate_stencil", o(ATI_separate_stencil), GLL, 2006 },
- { "GL_ATI_texture_compression_3dc", o(ATI_texture_compression_3dc), GL, 2004 },
+ { "GL_ATI_texture_compression_3dc", o(ATI_texture_compression_3dc), GLL, 2004 },
{ "GL_ATI_texture_env_combine3", o(ATI_texture_env_combine3), GLL, 2002 },
{ "GL_ATI_texture_float", o(ARB_texture_float), GL, 2002 },
{ "GL_ATI_texture_mirror_once", o(ATI_texture_mirror_once), GL, 2006 },
fb->DefaultGeometry.Height = param;
break;
case GL_FRAMEBUFFER_DEFAULT_LAYERS:
+ /*
+ * According to the OpenGL ES 3.1 specification section 9.2.1, the
+ * GL_FRAMEBUFFER_DEFAULT_LAYERS parameter name is not supported.
+ */
+ if (_mesa_is_gles31(ctx)) {
+ _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=0x%x)", func, pname);
+ break;
+ }
if (param < 0 || param > ctx->Const.MaxFramebufferLayers)
- _mesa_error(ctx, GL_INVALID_VALUE, "%s", func);
+ _mesa_error(ctx, GL_INVALID_VALUE, "%s", func);
else
fb->DefaultGeometry.Layers = param;
break;
*params = fb->DefaultGeometry.Height;
break;
case GL_FRAMEBUFFER_DEFAULT_LAYERS:
+ /*
+ * According to the OpenGL ES 3.1 specification section 9.2.3, the
+ * GL_FRAMEBUFFER_LAYERS parameter name is not supported.
+ */
+ if (_mesa_is_gles31(ctx)) {
+ _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=0x%x)", func, pname);
+ break;
+ }
*params = fb->DefaultGeometry.Layers;
break;
case GL_FRAMEBUFFER_DEFAULT_SAMPLES:
switch (pname) {
case GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE_EXT:
- *params = _mesa_is_winsys_fbo(buffer)
+ /* From the OpenGL spec, 9.2. Binding and Managing Framebuffer Objects:
+ *
+ * "If the value of FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE is NONE, then
+ * either no framebuffer is bound to target; or the default framebuffer
+ * is bound, attachment is DEPTH or STENCIL, and the number of depth or
+ * stencil bits, respectively, is zero."
+ */
+ *params = (_mesa_is_winsys_fbo(buffer) &&
+ ((attachment != GL_DEPTH && attachment != GL_STENCIL) ||
+ (att->Type != GL_NONE)))
? GL_FRAMEBUFFER_DEFAULT : att->Type;
return;
case GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME_EXT:
return 1 if fmat.has_channel('a') else 0
else:
return 0
- elif fmat.layout == 'rgtc':
+ elif fmat.layout in ('rgtc', 'latc'):
return 8 if fmat.has_channel(chan_name) else 0
elif fmat.layout in ('etc1', 'etc2'):
if fmat.name.endswith('_ALPHA1') and chan_name == 'a':
/**
* Return the layout type of the given format.
- * The return value will be one of:
- * MESA_FORMAT_LAYOUT_ARRAY
- * MESA_FORMAT_LAYOUT_PACKED
- * MESA_FORMAT_LAYOUT_S3TC
- * MESA_FORMAT_LAYOUT_RGTC
- * MESA_FORMAT_LAYOUT_FXT1
- * MESA_FORMAT_LAYOUT_ETC1
- * MESA_FORMAT_LAYOUT_ETC2
- * MESA_FORMAT_LAYOUT_BPTC
- * MESA_FORMAT_LAYOUT_ASTC
- * MESA_FORMAT_LAYOUT_OTHER
*/
extern enum mesa_format_layout
_mesa_get_format_layout(mesa_format format)
case MESA_FORMAT_R8G8B8X8_UNORM:
case MESA_FORMAT_B8G8R8X8_UNORM:
case MESA_FORMAT_X8R8G8B8_UNORM:
+ case MESA_FORMAT_A8B8G8R8_UINT:
+ case MESA_FORMAT_R8G8B8A8_UINT:
+ case MESA_FORMAT_B8G8R8A8_UINT:
+ case MESA_FORMAT_A8R8G8B8_UINT:
*datatype = GL_UNSIGNED_BYTE;
*comps = 4;
return;
return;
case MESA_FORMAT_B5G6R5_UNORM:
case MESA_FORMAT_R5G6B5_UNORM:
+ case MESA_FORMAT_B5G6R5_UINT:
+ case MESA_FORMAT_R5G6B5_UINT:
*datatype = GL_UNSIGNED_SHORT_5_6_5;
*comps = 3;
return;
case MESA_FORMAT_B4G4R4A4_UNORM:
case MESA_FORMAT_A4R4G4B4_UNORM:
case MESA_FORMAT_B4G4R4X4_UNORM:
+ case MESA_FORMAT_B4G4R4A4_UINT:
+ case MESA_FORMAT_A4R4G4B4_UINT:
*datatype = GL_UNSIGNED_SHORT_4_4_4_4;
*comps = 4;
return;
case MESA_FORMAT_B5G5R5A1_UNORM:
case MESA_FORMAT_A1R5G5B5_UNORM:
case MESA_FORMAT_B5G5R5X1_UNORM:
+ case MESA_FORMAT_B5G5R5A1_UINT:
+ case MESA_FORMAT_A1R5G5B5_UINT:
*datatype = GL_UNSIGNED_SHORT_1_5_5_5_REV;
*comps = 4;
return;
return;
case MESA_FORMAT_A1B5G5R5_UNORM:
+ case MESA_FORMAT_A1B5G5R5_UINT:
*datatype = GL_UNSIGNED_SHORT_5_5_5_1;
*comps = 4;
return;
return;
case MESA_FORMAT_R3G3B2_UNORM:
+ case MESA_FORMAT_R3G3B2_UINT:
*datatype = GL_UNSIGNED_BYTE_2_3_3_REV;
*comps = 3;
return;
case MESA_FORMAT_A4B4G4R4_UNORM:
+ case MESA_FORMAT_A4B4G4R4_UINT:
*datatype = GL_UNSIGNED_SHORT_4_4_4_4;
*comps = 4;
return;
case MESA_FORMAT_R4G4B4A4_UNORM:
+ case MESA_FORMAT_R4G4B4A4_UINT:
*datatype = GL_UNSIGNED_SHORT_4_4_4_4;
*comps = 4;
return;
case MESA_FORMAT_R5G5B5A1_UNORM:
+ case MESA_FORMAT_R5G5B5A1_UINT:
*datatype = GL_UNSIGNED_SHORT_1_5_5_5_REV;
*comps = 4;
return;
return;
case MESA_FORMAT_B2G3R3_UNORM:
+ case MESA_FORMAT_B2G3R3_UINT:
*datatype = GL_UNSIGNED_BYTE_3_3_2;
*comps = 3;
return;
type == GL_UNSIGNED_INT_2_10_10_10_REV &&
!swapBytes);
+ case MESA_FORMAT_B5G6R5_UINT:
+ return format == GL_RGB_INTEGER && type == GL_UNSIGNED_SHORT_5_6_5;
+
+ case MESA_FORMAT_R5G6B5_UINT:
+ return format == GL_RGB_INTEGER && type == GL_UNSIGNED_SHORT_5_6_5_REV;
+
+ case MESA_FORMAT_B2G3R3_UINT:
+ return format == GL_RGB_INTEGER && type == GL_UNSIGNED_BYTE_3_3_2;
+
+ case MESA_FORMAT_R3G3B2_UINT:
+ return format == GL_RGB_INTEGER && type == GL_UNSIGNED_BYTE_2_3_3_REV;
+
+ case MESA_FORMAT_A4B4G4R4_UINT:
+ if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4 && !swapBytes)
+ return GL_TRUE;
+
+ if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4_REV && swapBytes)
+ return GL_TRUE;
+ return GL_FALSE;
+
+ case MESA_FORMAT_R4G4B4A4_UINT:
+ if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4_REV && !swapBytes)
+ return GL_TRUE;
+
+ if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4 && swapBytes)
+ return GL_TRUE;
+
+ return GL_FALSE;
+
+ case MESA_FORMAT_B4G4R4A4_UINT:
+ return format == GL_BGRA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4_REV &&
+ !swapBytes;
+
+ case MESA_FORMAT_A4R4G4B4_UINT:
+ return GL_FALSE;
+
+ case MESA_FORMAT_A1B5G5R5_UINT:
+ return format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_5_5_5_1 &&
+ !swapBytes;
+
+ case MESA_FORMAT_B5G5R5A1_UINT:
+ return format == GL_BGRA_INTEGER && type == GL_UNSIGNED_SHORT_1_5_5_5_REV &&
+ !swapBytes;
+
+ case MESA_FORMAT_A1R5G5B5_UINT:
+ return format == GL_BGRA_INTEGER && type == GL_UNSIGNED_SHORT_5_5_5_1 &&
+ !swapBytes;
+
+ case MESA_FORMAT_R5G5B5A1_UINT:
+ return format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_1_5_5_5_REV;
+
+ case MESA_FORMAT_A8B8G8R8_UINT:
+ if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8 && !swapBytes)
+ return GL_TRUE;
+
+ if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8_REV && swapBytes)
+ return GL_TRUE;
+ return GL_FALSE;
+
+ case MESA_FORMAT_A8R8G8B8_UINT:
+ if (format == GL_BGRA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8 &&
+ !swapBytes)
+ return GL_TRUE;
+
+ if (format == GL_BGRA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8_REV &&
+ swapBytes)
+ return GL_TRUE;
+
+ return GL_FALSE;
+
+ case MESA_FORMAT_R8G8B8A8_UINT:
+ if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8_REV &&
+ !swapBytes)
+ return GL_TRUE;
+
+ if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8 && swapBytes)
+ return GL_TRUE;
+
+ return GL_FALSE;
+
+ case MESA_FORMAT_B8G8R8A8_UINT:
+ if (format == GL_BGRA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8_REV &&
+ !swapBytes)
+ return GL_TRUE;
+
+ if (format == GL_BGRA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8 && swapBytes)
+ return GL_TRUE;
+
+ return GL_FALSE;
+
case MESA_FORMAT_R9G9B9E5_FLOAT:
return format == GL_RGB && type == GL_UNSIGNED_INT_5_9_9_9_REV &&
!swapBytes;
MESA_FORMAT_Z_FLOAT32 , array , 1, 1, f32 , , , , x___, zs
# Packed signed/unsigned non-normalized integer formats
+MESA_FORMAT_A8B8G8R8_UINT , packed, 1, 1, u8 , u8 , u8 , u8 , wzyx, rgb
+MESA_FORMAT_A8R8G8B8_UINT , packed, 1, 1, u8 , u8 , u8 , u8 , yzwx, rgb
+MESA_FORMAT_R8G8B8A8_UINT , packed, 1, 1, u8 , u8 , u8 , u8 , xyzw, rgb
+MESA_FORMAT_B8G8R8A8_UINT , packed, 1, 1, u8 , u8 , u8 , u8 , zyxw, rgb
MESA_FORMAT_B10G10R10A2_UINT , packed, 1, 1, u10 , u10 , u10 , u2 , zyxw, rgb
MESA_FORMAT_R10G10B10A2_UINT , packed, 1, 1, u10 , u10 , u10 , u2 , xyzw, rgb
MESA_FORMAT_A2B10G10R10_UINT , packed, 1, 1, u2 , u10 , u10 , u10 , wzyx, rgb
MESA_FORMAT_A2R10G10B10_UINT , packed, 1, 1, u2 , u10 , u10 , u10 , yzwx, rgb
+MESA_FORMAT_B5G6R5_UINT , packed, 1, 1, u5 , u6 , u5 , , zyx1, rgb
+MESA_FORMAT_R5G6B5_UINT , packed, 1, 1, u5 , u6 , u5 , , xyz1, rgb
+MESA_FORMAT_B2G3R3_UINT , packed, 1, 1, u2 , u3 , u3 , , zyx1, rgb
+MESA_FORMAT_R3G3B2_UINT , packed, 1, 1, u3 , u3 , u2 , , xyz1, rgb
+MESA_FORMAT_A4B4G4R4_UINT , packed, 1, 1, u4 , u4 , u4 , u4 , wzyx, rgb
+MESA_FORMAT_R4G4B4A4_UINT , packed, 1, 1, u4 , u4 , u4 , u4 , xyzw, rgb
+MESA_FORMAT_B4G4R4A4_UINT , packed, 1, 1, u4 , u4 , u4 , u4 , zyxw, rgb
+MESA_FORMAT_A4R4G4B4_UINT , packed, 1, 1, u4 , u4 , u4 , u4 , yzwx, rgb
+MESA_FORMAT_A1B5G5R5_UINT , packed, 1, 1, u1 , u5 , u5 , u5 , wzyx, rgb
+MESA_FORMAT_B5G5R5A1_UINT , packed, 1, 1, u5 , u5 , u5 , u1 , zyxw, rgb
+MESA_FORMAT_A1R5G5B5_UINT , packed, 1, 1, u1 , u5 , u5 , u5 , yzwx, rgb
+MESA_FORMAT_R5G5B5A1_UINT , packed, 1, 1, u5 , u5 , u5 , u1 , xyzw, rgb
# Array signed/unsigned non-normalized integer formats
MESA_FORMAT_A_UINT8 , array , 1, 1, u8 , , , , 000x, rgb
MESA_FORMAT_RG_RGTC2_SNORM , rgtc , 4, 4, x128, , , , xy01, rgb
# LATC1/2 compressed formats
-MESA_FORMAT_L_LATC1_UNORM , rgtc , 4, 4, x64 , , , , xxx1, rgb
-MESA_FORMAT_L_LATC1_SNORM , rgtc , 4, 4, x64 , , , , xxx1, rgb
-MESA_FORMAT_LA_LATC2_UNORM , rgtc , 4, 4, x128, , , , xxxy, rgb
-MESA_FORMAT_LA_LATC2_SNORM , rgtc , 4, 4, x128, , , , xxxy, rgb
+MESA_FORMAT_L_LATC1_UNORM , latc , 4, 4, x64 , , , , xxx1, rgb
+MESA_FORMAT_L_LATC1_SNORM , latc , 4, 4, x64 , , , , xxx1, rgb
+MESA_FORMAT_LA_LATC2_UNORM , latc , 4, 4, x128, , , , xxxy, rgb
+MESA_FORMAT_LA_LATC2_SNORM , latc , 4, 4, x128, , , , xxxy, rgb
# ETC1/2 compressed formats
MESA_FORMAT_ETC1_RGB8 , etc1 , 4, 4, x64 , , , , xyz1, rgb
MESA_FORMAT_LAYOUT_PACKED,
MESA_FORMAT_LAYOUT_S3TC,
MESA_FORMAT_LAYOUT_RGTC,
+ MESA_FORMAT_LAYOUT_LATC,
MESA_FORMAT_LAYOUT_FXT1,
MESA_FORMAT_LAYOUT_ETC1,
MESA_FORMAT_LAYOUT_ETC2,
MESA_FORMAT_Z_FLOAT32,
/* Packed signed/unsigned non-normalized integer formats */
+
+ MESA_FORMAT_A8B8G8R8_UINT, /* RRRR RRRR GGGG GGGG BBBB BBBB AAAA AAAA */
+ MESA_FORMAT_A8R8G8B8_UINT, /* BBBB BBBB GGGG GGGG RRRR RRRR AAAA AAAA */
+ MESA_FORMAT_R8G8B8A8_UINT, /* AAAA AAAA BBBB BBBB GGGG GGGG RRRR RRRR */
+ MESA_FORMAT_B8G8R8A8_UINT, /* AAAA AAAA RRRR RRRR GGGG GGGG BBBB BBBB */
MESA_FORMAT_B10G10R10A2_UINT, /* AARR RRRR RRRR GGGG GGGG GGBB BBBB BBBB */
MESA_FORMAT_R10G10B10A2_UINT, /* AABB BBBB BBBB GGGG GGGG GGRR RRRR RRRR */
MESA_FORMAT_A2B10G10R10_UINT, /* RRRR RRRR RRGG GGGG GGGG BBBB BBBB BBAA */
MESA_FORMAT_A2R10G10B10_UINT, /* BBBB BBBB BBGG GGGG GGGG RRRR RRRR RRAA */
+ MESA_FORMAT_B5G6R5_UINT, /* RRRR RGGG GGGB BBBB */
+ MESA_FORMAT_R5G6B5_UINT, /* BBBB BGGG GGGR RRRR */
+ MESA_FORMAT_B2G3R3_UINT, /* RRRG GGBB */
+ MESA_FORMAT_R3G3B2_UINT, /* BBGG GRRR */
+ MESA_FORMAT_A4B4G4R4_UINT, /* RRRR GGGG BBBB AAAA */
+ MESA_FORMAT_R4G4B4A4_UINT, /* AAAA BBBB GGGG RRRR */
+ MESA_FORMAT_B4G4R4A4_UINT, /* AAAA RRRR GGGG BBBB */
+ MESA_FORMAT_A4R4G4B4_UINT, /* BBBB GGGG RRRR AAAA */
+ MESA_FORMAT_A1B5G5R5_UINT, /* RRRR RGGG GGBB BBBA */
+ MESA_FORMAT_B5G5R5A1_UINT, /* ARRR RRGG GGGB BBBB */
+ MESA_FORMAT_A1R5G5B5_UINT, /* BBBB BGGG GGRR RRRA */
+ MESA_FORMAT_R5G5B5A1_UINT, /* ABBB BBGG GGGR RRRR */
/* Array signed/unsigned non-normalized integer formats */
MESA_FORMAT_A_UINT8,
EXTRA_END
};
+static const int extra_ARB_shader_storage_buffer_object_es31[] = {
+ EXT(ARB_shader_storage_buffer_object),
+ EXTRA_API_ES31,
+ EXTRA_END
+};
+
EXTRA_EXT(ARB_texture_cube_map);
EXTRA_EXT(EXT_texture_array);
EXTRA_EXT(NV_fog_distance);
EXTRA_EXT(ARB_framebuffer_no_attachments);
EXTRA_EXT(ARB_tessellation_shader);
EXTRA_EXT(ARB_shader_subroutine);
+EXTRA_EXT(ARB_shader_storage_buffer_object);
static const int
extra_ARB_color_buffer_float_or_glcore[] = {
case GL_UNIFORM_BUFFER_BINDING:
v->value_int = ctx->UniformBuffer->Name;
break;
+ /* GL_ARB_shader_storage_buffer_object */
+ case GL_SHADER_STORAGE_BUFFER_BINDING:
+ v->value_int = ctx->ShaderStorageBuffer->Name;
+ break;
/* GL_ARB_timer_query */
case GL_TIMESTAMP:
if (ctx->Driver.GetTimestamp) {
v->value_int = 0;
}
break;
+ /* GL_ARB_compute_shader */
+ case GL_DISPATCH_INDIRECT_BUFFER_BINDING:
+ v->value_int = ctx->DispatchIndirectBuffer->Name;
+ break;
}
}
goto invalid_value;
if (!ctx->Extensions.ARB_uniform_buffer_object)
goto invalid_enum;
- v->value_int = ctx->UniformBufferBindings[index].Offset;
+ v->value_int = ctx->UniformBufferBindings[index].Offset < 0 ? 0 :
+ ctx->UniformBufferBindings[index].Offset;
return TYPE_INT;
case GL_UNIFORM_BUFFER_SIZE:
goto invalid_value;
if (!ctx->Extensions.ARB_uniform_buffer_object)
goto invalid_enum;
- v->value_int = ctx->UniformBufferBindings[index].Size;
+ v->value_int = ctx->UniformBufferBindings[index].Size < 0 ? 0 :
+ ctx->UniformBufferBindings[index].Size;
+ return TYPE_INT;
+
+ /* ARB_shader_storage_buffer_object */
+ case GL_SHADER_STORAGE_BUFFER_BINDING:
+ if (!ctx->Extensions.ARB_shader_storage_buffer_object)
+ goto invalid_enum;
+ if (index >= ctx->Const.MaxShaderStorageBufferBindings)
+ goto invalid_value;
+ v->value_int = ctx->ShaderStorageBufferBindings[index].BufferObject->Name;
+ return TYPE_INT;
+
+ case GL_SHADER_STORAGE_BUFFER_START:
+ if (!ctx->Extensions.ARB_shader_storage_buffer_object)
+ goto invalid_enum;
+ if (index >= ctx->Const.MaxShaderStorageBufferBindings)
+ goto invalid_value;
+ v->value_int = ctx->ShaderStorageBufferBindings[index].Offset < 0 ? 0 :
+ ctx->ShaderStorageBufferBindings[index].Offset;
+ return TYPE_INT;
+
+ case GL_SHADER_STORAGE_BUFFER_SIZE:
+ if (!ctx->Extensions.ARB_shader_storage_buffer_object)
+ goto invalid_enum;
+ if (index >= ctx->Const.MaxShaderStorageBufferBindings)
+ goto invalid_value;
+ v->value_int = ctx->ShaderStorageBufferBindings[index].Size < 0 ? 0 :
+ ctx->ShaderStorageBufferBindings[index].Size;
return TYPE_INT;
/* ARB_texture_multisample / GL3.2 */
return TYPE_INT64;
case GL_VERTEX_BINDING_DIVISOR:
- if (!_mesa_is_desktop_gl(ctx) || !ctx->Extensions.ARB_instanced_arrays)
+ if ((!_mesa_is_desktop_gl(ctx) || !ctx->Extensions.ARB_instanced_arrays) &&
+ !_mesa_is_gles31(ctx))
goto invalid_enum;
if (index >= ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs)
goto invalid_value;
return TYPE_INT;
case GL_VERTEX_BINDING_OFFSET:
- if (!_mesa_is_desktop_gl(ctx))
+ if (!_mesa_is_desktop_gl(ctx) && !_mesa_is_gles31(ctx))
goto invalid_enum;
if (index >= ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs)
goto invalid_value;
return TYPE_INT;
case GL_VERTEX_BINDING_STRIDE:
- if (!_mesa_is_desktop_gl(ctx))
+ if (!_mesa_is_desktop_gl(ctx) && !_mesa_is_gles31(ctx))
goto invalid_enum;
if (index >= ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs)
goto invalid_value;
v->value_int = ctx->Array.VAO->VertexBinding[VERT_ATTRIB_GENERIC(index)].Stride;
return TYPE_INT;
+ case GL_VERTEX_BINDING_BUFFER:
+ if (ctx->API == API_OPENGLES2 && ctx->Version < 31)
+ goto invalid_enum;
+ if (index >= ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs)
+ goto invalid_value;
+ v->value_int = ctx->Array.VAO->VertexBinding[VERT_ATTRIB_GENERIC(index)].BufferObj->Name;
+ return TYPE_INT;
+
/* ARB_shader_image_load_store */
case GL_IMAGE_BINDING_NAME: {
struct gl_texture_object *t;
{ "apis": ["GLES"], "params": [
# OES_point_size_array
- [ "POINT_SIZE_ARRAY_OES", "ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled, TYPE_BOOLEAN)" ],
- [ "POINT_SIZE_ARRAY_TYPE_OES", "ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Type, TYPE_ENUM)" ],
- [ "POINT_SIZE_ARRAY_STRIDE_OES", "ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Stride, TYPE_INT)" ],
- [ "POINT_SIZE_ARRAY_BUFFER_BINDING_OES", "LOC_CUSTOM, TYPE_INT, 0" ],
+ [ "POINT_SIZE_ARRAY_OES", "ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled, TYPE_BOOLEAN), NO_EXTRA" ],
+ [ "POINT_SIZE_ARRAY_TYPE_OES", "ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Type, TYPE_ENUM), NO_EXTRA" ],
+ [ "POINT_SIZE_ARRAY_STRIDE_OES", "ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Stride, TYPE_INT), NO_EXTRA" ],
+ [ "POINT_SIZE_ARRAY_BUFFER_BINDING_OES", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ],
]},
{ "apis": ["GL", "GL_CORE", "GLES2"], "params": [
[ "MAX_COMPUTE_SHARED_MEMORY_SIZE", "CONST(MAX_COMPUTE_SHARED_MEMORY_SIZE), extra_ARB_compute_shader_es31" ],
[ "MAX_COMPUTE_UNIFORM_COMPONENTS", "CONST(MAX_COMPUTE_UNIFORM_COMPONENTS), extra_ARB_compute_shader_es31" ],
[ "MAX_COMPUTE_IMAGE_UNIFORMS", "CONST(MAX_COMPUTE_IMAGE_UNIFORMS), extra_ARB_compute_shader_es31" ],
+ [ "DISPATCH_INDIRECT_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_compute_shader_es31" ],
+
+# GL_ARB_framebuffer_no_attachments / GLES 3.1
+ ["MAX_FRAMEBUFFER_WIDTH", "CONTEXT_INT(Const.MaxFramebufferWidth), extra_ARB_framebuffer_no_attachments"],
+ ["MAX_FRAMEBUFFER_HEIGHT", "CONTEXT_INT(Const.MaxFramebufferHeight), extra_ARB_framebuffer_no_attachments"],
+ ["MAX_FRAMEBUFFER_SAMPLES", "CONTEXT_INT(Const.MaxFramebufferSamples), extra_ARB_framebuffer_no_attachments"],
# GL_ARB_explicit_uniform_location / GLES 3.1
[ "MAX_UNIFORM_LOCATIONS", "CONTEXT_INT(Const.MaxUserAssignableUniformLocations), extra_ARB_explicit_uniform_location" ],
+
+# GL_ARB_separate_shader_objects / GLES 3.1
+ [ "PROGRAM_PIPELINE_BINDING", "LOC_CUSTOM, TYPE_INT, GL_PROGRAM_PIPELINE_BINDING, NO_EXTRA" ],
+
+# GL_ARB_vertex_attrib_binding / GLES 3.1
+ [ "MAX_VERTEX_ATTRIB_RELATIVE_OFFSET", "CONTEXT_ENUM(Const.MaxVertexAttribRelativeOffset), NO_EXTRA" ],
+ [ "MAX_VERTEX_ATTRIB_BINDINGS", "CONTEXT_ENUM(Const.MaxVertexAttribBindings), NO_EXTRA" ],
+
+# GL 4.4 / GLES 3.1
+ [ "MAX_VERTEX_ATTRIB_STRIDE", "CONTEXT_ENUM(Const.MaxVertexAttribStride), NO_EXTRA" ],
+
+ # GL_ARB_shader_storage_buffer_object / GLES 3.1
+ [ "MAX_VERTEX_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object_es31" ],
+ [ "MAX_FRAGMENT_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object_es31" ],
+ [ "MAX_COMPUTE_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object_es31" ],
+ [ "MAX_COMBINED_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.MaxCombinedShaderStorageBlocks), extra_ARB_shader_storage_buffer_object_es31" ],
+ [ "MAX_SHADER_STORAGE_BLOCK_SIZE", "CONTEXT_INT(Const.MaxShaderStorageBlockSize), extra_ARB_shader_storage_buffer_object_es31" ],
+ [ "MAX_SHADER_STORAGE_BUFFER_BINDINGS", "CONTEXT_INT(Const.MaxShaderStorageBufferBindings), extra_ARB_shader_storage_buffer_object_es31" ],
+ [ "MAX_COMBINED_SHADER_OUTPUT_RESOURCES", "CONTEXT_INT(Const.MaxCombinedShaderOutputResources), extra_ARB_shader_storage_buffer_object_es31" ],
+ [ "SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT", "CONTEXT_INT(Const.ShaderStorageBufferOffsetAlignment), extra_ARB_shader_storage_buffer_object_es31" ],
+ [ "SHADER_STORAGE_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_shader_storage_buffer_object_es31" ],
]},
# Enums in OpenGL Core profile and ES 3.1
[ "MAX_GEOMETRY_INPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents), extra_version_32" ],
[ "MAX_GEOMETRY_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents), extra_version_32" ],
-# GL 4.4
- [ "MAX_VERTEX_ATTRIB_STRIDE", "CONTEXT_ENUM(Const.MaxVertexAttribStride), NO_EXTRA" ],
-
# GL_ARB_robustness
[ "RESET_NOTIFICATION_STRATEGY_ARB", "CONTEXT_ENUM(Const.ResetStrategy), NO_EXTRA" ],
# GL_ARB_texture_gather
[ "MAX_PROGRAM_TEXTURE_GATHER_COMPONENTS_ARB", "CONTEXT_INT(Const.MaxProgramTextureGatherComponents), extra_ARB_texture_gather"],
-# GL_ARB_separate_shader_objects
- [ "PROGRAM_PIPELINE_BINDING", "LOC_CUSTOM, TYPE_INT, GL_PROGRAM_PIPELINE_BINDING, NO_EXTRA" ],
-
# GL_ARB_shader_atomic_counters
[ "MAX_GEOMETRY_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_and_geometry_shader" ],
[ "MAX_GEOMETRY_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters), extra_ARB_shader_atomic_counters_and_geometry_shader" ],
-# GL_ARB_vertex_attrib_binding
- [ "MAX_VERTEX_ATTRIB_RELATIVE_OFFSET", "CONTEXT_ENUM(Const.MaxVertexAttribRelativeOffset), NO_EXTRA" ],
- [ "MAX_VERTEX_ATTRIB_BINDINGS", "CONTEXT_ENUM(Const.MaxVertexAttribBindings), NO_EXTRA" ],
-
# GL_ARB_shader_image_load_store
[ "MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS", "CONTEXT_INT(Const.MaxCombinedShaderOutputResources), extra_ARB_shader_image_load_store" ],
[ "MAX_IMAGE_SAMPLES", "CONTEXT_INT(Const.MaxImageSamples), extra_ARB_shader_image_load_store" ],
[ "MAX_GEOMETRY_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxImageUniforms), extra_ARB_shader_image_load_store_and_geometry_shader"],
# GL_ARB_framebuffer_no_attachments
- ["MAX_FRAMEBUFFER_WIDTH", "CONTEXT_INT(Const.MaxFramebufferWidth), extra_ARB_framebuffer_no_attachments"],
- ["MAX_FRAMEBUFFER_HEIGHT", "CONTEXT_INT(Const.MaxFramebufferHeight), extra_ARB_framebuffer_no_attachments"],
["MAX_FRAMEBUFFER_LAYERS", "CONTEXT_INT(Const.MaxFramebufferLayers), extra_ARB_framebuffer_no_attachments"],
- ["MAX_FRAMEBUFFER_SAMPLES", "CONTEXT_INT(Const.MaxFramebufferSamples), extra_ARB_framebuffer_no_attachments"],
# GL_EXT_polygon_offset_clamp
[ "POLYGON_OFFSET_CLAMP_EXT", "CONTEXT_FLOAT(Polygon.OffsetClamp), extra_EXT_polygon_offset_clamp" ],
+
+# GL_ARB_shader_storage_buffer_object
+ [ "MAX_GEOMETRY_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object" ],
+ [ "MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object" ],
+ [ "MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object" ],
]},
# Enums restricted to OpenGL Core profile
#include "context.h"
#include "glformats.h"
#include "formats.h"
+#include "texcompress.h"
#include "enums.h"
enum {
else
return -1;
case GL_UNSIGNED_INT_24_8_EXT:
- if (format == GL_DEPTH_STENCIL_EXT)
+ if (format == GL_DEPTH_COMPONENT ||
+ format == GL_DEPTH_STENCIL_EXT)
return sizeof(GLuint);
else
return -1;
case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
+ case GL_COMPRESSED_RGBA_ASTC_4x4_KHR:
+ case GL_COMPRESSED_RGBA_ASTC_5x4_KHR:
+ case GL_COMPRESSED_RGBA_ASTC_5x5_KHR:
+ case GL_COMPRESSED_RGBA_ASTC_6x5_KHR:
+ case GL_COMPRESSED_RGBA_ASTC_6x6_KHR:
+ case GL_COMPRESSED_RGBA_ASTC_8x5_KHR:
+ case GL_COMPRESSED_RGBA_ASTC_8x6_KHR:
+ case GL_COMPRESSED_RGBA_ASTC_8x8_KHR:
+ case GL_COMPRESSED_RGBA_ASTC_10x5_KHR:
+ case GL_COMPRESSED_RGBA_ASTC_10x6_KHR:
+ case GL_COMPRESSED_RGBA_ASTC_10x8_KHR:
+ case GL_COMPRESSED_RGBA_ASTC_10x10_KHR:
+ case GL_COMPRESSED_RGBA_ASTC_12x10_KHR:
+ case GL_COMPRESSED_RGBA_ASTC_12x12_KHR:
+ case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR:
+ case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR:
+ case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR:
+ case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR:
+ case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR:
+ case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR:
+ case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR:
+ case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR:
+ case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR:
+ case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR:
+ case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR:
+ case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR:
+ case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR:
+ case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR:
/* generic integer formats */
case GL_RED_INTEGER_EXT:
case GL_GREEN_INTEGER_EXT:
GLboolean
_mesa_is_compressed_format(const struct gl_context *ctx, GLenum format)
{
+ mesa_format m_format = _mesa_glenum_to_compressed_format(format);
+
+ /* Some formats in this switch have an equivalent mesa_format_layout
+ * to the compressed formats in the layout switch below and thus
+ * must be handled first.
+ */
switch (format) {
- case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
- case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
- case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
- case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
- /* Assume that the ANGLE flag will always be set if the EXT flag is set.
- */
- return ctx->Extensions.ANGLE_texture_compression_dxt;
case GL_RGB_S3TC:
case GL_RGB4_S3TC:
case GL_RGBA_S3TC:
case GL_RGBA4_S3TC:
return _mesa_is_desktop_gl(ctx) &&
ctx->Extensions.ANGLE_texture_compression_dxt;
- case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
- case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
- case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
- case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
- return _mesa_is_desktop_gl(ctx)
- && ctx->Extensions.EXT_texture_sRGB
- && ctx->Extensions.EXT_texture_compression_s3tc;
- case GL_COMPRESSED_RGB_FXT1_3DFX:
- case GL_COMPRESSED_RGBA_FXT1_3DFX:
- return _mesa_is_desktop_gl(ctx)
- && ctx->Extensions.TDFX_texture_compression_FXT1;
- case GL_COMPRESSED_RED_RGTC1:
- case GL_COMPRESSED_SIGNED_RED_RGTC1:
- case GL_COMPRESSED_RG_RGTC2:
- case GL_COMPRESSED_SIGNED_RG_RGTC2:
- return _mesa_is_desktop_gl(ctx)
- && ctx->Extensions.ARB_texture_compression_rgtc;
- case GL_COMPRESSED_LUMINANCE_LATC1_EXT:
- case GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT:
- case GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT:
- case GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT:
- return ctx->API == API_OPENGL_COMPAT
- && ctx->Extensions.EXT_texture_compression_latc;
case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI:
return ctx->API == API_OPENGL_COMPAT
&& ctx->Extensions.ATI_texture_compression_3dc;
- case GL_ETC1_RGB8_OES:
- return _mesa_is_gles(ctx)
- && ctx->Extensions.OES_compressed_ETC1_RGB8_texture;
- case GL_COMPRESSED_RGB8_ETC2:
- case GL_COMPRESSED_SRGB8_ETC2:
- case GL_COMPRESSED_RGBA8_ETC2_EAC:
- case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC:
- case GL_COMPRESSED_R11_EAC:
- case GL_COMPRESSED_RG11_EAC:
- case GL_COMPRESSED_SIGNED_R11_EAC:
- case GL_COMPRESSED_SIGNED_RG11_EAC:
- case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
- case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
- return _mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility;
- case GL_COMPRESSED_RGBA_BPTC_UNORM:
- case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
- case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
- case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
- return _mesa_is_desktop_gl(ctx) &&
- ctx->Extensions.ARB_texture_compression_bptc;
- case GL_COMPRESSED_RGBA_ASTC_4x4_KHR:
- case GL_COMPRESSED_RGBA_ASTC_5x4_KHR:
- case GL_COMPRESSED_RGBA_ASTC_5x5_KHR:
- case GL_COMPRESSED_RGBA_ASTC_6x5_KHR:
- case GL_COMPRESSED_RGBA_ASTC_6x6_KHR:
- case GL_COMPRESSED_RGBA_ASTC_8x5_KHR:
- case GL_COMPRESSED_RGBA_ASTC_8x6_KHR:
- case GL_COMPRESSED_RGBA_ASTC_8x8_KHR:
- case GL_COMPRESSED_RGBA_ASTC_10x5_KHR:
- case GL_COMPRESSED_RGBA_ASTC_10x6_KHR:
- case GL_COMPRESSED_RGBA_ASTC_10x8_KHR:
- case GL_COMPRESSED_RGBA_ASTC_10x10_KHR:
- case GL_COMPRESSED_RGBA_ASTC_12x10_KHR:
- case GL_COMPRESSED_RGBA_ASTC_12x12_KHR:
- case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR:
- case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR:
- case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR:
- case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR:
- case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR:
- case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR:
- case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR:
- case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR:
- case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR:
- case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR:
- case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR:
- case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR:
- case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR:
- case GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR:
- return ctx->Extensions.KHR_texture_compression_astc_ldr;
case GL_PALETTE4_RGB8_OES:
case GL_PALETTE4_RGBA8_OES:
case GL_PALETTE4_R5_G6_B5_OES:
case GL_PALETTE8_RGBA4_OES:
case GL_PALETTE8_RGB5_A1_OES:
return ctx->API == API_OPENGLES;
+ }
+
+ switch (_mesa_get_format_layout(m_format)) {
+ case MESA_FORMAT_LAYOUT_S3TC:
+ if (_mesa_get_format_color_encoding(m_format) == GL_LINEAR) {
+ /* Assume that the ANGLE flag will always be set if the
+ * EXT flag is set.
+ */
+ return ctx->Extensions.ANGLE_texture_compression_dxt;
+ } else {
+ return _mesa_is_desktop_gl(ctx)
+ && ctx->Extensions.EXT_texture_sRGB
+ && ctx->Extensions.EXT_texture_compression_s3tc;
+ }
+ case MESA_FORMAT_LAYOUT_FXT1:
+ return _mesa_is_desktop_gl(ctx)
+ && ctx->Extensions.TDFX_texture_compression_FXT1;
+ case MESA_FORMAT_LAYOUT_RGTC:
+ return _mesa_is_desktop_gl(ctx)
+ && ctx->Extensions.ARB_texture_compression_rgtc;
+ case MESA_FORMAT_LAYOUT_LATC:
+ return ctx->API == API_OPENGL_COMPAT
+ && ctx->Extensions.EXT_texture_compression_latc;
+ case MESA_FORMAT_LAYOUT_ETC1:
+ return _mesa_is_gles(ctx)
+ && ctx->Extensions.OES_compressed_ETC1_RGB8_texture;
+ case MESA_FORMAT_LAYOUT_ETC2:
+ return _mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility;
+ case MESA_FORMAT_LAYOUT_BPTC:
+ return _mesa_is_desktop_gl(ctx) &&
+ ctx->Extensions.ARB_texture_compression_bptc;
+ case MESA_FORMAT_LAYOUT_ASTC:
+ return ctx->Extensions.KHR_texture_compression_astc_ldr;
default:
return GL_FALSE;
}
return GL_INVALID_OPERATION;
case GL_UNSIGNED_INT_24_8:
+ /* Depth buffer OK to read in OpenGL ES (NV_read_depth). */
+ if (ctx->API == API_OPENGLES2 && format == GL_DEPTH_COMPONENT)
+ return GL_NO_ERROR;
+
if (format != GL_DEPTH_STENCIL) {
return GL_INVALID_OPERATION;
}
return type_valid ? GL_NO_ERROR : GL_INVALID_OPERATION;
}
+/**
+ * Return the simple base format for a given internal texture format.
+ * For example, given GL_LUMINANCE12_ALPHA4, return GL_LUMINANCE_ALPHA.
+ *
+ * \param ctx GL context.
+ * \param internalFormat the internal texture format token or 1, 2, 3, or 4.
+ *
+ * \return the corresponding \u base internal format (GL_ALPHA, GL_LUMINANCE,
+ * GL_LUMANCE_ALPHA, GL_INTENSITY, GL_RGB, or GL_RGBA), or -1 if invalid enum.
+ *
+ * This is the format which is used during texture application (i.e. the
+ * texture format and env mode determine the arithmetic used.
+ */
+GLint
+_mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat)
+{
+ switch (internalFormat) {
+ case GL_ALPHA:
+ case GL_ALPHA4:
+ case GL_ALPHA8:
+ case GL_ALPHA12:
+ case GL_ALPHA16:
+ return (ctx->API != API_OPENGL_CORE) ? GL_ALPHA : -1;
+ case 1:
+ case GL_LUMINANCE:
+ case GL_LUMINANCE4:
+ case GL_LUMINANCE8:
+ case GL_LUMINANCE12:
+ case GL_LUMINANCE16:
+ return (ctx->API != API_OPENGL_CORE) ? GL_LUMINANCE : -1;
+ case 2:
+ case GL_LUMINANCE_ALPHA:
+ case GL_LUMINANCE4_ALPHA4:
+ case GL_LUMINANCE6_ALPHA2:
+ case GL_LUMINANCE8_ALPHA8:
+ case GL_LUMINANCE12_ALPHA4:
+ case GL_LUMINANCE12_ALPHA12:
+ case GL_LUMINANCE16_ALPHA16:
+ return (ctx->API != API_OPENGL_CORE) ? GL_LUMINANCE_ALPHA : -1;
+ case GL_INTENSITY:
+ case GL_INTENSITY4:
+ case GL_INTENSITY8:
+ case GL_INTENSITY12:
+ case GL_INTENSITY16:
+ return (ctx->API != API_OPENGL_CORE) ? GL_INTENSITY : -1;
+ case 3:
+ return (ctx->API != API_OPENGL_CORE) ? GL_RGB : -1;
+ case GL_RGB:
+ case GL_R3_G3_B2:
+ case GL_RGB4:
+ case GL_RGB5:
+ case GL_RGB8:
+ case GL_RGB10:
+ case GL_RGB12:
+ case GL_RGB16:
+ return GL_RGB;
+ case 4:
+ return (ctx->API != API_OPENGL_CORE) ? GL_RGBA : -1;
+ case GL_RGBA:
+ case GL_RGBA2:
+ case GL_RGBA4:
+ case GL_RGB5_A1:
+ case GL_RGBA8:
+ case GL_RGB10_A2:
+ case GL_RGBA12:
+ case GL_RGBA16:
+ return GL_RGBA;
+ default:
+ ; /* fallthrough */
+ }
+
+ /* GL_BGRA can be an internal format *only* in OpenGL ES (1.x or 2.0).
+ */
+ if (_mesa_is_gles(ctx)) {
+ switch (internalFormat) {
+ case GL_BGRA:
+ return GL_RGBA;
+ default:
+ ; /* fallthrough */
+ }
+ }
+
+ if (ctx->Extensions.ARB_ES2_compatibility) {
+ switch (internalFormat) {
+ case GL_RGB565:
+ return GL_RGB;
+ default:
+ ; /* fallthrough */
+ }
+ }
+
+ if (ctx->Extensions.ARB_depth_texture) {
+ switch (internalFormat) {
+ case GL_DEPTH_COMPONENT:
+ case GL_DEPTH_COMPONENT16:
+ case GL_DEPTH_COMPONENT24:
+ case GL_DEPTH_COMPONENT32:
+ return GL_DEPTH_COMPONENT;
+ case GL_DEPTH_STENCIL:
+ case GL_DEPTH24_STENCIL8:
+ return GL_DEPTH_STENCIL;
+ default:
+ ; /* fallthrough */
+ }
+ }
+
+ if (ctx->Extensions.ARB_texture_stencil8) {
+ switch (internalFormat) {
+ case GL_STENCIL_INDEX:
+ case GL_STENCIL_INDEX1:
+ case GL_STENCIL_INDEX4:
+ case GL_STENCIL_INDEX8:
+ case GL_STENCIL_INDEX16:
+ return GL_STENCIL_INDEX;
+ default:
+ ; /* fallthrough */
+ }
+ }
+
+ switch (internalFormat) {
+ case GL_COMPRESSED_ALPHA:
+ return GL_ALPHA;
+ case GL_COMPRESSED_LUMINANCE:
+ return GL_LUMINANCE;
+ case GL_COMPRESSED_LUMINANCE_ALPHA:
+ return GL_LUMINANCE_ALPHA;
+ case GL_COMPRESSED_INTENSITY:
+ return GL_INTENSITY;
+ case GL_COMPRESSED_RGB:
+ return GL_RGB;
+ case GL_COMPRESSED_RGBA:
+ return GL_RGBA;
+ default:
+ ; /* fallthrough */
+ }
+
+ if (ctx->Extensions.TDFX_texture_compression_FXT1) {
+ switch (internalFormat) {
+ case GL_COMPRESSED_RGB_FXT1_3DFX:
+ return GL_RGB;
+ case GL_COMPRESSED_RGBA_FXT1_3DFX:
+ return GL_RGBA;
+ default:
+ ; /* fallthrough */
+ }
+ }
+
+ /* Assume that the ANGLE flag will always be set if the EXT flag is set.
+ */
+ if (ctx->Extensions.ANGLE_texture_compression_dxt) {
+ switch (internalFormat) {
+ case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+ return GL_RGB;
+ case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+ case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+ case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+ return GL_RGBA;
+ default:
+ ; /* fallthrough */
+ }
+ }
+
+ if (_mesa_is_desktop_gl(ctx)
+ && ctx->Extensions.ANGLE_texture_compression_dxt) {
+ switch (internalFormat) {
+ case GL_RGB_S3TC:
+ case GL_RGB4_S3TC:
+ return GL_RGB;
+ case GL_RGBA_S3TC:
+ case GL_RGBA4_S3TC:
+ return GL_RGBA;
+ default:
+ ; /* fallthrough */
+ }
+ }
+
+ if (ctx->Extensions.MESA_ycbcr_texture) {
+ if (internalFormat == GL_YCBCR_MESA)
+ return GL_YCBCR_MESA;
+ }
+
+ if (ctx->Extensions.ARB_texture_float) {
+ switch (internalFormat) {
+ case GL_ALPHA16F_ARB:
+ case GL_ALPHA32F_ARB:
+ return GL_ALPHA;
+ case GL_RGBA16F_ARB:
+ case GL_RGBA32F_ARB:
+ return GL_RGBA;
+ case GL_RGB16F_ARB:
+ case GL_RGB32F_ARB:
+ return GL_RGB;
+ case GL_INTENSITY16F_ARB:
+ case GL_INTENSITY32F_ARB:
+ return GL_INTENSITY;
+ case GL_LUMINANCE16F_ARB:
+ case GL_LUMINANCE32F_ARB:
+ return GL_LUMINANCE;
+ case GL_LUMINANCE_ALPHA16F_ARB:
+ case GL_LUMINANCE_ALPHA32F_ARB:
+ return GL_LUMINANCE_ALPHA;
+ default:
+ ; /* fallthrough */
+ }
+ }
+
+ if (ctx->Extensions.EXT_texture_snorm) {
+ switch (internalFormat) {
+ case GL_RED_SNORM:
+ case GL_R8_SNORM:
+ case GL_R16_SNORM:
+ return GL_RED;
+ case GL_RG_SNORM:
+ case GL_RG8_SNORM:
+ case GL_RG16_SNORM:
+ return GL_RG;
+ case GL_RGB_SNORM:
+ case GL_RGB8_SNORM:
+ case GL_RGB16_SNORM:
+ return GL_RGB;
+ case GL_RGBA_SNORM:
+ case GL_RGBA8_SNORM:
+ case GL_RGBA16_SNORM:
+ return GL_RGBA;
+ case GL_ALPHA_SNORM:
+ case GL_ALPHA8_SNORM:
+ case GL_ALPHA16_SNORM:
+ return GL_ALPHA;
+ case GL_LUMINANCE_SNORM:
+ case GL_LUMINANCE8_SNORM:
+ case GL_LUMINANCE16_SNORM:
+ return GL_LUMINANCE;
+ case GL_LUMINANCE_ALPHA_SNORM:
+ case GL_LUMINANCE8_ALPHA8_SNORM:
+ case GL_LUMINANCE16_ALPHA16_SNORM:
+ return GL_LUMINANCE_ALPHA;
+ case GL_INTENSITY_SNORM:
+ case GL_INTENSITY8_SNORM:
+ case GL_INTENSITY16_SNORM:
+ return GL_INTENSITY;
+ default:
+ ; /* fallthrough */
+ }
+ }
+
+ if (ctx->Extensions.EXT_texture_sRGB) {
+ switch (internalFormat) {
+ case GL_SRGB_EXT:
+ case GL_SRGB8_EXT:
+ case GL_COMPRESSED_SRGB_EXT:
+ return GL_RGB;
+ case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
+ return ctx->Extensions.EXT_texture_compression_s3tc ? GL_RGB : -1;
+ case GL_SRGB_ALPHA_EXT:
+ case GL_SRGB8_ALPHA8_EXT:
+ case GL_COMPRESSED_SRGB_ALPHA_EXT:
+ return GL_RGBA;
+ case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
+ case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
+ case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
+ return ctx->Extensions.EXT_texture_compression_s3tc ? GL_RGBA : -1;
+ case GL_SLUMINANCE_ALPHA_EXT:
+ case GL_SLUMINANCE8_ALPHA8_EXT:
+ case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT:
+ return GL_LUMINANCE_ALPHA;
+ case GL_SLUMINANCE_EXT:
+ case GL_SLUMINANCE8_EXT:
+ case GL_COMPRESSED_SLUMINANCE_EXT:
+ return GL_LUMINANCE;
+ default:
+ ; /* fallthrough */
+ }
+ }
+
+ if (ctx->Version >= 30 ||
+ ctx->Extensions.EXT_texture_integer) {
+ switch (internalFormat) {
+ case GL_RGBA8UI_EXT:
+ case GL_RGBA16UI_EXT:
+ case GL_RGBA32UI_EXT:
+ case GL_RGBA8I_EXT:
+ case GL_RGBA16I_EXT:
+ case GL_RGBA32I_EXT:
+ case GL_RGB10_A2UI:
+ return GL_RGBA;
+ case GL_RGB8UI_EXT:
+ case GL_RGB16UI_EXT:
+ case GL_RGB32UI_EXT:
+ case GL_RGB8I_EXT:
+ case GL_RGB16I_EXT:
+ case GL_RGB32I_EXT:
+ return GL_RGB;
+ }
+ }
+
+ if (ctx->Extensions.EXT_texture_integer) {
+ switch (internalFormat) {
+ case GL_ALPHA8UI_EXT:
+ case GL_ALPHA16UI_EXT:
+ case GL_ALPHA32UI_EXT:
+ case GL_ALPHA8I_EXT:
+ case GL_ALPHA16I_EXT:
+ case GL_ALPHA32I_EXT:
+ return GL_ALPHA;
+ case GL_INTENSITY8UI_EXT:
+ case GL_INTENSITY16UI_EXT:
+ case GL_INTENSITY32UI_EXT:
+ case GL_INTENSITY8I_EXT:
+ case GL_INTENSITY16I_EXT:
+ case GL_INTENSITY32I_EXT:
+ return GL_INTENSITY;
+ case GL_LUMINANCE8UI_EXT:
+ case GL_LUMINANCE16UI_EXT:
+ case GL_LUMINANCE32UI_EXT:
+ case GL_LUMINANCE8I_EXT:
+ case GL_LUMINANCE16I_EXT:
+ case GL_LUMINANCE32I_EXT:
+ return GL_LUMINANCE;
+ case GL_LUMINANCE_ALPHA8UI_EXT:
+ case GL_LUMINANCE_ALPHA16UI_EXT:
+ case GL_LUMINANCE_ALPHA32UI_EXT:
+ case GL_LUMINANCE_ALPHA8I_EXT:
+ case GL_LUMINANCE_ALPHA16I_EXT:
+ case GL_LUMINANCE_ALPHA32I_EXT:
+ return GL_LUMINANCE_ALPHA;
+ default:
+ ; /* fallthrough */
+ }
+ }
+
+ if (ctx->Extensions.ARB_texture_rg) {
+ switch (internalFormat) {
+ case GL_R16F:
+ case GL_R32F:
+ if (!ctx->Extensions.ARB_texture_float)
+ break;
+ return GL_RED;
+ case GL_R8I:
+ case GL_R8UI:
+ case GL_R16I:
+ case GL_R16UI:
+ case GL_R32I:
+ case GL_R32UI:
+ if (ctx->Version < 30 && !ctx->Extensions.EXT_texture_integer)
+ break;
+ /* FALLTHROUGH */
+ case GL_R8:
+ case GL_R16:
+ case GL_RED:
+ case GL_COMPRESSED_RED:
+ return GL_RED;
+
+ case GL_RG16F:
+ case GL_RG32F:
+ if (!ctx->Extensions.ARB_texture_float)
+ break;
+ return GL_RG;
+ case GL_RG8I:
+ case GL_RG8UI:
+ case GL_RG16I:
+ case GL_RG16UI:
+ case GL_RG32I:
+ case GL_RG32UI:
+ if (ctx->Version < 30 && !ctx->Extensions.EXT_texture_integer)
+ break;
+ /* FALLTHROUGH */
+ case GL_RG:
+ case GL_RG8:
+ case GL_RG16:
+ case GL_COMPRESSED_RG:
+ return GL_RG;
+ default:
+ ; /* fallthrough */
+ }
+ }
+
+ if (ctx->Extensions.EXT_texture_shared_exponent) {
+ switch (internalFormat) {
+ case GL_RGB9_E5_EXT:
+ return GL_RGB;
+ default:
+ ; /* fallthrough */
+ }
+ }
+
+ if (ctx->Extensions.EXT_packed_float) {
+ switch (internalFormat) {
+ case GL_R11F_G11F_B10F_EXT:
+ return GL_RGB;
+ default:
+ ; /* fallthrough */
+ }
+ }
+
+ if (ctx->Extensions.ARB_depth_buffer_float) {
+ switch (internalFormat) {
+ case GL_DEPTH_COMPONENT32F:
+ return GL_DEPTH_COMPONENT;
+ case GL_DEPTH32F_STENCIL8:
+ return GL_DEPTH_STENCIL;
+ default:
+ ; /* fallthrough */
+ }
+ }
+
+ if (ctx->Extensions.ARB_texture_compression_rgtc) {
+ switch (internalFormat) {
+ case GL_COMPRESSED_RED_RGTC1:
+ case GL_COMPRESSED_SIGNED_RED_RGTC1:
+ return GL_RED;
+ case GL_COMPRESSED_RG_RGTC2:
+ case GL_COMPRESSED_SIGNED_RG_RGTC2:
+ return GL_RG;
+ default:
+ ; /* fallthrough */
+ }
+ }
+
+ if (ctx->Extensions.EXT_texture_compression_latc) {
+ switch (internalFormat) {
+ case GL_COMPRESSED_LUMINANCE_LATC1_EXT:
+ case GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT:
+ return GL_LUMINANCE;
+ case GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT:
+ case GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT:
+ return GL_LUMINANCE_ALPHA;
+ default:
+ ; /* fallthrough */
+ }
+ }
+
+ if (ctx->Extensions.ATI_texture_compression_3dc) {
+ switch (internalFormat) {
+ case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI:
+ return GL_LUMINANCE_ALPHA;
+ default:
+ ; /* fallthrough */
+ }
+ }
+
+ if (ctx->Extensions.OES_compressed_ETC1_RGB8_texture) {
+ switch (internalFormat) {
+ case GL_ETC1_RGB8_OES:
+ return GL_RGB;
+ default:
+ ; /* fallthrough */
+ }
+ }
+
+ if (_mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility) {
+ switch (internalFormat) {
+ case GL_COMPRESSED_RGB8_ETC2:
+ case GL_COMPRESSED_SRGB8_ETC2:
+ return GL_RGB;
+ case GL_COMPRESSED_RGBA8_ETC2_EAC:
+ case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC:
+ case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
+ case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
+ return GL_RGBA;
+ case GL_COMPRESSED_R11_EAC:
+ case GL_COMPRESSED_SIGNED_R11_EAC:
+ return GL_RED;
+ case GL_COMPRESSED_RG11_EAC:
+ case GL_COMPRESSED_SIGNED_RG11_EAC:
+ return GL_RG;
+ default:
+ ; /* fallthrough */
+ }
+ }
+
+ if (_mesa_is_desktop_gl(ctx) &&
+ ctx->Extensions.ARB_texture_compression_bptc) {
+ switch (internalFormat) {
+ case GL_COMPRESSED_RGBA_BPTC_UNORM:
+ case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
+ return GL_RGBA;
+ case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
+ case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
+ return GL_RGB;
+ default:
+ ; /* fallthrough */
+ }
+ }
+
+ if (ctx->API == API_OPENGLES) {
+ switch (internalFormat) {
+ case GL_PALETTE4_RGB8_OES:
+ case GL_PALETTE4_R5_G6_B5_OES:
+ case GL_PALETTE8_RGB8_OES:
+ case GL_PALETTE8_R5_G6_B5_OES:
+ return GL_RGB;
+ case GL_PALETTE4_RGBA8_OES:
+ case GL_PALETTE8_RGB5_A1_OES:
+ case GL_PALETTE4_RGBA4_OES:
+ case GL_PALETTE4_RGB5_A1_OES:
+ case GL_PALETTE8_RGBA8_OES:
+ case GL_PALETTE8_RGBA4_OES:
+ return GL_RGBA;
+ default:
+ ; /* fallthrough */
+ }
+ }
+
+ return -1; /* error */
+}
+
+/**
+ * Returns the effective internal format from a texture format and type.
+ * This is used by texture image operations internally for validation, when
+ * the specified internal format is a base (unsized) format.
+ *
+ * This method will only return a valid effective internal format if the
+ * combination of format, type and internal format in base form, is acceptable.
+ *
+ * If a single sized internal format is defined in the spec (OpenGL-ES 3.0.4) or
+ * in extensions, to unambiguously correspond to the given base format, then
+ * that internal format is returned as the effective. Otherwise, if the
+ * combination is accepted but a single effective format is not defined, the
+ * passed base format will be returned instead.
+ *
+ * \param format the texture format
+ * \param type the texture type
+ */
+static GLenum
+_mesa_es3_effective_internal_format_for_format_and_type(GLenum format,
+ GLenum type)
+{
+ switch (type) {
+ case GL_UNSIGNED_BYTE:
+ switch (format) {
+ case GL_RGBA:
+ return GL_RGBA8;
+ case GL_RGB:
+ return GL_RGB8;
+ /* Although LUMINANCE_ALPHA, LUMINANCE and ALPHA appear in table 3.12,
+ * (section 3.8 Texturing, page 128 of the OpenGL-ES 3.0.4) as effective
+ * internal formats, they do not correspond to GL constants, so the base
+ * format is returned instead.
+ */
+ case GL_LUMINANCE_ALPHA:
+ case GL_LUMINANCE:
+ case GL_ALPHA:
+ return format;
+ }
+ break;
+
+ case GL_UNSIGNED_SHORT_4_4_4_4:
+ if (format == GL_RGBA)
+ return GL_RGBA4;
+ break;
+
+ case GL_UNSIGNED_SHORT_5_5_5_1:
+ if (format == GL_RGBA)
+ return GL_RGB5_A1;
+ break;
+
+ case GL_UNSIGNED_SHORT_5_6_5:
+ if (format == GL_RGB)
+ return GL_RGB565;
+ break;
+
+ /* OES_packed_depth_stencil */
+ case GL_UNSIGNED_INT_24_8:
+ if (format == GL_DEPTH_STENCIL)
+ return GL_DEPTH24_STENCIL8;
+ break;
+
+ case GL_FLOAT_32_UNSIGNED_INT_24_8_REV:
+ if (format == GL_DEPTH_STENCIL)
+ return GL_DEPTH32F_STENCIL8;
+ break;
+
+ case GL_UNSIGNED_SHORT:
+ if (format == GL_DEPTH_COMPONENT)
+ return GL_DEPTH_COMPONENT16;
+ break;
+
+ case GL_UNSIGNED_INT:
+ /* It can be DEPTH_COMPONENT16 or DEPTH_COMPONENT24, so just return
+ * the format.
+ */
+ if (format == GL_DEPTH_COMPONENT)
+ return format;
+ break;
+
+ /* OES_texture_float and OES_texture_half_float */
+ case GL_FLOAT:
+ if (format == GL_DEPTH_COMPONENT)
+ return GL_DEPTH_COMPONENT32F;
+ /* fall through */
+ case GL_HALF_FLOAT_OES:
+ switch (format) {
+ case GL_RGBA:
+ case GL_RGB:
+ case GL_LUMINANCE_ALPHA:
+ case GL_LUMINANCE:
+ case GL_ALPHA:
+ case GL_RED:
+ case GL_RG:
+ return format;
+ }
+ break;
+ case GL_HALF_FLOAT:
+ switch (format) {
+ case GL_RG:
+ case GL_RED:
+ return format;
+ }
+ break;
+
+ /* GL_EXT_texture_type_2_10_10_10_REV */
+ case GL_UNSIGNED_INT_2_10_10_10_REV:
+ switch (format) {
+ case GL_RGBA:
+ case GL_RGB:
+ return format;
+ }
+ break;
+
+ default:
+ /* fall through and return NONE */
+ break;
+ }
+
+ return GL_NONE;
+}
/**
* Do error checking of format/type combinations for OpenGL ES 3
GLenum format, GLenum type,
GLenum internalFormat)
{
+ /* If internalFormat is an unsized format, then the effective internal
+ * format derived from format and type should be used instead. Page 127,
+ * section "3.8 Texturing" of the GLES 3.0.4 spec states:
+ *
+ * "if internalformat is a base internal format, the effective
+ * internal format is a sized internal format that is derived
+ * from the format and type for internal use by the GL.
+ * Table 3.12 specifies the mapping of format and type to effective
+ * internal formats. The effective internal format is used by the GL
+ * for purposes such as texture completeness or type checks for
+ * CopyTex* commands. In these cases, the GL is required to operate
+ * as if the effective internal format was used as the internalformat
+ * when specifying the texture data."
+ */
+ if (_mesa_is_enum_format_unsized(internalFormat)) {
+ GLenum effectiveInternalFormat =
+ _mesa_es3_effective_internal_format_for_format_and_type(format, type);
+
+ if (effectiveInternalFormat == GL_NONE)
+ return GL_INVALID_OPERATION;
+
+ GLenum baseInternalFormat =
+ _mesa_base_tex_format(ctx, effectiveInternalFormat);
+
+ if (internalFormat != baseInternalFormat)
+ return GL_INVALID_OPERATION;
+
+ internalFormat = effectiveInternalFormat;
+ }
+
switch (format) {
case GL_RGBA:
switch (type) {
return MESA_FORMAT_B5G6R5_UNORM;
else if (format == GL_BGR)
return MESA_FORMAT_R5G6B5_UNORM;
+ else if (format == GL_RGB_INTEGER)
+ return MESA_FORMAT_B5G6R5_UINT;
break;
case GL_UNSIGNED_SHORT_5_6_5_REV:
if (format == GL_RGB)
return MESA_FORMAT_R5G6B5_UNORM;
else if (format == GL_BGR)
return MESA_FORMAT_B5G6R5_UNORM;
+ else if (format == GL_RGB_INTEGER)
+ return MESA_FORMAT_R5G6B5_UINT;
break;
case GL_UNSIGNED_SHORT_4_4_4_4:
if (format == GL_RGBA)
return MESA_FORMAT_A4R4G4B4_UNORM;
else if (format == GL_ABGR_EXT)
return MESA_FORMAT_R4G4B4A4_UNORM;
+ else if (format == GL_RGBA_INTEGER)
+ return MESA_FORMAT_A4B4G4R4_UINT;
+ else if (format == GL_BGRA_INTEGER)
+ return MESA_FORMAT_A4R4G4B4_UINT;
break;
case GL_UNSIGNED_SHORT_4_4_4_4_REV:
if (format == GL_RGBA)
return MESA_FORMAT_B4G4R4A4_UNORM;
else if (format == GL_ABGR_EXT)
return MESA_FORMAT_A4B4G4R4_UNORM;
+ else if (format == GL_RGBA_INTEGER)
+ return MESA_FORMAT_R4G4B4A4_UINT;
+ else if (format == GL_BGRA_INTEGER)
+ return MESA_FORMAT_B4G4R4A4_UINT;
break;
case GL_UNSIGNED_SHORT_5_5_5_1:
if (format == GL_RGBA)
return MESA_FORMAT_A1B5G5R5_UNORM;
else if (format == GL_BGRA)
return MESA_FORMAT_A1R5G5B5_UNORM;
+ else if (format == GL_RGBA_INTEGER)
+ return MESA_FORMAT_A1B5G5R5_UINT;
+ else if (format == GL_BGRA_INTEGER)
+ return MESA_FORMAT_A1R5G5B5_UINT;
break;
case GL_UNSIGNED_SHORT_1_5_5_5_REV:
if (format == GL_RGBA)
return MESA_FORMAT_R5G5B5A1_UNORM;
else if (format == GL_BGRA)
return MESA_FORMAT_B5G5R5A1_UNORM;
+ else if (format == GL_RGBA_INTEGER)
+ return MESA_FORMAT_R5G5B5A1_UINT;
+ else if (format == GL_BGRA_INTEGER)
+ return MESA_FORMAT_B5G5R5A1_UINT;
break;
case GL_UNSIGNED_BYTE_3_3_2:
if (format == GL_RGB)
return MESA_FORMAT_B2G3R3_UNORM;
+ else if (format == GL_RGB_INTEGER)
+ return MESA_FORMAT_B2G3R3_UINT;
break;
case GL_UNSIGNED_BYTE_2_3_3_REV:
if (format == GL_RGB)
return MESA_FORMAT_R3G3B2_UNORM;
+ else if (format == GL_RGB_INTEGER)
+ return MESA_FORMAT_R3G3B2_UINT;
break;
case GL_UNSIGNED_INT_5_9_9_9_REV:
if (format == GL_RGB)
return MESA_FORMAT_A8R8G8B8_UNORM;
else if (format == GL_ABGR_EXT)
return MESA_FORMAT_R8G8B8A8_UNORM;
+ else if (format == GL_RGBA_INTEGER)
+ return MESA_FORMAT_A8B8G8R8_UINT;
+ else if (format == GL_BGRA_INTEGER)
+ return MESA_FORMAT_A8R8G8B8_UINT;
break;
case GL_UNSIGNED_INT_8_8_8_8_REV:
if (format == GL_RGBA)
return MESA_FORMAT_B8G8R8A8_UNORM;
else if (format == GL_ABGR_EXT)
return MESA_FORMAT_A8B8G8R8_UNORM;
+ else if (format == GL_RGBA_INTEGER)
+ return MESA_FORMAT_R8G8B8A8_UINT;
+ else if (format == GL_BGRA_INTEGER)
+ return MESA_FORMAT_B8G8R8A8_UINT;
break;
case GL_UNSIGNED_SHORT_8_8_MESA:
if (format == GL_YCBCR_MESA)
_mesa_es3_error_check_format_and_type(const struct gl_context *ctx,
GLenum format, GLenum type,
GLenum internalFormat);
+extern GLint
+_mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat );
extern uint32_t
_mesa_format_from_format_and_type(GLenum format, GLenum type);
* \param src the array with the source data we want to byte-swap.
* \param n number of words.
*/
-void
-_mesa_swap2_copy( GLushort *dst, GLushort *src, GLuint n )
+static void
+swap2_copy( GLushort *dst, GLushort *src, GLuint n )
{
GLuint i;
for (i = 0; i < n; i++) {
}
}
-
+void
+_mesa_swap2(GLushort *p, GLuint n)
+{
+ swap2_copy(p, p, n);
+}
/*
* Flip the order of the 4 bytes in each word in the given array (src) and
* \param src the array with the source data we want to byte-swap.
* \param n number of words.
*/
-void
-_mesa_swap4_copy( GLuint *dst, GLuint *src, GLuint n )
+static void
+swap4_copy( GLuint *dst, GLuint *src, GLuint n )
{
GLuint i, a, b;
for (i = 0; i < n; i++) {
}
}
+void
+_mesa_swap4(GLuint *p, GLuint n)
+{
+ swap4_copy(p, p, n);
+}
/**
* Return the byte offset of a specific pixel in an image (1D, 2D or 3D).
return GL_TRUE;
}
+
+/**
+ * Swap the bytes in a 2D image.
+ *
+ * using the packing information this swaps the bytes
+ * according to the format and type of data being input.
+ * It takes into a/c various packing parameters like
+ * Alignment and RowLength.
+ */
+void
+_mesa_swap_bytes_2d_image(GLenum format, GLenum type,
+ const struct gl_pixelstore_attrib *packing,
+ GLsizei width, GLsizei height,
+ GLvoid *dst, const GLvoid *src)
+{
+ GLint swapSize = _mesa_sizeof_packed_type(type);
+
+ assert(packing->SwapBytes);
+
+ if (swapSize == 2 || swapSize == 4) {
+ int swapsPerPixel = _mesa_bytes_per_pixel(format, type) / swapSize;
+ int stride = _mesa_image_row_stride(packing, width, format, type);
+ int row;
+ uint8_t *dstrow;
+ const uint8_t *srcrow;
+ assert(swapsPerPixel > 0);
+ assert(_mesa_bytes_per_pixel(format, type) % swapSize == 0);
+ dstrow = dst;
+ srcrow = src;
+ for (row = 0; row < height; row++) {
+ if (swapSize == 2)
+ swap2_copy((GLushort *)dstrow, (GLushort *)srcrow, width * swapsPerPixel);
+ else if (swapSize == 4)
+ swap4_copy((GLuint *)dstrow, (GLuint *)srcrow, width * swapsPerPixel);
+ dstrow += stride;
+ srcrow += stride;
+ }
+ }
+}
struct gl_framebuffer;
extern void
-_mesa_swap2_copy(GLushort *dst, GLushort *src, GLuint n);
+_mesa_swap2(GLushort *p, GLuint n);
extern void
-_mesa_swap4_copy(GLuint *dst, GLuint *src, GLuint n);
+_mesa_swap4(GLuint *p, GLuint n);
-static inline void
-_mesa_swap2(GLushort *p, GLuint n)
-{
- _mesa_swap2_copy(p, p, n);
-}
-
-static inline void
-_mesa_swap4(GLuint *p, GLuint n)
-{
- _mesa_swap4_copy(p, p, n);
-}
extern GLintptr
_mesa_image_offset( GLuint dimensions,
GLint *srcX0, GLint *srcY0, GLint *srcX1, GLint *srcY1,
GLint *dstX0, GLint *dstY0, GLint *dstX1, GLint *dstY1);
+void
+_mesa_swap_bytes_2d_image(GLenum format, GLenum type,
+ const struct gl_pixelstore_attrib *packing,
+ GLsizei width, GLsizei height,
+ GLvoid *dst, const GLvoid *src);
#endif
}
/* get dest gl_texture_image */
- dstImage = _mesa_get_tex_image(ctx, texObj, target, level + 1);
- if (!dstImage) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "generating mipmaps");
- return;
- }
+ dstImage = _mesa_select_tex_image(texObj, target, level + 1);
+ assert(dstImage);
if (target == GL_TEXTURE_1D_ARRAY) {
srcDepth = srcHeight;
srcWidth, srcHeight, srcDepth,
&dstWidth, &dstHeight, &dstDepth);
if (!nextLevel)
- break;
+ goto end;
+
+ if (!_mesa_prepare_mipmap_level(ctx, texObj, level + 1,
+ dstWidth, dstHeight, dstDepth,
+ border, srcImage->InternalFormat,
+ srcImage->TexFormat)) {
+ /* all done */
+ goto end;
+ }
+
+ /* get dest gl_texture_image */
+ dstImage = _mesa_select_tex_image(texObj, target, level + 1);
+ assert(dstImage);
/* Compute dst image strides and alloc memory on first iteration */
temp_dst_row_stride = _mesa_format_row_stride(temp_format, dstWidth);
}
}
- /* get dest gl_texture_image */
- dstImage = _mesa_get_tex_image(ctx, texObj, target, level + 1);
- if (!dstImage) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "generating mipmaps");
- goto end;
- }
-
/* for 2D arrays, setup array[depth] of slice pointers */
for (i = 0; i < srcDepth; i++) {
temp_src_slices[i] = temp_src + temp_src_img_stride * i;
dstWidth, dstHeight, dstDepth,
temp_dst_slices, temp_dst_row_stride);
- if (!_mesa_prepare_mipmap_level(ctx, texObj, level + 1,
- dstWidth, dstHeight, dstDepth,
- border, srcImage->InternalFormat,
- srcImage->TexFormat)) {
- /* all done */
- goto end;
- }
-
/* The image space was allocated above so use glTexSubImage now */
ctx->Driver.TexSubImage(ctx, 2, dstImage,
0, 0, 0, dstWidth, dstHeight, dstDepth,
*/
struct gl_sampler_object
{
+ mtx_t Mutex;
GLuint Name;
GLint RefCount;
GLchar *Label; /**< GL_KHR_debug */
*/
struct gl_program
{
+ mtx_t Mutex;
GLuint Id;
GLint RefCount;
GLubyte *String; /**< Null-terminated program text */
struct gl_uniform_block *UniformBlocks;
struct exec_list *ir;
+ struct exec_list *packed_varyings;
struct glsl_symbol_table *symbols;
bool uses_builtin_functions;
{
ubo_packing_std140,
ubo_packing_shared,
- ubo_packing_packed
+ ubo_packing_packed,
+ ubo_packing_std430
};
*/
unsigned LastClipDistanceArraySize;
- unsigned NumUniformBlocks;
+ unsigned NumBufferInterfaceBlocks;
struct gl_uniform_block *UniformBlocks;
/**
GLboolean ARB_shader_stencil_export;
GLboolean ARB_shader_storage_buffer_object;
GLboolean ARB_shader_subroutine;
+ GLboolean ARB_shader_texture_image_samples;
GLboolean ARB_shader_texture_lod;
GLboolean ARB_shading_language_packing;
GLboolean ARB_shading_language_420pack;
struct gl_perf_monitor_state PerfMonitor;
struct gl_buffer_object *DrawIndirectBuffer; /** < GL_ARB_draw_indirect */
+ struct gl_buffer_object *DispatchIndirectBuffer; /** < GL_ARB_compute_shader */
struct gl_buffer_object *CopyReadBuffer; /**< GL_ARB_copy_buffer */
struct gl_buffer_object *CopyWriteBuffer; /**< GL_ARB_copy_buffer */
--- /dev/null
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
+ * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * Code related to the GL_APPLE_object_purgeable extension.
+ */
+
+
+#include "glheader.h"
+#include "enums.h"
+#include "hash.h"
+#include "imports.h"
+#include "context.h"
+#include "bufferobj.h"
+#include "fbobject.h"
+#include "mtypes.h"
+#include "objectpurge.h"
+#include "texobj.h"
+#include "teximage.h"
+
+
+static GLenum
+buffer_object_purgeable(struct gl_context *ctx, GLuint name, GLenum option)
+{
+ struct gl_buffer_object *bufObj;
+ GLenum retval;
+
+ bufObj = _mesa_lookup_bufferobj(ctx, name);
+ if (!bufObj) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "glObjectPurgeable(name = 0x%x)", name);
+ return 0;
+ }
+ if (!_mesa_is_bufferobj(bufObj)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "glObjectPurgeable(buffer 0)" );
+ return 0;
+ }
+
+ if (bufObj->Purgeable) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "glObjectPurgeable(name = 0x%x) is already purgeable", name);
+ return GL_VOLATILE_APPLE;
+ }
+
+ bufObj->Purgeable = GL_TRUE;
+
+ retval = GL_VOLATILE_APPLE;
+ if (ctx->Driver.BufferObjectPurgeable)
+ retval = ctx->Driver.BufferObjectPurgeable(ctx, bufObj, option);
+
+ return retval;
+}
+
+
+static GLenum
+renderbuffer_purgeable(struct gl_context *ctx, GLuint name, GLenum option)
+{
+ struct gl_renderbuffer *bufObj;
+ GLenum retval;
+
+ bufObj = _mesa_lookup_renderbuffer(ctx, name);
+ if (!bufObj) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "glObjectUnpurgeable(name = 0x%x)", name);
+ return 0;
+ }
+
+ if (bufObj->Purgeable) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "glObjectPurgeable(name = 0x%x) is already purgeable", name);
+ return GL_VOLATILE_APPLE;
+ }
+
+ bufObj->Purgeable = GL_TRUE;
+
+ retval = GL_VOLATILE_APPLE;
+ if (ctx->Driver.RenderObjectPurgeable)
+ retval = ctx->Driver.RenderObjectPurgeable(ctx, bufObj, option);
+
+ return retval;
+}
+
+
+static GLenum
+texture_object_purgeable(struct gl_context *ctx, GLuint name, GLenum option)
+{
+ struct gl_texture_object *bufObj;
+ GLenum retval;
+
+ bufObj = _mesa_lookup_texture(ctx, name);
+ if (!bufObj) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "glObjectPurgeable(name = 0x%x)", name);
+ return 0;
+ }
+
+ if (bufObj->Purgeable) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "glObjectPurgeable(name = 0x%x) is already purgeable", name);
+ return GL_VOLATILE_APPLE;
+ }
+
+ bufObj->Purgeable = GL_TRUE;
+
+ retval = GL_VOLATILE_APPLE;
+ if (ctx->Driver.TextureObjectPurgeable)
+ retval = ctx->Driver.TextureObjectPurgeable(ctx, bufObj, option);
+
+ return retval;
+}
+
+
+GLenum GLAPIENTRY
+_mesa_ObjectPurgeableAPPLE(GLenum objectType, GLuint name, GLenum option)
+{
+ GLenum retval;
+
+ GET_CURRENT_CONTEXT(ctx);
+ ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, 0);
+
+ if (name == 0) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "glObjectPurgeable(name = 0x%x)", name);
+ return 0;
+ }
+
+ switch (option) {
+ case GL_VOLATILE_APPLE:
+ case GL_RELEASED_APPLE:
+ /* legal */
+ break;
+ default:
+ _mesa_error(ctx, GL_INVALID_ENUM,
+ "glObjectPurgeable(name = 0x%x) invalid option: %d",
+ name, option);
+ return 0;
+ }
+
+ switch (objectType) {
+ case GL_TEXTURE:
+ retval = texture_object_purgeable(ctx, name, option);
+ break;
+ case GL_RENDERBUFFER_EXT:
+ retval = renderbuffer_purgeable(ctx, name, option);
+ break;
+ case GL_BUFFER_OBJECT_APPLE:
+ retval = buffer_object_purgeable(ctx, name, option);
+ break;
+ default:
+ _mesa_error(ctx, GL_INVALID_ENUM,
+ "glObjectPurgeable(name = 0x%x) invalid type: %d",
+ name, objectType);
+ return 0;
+ }
+
+ /* In strict conformance to the spec, we must only return VOLATILE when
+ * when passed the VOLATILE option. Madness.
+ *
+ * XXX First fix the spec, then fix me.
+ */
+ return option == GL_VOLATILE_APPLE ? GL_VOLATILE_APPLE : retval;
+}
+
+
+static GLenum
+buffer_object_unpurgeable(struct gl_context *ctx, GLuint name, GLenum option)
+{
+ struct gl_buffer_object *bufObj;
+ GLenum retval;
+
+ bufObj = _mesa_lookup_bufferobj(ctx, name);
+ if (!bufObj) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "glObjectUnpurgeable(name = 0x%x)", name);
+ return 0;
+ }
+
+ if (! bufObj->Purgeable) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "glObjectUnpurgeable(name = 0x%x) object is "
+ " already \"unpurged\"", name);
+ return 0;
+ }
+
+ bufObj->Purgeable = GL_FALSE;
+
+ retval = option;
+ if (ctx->Driver.BufferObjectUnpurgeable)
+ retval = ctx->Driver.BufferObjectUnpurgeable(ctx, bufObj, option);
+
+ return retval;
+}
+
+
+static GLenum
+renderbuffer_unpurgeable(struct gl_context *ctx, GLuint name, GLenum option)
+{
+ struct gl_renderbuffer *bufObj;
+ GLenum retval;
+
+ bufObj = _mesa_lookup_renderbuffer(ctx, name);
+ if (!bufObj) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "glObjectUnpurgeable(name = 0x%x)", name);
+ return 0;
+ }
+
+ if (! bufObj->Purgeable) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "glObjectUnpurgeable(name = 0x%x) object is "
+ " already \"unpurged\"", name);
+ return 0;
+ }
+
+ bufObj->Purgeable = GL_FALSE;
+
+ retval = option;
+ if (ctx->Driver.RenderObjectUnpurgeable)
+ retval = ctx->Driver.RenderObjectUnpurgeable(ctx, bufObj, option);
+
+ return retval;
+}
+
+
+static GLenum
+texture_object_unpurgeable(struct gl_context *ctx, GLuint name, GLenum option)
+{
+ struct gl_texture_object *bufObj;
+ GLenum retval;
+
+ bufObj = _mesa_lookup_texture(ctx, name);
+ if (!bufObj) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "glObjectUnpurgeable(name = 0x%x)", name);
+ return 0;
+ }
+
+ if (! bufObj->Purgeable) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "glObjectUnpurgeable(name = 0x%x) object is"
+ " already \"unpurged\"", name);
+ return 0;
+ }
+
+ bufObj->Purgeable = GL_FALSE;
+
+ retval = option;
+ if (ctx->Driver.TextureObjectUnpurgeable)
+ retval = ctx->Driver.TextureObjectUnpurgeable(ctx, bufObj, option);
+
+ return retval;
+}
+
+
+GLenum GLAPIENTRY
+_mesa_ObjectUnpurgeableAPPLE(GLenum objectType, GLuint name, GLenum option)
+{
+ GET_CURRENT_CONTEXT(ctx);
+ ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, 0);
+
+ if (name == 0) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "glObjectUnpurgeable(name = 0x%x)", name);
+ return 0;
+ }
+
+ switch (option) {
+ case GL_RETAINED_APPLE:
+ case GL_UNDEFINED_APPLE:
+ /* legal */
+ break;
+ default:
+ _mesa_error(ctx, GL_INVALID_ENUM,
+ "glObjectUnpurgeable(name = 0x%x) invalid option: %d",
+ name, option);
+ return 0;
+ }
+
+ switch (objectType) {
+ case GL_BUFFER_OBJECT_APPLE:
+ return buffer_object_unpurgeable(ctx, name, option);
+ case GL_TEXTURE:
+ return texture_object_unpurgeable(ctx, name, option);
+ case GL_RENDERBUFFER_EXT:
+ return renderbuffer_unpurgeable(ctx, name, option);
+ default:
+ _mesa_error(ctx, GL_INVALID_ENUM,
+ "glObjectUnpurgeable(name = 0x%x) invalid type: %d",
+ name, objectType);
+ return 0;
+ }
+}
+
+
+static void
+get_buffer_object_parameteriv(struct gl_context *ctx, GLuint name,
+ GLenum pname, GLint *params)
+{
+ struct gl_buffer_object *bufObj = _mesa_lookup_bufferobj(ctx, name);
+ if (!bufObj) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "glGetObjectParameteriv(name = 0x%x) invalid object", name);
+ return;
+ }
+
+ switch (pname) {
+ case GL_PURGEABLE_APPLE:
+ *params = bufObj->Purgeable;
+ break;
+ default:
+ _mesa_error(ctx, GL_INVALID_ENUM,
+ "glGetObjectParameteriv(name = 0x%x) invalid enum: %d",
+ name, pname);
+ break;
+ }
+}
+
+
+static void
+get_renderbuffer_parameteriv(struct gl_context *ctx, GLuint name,
+ GLenum pname, GLint *params)
+{
+ struct gl_renderbuffer *rb = _mesa_lookup_renderbuffer(ctx, name);
+ if (!rb) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "glObjectUnpurgeable(name = 0x%x)", name);
+ return;
+ }
+
+ switch (pname) {
+ case GL_PURGEABLE_APPLE:
+ *params = rb->Purgeable;
+ break;
+ default:
+ _mesa_error(ctx, GL_INVALID_ENUM,
+ "glGetObjectParameteriv(name = 0x%x) invalid enum: %d",
+ name, pname);
+ break;
+ }
+}
+
+
+static void
+get_texture_object_parameteriv(struct gl_context *ctx, GLuint name,
+ GLenum pname, GLint *params)
+{
+ struct gl_texture_object *texObj = _mesa_lookup_texture(ctx, name);
+ if (!texObj) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "glObjectUnpurgeable(name = 0x%x)", name);
+ return;
+ }
+
+ switch (pname) {
+ case GL_PURGEABLE_APPLE:
+ *params = texObj->Purgeable;
+ break;
+ default:
+ _mesa_error(ctx, GL_INVALID_ENUM,
+ "glGetObjectParameteriv(name = 0x%x) invalid enum: %d",
+ name, pname);
+ break;
+ }
+}
+
+
+void GLAPIENTRY
+_mesa_GetObjectParameterivAPPLE(GLenum objectType, GLuint name, GLenum pname,
+ GLint *params)
+{
+ GET_CURRENT_CONTEXT(ctx);
+
+ if (name == 0) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "glGetObjectParameteriv(name = 0x%x)", name);
+ return;
+ }
+
+ switch (objectType) {
+ case GL_TEXTURE:
+ get_texture_object_parameteriv(ctx, name, pname, params);
+ break;
+ case GL_BUFFER_OBJECT_APPLE:
+ get_buffer_object_parameteriv(ctx, name, pname, params);
+ break;
+ case GL_RENDERBUFFER_EXT:
+ get_renderbuffer_parameteriv(ctx, name, pname, params);
+ break;
+ default:
+ _mesa_error(ctx, GL_INVALID_ENUM,
+ "glGetObjectParameteriv(name = 0x%x) invalid type: %d",
+ name, objectType);
+ }
+}
--- /dev/null
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
+ * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef OBJECTPURGE_H
+#define OBJECTPURGE_H
+
+
+GLenum GLAPIENTRY
+_mesa_ObjectPurgeableAPPLE(GLenum objectType, GLuint name, GLenum option);
+
+GLenum GLAPIENTRY
+_mesa_ObjectUnpurgeableAPPLE(GLenum objectType, GLuint name, GLenum option);
+
+void GLAPIENTRY
+_mesa_GetObjectParameterivAPPLE(GLenum objectType, GLuint name,
+ GLenum pname, GLint* params);
+
+
+#endif /* OBJECTPURGE_H */
/* no buffer! */
return GL_FALSE;
+ /* If the size of the image is zero then no pixels are accessed so we
+ * don't need to check anything else.
+ */
+ if (width == 0 || height == 0 || depth == 0)
+ return GL_TRUE;
+
/* get the offset to the first pixel we'll read/write */
start = _mesa_image_offset(dimensions, pack, width, height,
format, type, 0, 0, 0);
*params = pipe->InfoLog ? strlen(pipe->InfoLog) + 1 : 0;
return;
case GL_VALIDATE_STATUS:
- *params = pipe->Validated;
+ /* If pipeline is not bound, return initial value 0. */
+ *params = (ctx->_Shader->Name != pipe->Name) ? 0 : pipe->Validated;
return;
case GL_VERTEX_SHADER:
*params = pipe->CurrentProgram[MESA_SHADER_VERTEX]
case GL_PROGRAM_OUTPUT:
case GL_TRANSFORM_FEEDBACK_VARYING:
case GL_ATOMIC_COUNTER_BUFFER:
+ case GL_BUFFER_VARIABLE:
+ case GL_SHADER_STORAGE_BLOCK:
return true;
case GL_VERTEX_SUBROUTINE:
case GL_FRAGMENT_SUBROUTINE:
case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
return _mesa_has_tessellation(ctx) && _mesa_has_shader_subroutine(ctx);
- case GL_BUFFER_VARIABLE:
- case GL_SHADER_STORAGE_BLOCK:
default:
return false;
}
for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++) {
if (shProg->ProgramResourceList[i].Type != programInterface)
continue;
- const char *name =
- _mesa_program_resource_name(&shProg->ProgramResourceList[i]);
- unsigned array_size =
- _mesa_program_resource_array_size(&shProg->ProgramResourceList[i]);
- *params = MAX2(*params, strlen(name) + (array_size ? 3 : 0) + 1);
+ unsigned len =
+ _mesa_program_resource_name_len(&shProg->ProgramResourceList[i]);
+ *params = MAX2(*params, len + 1);
}
break;
case GL_MAX_NUM_ACTIVE_VARIABLES:
switch (programInterface) {
case GL_UNIFORM_BLOCK:
+ case GL_SHADER_STORAGE_BLOCK:
for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++) {
if (shProg->ProgramResourceList[i].Type == programInterface) {
struct gl_uniform_block *block =
case GL_PROGRAM_INPUT:
case GL_PROGRAM_OUTPUT:
case GL_UNIFORM:
+ case GL_BUFFER_VARIABLE:
case GL_TRANSFORM_FEEDBACK_VARYING:
case GL_UNIFORM_BLOCK:
+ case GL_SHADER_STORAGE_BLOCK:
res = _mesa_program_resource_find_name(shProg, programInterface, name,
&array_index);
if (!res || array_index > 0)
* convert to, then we can convert directly into the dst buffer and avoid
* the final conversion/copy from the rgba buffer to the dst buffer.
*/
- if (dst_format == rgba_format) {
+ if (dst_format == rgba_format &&
+ dst_stride == rgba_stride) {
need_convert = false;
rgba = dst;
} else {
done_swap:
/* Handle byte swapping if required */
if (packing->SwapBytes) {
- GLint swapSize = _mesa_sizeof_packed_type(type);
- if (swapSize == 2 || swapSize == 4) {
- int swapsPerPixel = _mesa_bytes_per_pixel(format, type) / swapSize;
- assert(_mesa_bytes_per_pixel(format, type) % swapSize == 0);
- if (swapSize == 2)
- _mesa_swap2((GLushort *) dst, width * height * swapsPerPixel);
- else if (swapSize == 4)
- _mesa_swap4((GLuint *) dst, width * height * swapsPerPixel);
- }
+ _mesa_swap_bytes_2d_image(format, type, packing,
+ width, height, dst, dst);
}
done_unmap:
return GL_NO_ERROR;
break;
case GL_UNSIGNED_SHORT:
+ case GL_UNSIGNED_INT:
case GL_UNSIGNED_INT_24_8:
if (!is_float_depth)
return GL_NO_ERROR;
mtx_lock(&oldRb->Mutex);
assert(oldRb->RefCount > 0);
oldRb->RefCount--;
- /*printf("RB DECR %p (%d) to %d\n", (void*) oldRb, oldRb->Name, oldRb->RefCount);*/
deleteFlag = (oldRb->RefCount == 0);
mtx_unlock(&oldRb->Mutex);
/* reference new renderbuffer */
mtx_lock(&rb->Mutex);
rb->RefCount++;
- /*printf("RB INCR %p (%d) to %d\n", (void*) rb, rb->Name, rb->RefCount);*/
mtx_unlock(&rb->Mutex);
*ptr = rb;
}
GLboolean deleteFlag = GL_FALSE;
struct gl_sampler_object *oldSamp = *ptr;
- /*mtx_lock(&oldSamp->Mutex);*/
+ mtx_lock(&oldSamp->Mutex);
assert(oldSamp->RefCount > 0);
oldSamp->RefCount--;
-#if 0
- printf("SamplerObj %p %d DECR to %d\n",
- (void *) oldSamp, oldSamp->Name, oldSamp->RefCount);
-#endif
deleteFlag = (oldSamp->RefCount == 0);
- /*mtx_unlock(&oldSamp->Mutex);*/
+ mtx_unlock(&oldSamp->Mutex);
if (deleteFlag) {
assert(ctx->Driver.DeleteSamplerObject);
if (samp) {
/* reference new sampler */
- /*mtx_lock(&samp->Mutex);*/
+ mtx_lock(&samp->Mutex);
if (samp->RefCount == 0) {
/* this sampler's being deleted (look just above) */
/* Not sure this can every really happen. Warn if it does. */
}
else {
samp->RefCount++;
-#if 0
- printf("SamplerObj %p %d INCR to %d\n",
- (void *) samp, samp->Name, samp->RefCount);
-#endif
*ptr = samp;
}
- /*mtx_unlock(&samp->Mutex);*/
+ mtx_unlock(&samp->Mutex);
}
}
static void
_mesa_init_sampler_object(struct gl_sampler_object *sampObj, GLuint name)
{
+ mtx_init(&sampObj->Mutex, mtx_plain);
sampObj->Name = name;
sampObj->RefCount = 1;
sampObj->WrapS = GL_REPEAT;
_mesa_delete_sampler_object(struct gl_context *ctx,
struct gl_sampler_object *sampObj)
{
+ mtx_destroy(&sampObj->Mutex);
free(sampObj->Label);
free(sampObj);
}
#include "main/context.h"
#include "main/core.h"
-#include "glsl_symbol_table.h"
-#include "ir.h"
-#include "shaderobj.h"
-#include "program/hash_table.h"
-#include "../glsl/program.h"
-#include "uniforms.h"
#include "main/enums.h"
+#include "main/shaderapi.h"
+#include "main/shaderobj.h"
+#include "main/uniforms.h"
+#include "glsl/glsl_symbol_table.h"
+#include "glsl/ir.h"
+#include "glsl/program.h"
+#include "program/hash_table.h"
+#include "util/strndup.h"
-extern "C" {
-#include "shaderapi.h"
-}
static GLint
program_resource_location(struct gl_shader_program *shProg,
const ir_variable *var;
switch (res->Type) {
case GL_UNIFORM_BLOCK:
+ case GL_SHADER_STORAGE_BLOCK:
return RESOURCE_UBO(res)->Name;
case GL_TRANSFORM_FEEDBACK_VARYING:
return RESOURCE_XFB(res)->Name;
case GL_PROGRAM_OUTPUT:
return RESOURCE_VAR(res)->name;
case GL_UNIFORM:
+ case GL_BUFFER_VARIABLE:
return RESOURCE_UNI(res)->name;
case GL_VERTEX_SUBROUTINE_UNIFORM:
case GL_GEOMETRY_SUBROUTINE_UNIFORM:
RESOURCE_XFB(res)->Size : 0;
case GL_PROGRAM_INPUT:
case GL_PROGRAM_OUTPUT:
- return RESOURCE_VAR(res)->data.max_array_access;
+ return RESOURCE_VAR(res)->type->length;
case GL_UNIFORM:
case GL_VERTEX_SUBROUTINE_UNIFORM:
case GL_GEOMETRY_SUBROUTINE_UNIFORM:
case GL_COMPUTE_SUBROUTINE_UNIFORM:
case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
+ case GL_BUFFER_VARIABLE:
return RESOURCE_UNI(res)->array_elements;
case GL_VERTEX_SUBROUTINE:
case GL_GEOMETRY_SUBROUTINE:
case GL_TESS_EVALUATION_SUBROUTINE:
case GL_ATOMIC_COUNTER_BUFFER:
case GL_UNIFORM_BLOCK:
+ case GL_SHADER_STORAGE_BLOCK:
return 0;
default:
assert(!"support for resource type not implemented");
if (strncmp(rname, name, baselen) == 0) {
switch (programInterface) {
case GL_UNIFORM_BLOCK:
+ case GL_SHADER_STORAGE_BLOCK:
/* Basename match, check if array or struct. */
if (name[baselen] == '\0' ||
name[baselen] == '[' ||
}
break;
case GL_TRANSFORM_FEEDBACK_VARYING:
+ case GL_BUFFER_VARIABLE:
case GL_UNIFORM:
case GL_VERTEX_SUBROUTINE_UNIFORM:
case GL_GEOMETRY_SUBROUTINE_UNIFORM:
switch (res->Type) {
case GL_UNIFORM_BLOCK:
+ case GL_SHADER_STORAGE_BLOCK:
return RESOURCE_UBO(res)- shProg->UniformBlocks;
case GL_ATOMIC_COUNTER_BUFFER:
return RESOURCE_ATC(res) - shProg->AtomicBuffers;
switch (res->Type) {
case GL_UNIFORM_BLOCK:
case GL_ATOMIC_COUNTER_BUFFER:
+ case GL_SHADER_STORAGE_BLOCK:
if (_mesa_program_resource_index(shProg, res) == index)
return res;
break;
case GL_COMPUTE_SUBROUTINE:
case GL_TESS_CONTROL_SUBROUTINE:
case GL_TESS_EVALUATION_SUBROUTINE:
+ case GL_BUFFER_VARIABLE:
if (++idx == (int) index)
return res;
break;
return NULL;
}
+/* Function returns if resource name is expected to have index
+ * appended into it.
+ *
+ *
+ * Page 61 (page 73 of the PDF) in section 2.11 of the OpenGL ES 3.0
+ * spec says:
+ *
+ * "If the active uniform is an array, the uniform name returned in
+ * name will always be the name of the uniform array appended with
+ * "[0]"."
+ *
+ * The same text also appears in the OpenGL 4.2 spec. It does not,
+ * however, appear in any previous spec. Previous specifications are
+ * ambiguous in this regard. However, either name can later be passed
+ * to glGetUniformLocation (and related APIs), so there shouldn't be any
+ * harm in always appending "[0]" to uniform array names.
+ *
+ * Geometry shader stage has different naming convention where the 'normal'
+ * condition is an array, therefore for variables referenced in geometry
+ * stage we do not add '[0]'.
+ *
+ * Note, that TCS outputs and TES inputs should not have index appended
+ * either.
+ */
+static bool
+add_index_to_name(struct gl_program_resource *res)
+{
+ bool add_index = !(((res->Type == GL_PROGRAM_INPUT) &&
+ res->StageReferences & (1 << MESA_SHADER_GEOMETRY)));
+
+ /* Transform feedback varyings have array index already appended
+ * in their names.
+ */
+ if (res->Type == GL_TRANSFORM_FEEDBACK_VARYING)
+ add_index = false;
+
+ return add_index;
+}
+
+/* Get name length of a program resource. This consists of
+ * base name + 3 for '[0]' if resource is an array.
+ */
+extern unsigned
+_mesa_program_resource_name_len(struct gl_program_resource *res)
+{
+ unsigned length = strlen(_mesa_program_resource_name(res));
+ if (_mesa_program_resource_array_size(res) && add_index_to_name(res))
+ length += 3;
+ return length;
+}
+
/* Get full name of a program resource.
*/
bool
_mesa_copy_string(name, bufSize, length, _mesa_program_resource_name(res));
- /* Page 61 (page 73 of the PDF) in section 2.11 of the OpenGL ES 3.0
- * spec says:
- *
- * "If the active uniform is an array, the uniform name returned in
- * name will always be the name of the uniform array appended with
- * "[0]"."
- *
- * The same text also appears in the OpenGL 4.2 spec. It does not,
- * however, appear in any previous spec. Previous specifications are
- * ambiguous in this regard. However, either name can later be passed
- * to glGetUniformLocation (and related APIs), so there shouldn't be any
- * harm in always appending "[0]" to uniform array names.
- *
- * Geometry shader stage has different naming convention where the 'normal'
- * condition is an array, therefore for variables referenced in geometry
- * stage we do not add '[0]'.
- *
- * Note, that TCS outputs and TES inputs should not have index appended
- * either.
- */
- bool add_index = !(((programInterface == GL_PROGRAM_INPUT) &&
- res->StageReferences & (1 << MESA_SHADER_GEOMETRY)));
-
- /* Transform feedback varyings have array index already appended
- * in their names.
- */
- if (programInterface == GL_TRANSFORM_FEEDBACK_VARYING)
- add_index = false;
-
- if (add_index && _mesa_program_resource_array_size(res)) {
+ if (_mesa_program_resource_array_size(res) && add_index_to_name(res)) {
int i;
/* The comparison is strange because *length does *NOT* include the
}
}
+static char*
+get_top_level_name(const char *name)
+{
+ const char *first_dot = strchr(name, '.');
+ const char *first_square_bracket = strchr(name, '[');
+ int name_size = 0;
+ /* From ARB_program_interface_query spec:
+ *
+ * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer identifying the
+ * number of active array elements of the top-level shader storage block
+ * member containing to the active variable is written to <params>. If the
+ * top-level block member is not declared as an array, the value one is
+ * written to <params>. If the top-level block member is an array with no
+ * declared size, the value zero is written to <params>.
+ */
+
+ /* The buffer variable is on top level.*/
+ if (!first_square_bracket && !first_dot)
+ name_size = strlen(name);
+ else if ((!first_square_bracket ||
+ (first_dot && first_dot < first_square_bracket)))
+ name_size = first_dot - name;
+ else
+ name_size = first_square_bracket - name;
+
+ return strndup(name, name_size);
+}
+
+static char*
+get_var_name(const char *name)
+{
+ const char *first_dot = strchr(name, '.');
+
+ if (!first_dot)
+ return strdup(name);
+
+ return strndup(first_dot+1, strlen(first_dot) - 1);
+}
+
+static GLint
+program_resource_top_level_array_size(struct gl_shader_program *shProg,
+ struct gl_program_resource *res,
+ const char *name)
+{
+ int block_index = RESOURCE_UNI(res)->block_index;
+ int array_size = -1;
+ char *var_name = get_top_level_name(name);
+ char *interface_name =
+ get_top_level_name(shProg->UniformBlocks[block_index].Name);
+
+ if (strcmp(var_name, interface_name) == 0) {
+ /* Deal with instanced array of SSBOs */
+ char *temp_name = get_var_name(name);
+ free(var_name);
+ var_name = get_top_level_name(temp_name);
+ free(temp_name);
+ }
+
+ for (unsigned i = 0; i < shProg->NumShaders; i++) {
+ if (shProg->Shaders[i] == NULL)
+ continue;
+
+ const gl_shader *stage = shProg->Shaders[i];
+ foreach_in_list(ir_instruction, node, stage->ir) {
+ ir_variable *var = node->as_variable();
+ if (!var || !var->get_interface_type() ||
+ var->data.mode != ir_var_shader_storage)
+ continue;
+
+ const glsl_type *interface = var->get_interface_type();
+
+ if (strcmp(interface_name, interface->name) != 0)
+ continue;
+
+ for (unsigned i = 0; i < interface->length; i++) {
+ const glsl_struct_field *field = &interface->fields.structure[i];
+ if (strcmp(field->name, var_name) != 0)
+ continue;
+ /* From GL_ARB_program_interface_query spec:
+ *
+ * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer
+ * identifying the number of active array elements of the top-level
+ * shader storage block member containing to the active variable is
+ * written to <params>. If the top-level block member is not
+ * declared as an array, the value one is written to <params>. If
+ * the top-level block member is an array with no declared size,
+ * the value zero is written to <params>.
+ */
+ if (field->type->is_unsized_array())
+ array_size = 0;
+ else if (field->type->is_array())
+ array_size = field->type->length;
+ else
+ array_size = 1;
+ goto found_top_level_array_size;
+ }
+ }
+ }
+found_top_level_array_size:
+ free(interface_name);
+ free(var_name);
+ return array_size;
+}
+
+static GLint
+program_resource_top_level_array_stride(struct gl_shader_program *shProg,
+ struct gl_program_resource *res,
+ const char *name)
+{
+ int block_index = RESOURCE_UNI(res)->block_index;
+ int array_stride = -1;
+ char *var_name = get_top_level_name(name);
+ char *interface_name =
+ get_top_level_name(shProg->UniformBlocks[block_index].Name);
+
+ if (strcmp(var_name, interface_name) == 0) {
+ /* Deal with instanced array of SSBOs */
+ char *temp_name = get_var_name(name);
+ free(var_name);
+ var_name = get_top_level_name(temp_name);
+ free(temp_name);
+ }
+
+ for (unsigned i = 0; i < shProg->NumShaders; i++) {
+ if (shProg->Shaders[i] == NULL)
+ continue;
+
+ const gl_shader *stage = shProg->Shaders[i];
+ foreach_in_list(ir_instruction, node, stage->ir) {
+ ir_variable *var = node->as_variable();
+ if (!var || !var->get_interface_type() ||
+ var->data.mode != ir_var_shader_storage)
+ continue;
+
+ const glsl_type *interface = var->get_interface_type();
+
+ if (strcmp(interface_name, interface->name) != 0) {
+ continue;
+ }
+
+ for (unsigned i = 0; i < interface->length; i++) {
+ const glsl_struct_field *field = &interface->fields.structure[i];
+ if (strcmp(field->name, var_name) != 0)
+ continue;
+ /* From GL_ARB_program_interface_query:
+ *
+ * "For the property TOP_LEVEL_ARRAY_STRIDE, a single integer
+ * identifying the stride between array elements of the top-level
+ * shader storage block member containing the active variable is
+ * written to <params>. For top-level block members declared as
+ * arrays, the value written is the difference, in basic machine
+ * units, between the offsets of the active variable for
+ * consecutive elements in the top-level array. For top-level
+ * block members not declared as an array, zero is written to
+ * <params>."
+ */
+ if (field->type->is_array()) {
+ const enum glsl_matrix_layout matrix_layout =
+ glsl_matrix_layout(field->matrix_layout);
+ bool row_major = matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR;
+ const glsl_type *array_type = field->type->fields.array;
+
+ if (interface->interface_packing != GLSL_INTERFACE_PACKING_STD430) {
+ if (array_type->is_record()) {
+ array_stride = array_type->std140_size(row_major);
+ array_stride = glsl_align(array_stride, 16);
+ } else {
+ unsigned element_base_align = 0;
+ element_base_align = array_type->std140_base_alignment(row_major);
+ array_stride = MAX2(element_base_align, 16);
+ }
+ } else {
+ array_stride = array_type->std430_array_stride(row_major);
+ }
+ } else {
+ array_stride = 0;
+ }
+ goto found_top_level_array_size;
+ }
+ }
+ }
+found_top_level_array_size:
+ free(interface_name);
+ free(var_name);
+ return array_stride;
+}
+
/**
* Function implements following location queries:
* glGetUniformLocation
if (res->Type == GL_ATOMIC_COUNTER_BUFFER)
return RESOURCE_ATC(res)->StageReferences[stage];
- if (res->Type == GL_UNIFORM_BLOCK)
+ if (res->Type == GL_UNIFORM_BLOCK || res->Type == GL_SHADER_STORAGE_BLOCK)
return shProg->UniformBlockStageIndex[stage][index] != -1;
return res->StageReferences & (1 << stage);
{
GET_CURRENT_CONTEXT(ctx);
if (res->Type != GL_UNIFORM_BLOCK &&
- res->Type != GL_ATOMIC_COUNTER_BUFFER)
+ res->Type != GL_ATOMIC_COUNTER_BUFFER &&
+ res->Type != GL_SHADER_STORAGE_BLOCK)
goto invalid_operation;
if (res->Type == GL_UNIFORM_BLOCK) {
}
return RESOURCE_UBO(res)->NumUniforms;
}
+ } else if (res->Type == GL_SHADER_STORAGE_BLOCK) {
+ switch (prop) {
+ case GL_BUFFER_BINDING:
+ *val = RESOURCE_UBO(res)->Binding;
+ return 1;
+ case GL_BUFFER_DATA_SIZE:
+ *val = RESOURCE_UBO(res)->UniformBufferSize;
+ return 1;
+ case GL_NUM_ACTIVE_VARIABLES:
+ *val = 0;
+ for (unsigned i = 0; i < RESOURCE_UBO(res)->NumUniforms; i++) {
+ const char *iname = RESOURCE_UBO(res)->Uniforms[i].IndexName;
+ struct gl_program_resource *uni =
+ _mesa_program_resource_find_name(shProg, GL_BUFFER_VARIABLE,
+ iname, NULL);
+ if (!uni)
+ continue;
+ (*val)++;
+ }
+ return 1;
+ case GL_ACTIVE_VARIABLES:
+ for (unsigned i = 0; i < RESOURCE_UBO(res)->NumUniforms; i++) {
+ const char *iname = RESOURCE_UBO(res)->Uniforms[i].IndexName;
+ struct gl_program_resource *uni =
+ _mesa_program_resource_find_name(shProg, GL_BUFFER_VARIABLE,
+ iname, NULL);
+ if (!uni)
+ continue;
+ *val++ =
+ _mesa_program_resource_index(shProg, uni);
+ }
+ return RESOURCE_UBO(res)->NumUniforms;
+ }
} else if (res->Type == GL_ATOMIC_COUNTER_BUFFER) {
switch (prop) {
case GL_BUFFER_BINDING:
if (res->Type != type)\
goto invalid_operation;
+#define VALIDATE_TYPE_2(type1, type2)\
+ if (res->Type != type1 && res->Type != type2)\
+ goto invalid_operation;
+
switch(prop) {
case GL_NAME_LENGTH:
switch (res->Type) {
case GL_ATOMIC_COUNTER_BUFFER:
goto invalid_operation;
- case GL_TRANSFORM_FEEDBACK_VARYING:
- *val = strlen(_mesa_program_resource_name(res)) + 1;
- break;
default:
- /* Base name +3 if array '[0]' + terminator. */
- *val = strlen(_mesa_program_resource_name(res)) +
- (_mesa_program_resource_array_size(res) > 0 ? 3 : 0) + 1;
+ /* Resource name length + terminator. */
+ *val = _mesa_program_resource_name_len(res) + 1;
}
return 1;
case GL_TYPE:
switch (res->Type) {
case GL_UNIFORM:
+ case GL_BUFFER_VARIABLE:
*val = RESOURCE_UNI(res)->type->gl_type;
return 1;
case GL_PROGRAM_INPUT:
case GL_ARRAY_SIZE:
switch (res->Type) {
case GL_UNIFORM:
+ case GL_BUFFER_VARIABLE:
*val = MAX2(RESOURCE_UNI(res)->array_elements, 1);
return 1;
case GL_PROGRAM_INPUT:
case GL_PROGRAM_OUTPUT:
- *val = MAX2(RESOURCE_VAR(res)->type->length, 1);
+ *val = MAX2(_mesa_program_resource_array_size(res), 1);
return 1;
case GL_TRANSFORM_FEEDBACK_VARYING:
*val = MAX2(RESOURCE_XFB(res)->Size, 1);
goto invalid_operation;
}
case GL_OFFSET:
- VALIDATE_TYPE(GL_UNIFORM);
+ VALIDATE_TYPE_2(GL_UNIFORM, GL_BUFFER_VARIABLE);
*val = RESOURCE_UNI(res)->offset;
return 1;
case GL_BLOCK_INDEX:
- VALIDATE_TYPE(GL_UNIFORM);
+ VALIDATE_TYPE_2(GL_UNIFORM, GL_BUFFER_VARIABLE);
*val = RESOURCE_UNI(res)->block_index;
return 1;
case GL_ARRAY_STRIDE:
- VALIDATE_TYPE(GL_UNIFORM);
+ VALIDATE_TYPE_2(GL_UNIFORM, GL_BUFFER_VARIABLE);
*val = RESOURCE_UNI(res)->array_stride;
return 1;
case GL_MATRIX_STRIDE:
- VALIDATE_TYPE(GL_UNIFORM);
+ VALIDATE_TYPE_2(GL_UNIFORM, GL_BUFFER_VARIABLE);
*val = RESOURCE_UNI(res)->matrix_stride;
return 1;
case GL_IS_ROW_MAJOR:
- VALIDATE_TYPE(GL_UNIFORM);
+ VALIDATE_TYPE_2(GL_UNIFORM, GL_BUFFER_VARIABLE);
*val = RESOURCE_UNI(res)->row_major;
return 1;
case GL_ATOMIC_COUNTER_BUFFER_INDEX:
case GL_PROGRAM_INPUT:
case GL_PROGRAM_OUTPUT:
case GL_UNIFORM_BLOCK:
+ case GL_BUFFER_VARIABLE:
+ case GL_SHADER_STORAGE_BLOCK:
case GL_ATOMIC_COUNTER_BUFFER:
*val = is_resource_referenced(shProg, res, index,
stage_from_enum(prop));
}
return count;
}
+
+ case GL_TOP_LEVEL_ARRAY_SIZE:
+ VALIDATE_TYPE(GL_BUFFER_VARIABLE);
+ *val = program_resource_top_level_array_size(shProg, res,
+ _mesa_program_resource_name(res));
+ return 1;
+
+ case GL_TOP_LEVEL_ARRAY_STRIDE:
+ VALIDATE_TYPE(GL_BUFFER_VARIABLE);
+ *val = program_resource_top_level_array_stride(shProg, res,
+ _mesa_program_resource_name(res));
+ return 1;
+
/* GL_ARB_tessellation_shader */
case GL_IS_PER_PATCH:
switch (res->Type) {
}
#undef VALIDATE_TYPE
+#undef VALIDATE_TYPE_2
invalid_enum:
_mesa_error(ctx, GL_INVALID_ENUM, "%s(%s prop %s)", caller,
*/
+#include <stdbool.h>
#include "main/glheader.h"
#include "main/context.h"
#include "main/dispatch.h"
#include "main/shaderobj.h"
#include "main/transformfeedback.h"
#include "main/uniforms.h"
+#include "glsl/glsl_parser_extras.h"
+#include "glsl/ir.h"
+#include "glsl/ir_uniform.h"
+#include "glsl/program.h"
#include "program/program.h"
#include "program/prog_print.h"
#include "program/prog_parameter.h"
#include "util/ralloc.h"
#include "util/hash_table.h"
-#include <stdbool.h>
-#include "../glsl/glsl_parser_extras.h"
-#include "../glsl/ir.h"
-#include "../glsl/ir_uniform.h"
-#include "../glsl/program.h"
-
-/** Define this to enable shader substitution (see below) */
-#define SHADER_SUBST 0
+#include "util/mesa-sha1.h"
/**
if (!has_ubo)
break;
- for (i = 0; i < shProg->NumUniformBlocks; i++) {
+ for (i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
/* Add one for the terminating NUL character.
*/
const GLint len = strlen(shProg->UniformBlocks[i].Name) + 1;
if (!has_ubo)
break;
- *params = shProg->NumUniformBlocks;
+ *params = 0;
+ for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
+ if (!shProg->UniformBlocks[i].IsShaderStorage)
+ (*params)++;
+ }
return;
case GL_PROGRAM_BINARY_RETRIEVABLE_HINT:
/* This enum isn't part of the OES extension for OpenGL ES 2.0. It is
return;
}
case GL_PROGRAM_SEPARABLE:
- *params = shProg->SeparateShader;
+ /* If the program has not been linked, return initial value 0. */
+ *params = (shProg->LinkStatus == GL_FALSE) ? 0 : shProg->SeparateShader;
return;
/* ARB_tessellation_shader */
* glShaderSource[ARB].
*/
static void
-shader_source(struct gl_context *ctx, GLuint shader, const GLchar *source)
+shader_source(struct gl_shader *sh, const GLchar *source)
{
- struct gl_shader *sh;
-
- sh = _mesa_lookup_shader_err(ctx, shader, "glShaderSource");
- if (!sh)
- return;
+ assert(sh);
/* free old shader source string and install new one */
free((void *)sh->Source);
link_program(ctx, programObj);
}
+#if defined(HAVE_SHA1)
+/**
+ * Generate a SHA-1 hash value string for given source string.
+ */
+static void
+generate_sha1(const char *source, char sha_str[64])
+{
+ unsigned char sha[20];
+ _mesa_sha1_compute(source, strlen(source), sha);
+ _mesa_sha1_format(sha_str, sha);
+}
+
+/**
+ * Construct a full path for shader replacement functionality using
+ * following format:
+ *
+ * <path>/<stage prefix>_<CHECKSUM>.glsl
+ */
+static void
+construct_name(const gl_shader_stage stage, const char *source,
+ const char *path, char *name, unsigned length)
+{
+ char sha[64];
+ static const char *types[] = {
+ "VS", "TC", "TE", "GS", "FS", "CS",
+ };
+
+ generate_sha1(source, sha);
+ _mesa_snprintf(name, length, "%s/%s_%s.glsl", path, types[stage],
+ sha);
+}
+
+/**
+ * Write given shader source to a file in MESA_SHADER_DUMP_PATH.
+ */
+static void
+dump_shader(const gl_shader_stage stage, const char *source)
+{
+ char name[PATH_MAX];
+ static bool path_exists = true;
+ char *dump_path;
+ FILE *f;
+
+ if (!path_exists)
+ return;
+
+ dump_path = getenv("MESA_SHADER_DUMP_PATH");
+ if (!dump_path) {
+ path_exists = false;
+ return;
+ }
+ construct_name(stage, source, dump_path, name, PATH_MAX);
+
+ f = fopen(name, "w");
+ if (f) {
+ fputs(source, f);
+ fclose(f);
+ } else {
+ GET_CURRENT_CONTEXT(ctx);
+ _mesa_warning(ctx, "could not open %s for dumping shader (%s)", name,
+ strerror(errno));
+ }
+}
/**
* Read shader source code from a file.
* Useful for debugging to override an app's shader.
*/
static GLcharARB *
-read_shader(const char *fname)
+read_shader(const gl_shader_stage stage, const char *source)
{
- int shader_size = 0;
- FILE *f = fopen(fname, "r");
- GLcharARB *buffer, *shader;
- int len;
+ char name[PATH_MAX];
+ char *read_path;
+ static bool path_exists = true;
+ int len, shader_size = 0;
+ GLcharARB *buffer;
+ FILE *f;
+
+ if (!path_exists)
+ return NULL;
- if (!f) {
+ read_path = getenv("MESA_SHADER_READ_PATH");
+ if (!read_path) {
+ path_exists = false;
return NULL;
}
+ construct_name(stage, source, read_path, name, PATH_MAX);
+
+ f = fopen(name, "r");
+ if (!f)
+ return NULL;
+
/* allocate enough room for the entire shader */
fseek(f, 0, SEEK_END);
shader_size = ftell(f);
fclose(f);
- shader = strdup(buffer);
- free(buffer);
-
- return shader;
+ return buffer;
}
-
+#endif /* HAVE_SHA1 */
/**
* Called via glShaderSource() and glShaderSourceARB() API functions.
GLint *offsets;
GLsizei i, totalLength;
GLcharARB *source;
- GLuint checksum;
+ struct gl_shader *sh;
+
+#if defined(HAVE_SHA1)
+ GLcharARB *replacement;
+#endif /* HAVE_SHA1 */
+
+ sh = _mesa_lookup_shader_err(ctx, shaderObj, "glShaderSourceARB");
+ if (!sh)
+ return;
- if (!shaderObj || string == NULL) {
+ if (string == NULL) {
_mesa_error(ctx, GL_INVALID_VALUE, "glShaderSourceARB");
return;
}
source[totalLength - 1] = '\0';
source[totalLength - 2] = '\0';
- if (SHADER_SUBST) {
- /* Compute the shader's source code checksum then try to open a file
- * named newshader_<CHECKSUM>. If it exists, use it in place of the
- * original shader source code. For debugging.
- */
- char filename[100];
- GLcharARB *newSource;
-
- checksum = _mesa_str_checksum(source);
-
- _mesa_snprintf(filename, sizeof(filename), "newshader_%d", checksum);
+#if defined(HAVE_SHA1)
+ /* Dump original shader source to MESA_SHADER_DUMP_PATH and replace
+ * if corresponding entry found from MESA_SHADER_READ_PATH.
+ */
+ dump_shader(sh->Stage, source);
- newSource = read_shader(filename);
- if (newSource) {
- fprintf(stderr, "Mesa: Replacing shader %u chksum=%d with %s\n",
- shaderObj, checksum, filename);
- free(source);
- source = newSource;
- }
+ replacement = read_shader(sh->Stage, source);
+ if (replacement) {
+ free(source);
+ source = replacement;
}
+#endif /* HAVE_SHA1 */
- shader_source(ctx, shaderObj, source);
-
- if (SHADER_SUBST) {
- struct gl_shader *sh = _mesa_lookup_shader(ctx, shaderObj);
- if (sh)
- sh->SourceChecksum = checksum; /* save original checksum */
- }
+ shader_source(sh, source);
free(offsets);
}
GLsizei bufSize, GLsizei *length,
GLchar *name, const char *caller);
+extern unsigned
+_mesa_program_resource_name_len(struct gl_program_resource *res);
+
extern GLint
_mesa_program_resource_location(struct gl_shader_program *shProg,
GLenum programInterface, const char *name);
assert(old->RefCount > 0);
old->RefCount--;
- /*printf("SHADER DECR %p (%d) to %d\n",
- (void*) old, old->Name, old->RefCount);*/
deleteFlag = (old->RefCount == 0);
if (deleteFlag) {
if (sh) {
/* reference new */
sh->RefCount++;
- /*printf("SHADER INCR %p (%d) to %d\n",
- (void*) sh, sh->Name, sh->RefCount);*/
*ptr = sh;
}
}
assert(old->RefCount > 0);
old->RefCount--;
-#if 0
- printf("ShaderProgram %p ID=%u RefCount-- to %d\n",
- (void *) old, old->Name, old->RefCount);
-#endif
deleteFlag = (old->RefCount == 0);
if (deleteFlag) {
if (shProg) {
shProg->RefCount++;
-#if 0
- printf("ShaderProgram %p ID=%u RefCount++ to %d\n",
- (void *) shProg, shProg->Name, shProg->RefCount);
-#endif
*ptr = shProg;
}
}
ralloc_free(shProg->UniformBlocks);
shProg->UniformBlocks = NULL;
- shProg->NumUniformBlocks = 0;
+ shProg->NumBufferInterfaceBlocks = 0;
for (i = 0; i < MESA_SHADER_STAGES; i++) {
ralloc_free(shProg->UniformBlockStageIndex[i]);
shProg->UniformBlockStageIndex[i] = NULL;
AM_CFLAGS = \
- $(X11_CFLAGS) \
$(PTHREAD_CFLAGS)
AM_CPPFLAGS = \
-I$(top_srcdir)/src/gtest/include \
{ "glGetProgramResourceiv", 43, -1 },
{ "glGetProgramResourceLocation", 43, -1 },
{ "glGetProgramResourceLocationIndex", 43, -1 },
-// { "glShaderStorageBlockBinding", 43, -1 }, // XXX: Add to xml
+ { "glShaderStorageBlockBinding", 43, -1 },
// { "glTextureBufferRangeEXT", 43, -1 }, // XXX: Add to xml
{ "glTexStorage2DMultisample", 43, -1 },
{ "glTexStorage3DMultisample", 43, -1 },
{ "glDrawArraysIndirect", 31, -1 },
{ "glDrawElementsIndirect", 31, -1 },
- // FINISHME: These two functions have not been implemented yet. They come
- // FINISHME: from the ARB_framebuffer_no_attachments extension.
- // { "glFramebufferParameteri", 31, -1 },
- // { "glGetFramebufferParameteriv", 31, -1 },
+ { "glFramebufferParameteri", 31, -1 },
+ { "glGetFramebufferParameteriv", 31, -1 },
{ "glGetProgramInterfaceiv", 31, -1 },
{ "glGetProgramResourceIndex", 31, -1 },
{ 0x8E5F, "GL_MAX_PROGRAM_TEXTURE_GATHER_OFFSET" },
{ 0x8F36, "GL_COPY_READ_BUFFER" },
{ 0x8F37, "GL_COPY_WRITE_BUFFER" },
+ { 0x8F39, "GL_MAX_COMBINED_SHADER_OUTPUT_RESOURCES" },
{ 0x8F90, "GL_RED_SNORM" },
{ 0x8F91, "GL_RG_SNORM" },
{ 0x8F92, "GL_RGB_SNORM" },
{ 0x8F9E, "GL_PRIMITIVE_RESTART_INDEX" },
{ 0x8F9F, "GL_MAX_PROGRAM_TEXTURE_GATHER_COMPONENTS_ARB" },
{ 0x906F, "GL_RGB10_A2UI" },
+ { 0x90D2, "GL_SHADER_STORAGE_BUFFER" },
+ { 0x90D3, "GL_SHADER_STORAGE_BUFFER_BINDING" },
+ { 0x90D4, "GL_SHADER_STORAGE_BUFFER_START" },
+ { 0x90D5, "GL_SHADER_STORAGE_BUFFER_SIZE" },
+ { 0x90D6, "GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS" },
+ { 0x90D7, "GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS" },
+ { 0x90D8, "GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS" },
+ { 0x90D9, "GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS" },
+ { 0x90DA, "GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS" },
+ { 0x90DB, "GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS" },
+ { 0x90DC, "GL_MAX_COMBINED_SHADER_STORAGE_BLOCKS" },
+ { 0x90DD, "GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS" },
+ { 0x90DE, "GL_MAX_SHADER_STORAGE_BLOCK_SIZE" },
+ { 0x90DF, "GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT" },
{ 0x9100, "GL_TEXTURE_2D_MULTISAMPLE" },
{ 0x9101, "GL_PROXY_TEXTURE_2D_MULTISAMPLE" },
{ 0x9102, "GL_TEXTURE_2D_MULTISAMPLE_ARRAY" },
case GL_ETC1_RGB8_OES:
case GL_COMPRESSED_RGB8_ETC2:
case GL_COMPRESSED_SRGB8_ETC2:
+ case GL_RGB_S3TC:
+ case GL_RGB4_S3TC:
+ case GL_PALETTE4_RGB8_OES:
+ case GL_PALETTE4_R5_G6_B5_OES:
+ case GL_PALETTE8_RGB8_OES:
+ case GL_PALETTE8_R5_G6_B5_OES:
return GL_RGB;
case GL_COMPRESSED_RGBA:
case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC:
case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
+ case GL_RGBA_S3TC:
+ case GL_RGBA4_S3TC:
+ case GL_PALETTE4_RGBA8_OES:
+ case GL_PALETTE8_RGB5_A1_OES:
+ case GL_PALETTE4_RGBA4_OES:
+ case GL_PALETTE4_RGB5_A1_OES:
+ case GL_PALETTE8_RGBA8_OES:
+ case GL_PALETTE8_RGBA4_OES:
return GL_RGBA;
case GL_COMPRESSED_ALPHA:
case MESA_FORMAT_LAYOUT_FXT1:
return _mesa_get_fxt_fetch_func(format);
case MESA_FORMAT_LAYOUT_RGTC:
+ case MESA_FORMAT_LAYOUT_LATC:
return _mesa_get_compressed_rgtc_func(format);
case MESA_FORMAT_LAYOUT_ETC1:
return _mesa_get_etc_fetch_func(format);
tempImageSlices[0] = (GLubyte *) tempImage;
_mesa_texstore(ctx, dims,
baseInternalFormat,
- MESA_FORMAT_R8G8B8A8_UNORM,
+ _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
+ : MESA_FORMAT_A8B8G8R8_UNORM,
rgbaRowStride, tempImageSlices,
srcWidth, srcHeight, srcDepth,
srcFormat, srcType, srcAddr,
if (srcFormat != GL_RGB ||
srcType != GL_UNSIGNED_BYTE ||
ctx->_ImageTransferState ||
- srcPacking->RowLength != srcWidth ||
+ ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
srcPacking->SwapBytes) {
/* convert image to RGB/GLubyte */
GLubyte *tempImageSlices[1];
tempImageSlices[0] = (GLubyte *) tempImage;
_mesa_texstore(ctx, dims,
baseInternalFormat,
- MESA_FORMAT_R8G8B8A8_UNORM,
+ _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
+ : MESA_FORMAT_A8B8G8R8_UNORM,
rgbaRowStride, tempImageSlices,
srcWidth, srcHeight, srcDepth,
srcFormat, srcType, srcAddr,
dstFormat == MESA_FORMAT_LA_LATC2_UNORM);
if (baseInternalFormat == GL_RG)
- tempFormat = MESA_FORMAT_R8G8_UNORM;
+ tempFormat = _mesa_little_endian() ? MESA_FORMAT_R8G8_UNORM
+ : MESA_FORMAT_G8R8_UNORM;
else
- tempFormat = MESA_FORMAT_L8A8_UNORM;
+ tempFormat = _mesa_little_endian() ? MESA_FORMAT_L8A8_UNORM
+ : MESA_FORMAT_A8L8_UNORM;
rgRowStride = 2 * srcWidth * sizeof(GLubyte);
tempImage = malloc(srcWidth * srcHeight * 2 * sizeof(GLubyte));
if (srcFormat != GL_RGB ||
srcType != GL_UNSIGNED_BYTE ||
ctx->_ImageTransferState ||
- srcPacking->RowLength != srcWidth ||
+ ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
srcPacking->SwapBytes) {
/* convert image to RGB/GLubyte */
GLubyte *tempImageSlices[1];
if (srcFormat != GL_RGBA ||
srcType != GL_UNSIGNED_BYTE ||
ctx->_ImageTransferState ||
- srcPacking->RowLength != srcWidth ||
+ ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
srcPacking->SwapBytes) {
/* convert image to RGBA/GLubyte */
GLubyte *tempImageSlices[1];
tempImageSlices[0] = (GLubyte *) tempImage;
_mesa_texstore(ctx, dims,
baseInternalFormat,
- MESA_FORMAT_R8G8B8A8_UNORM,
+ _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
+ : MESA_FORMAT_A8B8G8R8_UNORM,
rgbaRowStride, tempImageSlices,
srcWidth, srcHeight, srcDepth,
srcFormat, srcType, srcAddr,
if (srcFormat != GL_RGBA ||
srcType != GL_UNSIGNED_BYTE ||
ctx->_ImageTransferState ||
- srcPacking->RowLength != srcWidth ||
+ ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
srcPacking->SwapBytes) {
/* convert image to RGBA/GLubyte */
GLubyte *tempImageSlices[1];
tempImageSlices[0] = (GLubyte *) tempImage;
_mesa_texstore(ctx, dims,
baseInternalFormat,
- MESA_FORMAT_R8G8B8A8_UNORM,
+ _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
+ : MESA_FORMAT_A8B8G8R8_UNORM,
rgbaRowStride, tempImageSlices,
srcWidth, srcHeight, srcDepth,
srcFormat, srcType, srcAddr,
if (srcFormat != GL_RGBA ||
srcType != GL_UNSIGNED_BYTE ||
ctx->_ImageTransferState ||
- srcPacking->RowLength != srcWidth ||
+ ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
srcPacking->SwapBytes) {
/* convert image to RGBA/GLubyte */
GLubyte *tempImageSlices[1];
tempImageSlices[0] = (GLubyte *) tempImage;
_mesa_texstore(ctx, dims,
baseInternalFormat,
- MESA_FORMAT_R8G8B8A8_UNORM,
+ _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
+ : MESA_FORMAT_A8B8G8R8_UNORM,
rgbaRowStride, tempImageSlices,
srcWidth, srcHeight, srcDepth,
srcFormat, srcType, srcAddr,
tempSlice, RGBA32_FLOAT, srcStride,
width, height,
needsRebase ? rebaseSwizzle : NULL);
+
+ /* Handle byte swapping if required */
+ if (ctx->Pack.SwapBytes) {
+ _mesa_swap_bytes_2d_image(format, type, &ctx->Pack,
+ width, height, dest, dest);
+ }
+
tempSlice += 4 * width * height;
}
do_swap:
/* Handle byte swapping if required */
- if (ctx->Pack.SwapBytes) {
- GLint swapSize = _mesa_sizeof_packed_type(type);
- if (swapSize == 2 || swapSize == 4) {
- int swapsPerPixel = _mesa_bytes_per_pixel(format, type) / swapSize;
- assert(_mesa_bytes_per_pixel(format, type) % swapSize == 0);
- if (swapSize == 2)
- _mesa_swap2((GLushort *) dest, width * height * swapsPerPixel);
- else if (swapSize == 4)
- _mesa_swap4((GLuint *) dest, width * height * swapsPerPixel);
- }
- }
+ if (ctx->Pack.SwapBytes)
+ _mesa_swap_bytes_2d_image(format, type, &ctx->Pack,
+ width, height, dest, dest);
/* Unmap the src texture buffer */
ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset + img);
}
/**
- * Return the simple base format for a given internal texture format.
- * For example, given GL_LUMINANCE12_ALPHA4, return GL_LUMINANCE_ALPHA.
- *
- * \param ctx GL context.
- * \param internalFormat the internal texture format token or 1, 2, 3, or 4.
- *
- * \return the corresponding \u base internal format (GL_ALPHA, GL_LUMINANCE,
- * GL_LUMANCE_ALPHA, GL_INTENSITY, GL_RGB, or GL_RGBA), or -1 if invalid enum.
- *
- * This is the format which is used during texture application (i.e. the
- * texture format and env mode determine the arithmetic used.
- */
-GLint
-_mesa_base_tex_format( struct gl_context *ctx, GLint internalFormat )
-{
- switch (internalFormat) {
- case GL_ALPHA:
- case GL_ALPHA4:
- case GL_ALPHA8:
- case GL_ALPHA12:
- case GL_ALPHA16:
- return (ctx->API != API_OPENGL_CORE) ? GL_ALPHA : -1;
- case 1:
- case GL_LUMINANCE:
- case GL_LUMINANCE4:
- case GL_LUMINANCE8:
- case GL_LUMINANCE12:
- case GL_LUMINANCE16:
- return (ctx->API != API_OPENGL_CORE) ? GL_LUMINANCE : -1;
- case 2:
- case GL_LUMINANCE_ALPHA:
- case GL_LUMINANCE4_ALPHA4:
- case GL_LUMINANCE6_ALPHA2:
- case GL_LUMINANCE8_ALPHA8:
- case GL_LUMINANCE12_ALPHA4:
- case GL_LUMINANCE12_ALPHA12:
- case GL_LUMINANCE16_ALPHA16:
- return (ctx->API != API_OPENGL_CORE) ? GL_LUMINANCE_ALPHA : -1;
- case GL_INTENSITY:
- case GL_INTENSITY4:
- case GL_INTENSITY8:
- case GL_INTENSITY12:
- case GL_INTENSITY16:
- return (ctx->API != API_OPENGL_CORE) ? GL_INTENSITY : -1;
- case 3:
- return (ctx->API != API_OPENGL_CORE) ? GL_RGB : -1;
- case GL_RGB:
- case GL_R3_G3_B2:
- case GL_RGB4:
- case GL_RGB5:
- case GL_RGB8:
- case GL_RGB10:
- case GL_RGB12:
- case GL_RGB16:
- return GL_RGB;
- case 4:
- return (ctx->API != API_OPENGL_CORE) ? GL_RGBA : -1;
- case GL_RGBA:
- case GL_RGBA2:
- case GL_RGBA4:
- case GL_RGB5_A1:
- case GL_RGBA8:
- case GL_RGB10_A2:
- case GL_RGBA12:
- case GL_RGBA16:
- return GL_RGBA;
- default:
- ; /* fallthrough */
- }
-
- /* GL_BGRA can be an internal format *only* in OpenGL ES (1.x or 2.0).
- */
- if (_mesa_is_gles(ctx)) {
- switch (internalFormat) {
- case GL_BGRA:
- return GL_RGBA;
- default:
- ; /* fallthrough */
- }
- }
-
- if (ctx->Extensions.ARB_ES2_compatibility) {
- switch (internalFormat) {
- case GL_RGB565:
- return GL_RGB;
- default:
- ; /* fallthrough */
- }
- }
-
- if (ctx->Extensions.ARB_depth_texture) {
- switch (internalFormat) {
- case GL_DEPTH_COMPONENT:
- case GL_DEPTH_COMPONENT16:
- case GL_DEPTH_COMPONENT24:
- case GL_DEPTH_COMPONENT32:
- return GL_DEPTH_COMPONENT;
- case GL_DEPTH_STENCIL:
- case GL_DEPTH24_STENCIL8:
- return GL_DEPTH_STENCIL;
- default:
- ; /* fallthrough */
- }
- }
-
- if (ctx->Extensions.ARB_texture_stencil8) {
- switch (internalFormat) {
- case GL_STENCIL_INDEX:
- case GL_STENCIL_INDEX1:
- case GL_STENCIL_INDEX4:
- case GL_STENCIL_INDEX8:
- case GL_STENCIL_INDEX16:
- return GL_STENCIL_INDEX;
- default:
- ; /* fallthrough */
- }
- }
-
- switch (internalFormat) {
- case GL_COMPRESSED_ALPHA:
- return GL_ALPHA;
- case GL_COMPRESSED_LUMINANCE:
- return GL_LUMINANCE;
- case GL_COMPRESSED_LUMINANCE_ALPHA:
- return GL_LUMINANCE_ALPHA;
- case GL_COMPRESSED_INTENSITY:
- return GL_INTENSITY;
- case GL_COMPRESSED_RGB:
- return GL_RGB;
- case GL_COMPRESSED_RGBA:
- return GL_RGBA;
- default:
- ; /* fallthrough */
- }
-
- if (ctx->Extensions.TDFX_texture_compression_FXT1) {
- switch (internalFormat) {
- case GL_COMPRESSED_RGB_FXT1_3DFX:
- return GL_RGB;
- case GL_COMPRESSED_RGBA_FXT1_3DFX:
- return GL_RGBA;
- default:
- ; /* fallthrough */
- }
- }
-
- /* Assume that the ANGLE flag will always be set if the EXT flag is set.
- */
- if (ctx->Extensions.ANGLE_texture_compression_dxt) {
- switch (internalFormat) {
- case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
- return GL_RGB;
- case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
- case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
- case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
- return GL_RGBA;
- default:
- ; /* fallthrough */
- }
- }
-
- if (_mesa_is_desktop_gl(ctx)
- && ctx->Extensions.ANGLE_texture_compression_dxt) {
- switch (internalFormat) {
- case GL_RGB_S3TC:
- case GL_RGB4_S3TC:
- return GL_RGB;
- case GL_RGBA_S3TC:
- case GL_RGBA4_S3TC:
- return GL_RGBA;
- default:
- ; /* fallthrough */
- }
- }
-
- if (ctx->Extensions.MESA_ycbcr_texture) {
- if (internalFormat == GL_YCBCR_MESA)
- return GL_YCBCR_MESA;
- }
-
- if (ctx->Extensions.ARB_texture_float) {
- switch (internalFormat) {
- case GL_ALPHA16F_ARB:
- case GL_ALPHA32F_ARB:
- return GL_ALPHA;
- case GL_RGBA16F_ARB:
- case GL_RGBA32F_ARB:
- return GL_RGBA;
- case GL_RGB16F_ARB:
- case GL_RGB32F_ARB:
- return GL_RGB;
- case GL_INTENSITY16F_ARB:
- case GL_INTENSITY32F_ARB:
- return GL_INTENSITY;
- case GL_LUMINANCE16F_ARB:
- case GL_LUMINANCE32F_ARB:
- return GL_LUMINANCE;
- case GL_LUMINANCE_ALPHA16F_ARB:
- case GL_LUMINANCE_ALPHA32F_ARB:
- return GL_LUMINANCE_ALPHA;
- default:
- ; /* fallthrough */
- }
- }
-
- if (ctx->Extensions.EXT_texture_snorm) {
- switch (internalFormat) {
- case GL_RED_SNORM:
- case GL_R8_SNORM:
- case GL_R16_SNORM:
- return GL_RED;
- case GL_RG_SNORM:
- case GL_RG8_SNORM:
- case GL_RG16_SNORM:
- return GL_RG;
- case GL_RGB_SNORM:
- case GL_RGB8_SNORM:
- case GL_RGB16_SNORM:
- return GL_RGB;
- case GL_RGBA_SNORM:
- case GL_RGBA8_SNORM:
- case GL_RGBA16_SNORM:
- return GL_RGBA;
- case GL_ALPHA_SNORM:
- case GL_ALPHA8_SNORM:
- case GL_ALPHA16_SNORM:
- return GL_ALPHA;
- case GL_LUMINANCE_SNORM:
- case GL_LUMINANCE8_SNORM:
- case GL_LUMINANCE16_SNORM:
- return GL_LUMINANCE;
- case GL_LUMINANCE_ALPHA_SNORM:
- case GL_LUMINANCE8_ALPHA8_SNORM:
- case GL_LUMINANCE16_ALPHA16_SNORM:
- return GL_LUMINANCE_ALPHA;
- case GL_INTENSITY_SNORM:
- case GL_INTENSITY8_SNORM:
- case GL_INTENSITY16_SNORM:
- return GL_INTENSITY;
- default:
- ; /* fallthrough */
- }
- }
-
- if (ctx->Extensions.EXT_texture_sRGB) {
- switch (internalFormat) {
- case GL_SRGB_EXT:
- case GL_SRGB8_EXT:
- case GL_COMPRESSED_SRGB_EXT:
- return GL_RGB;
- case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
- return ctx->Extensions.EXT_texture_compression_s3tc ? GL_RGB : -1;
- case GL_SRGB_ALPHA_EXT:
- case GL_SRGB8_ALPHA8_EXT:
- case GL_COMPRESSED_SRGB_ALPHA_EXT:
- return GL_RGBA;
- case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
- case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
- case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
- return ctx->Extensions.EXT_texture_compression_s3tc ? GL_RGBA : -1;
- case GL_SLUMINANCE_ALPHA_EXT:
- case GL_SLUMINANCE8_ALPHA8_EXT:
- case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT:
- return GL_LUMINANCE_ALPHA;
- case GL_SLUMINANCE_EXT:
- case GL_SLUMINANCE8_EXT:
- case GL_COMPRESSED_SLUMINANCE_EXT:
- return GL_LUMINANCE;
- default:
- ; /* fallthrough */
- }
- }
-
- if (ctx->Version >= 30 ||
- ctx->Extensions.EXT_texture_integer) {
- switch (internalFormat) {
- case GL_RGBA8UI_EXT:
- case GL_RGBA16UI_EXT:
- case GL_RGBA32UI_EXT:
- case GL_RGBA8I_EXT:
- case GL_RGBA16I_EXT:
- case GL_RGBA32I_EXT:
- case GL_RGB10_A2UI:
- return GL_RGBA;
- case GL_RGB8UI_EXT:
- case GL_RGB16UI_EXT:
- case GL_RGB32UI_EXT:
- case GL_RGB8I_EXT:
- case GL_RGB16I_EXT:
- case GL_RGB32I_EXT:
- return GL_RGB;
- }
- }
-
- if (ctx->Extensions.EXT_texture_integer) {
- switch (internalFormat) {
- case GL_ALPHA8UI_EXT:
- case GL_ALPHA16UI_EXT:
- case GL_ALPHA32UI_EXT:
- case GL_ALPHA8I_EXT:
- case GL_ALPHA16I_EXT:
- case GL_ALPHA32I_EXT:
- return GL_ALPHA;
- case GL_INTENSITY8UI_EXT:
- case GL_INTENSITY16UI_EXT:
- case GL_INTENSITY32UI_EXT:
- case GL_INTENSITY8I_EXT:
- case GL_INTENSITY16I_EXT:
- case GL_INTENSITY32I_EXT:
- return GL_INTENSITY;
- case GL_LUMINANCE8UI_EXT:
- case GL_LUMINANCE16UI_EXT:
- case GL_LUMINANCE32UI_EXT:
- case GL_LUMINANCE8I_EXT:
- case GL_LUMINANCE16I_EXT:
- case GL_LUMINANCE32I_EXT:
- return GL_LUMINANCE;
- case GL_LUMINANCE_ALPHA8UI_EXT:
- case GL_LUMINANCE_ALPHA16UI_EXT:
- case GL_LUMINANCE_ALPHA32UI_EXT:
- case GL_LUMINANCE_ALPHA8I_EXT:
- case GL_LUMINANCE_ALPHA16I_EXT:
- case GL_LUMINANCE_ALPHA32I_EXT:
- return GL_LUMINANCE_ALPHA;
- default:
- ; /* fallthrough */
- }
- }
-
- if (ctx->Extensions.ARB_texture_rg) {
- switch (internalFormat) {
- case GL_R16F:
- case GL_R32F:
- if (!ctx->Extensions.ARB_texture_float)
- break;
- return GL_RED;
- case GL_R8I:
- case GL_R8UI:
- case GL_R16I:
- case GL_R16UI:
- case GL_R32I:
- case GL_R32UI:
- if (ctx->Version < 30 && !ctx->Extensions.EXT_texture_integer)
- break;
- /* FALLTHROUGH */
- case GL_R8:
- case GL_R16:
- case GL_RED:
- case GL_COMPRESSED_RED:
- return GL_RED;
-
- case GL_RG16F:
- case GL_RG32F:
- if (!ctx->Extensions.ARB_texture_float)
- break;
- return GL_RG;
- case GL_RG8I:
- case GL_RG8UI:
- case GL_RG16I:
- case GL_RG16UI:
- case GL_RG32I:
- case GL_RG32UI:
- if (ctx->Version < 30 && !ctx->Extensions.EXT_texture_integer)
- break;
- /* FALLTHROUGH */
- case GL_RG:
- case GL_RG8:
- case GL_RG16:
- case GL_COMPRESSED_RG:
- return GL_RG;
- default:
- ; /* fallthrough */
- }
- }
-
- if (ctx->Extensions.EXT_texture_shared_exponent) {
- switch (internalFormat) {
- case GL_RGB9_E5_EXT:
- return GL_RGB;
- default:
- ; /* fallthrough */
- }
- }
-
- if (ctx->Extensions.EXT_packed_float) {
- switch (internalFormat) {
- case GL_R11F_G11F_B10F_EXT:
- return GL_RGB;
- default:
- ; /* fallthrough */
- }
- }
-
- if (ctx->Extensions.ARB_depth_buffer_float) {
- switch (internalFormat) {
- case GL_DEPTH_COMPONENT32F:
- return GL_DEPTH_COMPONENT;
- case GL_DEPTH32F_STENCIL8:
- return GL_DEPTH_STENCIL;
- default:
- ; /* fallthrough */
- }
- }
-
- if (ctx->Extensions.ARB_texture_compression_rgtc) {
- switch (internalFormat) {
- case GL_COMPRESSED_RED_RGTC1:
- case GL_COMPRESSED_SIGNED_RED_RGTC1:
- return GL_RED;
- case GL_COMPRESSED_RG_RGTC2:
- case GL_COMPRESSED_SIGNED_RG_RGTC2:
- return GL_RG;
- default:
- ; /* fallthrough */
- }
- }
-
- if (ctx->Extensions.EXT_texture_compression_latc) {
- switch (internalFormat) {
- case GL_COMPRESSED_LUMINANCE_LATC1_EXT:
- case GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT:
- return GL_LUMINANCE;
- case GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT:
- case GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT:
- return GL_LUMINANCE_ALPHA;
- default:
- ; /* fallthrough */
- }
- }
-
- if (ctx->Extensions.ATI_texture_compression_3dc) {
- switch (internalFormat) {
- case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI:
- return GL_LUMINANCE_ALPHA;
- default:
- ; /* fallthrough */
- }
- }
-
- if (ctx->Extensions.OES_compressed_ETC1_RGB8_texture) {
- switch (internalFormat) {
- case GL_ETC1_RGB8_OES:
- return GL_RGB;
- default:
- ; /* fallthrough */
- }
- }
-
- if (ctx->Extensions.KHR_texture_compression_astc_ldr &&
- _mesa_is_astc_format(internalFormat))
- return GL_RGBA;
-
- if (_mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility) {
- switch (internalFormat) {
- case GL_COMPRESSED_RGB8_ETC2:
- case GL_COMPRESSED_SRGB8_ETC2:
- return GL_RGB;
- case GL_COMPRESSED_RGBA8_ETC2_EAC:
- case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC:
- case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
- case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
- return GL_RGBA;
- case GL_COMPRESSED_R11_EAC:
- case GL_COMPRESSED_SIGNED_R11_EAC:
- return GL_RED;
- case GL_COMPRESSED_RG11_EAC:
- case GL_COMPRESSED_SIGNED_RG11_EAC:
- return GL_RG;
- default:
- ; /* fallthrough */
- }
- }
-
- if (_mesa_is_desktop_gl(ctx) &&
- ctx->Extensions.ARB_texture_compression_bptc) {
- switch (internalFormat) {
- case GL_COMPRESSED_RGBA_BPTC_UNORM:
- case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
- return GL_RGBA;
- case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
- case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
- return GL_RGB;
- default:
- ; /* fallthrough */
- }
- }
-
- if (ctx->API == API_OPENGLES) {
- switch (internalFormat) {
- case GL_PALETTE4_RGB8_OES:
- case GL_PALETTE4_R5_G6_B5_OES:
- case GL_PALETTE8_RGB8_OES:
- case GL_PALETTE8_R5_G6_B5_OES:
- return GL_RGB;
- case GL_PALETTE4_RGBA8_OES:
- case GL_PALETTE8_RGB5_A1_OES:
- case GL_PALETTE4_RGBA4_OES:
- case GL_PALETTE4_RGB5_A1_OES:
- case GL_PALETTE8_RGBA8_OES:
- case GL_PALETTE8_RGBA4_OES:
- return GL_RGBA;
- default:
- ; /* fallthrough */
- }
- }
-
- return -1; /* error */
-}
-
-
-/**
* For cube map faces, return a face index in [0,5].
* For other targets return 0;
*/
return GL_TRUE;
}
- /* OpenGL ES 1.x and OpenGL ES 2.0 impose additional restrictions on the
- * combinations of format, internalFormat, and type that can be used.
- * Formats and types that require additional extensions (e.g., GL_FLOAT
- * requires GL_OES_texture_float) are filtered elsewhere.
- */
- if (_mesa_is_gles(ctx) &&
- texture_format_error_check_gles(ctx, format, type, internalFormat,
- dimensions, "glTexImage%dD")) {
- return GL_TRUE;
+ /* Check incoming image format and type */
+ err = _mesa_error_check_format_and_type(ctx, format, type);
+ if (err != GL_NO_ERROR) {
+ /* Prior to OpenGL-ES 2.0, an INVALID_VALUE is expected instead of
+ * INVALID_ENUM. From page 73 OpenGL ES 1.1 spec:
+ *
+ * "Specifying a value for internalformat that is not one of the
+ * above (acceptable) values generates the error INVALID VALUE."
+ */
+ if (err == GL_INVALID_ENUM && _mesa_is_gles(ctx) && ctx->Version < 20)
+ err = GL_INVALID_VALUE;
+
+ _mesa_error(ctx, err,
+ "glTexImage%dD(incompatible format = %s, type = %s)",
+ dimensions, _mesa_enum_to_string(format),
+ _mesa_enum_to_string(type));
+ return GL_TRUE;
}
/* Check internalFormat */
return GL_TRUE;
}
- /* Check incoming image format and type */
- err = _mesa_error_check_format_and_type(ctx, format, type);
- if (err != GL_NO_ERROR) {
- _mesa_error(ctx, err,
- "glTexImage%dD(incompatible format = %s, type = %s)",
- dimensions, _mesa_enum_to_string(format),
- _mesa_enum_to_string(type));
+ /* OpenGL ES 1.x and OpenGL ES 2.0 impose additional restrictions on the
+ * combinations of format, internalFormat, and type that can be used.
+ * Formats and types that require additional extensions (e.g., GL_FLOAT
+ * requires GL_OES_texture_float) are filtered elsewhere.
+ */
+ if (_mesa_is_gles(ctx) &&
+ texture_format_error_check_gles(ctx, format, type, internalFormat,
+ dimensions, "glTexImage%dD")) {
return GL_TRUE;
}
/** \name Internal functions */
/*@{*/
-extern GLint
-_mesa_base_tex_format( struct gl_context *ctx, GLint internalFormat );
-
-
extern GLboolean
_mesa_is_proxy_texture(GLenum target);
* OTHER DEALINGS IN THE SOFTWARE.
*/
-
/**
* \file texstorage.c
* GL_ARB_texture_storage functions
*/
-
-
#include "glheader.h"
#include "context.h"
#include "enums.h"
/** Helper to get a particular texture image in a texture object */
static struct gl_texture_image *
-get_tex_image(struct gl_context *ctx,
+get_tex_image(struct gl_context *ctx,
struct gl_texture_object *texObj,
GLuint face, GLuint level)
{
0, internalFormat, texFormat);
}
- _mesa_next_mipmap_level_size(target, 0, levelWidth, levelHeight, levelDepth,
+ _mesa_next_mipmap_level_size(target, 0,
+ levelWidth, levelHeight, levelDepth,
&levelWidth, &levelHeight, &levelDepth);
}
return GL_TRUE;
GLboolean
-_mesa_is_legal_tex_storage_format(struct gl_context *ctx, GLenum internalformat)
+_mesa_is_legal_tex_storage_format(const struct gl_context *ctx,
+ GLenum internalformat)
{
/* check internal format - note that only sized formats are allowed */
switch (internalformat) {
}
}
+
/**
* Default ctx->Driver.AllocTextureStorage() handler.
*
"glTex%sStorage%uD(width, height or depth < 1)",
suffix, dims);
return GL_TRUE;
- }
+ }
if (_mesa_is_compressed_format(ctx, internalformat)) {
GLenum err;
_mesa_error(ctx, GL_INVALID_VALUE, "glTex%sStorage%uD(levels < 1)",
suffix, dims);
return GL_TRUE;
- }
+ }
/* check levels against maximum (note different error than above) */
if (levels > (GLint) _mesa_max_texture_levels(ctx, target)) {
return; /* error was recorded */
}
-
texFormat = _mesa_choose_texture_format(ctx, texObj, target, 0,
internalformat, GL_NONE, GL_NONE);
assert(texFormat != MESA_FORMAT_NONE);
}
}
+
/**
* Helper used by _mesa_TexStorage1/2/3D().
*/
struct gl_texture_object *texObj;
GET_CURRENT_CONTEXT(ctx);
- /* target check */
- /* This is done here so that _mesa_texture_storage can receive unsized
- * formats. */
+ /* Check target. This is done here so that _mesa_texture_storage
+ * can receive unsized formats.
+ */
if (!legal_texobj_target(ctx, dims, target)) {
_mesa_error(ctx, GL_INVALID_ENUM,
"glTexStorage%uD(illegal target=%s)",
_mesa_enum_to_string(target), levels,
_mesa_enum_to_string(internalformat),
width, height, depth);
+
/* Check the format to make sure it is sized. */
if (!_mesa_is_legal_tex_storage_format(ctx, internalformat)) {
_mesa_error(ctx, GL_INVALID_ENUM,
internalformat, width, height, depth, false);
}
+
/**
* Helper used by _mesa_TextureStorage1/2/3D().
*/
return;
}
- /* target check */
- /* This is done here so that _mesa_texture_storage can receive unsized
- * formats. */
+ /* Check target. This is done here so that _mesa_texture_storage
+ * can receive unsized formats.
+ */
if (!legal_texobj_target(ctx, dims, texObj->Target)) {
_mesa_error(ctx, GL_INVALID_ENUM,
"glTextureStorage%uD(illegal target=%s)",
levels, internalformat, width, height, depth, true);
}
+
void GLAPIENTRY
_mesa_TexStorage1D(GLenum target, GLsizei levels, GLenum internalformat,
GLsizei width)
texstorage(3, target, levels, internalformat, width, height, depth);
}
+
void GLAPIENTRY
_mesa_TextureStorage1D(GLuint texture, GLsizei levels, GLenum internalformat,
GLsizei width)
texturestorage(2, texture, levels, internalformat, width, height, 1);
}
+
void GLAPIENTRY
_mesa_TextureStorage3D(GLuint texture, GLsizei levels, GLenum internalformat,
GLsizei width, GLsizei height, GLsizei depth)
}
-
void GLAPIENTRY
_mesa_TextureStorage3DEXT(GLuint texture, GLenum target, GLsizei levels,
GLenum internalformat,
GLsizei width, GLsizei height, GLsizei depth);
extern GLboolean
-_mesa_is_legal_tex_storage_format(struct gl_context *ctx, GLenum internalformat);
+_mesa_is_legal_tex_storage_format(const struct gl_context *ctx,
+ GLenum internalformat);
extern GLboolean
_mesa_AllocTextureStorage_sw(struct gl_context *ctx,
*/
GLint swapSize = _mesa_sizeof_packed_type(srcType);
if (swapSize == 2 || swapSize == 4) {
- int bytesPerPixel = _mesa_bytes_per_pixel(srcFormat, srcType);
- int swapsPerPixel = bytesPerPixel / swapSize;
- int elementCount = srcWidth * srcHeight * srcDepth;
- assert(bytesPerPixel % swapSize == 0);
- tempImage = malloc(elementCount * bytesPerPixel);
+ int imageStride = _mesa_image_image_stride(srcPacking, srcWidth, srcHeight, srcFormat, srcType);
+ int bufferSize = imageStride * srcDepth;
+ int layer;
+ const uint8_t *src;
+ uint8_t *dst;
+
+ tempImage = malloc(bufferSize);
if (!tempImage)
return GL_FALSE;
- if (swapSize == 2)
- _mesa_swap2_copy(tempImage, (GLushort *) srcAddr,
- elementCount * swapsPerPixel);
- else
- _mesa_swap4_copy(tempImage, (GLuint *) srcAddr,
- elementCount * swapsPerPixel);
+ src = srcAddr;
+ dst = tempImage;
+ for (layer = 0; layer < srcDepth; layer++) {
+ _mesa_swap_bytes_2d_image(srcFormat, srcType,
+ srcPacking,
+ srcWidth, srcHeight,
+ dst, src);
+ src += imageStride;
+ dst += imageStride;
+ }
srcAddr = tempImage;
}
}
GLuint cols, GLuint rows,
GLint location, GLsizei count,
GLboolean transpose,
- const GLvoid *values, GLenum type)
+ const GLvoid *values, enum glsl_base_type basicType)
{
unsigned offset;
unsigned vectors;
return;
}
- assert(type == GL_FLOAT || type == GL_DOUBLE);
- size_mul = type == GL_DOUBLE ? 2 : 1;
+ assert(basicType == GLSL_TYPE_FLOAT || basicType == GLSL_TYPE_DOUBLE);
+ size_mul = basicType == GLSL_TYPE_DOUBLE ? 2 : 1;
assert(!uni->type->is_sampler());
vectors = uni->type->matrix_columns;
}
}
+ /* Section 2.11.7 (Uniform Variables) of the OpenGL 4.2 Core Profile spec
+ * says:
+ *
+ * "If any of the following conditions occur, an INVALID_OPERATION
+ * error is generated by the Uniform* commands, and no uniform values
+ * are changed:
+ *
+ * ...
+ *
+ * - if the uniform declared in the shader is not of type boolean and
+ * the type indicated in the name of the Uniform* command used does
+ * not match the type of the uniform"
+ *
+ * There are no Boolean matrix types, so we do not need to allow
+ * GLSL_TYPE_BOOL here (as _mesa_uniform does).
+ */
+ if (uni->type->base_type != basicType) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "glUniformMatrix%ux%u(\"%s\"@%d is %s, not %s)",
+ cols, rows, uni->name, location,
+ glsl_type_name(uni->type->base_type),
+ glsl_type_name(basicType));
+ return;
+ }
+
if (unlikely(ctx->_Shader->Flags & GLSL_UNIFORMS)) {
log_uniform(values, uni->type->base_type, components, vectors, count,
bool(transpose), shProg, location, uni);
if (!transpose) {
memcpy(&uni->storage[elements * offset], values,
sizeof(uni->storage[0]) * elements * count * size_mul);
- } else if (type == GL_FLOAT) {
+ } else if (basicType == GLSL_TYPE_FLOAT) {
/* Copy and transpose the matrix.
*/
const float *src = (const float *)values;
src += elements;
}
} else {
- assert(type == GL_DOUBLE);
+ assert(basicType == GLSL_TYPE_DOUBLE);
const double *src = (const double *)values;
double *dst = (double *)&uni->storage[elements * offset].f;
{
GET_CURRENT_CONTEXT(ctx);
_mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
- 2, 2, location, count, transpose, value, GL_FLOAT);
+ 2, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
}
void GLAPIENTRY
{
GET_CURRENT_CONTEXT(ctx);
_mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
- 3, 3, location, count, transpose, value, GL_FLOAT);
+ 3, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
}
void GLAPIENTRY
{
GET_CURRENT_CONTEXT(ctx);
_mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
- 4, 4, location, count, transpose, value, GL_FLOAT);
+ 4, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
}
/** Same as above with direct state access **/
struct gl_shader_program *shProg =
_mesa_lookup_shader_program_err(ctx, program,
"glProgramUniformMatrix2fv");
- _mesa_uniform_matrix(ctx, shProg, 2, 2, location, count, transpose, value, GL_FLOAT);
+ _mesa_uniform_matrix(ctx, shProg, 2, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
}
void GLAPIENTRY
struct gl_shader_program *shProg =
_mesa_lookup_shader_program_err(ctx, program,
"glProgramUniformMatrix3fv");
- _mesa_uniform_matrix(ctx, shProg, 3, 3, location, count, transpose, value, GL_FLOAT);
+ _mesa_uniform_matrix(ctx, shProg, 3, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
}
void GLAPIENTRY
struct gl_shader_program *shProg =
_mesa_lookup_shader_program_err(ctx, program,
"glProgramUniformMatrix4fv");
- _mesa_uniform_matrix(ctx, shProg, 4, 4, location, count, transpose, value, GL_FLOAT);
+ _mesa_uniform_matrix(ctx, shProg, 4, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
}
{
GET_CURRENT_CONTEXT(ctx);
_mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
- 2, 3, location, count, transpose, value, GL_FLOAT);
+ 2, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
}
void GLAPIENTRY
{
GET_CURRENT_CONTEXT(ctx);
_mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
- 3, 2, location, count, transpose, value, GL_FLOAT);
+ 3, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
}
void GLAPIENTRY
{
GET_CURRENT_CONTEXT(ctx);
_mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
- 2, 4, location, count, transpose, value, GL_FLOAT);
+ 2, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
}
void GLAPIENTRY
{
GET_CURRENT_CONTEXT(ctx);
_mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
- 4, 2, location, count, transpose, value, GL_FLOAT);
+ 4, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
}
void GLAPIENTRY
{
GET_CURRENT_CONTEXT(ctx);
_mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
- 3, 4, location, count, transpose, value, GL_FLOAT);
+ 3, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
}
void GLAPIENTRY
{
GET_CURRENT_CONTEXT(ctx);
_mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
- 4, 3, location, count, transpose, value, GL_FLOAT);
+ 4, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
}
/** Same as above with direct state access **/
struct gl_shader_program *shProg =
_mesa_lookup_shader_program_err(ctx, program,
"glProgramUniformMatrix2x3fv");
- _mesa_uniform_matrix(ctx, shProg, 2, 3, location, count, transpose, value, GL_FLOAT);
+ _mesa_uniform_matrix(ctx, shProg, 2, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
}
void GLAPIENTRY
struct gl_shader_program *shProg =
_mesa_lookup_shader_program_err(ctx, program,
"glProgramUniformMatrix3x2fv");
- _mesa_uniform_matrix(ctx, shProg, 3, 2, location, count, transpose, value, GL_FLOAT);
+ _mesa_uniform_matrix(ctx, shProg, 3, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
}
void GLAPIENTRY
struct gl_shader_program *shProg =
_mesa_lookup_shader_program_err(ctx, program,
"glProgramUniformMatrix2x4fv");
- _mesa_uniform_matrix(ctx, shProg, 2, 4, location, count, transpose, value, GL_FLOAT);
+ _mesa_uniform_matrix(ctx, shProg, 2, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
}
void GLAPIENTRY
struct gl_shader_program *shProg =
_mesa_lookup_shader_program_err(ctx, program,
"glProgramUniformMatrix4x2fv");
- _mesa_uniform_matrix(ctx, shProg, 4, 2, location, count, transpose, value, GL_FLOAT);
+ _mesa_uniform_matrix(ctx, shProg, 4, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
}
void GLAPIENTRY
struct gl_shader_program *shProg =
_mesa_lookup_shader_program_err(ctx, program,
"glProgramUniformMatrix3x4fv");
- _mesa_uniform_matrix(ctx, shProg, 3, 4, location, count, transpose, value, GL_FLOAT);
+ _mesa_uniform_matrix(ctx, shProg, 3, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
}
void GLAPIENTRY
struct gl_shader_program *shProg =
_mesa_lookup_shader_program_err(ctx, program,
"glProgramUniformMatrix4x3fv");
- _mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value, GL_FLOAT);
+ _mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
}
if (!shProg)
return;
- if (uniformBlockIndex >= shProg->NumUniformBlocks) {
+ if (uniformBlockIndex >= shProg->NumBufferInterfaceBlocks) {
_mesa_error(ctx, GL_INVALID_VALUE,
"glUniformBlockBinding(block index %u >= %u)",
- uniformBlockIndex, shProg->NumUniformBlocks);
+ uniformBlockIndex, shProg->NumBufferInterfaceBlocks);
return;
}
}
}
+void GLAPIENTRY
+_mesa_ShaderStorageBlockBinding(GLuint program,
+ GLuint shaderStorageBlockIndex,
+ GLuint shaderStorageBlockBinding)
+{
+ GET_CURRENT_CONTEXT(ctx);
+ struct gl_shader_program *shProg;
+
+ if (!ctx->Extensions.ARB_shader_storage_buffer_object) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "glShaderStorageBlockBinding");
+ return;
+ }
+
+ shProg = _mesa_lookup_shader_program_err(ctx, program,
+ "glShaderStorageBlockBinding");
+ if (!shProg)
+ return;
+
+ if (shaderStorageBlockIndex >= shProg->NumBufferInterfaceBlocks) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "glShaderStorageBlockBinding(block index %u >= %u)",
+ shaderStorageBlockIndex, shProg->NumBufferInterfaceBlocks);
+ return;
+ }
+
+ if (shaderStorageBlockBinding >= ctx->Const.MaxShaderStorageBufferBindings) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "glShaderStorageBlockBinding(block binding %u >= %u)",
+ shaderStorageBlockBinding,
+ ctx->Const.MaxShaderStorageBufferBindings);
+ return;
+ }
+
+ if (shProg->UniformBlocks[shaderStorageBlockIndex].Binding !=
+ shaderStorageBlockBinding) {
+ int i;
+
+ FLUSH_VERTICES(ctx, 0);
+ ctx->NewDriverState |= ctx->DriverFlags.NewShaderStorageBuffer;
+
+ shProg->UniformBlocks[shaderStorageBlockIndex].Binding = shaderStorageBlockBinding;
+
+ for (i = 0; i < MESA_SHADER_STAGES; i++) {
+ int stage_index = shProg->UniformBlockStageIndex[i][shaderStorageBlockIndex];
+
+ if (stage_index != -1) {
+ struct gl_shader *sh = shProg->_LinkedShaders[i];
+ sh->UniformBlocks[stage_index].Binding = shaderStorageBlockBinding;
+ }
+ }
+ }
+}
/**
* Generic program resource property query.
{
GET_CURRENT_CONTEXT(ctx);
_mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
- 2, 2, location, count, transpose, value, GL_DOUBLE);
+ 2, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
}
void GLAPIENTRY
{
GET_CURRENT_CONTEXT(ctx);
_mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
- 3, 3, location, count, transpose, value, GL_DOUBLE);
+ 3, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
}
void GLAPIENTRY
{
GET_CURRENT_CONTEXT(ctx);
_mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
- 4, 4, location, count, transpose, value, GL_DOUBLE);
+ 4, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
}
void GLAPIENTRY
{
GET_CURRENT_CONTEXT(ctx);
_mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
- 2, 3, location, count, transpose, value, GL_DOUBLE);
+ 2, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
}
void GLAPIENTRY
{
GET_CURRENT_CONTEXT(ctx);
_mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
- 3, 2, location, count, transpose, value, GL_DOUBLE);
+ 3, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
}
void GLAPIENTRY
{
GET_CURRENT_CONTEXT(ctx);
_mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
- 2, 4, location, count, transpose, value, GL_DOUBLE);
+ 2, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
}
void GLAPIENTRY
{
GET_CURRENT_CONTEXT(ctx);
_mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
- 4, 2, location, count, transpose, value, GL_DOUBLE);
+ 4, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
}
void GLAPIENTRY
{
GET_CURRENT_CONTEXT(ctx);
_mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
- 3, 4, location, count, transpose, value, GL_DOUBLE);
+ 3, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
}
void GLAPIENTRY
{
GET_CURRENT_CONTEXT(ctx);
_mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
- 4, 3, location, count, transpose, value, GL_DOUBLE);
+ 4, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
}
void GLAPIENTRY
struct gl_shader_program *shProg =
_mesa_lookup_shader_program_err(ctx, program,
"glProgramUniformMatrix2dv");
- _mesa_uniform_matrix(ctx, shProg, 2, 2, location, count, transpose, value, GL_DOUBLE);
+ _mesa_uniform_matrix(ctx, shProg, 2, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
}
void GLAPIENTRY
struct gl_shader_program *shProg =
_mesa_lookup_shader_program_err(ctx, program,
"glProgramUniformMatrix3dv");
- _mesa_uniform_matrix(ctx, shProg, 3, 3, location, count, transpose, value, GL_DOUBLE);
+ _mesa_uniform_matrix(ctx, shProg, 3, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
}
void GLAPIENTRY
struct gl_shader_program *shProg =
_mesa_lookup_shader_program_err(ctx, program,
"glProgramUniformMatrix4dv");
- _mesa_uniform_matrix(ctx, shProg, 4, 4, location, count, transpose, value, GL_DOUBLE);
+ _mesa_uniform_matrix(ctx, shProg, 4, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
}
void GLAPIENTRY
struct gl_shader_program *shProg =
_mesa_lookup_shader_program_err(ctx, program,
"glProgramUniformMatrix2x3dv");
- _mesa_uniform_matrix(ctx, shProg, 2, 3, location, count, transpose, value, GL_DOUBLE);
+ _mesa_uniform_matrix(ctx, shProg, 2, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
}
void GLAPIENTRY
struct gl_shader_program *shProg =
_mesa_lookup_shader_program_err(ctx, program,
"glProgramUniformMatrix3x2dv");
- _mesa_uniform_matrix(ctx, shProg, 3, 2, location, count, transpose, value, GL_DOUBLE);
+ _mesa_uniform_matrix(ctx, shProg, 3, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
}
void GLAPIENTRY
struct gl_shader_program *shProg =
_mesa_lookup_shader_program_err(ctx, program,
"glProgramUniformMatrix2x4dv");
- _mesa_uniform_matrix(ctx, shProg, 2, 4, location, count, transpose, value, GL_DOUBLE);
+ _mesa_uniform_matrix(ctx, shProg, 2, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
}
void GLAPIENTRY
struct gl_shader_program *shProg =
_mesa_lookup_shader_program_err(ctx, program,
"glProgramUniformMatrix4x2dv");
- _mesa_uniform_matrix(ctx, shProg, 4, 2, location, count, transpose, value, GL_DOUBLE);
+ _mesa_uniform_matrix(ctx, shProg, 4, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
}
void GLAPIENTRY
struct gl_shader_program *shProg =
_mesa_lookup_shader_program_err(ctx, program,
"glProgramUniformMatrix3x4dv");
- _mesa_uniform_matrix(ctx, shProg, 3, 4, location, count, transpose, value, GL_DOUBLE);
+ _mesa_uniform_matrix(ctx, shProg, 3, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
}
void GLAPIENTRY
struct gl_shader_program *shProg =
_mesa_lookup_shader_program_err(ctx, program,
"glProgramUniformMatrix4x3dv");
- _mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value, GL_DOUBLE);
+ _mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
}
GLuint uniformBlockIndex,
GLuint uniformBlockBinding);
void GLAPIENTRY
+_mesa_ShaderStorageBlockBinding(GLuint program,
+ GLuint shaderStorageBlockIndex,
+ GLuint shaderStorageBlockBinding);
+void GLAPIENTRY
_mesa_GetActiveAtomicCounterBufferiv(GLuint program, GLuint bufferIndex,
GLenum pname, GLint *params);
void GLAPIENTRY
GLuint cols, GLuint rows,
GLint location, GLsizei count,
GLboolean transpose,
- const GLvoid *values, GLenum type);
+ const GLvoid *values, enum glsl_base_type basicType);
void
_mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location,
"glPointSizePointer(ES 1.x only)");
return;
}
-
+
update_array(ctx, "glPointSizePointer", VERT_ATTRIB_POINT_SIZE,
legalTypes, 1, 1,
1, type, stride, GL_FALSE, GL_FALSE, GL_FALSE, ptr);
}
goto error;
case GL_VERTEX_ATTRIB_BINDING:
- if (_mesa_is_desktop_gl(ctx)) {
+ if (_mesa_is_desktop_gl(ctx) || _mesa_is_gles31(ctx)) {
return array->VertexBinding - VERT_ATTRIB_GENERIC0;
}
goto error;
case GL_VERTEX_ATTRIB_RELATIVE_OFFSET:
- if (_mesa_is_desktop_gl(ctx)) {
+ if (_mesa_is_desktop_gl(ctx) || _mesa_is_gles31(ctx)) {
return array->RelativeOffset;
}
goto error;
return NULL;
}
- assert(VERT_ATTRIB_GENERIC(index) < ARRAY_SIZE(ctx->Array.VAO->VertexAttrib));
+ assert(VERT_ATTRIB_GENERIC(index) <
+ ARRAY_SIZE(ctx->Array.VAO->VertexAttrib));
FLUSH_CURRENT(ctx, 0);
return ctx->Current.Attrib[VERT_ATTRIB_GENERIC(index)];
GET_CURRENT_CONTEXT(ctx);
if (pname == GL_CURRENT_VERTEX_ATTRIB_ARB) {
- const GLdouble *v = (const GLdouble *)get_current_attrib(ctx, index, "glGetVertexAttribLdv");
+ const GLdouble *v =
+ (const GLdouble *)get_current_attrib(ctx, index,
+ "glGetVertexAttribLdv");
if (v != NULL) {
params[0] = v[0];
params[1] = v[1];
return;
}
- assert(VERT_ATTRIB_GENERIC(index) < ARRAY_SIZE(ctx->Array.VAO->VertexAttrib));
+ assert(VERT_ATTRIB_GENERIC(index) <
+ ARRAY_SIZE(ctx->Array.VAO->VertexAttrib));
- *pointer = (GLvoid *) ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_GENERIC(index)].Ptr;
+ *pointer = (GLvoid *)
+ ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_GENERIC(index)].Ptr;
}
* required to be the same, so in practice this doesn't matter.
*/
if (index >= ctx->Const.MaxVertexAttribBindings) {
- _mesa_error(ctx, GL_INVALID_VALUE, "glGetVertexArrayIndexed64iv("
- "index %d >= the value of GL_MAX_VERTEX_ATTRIB_BINDINGS (%d))",
+ _mesa_error(ctx, GL_INVALID_VALUE, "glGetVertexArrayIndexed64iv(index"
+ "%d >= the value of GL_MAX_VERTEX_ATTRIB_BINDINGS (%d))",
index, ctx->Const.MaxVertexAttribBindings);
return;
}
* GL_ARB_vertex_attrib_binding
*/
static void
-vertex_array_vertex_buffer(struct gl_context *ctx, struct gl_vertex_array_object *vao,
+vertex_array_vertex_buffer(struct gl_context *ctx,
+ struct gl_vertex_array_object *vao,
GLuint bindingIndex, GLuint buffer, GLintptr offset,
GLsizei stride, const char *func)
{
return;
}
- if (ctx->API == API_OPENGL_CORE && ctx->Version >= 44 &&
+ if (((ctx->API == API_OPENGL_CORE && ctx->Version >= 44) || _mesa_is_gles31(ctx)) &&
stride > ctx->Const.MaxVertexAttribStride) {
_mesa_error(ctx, GL_INVALID_VALUE, "%s(stride=%d > "
"GL_MAX_VERTEX_ATTRIB_STRIDE)", func, stride);
return;
}
- if (buffer == vao->VertexBinding[VERT_ATTRIB_GENERIC(bindingIndex)].BufferObj->Name) {
+ if (buffer ==
+ vao->VertexBinding[VERT_ATTRIB_GENERIC(bindingIndex)].BufferObj->Name) {
vbo = vao->VertexBinding[VERT_ATTRIB_GENERIC(bindingIndex)].BufferObj;
} else if (buffer != 0) {
vbo = _mesa_lookup_bufferobj(ctx, buffer);
* Otherwise, we fall back to the same compat profile behavior as other
* object references (automatically gen it).
*/
- if (!_mesa_handle_bind_buffer_gen(ctx, GL_ARRAY_BUFFER, buffer,
- &vbo, func))
+ if (!_mesa_handle_bind_buffer_gen(ctx, buffer, &vbo, func))
return;
} else {
/* The ARB_vertex_attrib_binding spec says:
* "An INVALID_OPERATION error is generated if no vertex array object
* is bound."
*/
- if (ctx->API == API_OPENGL_CORE &&
+ if ((ctx->API == API_OPENGL_CORE || _mesa_is_gles31(ctx)) &&
ctx->Array.VAO == ctx->Array.DefaultVAO) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"glBindVertexBuffer(No array object bound)");
* is an oversight. In the OpenGL 4.3 (Core Profile) spec, it applies
* to all three functions.
*/
- if (ctx->API == API_OPENGL_CORE &&
+ if ((ctx->API == API_OPENGL_CORE || _mesa_is_gles31(ctx)) &&
ctx->Array.VAO == ctx->Array.DefaultVAO) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"%s(No array object bound)", func);
* "An INVALID_OPERATION error is generated if no vertex array object
* is bound."
*/
- if (ctx->API == API_OPENGL_CORE &&
+ if ((ctx->API == API_OPENGL_CORE || _mesa_is_gles31(ctx)) &&
ctx->Array.VAO == ctx->Array.DefaultVAO) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"glVertexAttribBinding(No array object bound)");
* "An INVALID_OPERATION error is generated if no vertex array object
* is bound."
*/
- if (ctx->API == API_OPENGL_CORE &&
+ if ((ctx->API == API_OPENGL_CORE || _mesa_is_gles31(ctx)) &&
ctx->Array.VAO == ctx->Array.DefaultVAO) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"glVertexBindingDivisor(No array object bound)");
void GLAPIENTRY
-_mesa_VertexArrayBindingDivisor(GLuint vaobj, GLuint bindingIndex, GLuint divisor)
+_mesa_VertexArrayBindingDivisor(GLuint vaobj, GLuint bindingIndex,
+ GLuint divisor)
{
struct gl_vertex_array_object *vao;
GET_CURRENT_CONTEXT(ctx);
/**
* Initialize vertex array state for given context.
*/
-void
+void
_mesa_init_varray(struct gl_context *ctx)
{
ctx->Array.DefaultVAO = ctx->Driver.NewArrayObject(ctx, 0);
/**
* Free vertex array state for given context.
*/
-void
+void
_mesa_free_varray_data(struct gl_context *ctx)
{
_mesa_HashDeleteAll(ctx->Array.Objects, delete_arrayobj_cb, ctx);
extensions->ARB_compute_shader &&
extensions->ARB_draw_indirect &&
extensions->ARB_explicit_uniform_location &&
- false /*extensions->ARB_framebuffer_no_attachments*/ &&
+ extensions->ARB_framebuffer_no_attachments &&
extensions->ARB_shader_atomic_counters &&
extensions->ARB_shader_image_load_store &&
extensions->ARB_shader_image_size &&
case ir_unop_dFdy_coarse:
case ir_unop_dFdy_fine:
case ir_unop_subroutine_to_int:
+ case ir_unop_get_buffer_size:
assert(!"not supported");
break;
+ case ir_unop_ssbo_unsized_array_length:
case ir_quadop_vector:
/* This operation should have already been handled.
*/
case ir_query_levels:
assert(!"Unexpected ir_query_levels opcode");
break;
+ case ir_texture_samples:
+ unreachable("Unexpected ir_texture_samples opcode");
}
const glsl_type *sampler_type = ir->sampler->type;
if (!ctx->Driver.LinkShader(ctx, prog)) {
prog->LinkStatus = GL_FALSE;
} else {
- build_program_resource_list(ctx, prog);
+ build_program_resource_list(prog);
}
}
static void
ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
{
- nir_ssa_def *dp3 = nir_fdot3(b, src[0], src[1]);
- ptn_move_dest(b, dest, nir_fadd(b, dp3, ptn_channel(b, src[1], W)));
+ ptn_move_dest(b, dest, nir_fdph(b, src[0], src[1]));
}
static void
c->temp_regs = rzalloc_array(c, nir_register *, c->prog->NumTemporaries);
nir_register *reg;
- for (int i = 0; i < c->prog->NumTemporaries; i++) {
+ for (unsigned i = 0; i < c->prog->NumTemporaries; i++) {
reg = nir_local_reg_create(b->impl);
if (!reg) {
c->error = true;
assert(prog);
memset(prog, 0, sizeof(*prog));
+ mtx_init(&prog->Mutex, mtx_plain);
prog->Id = id;
prog->Target = target;
prog->RefCount = 1;
ralloc_free(prog->nir);
}
+ mtx_destroy(&prog->Mutex);
free(prog);
}
if (*ptr) {
GLboolean deleteFlag;
+ struct gl_program *oldProg = *ptr;
- /*mtx_lock(&(*ptr)->Mutex);*/
-#if 0
- printf("Program %p ID=%u Target=%s Refcount-- to %d\n",
- *ptr, (*ptr)->Id,
- ((*ptr)->Target == GL_VERTEX_PROGRAM_ARB ? "VP" :
- ((*ptr)->Target == GL_GEOMETRY_PROGRAM_NV ? "GP" : "FP")),
- (*ptr)->RefCount - 1);
-#endif
- assert((*ptr)->RefCount > 0);
- (*ptr)->RefCount--;
+ mtx_lock(&oldProg->Mutex);
+ assert(oldProg->RefCount > 0);
+ oldProg->RefCount--;
- deleteFlag = ((*ptr)->RefCount == 0);
- /*mtx_lock(&(*ptr)->Mutex);*/
+ deleteFlag = (oldProg->RefCount == 0);
+ mtx_unlock(&oldProg->Mutex);
if (deleteFlag) {
assert(ctx);
- ctx->Driver.DeleteProgram(ctx, *ptr);
+ ctx->Driver.DeleteProgram(ctx, oldProg);
}
*ptr = NULL;
assert(!*ptr);
if (prog) {
- /*mtx_lock(&prog->Mutex);*/
+ mtx_lock(&prog->Mutex);
prog->RefCount++;
-#if 0
- printf("Program %p ID=%u Target=%s Refcount++ to %d\n",
- prog, prog->Id,
- (prog->Target == GL_VERTEX_PROGRAM_ARB ? "VP" :
- (prog->Target == GL_GEOMETRY_PROGRAM_NV ? "GP" : "FP")),
- prog->RefCount);
-#endif
- /*mtx_unlock(&prog->Mutex);*/
+ mtx_unlock(&prog->Mutex);
}
*ptr = prog;
unsigned src_format;
array = get_client_array(vp, arrays, attr);
- if (!array)
+ if (!array) {
+ vbuffer[attr].buffer = NULL;
+ vbuffer[attr].user_buffer = NULL;
+ vbuffer[attr].buffer_offset = 0;
continue;
+ }
stride = array->StrideB;
bufobj = array->BufferObj;
return NULL;
size = MIN2(stObj->pt->width0 - base, (unsigned)stObj->base.BufferSize);
- f = ((base * 8) / desc->block.bits) * desc->block.width;
- n = ((size * 8) / desc->block.bits) * desc->block.width;
+ f = (base / (desc->block.bits / 8)) * desc->block.width;
+ n = (size / (desc->block.bits / 8)) * desc->block.width;
if (!n)
return NULL;
templ.u.buf.first_element = f;
tBot = (GLfloat) height;
}
- if (u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]),
- vbuf_offset, vbuf, (void **) &vertices) != PIPE_OK) {
+ u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]),
+ vbuf_offset, vbuf, (void **) &vertices);
+ if (!*vbuf) {
return;
}
st_adjust_blit_for_msaa_resolve(&blit);
st->pipe->blit(st->pipe, &blit);
+ dstRb->defined = true; /* front buffer tracking */
}
}
}
st_adjust_blit_for_msaa_resolve(&blit);
st->pipe->blit(st->pipe, &blit);
+ dstRb->defined = true; /* front buffer tracking */
}
}
}
vb.stride = 8 * sizeof(float);
- if (u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]),
- &vb.buffer_offset, &vb.buffer,
- (void **) &vertices) != PIPE_OK) {
+ u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]),
+ &vb.buffer_offset, &vb.buffer,
+ (void **) &vertices);
+ if (!vb.buffer) {
return;
}
struct pipe_resource *buf = NULL;
unsigned offset;
- if (u_upload_alloc(st->uploader, 0, 4 * sizeof(verts[0]), &offset,
- &buf, (void **) &verts) != PIPE_OK) {
+ u_upload_alloc(st->uploader, 0, 4 * sizeof(verts[0]), &offset,
+ &buf, (void **) &verts);
+ if (!buf) {
return;
}
GLfloat *vbuf = NULL;
GLuint attr;
- if (u_upload_alloc(st->uploader, 0,
- numAttribs * 4 * 4 * sizeof(GLfloat),
- &offset, &vbuffer, (void **) &vbuf) != PIPE_OK) {
+ u_upload_alloc(st->uploader, 0,
+ numAttribs * 4 * 4 * sizeof(GLfloat),
+ &offset, &vbuffer, (void **) &vbuf);
+ if (!vbuffer) {
return;
}
/**
- * Called via ctx->Driver.BindFramebufferEXT().
- */
-static void
-st_bind_framebuffer(struct gl_context *ctx, GLenum target,
- struct gl_framebuffer *fb, struct gl_framebuffer *fbread)
-{
- /* no-op */
-}
-
-
-/**
* Create or update the pipe_surface of a FBO renderbuffer.
* This is usually called after st_finalize_texture.
*/
{
functions->NewFramebuffer = st_new_framebuffer;
functions->NewRenderbuffer = st_new_renderbuffer;
- functions->BindFramebuffer = st_bind_framebuffer;
functions->FramebufferRenderbuffer = _mesa_FramebufferRenderbuffer_sw;
functions->RenderTexture = st_render_texture;
functions->FinishRenderTexture = st_finish_render_texture;
static uint64_t
st_GetTimestamp(struct gl_context *ctx)
{
- struct pipe_screen *screen = st_context(ctx)->pipe->screen;
+ struct pipe_context *pipe = st_context(ctx)->pipe;
+ struct pipe_screen *screen = pipe->screen;
- return screen->get_timestamp(screen);
+ /* Prefer the per-screen function */
+ if (screen->get_timestamp) {
+ return screen->get_timestamp(screen);
+ }
+ else {
+ /* Fall back to the per-context function */
+ assert(pipe->get_timestamp);
+ return pipe->get_timestamp(pipe);
+ }
}
GLuint row;
for (row = 0; row < (unsigned) height; row++) {
- GLvoid *dest = _mesa_image_address3d(pack, pixels,
+ GLvoid *dest = _mesa_image_address2d(pack, pixels,
width, height, format,
- type, 0, row, 0);
+ type, row, 0);
memcpy(dest, map, bytesPerRow);
map += tex_xfer->stride;
}
static void
st_CopyImageSubData(struct gl_context *ctx,
struct gl_texture_image *src_image,
+ struct gl_renderbuffer *src_renderbuffer,
int src_x, int src_y, int src_z,
struct gl_texture_image *dst_image,
+ struct gl_renderbuffer *dst_renderbuffer,
int dst_x, int dst_y, int dst_z,
int src_width, int src_height)
{
struct st_context *st = st_context(ctx);
struct pipe_context *pipe = st->pipe;
- struct st_texture_image *src = st_texture_image(src_image);
- struct st_texture_image *dst = st_texture_image(dst_image);
-
+ struct pipe_resource *src_res, *dst_res;
struct pipe_box box;
+ int src_level, dst_level;
+
+ if (src_image) {
+ struct st_texture_image *src = st_texture_image(src_image);
+ src_res = src->pt;
+ src_level = src_image->Level;
+ }
+ else {
+ struct st_renderbuffer *src = st_renderbuffer(src_renderbuffer);
+ src_res = src->texture;
+ src_level = 0;
+ }
+
+ if (dst_image) {
+ struct st_texture_image *dst = st_texture_image(dst_image);
+ dst_res = dst->pt;
+ dst_level = dst_image->Level;
+ }
+ else {
+ struct st_renderbuffer *dst = st_renderbuffer(dst_renderbuffer);
+ dst_res = dst->texture;
+ dst_level = 0;
+ }
u_box_2d_zslice(src_x, src_y, src_z, src_width, src_height, &box);
- pipe->resource_copy_region(pipe, dst->pt, dst_image->Level,
+ pipe->resource_copy_region(pipe, dst_res, dst_level,
dst_x, dst_y, dst_z,
- src->pt, src_image->Level,
+ src_res, src_level,
&box);
}
}
else if (st->indexbuf_uploader) {
/* upload indexes from user memory into a real buffer */
- if (u_upload_data(st->indexbuf_uploader, 0,
- ib->count * ibuffer->index_size, ib->ptr,
- &ibuffer->offset, &ibuffer->buffer) != PIPE_OK) {
+ u_upload_data(st->indexbuf_uploader, 0,
+ ib->count * ibuffer->index_size, ib->ptr,
+ &ibuffer->offset, &ibuffer->buffer);
+ if (!ibuffer->buffer) {
/* out of memory */
return FALSE;
}
{ o(ARB_point_sprite), PIPE_CAP_POINT_SPRITE },
{ o(ARB_seamless_cube_map), PIPE_CAP_SEAMLESS_CUBE_MAP },
{ o(ARB_shader_stencil_export), PIPE_CAP_SHADER_STENCIL_EXPORT },
+ { o(ARB_shader_texture_image_samples), PIPE_CAP_TGSI_TXQS },
{ o(ARB_shader_texture_lod), PIPE_CAP_SM3 },
{ o(ARB_shadow), PIPE_CAP_TEXTURE_SHADOW_MAP },
{ o(ARB_texture_buffer_object), PIPE_CAP_TEXTURE_BUFFER_OBJECTS },
consts->MaxViewports = screen->get_param(screen, PIPE_CAP_MAX_VIEWPORTS);
if (consts->MaxViewports >= 16) {
- consts->ViewportBounds.Min = -16384.0;
- consts->ViewportBounds.Max = 16384.0;
+ if (glsl_feature_level >= 400) {
+ consts->ViewportBounds.Min = -32768.0;
+ consts->ViewportBounds.Max = 32767.0;
+ } else {
+ consts->ViewportBounds.Min = -16384.0;
+ consts->ViewportBounds.Max = 16383.0;
+ }
extensions->ARB_viewport_array = GL_TRUE;
extensions->ARB_fragment_layer_viewport = GL_TRUE;
if (extensions->AMD_vertex_shader_layer)
#include "main/imports.h"
#include "main/context.h"
+#include "main/enums.h"
+#include "main/formats.h"
#include "main/glformats.h"
#include "main/texgetimage.h"
#include "main/teximage.h"
/* 32-bit float formats */
{
{ GL_RGBA32F_ARB, 0 },
- { PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+ { PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
},
{
{ GL_RGB32F_ARB, 0 },
{ PIPE_FORMAT_R32G32B32_FLOAT, PIPE_FORMAT_R32G32B32X32_FLOAT,
- PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+ PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
},
{
{ GL_LUMINANCE_ALPHA32F_ARB, 0 },
- { PIPE_FORMAT_L32A32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT,
- PIPE_FORMAT_L16A16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+ { PIPE_FORMAT_L32A32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
},
{
{ GL_ALPHA32F_ARB, 0 },
{ PIPE_FORMAT_A32_FLOAT, PIPE_FORMAT_L32A32_FLOAT,
- PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_A16_FLOAT,
- PIPE_FORMAT_L16A16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+ PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
},
{
{ GL_INTENSITY32F_ARB, 0 },
{ PIPE_FORMAT_I32_FLOAT, PIPE_FORMAT_L32A32_FLOAT,
- PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_I16_FLOAT,
- PIPE_FORMAT_L16A16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+ PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
},
{
{ GL_LUMINANCE32F_ARB, 0 },
{ PIPE_FORMAT_L32_FLOAT, PIPE_FORMAT_L32A32_FLOAT,
- PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_L16_FLOAT,
- PIPE_FORMAT_L16A16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+ PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
},
{
{ GL_R32F, 0 },
{ PIPE_FORMAT_R32_FLOAT, PIPE_FORMAT_R32G32_FLOAT,
- PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_R16_FLOAT,
- PIPE_FORMAT_R16G16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+ PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
},
{
{ GL_RG32F, 0 },
- { PIPE_FORMAT_R32G32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT,
- PIPE_FORMAT_R16G16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+ { PIPE_FORMAT_R32G32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
},
/* R, RG formats */
{
struct st_context *st = st_context(ctx);
enum pipe_format pFormat;
+ mesa_format mFormat;
unsigned bindings;
enum pipe_texture_target pTarget = gl_target_to_pipe(target);
else if (internalFormat == 3 || internalFormat == 4 ||
internalFormat == GL_RGB || internalFormat == GL_RGBA ||
internalFormat == GL_RGB8 || internalFormat == GL_RGBA8 ||
- internalFormat == GL_BGRA)
+ internalFormat == GL_BGRA ||
+ internalFormat == GL_RGB16F ||
+ internalFormat == GL_RGBA16F ||
+ internalFormat == GL_RGB32F ||
+ internalFormat == GL_RGBA32F)
bindings |= PIPE_BIND_RENDER_TARGET;
/* GLES allows the driver to choose any format which matches
return MESA_FORMAT_NONE;
}
- return st_pipe_format_to_mesa_format(pFormat);
+ mFormat = st_pipe_format_to_mesa_format(pFormat);
+
+ /* Debugging aid */
+ if (0) {
+ debug_printf("%s(intFormat=%s, format=%s, type=%s) -> %s, %s\n",
+ __func__,
+ _mesa_enum_to_string(internalFormat),
+ _mesa_enum_to_string(format),
+ _mesa_enum_to_string(type),
+ util_format_name(pFormat),
+ _mesa_get_format_name(mFormat));
+ }
+
+ return mFormat;
}
case ir_triop_vector_insert:
case ir_binop_carry:
case ir_binop_borrow:
+ case ir_unop_ssbo_unsized_array_length:
/* This operation is not supported, or should have already been handled.
*/
assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()");
break;
+
+ case ir_unop_get_buffer_size:
+ assert(!"Not implemented yet");
+ break;
}
this->result = result_src;
case ir_lod:
opcode = TGSI_OPCODE_LODQ;
break;
+ case ir_texture_samples:
+ opcode = TGSI_OPCODE_TXQS;
+ break;
}
if (ir->projector) {
emit_asm(ir, TGSI_OPCODE_MOV, result_dst, levels_src);
} else
inst = emit_asm(ir, opcode, result_dst, lod_info);
+ } else if (opcode == TGSI_OPCODE_TXQS) {
+ inst = emit_asm(ir, opcode, result_dst);
} else if (opcode == TGSI_OPCODE_TXF) {
inst = emit_asm(ir, opcode, result_dst, coord);
} else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) {
*/
for (unsigned i = 0; i < ARRAY_SIZE(inst->dst); i++) {
if (inst->dst[i].file == PROGRAM_TEMPORARY &&
- !inst->dst[i].reladdr &&
- !inst->saturate) {
+ !inst->dst[i].reladdr) {
for (int c = 0; c < 4; c++) {
if (inst->dst[i].writemask & (1 << c)) {
if (writes[4 * inst->dst[i].index + c]) {
case TGSI_OPCODE_TXL:
case TGSI_OPCODE_TXP:
case TGSI_OPCODE_TXQ:
+ case TGSI_OPCODE_TXQS:
case TGSI_OPCODE_TXF:
case TGSI_OPCODE_TEX2:
case TGSI_OPCODE_TXB2:
*/
GLint swapSize = _mesa_sizeof_packed_type(type);
if (swapSize == 2 || swapSize == 4) {
- int components = _mesa_components_in_format(format);
- int elementCount = width * height * components;
- tempImage = malloc(elementCount * swapSize);
+ int imageStride = _mesa_image_image_stride(unpack, width, height, format, type);
+
+ tempImage = malloc(imageStride);
if (!tempImage) {
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels");
return;
}
- if (swapSize == 2)
- _mesa_swap2_copy(tempImage, (GLushort *) pixels, elementCount);
- else
- _mesa_swap4_copy(tempImage, (GLuint *) pixels, elementCount);
+
+ _mesa_swap_bytes_2d_image(format, type, unpack,
+ width, height, tempImage, pixels);
+
pixels = tempImage;
}
}
},
/* Packed signed/unsigned non-normalized integer formats */
+ FETCH_NULL(A8B8G8R8_UINT),
+ FETCH_NULL(A8R8G8B8_UINT),
+ FETCH_NULL(R8G8B8A8_UINT),
+ FETCH_NULL(B8G8R8A8_UINT),
FETCH_NULL(B10G10R10A2_UINT),
FETCH_NULL(R10G10B10A2_UINT),
FETCH_NULL(A2B10G10R10_UINT),
FETCH_NULL(A2R10G10B10_UINT),
+ FETCH_NULL(B5G6R5_UINT),
+ FETCH_NULL(R5G6B5_UINT),
+ FETCH_NULL(B2G3R3_UINT),
+ FETCH_NULL(R3G3B2_UINT),
+ FETCH_NULL(A4B4G4R4_UINT),
+ FETCH_NULL(R4G4B4A4_UINT),
+ FETCH_NULL(B4G4R4A4_UINT),
+ FETCH_NULL(A4R4G4B4_UINT),
+ FETCH_NULL(A1B5G5R5_UINT),
+ FETCH_NULL(B5G5R5A1_UINT),
+ FETCH_NULL(A1R5G5B5_UINT),
+ FETCH_NULL(R5G5B5A1_UINT),
/* Array signed/unsigned non-normalized integer formats */
FETCH_NULL(A_UINT8),
* Authors:
* Keith Whitwell <keithw@vmware.com>
*/
-
+#include <stdbool.h>
/**
* \file t_dd_dmatmp.h
* tristrips, lineloops to linestrips), or to indexed vertices.
*/
-#if !defined(HAVE_TRIANGLES)
-#error "must have at least triangles to use render template"
-#endif
-
-#if !HAVE_ELTS
-#define ELTS_VARS(buf)
-#define ALLOC_ELTS(nr) 0
-#define EMIT_ELT( offset, elt )
-#define EMIT_TWO_ELTS( offset, elt0, elt1 )
-#define INCR_ELTS( nr )
-#define ELT_INIT(prim)
-#define GET_CURRENT_VB_MAX_ELTS() 0
-#define GET_SUBSEQUENT_VB_MAX_ELTS() 0
-#define RELEASE_ELT_VERTS()
-#define EMIT_INDEXED_VERTS( ctx, start, count )
+#if !HAVE_TRIANGLES || !HAVE_LINES || !HAVE_LINE_STRIPS || !HAVE_TRI_STRIPS || !HAVE_TRI_FANS
+#error "must have lines, line strips, triangles, triangle fans, and triangle strips to use render template"
#endif
-#ifndef EMIT_TWO_ELTS
-#define EMIT_TWO_ELTS( offset, elt0, elt1 ) \
-do { \
- EMIT_ELT( offset, elt0 ); \
- EMIT_ELT( offset+1, elt1 ); \
-} while (0)
+#if HAVE_QUAD_STRIPS || HAVE_QUADS || HAVE_ELTS
+#error "ELTs, quads, and quad strips not supported by render template"
#endif
/* Render whole begin/end objects */
/**********************************************************************/
-
-
-
-#if (HAVE_ELTS)
-static void *TAG(emit_elts)( struct gl_context *ctx, GLuint *elts, GLuint nr,
- void *buf)
-{
- GLint i;
- LOCAL_VARS;
- ELTS_VARS(buf);
-
- for ( i = 0 ; i+1 < nr ; i+=2, elts += 2 ) {
- EMIT_TWO_ELTS( 0, elts[0], elts[1] );
- INCR_ELTS( 2 );
- }
-
- if (i < nr) {
- EMIT_ELT( 0, elts[0] );
- INCR_ELTS( 1 );
- }
-
- return (void *)ELTPTR;
-}
-#endif
-
-static __inline void *TAG(emit_verts)( struct gl_context *ctx, GLuint start,
- GLuint count, void *buf )
+static inline void *TAG(emit_verts)(struct gl_context *ctx, GLuint start,
+ GLuint count, void *buf)
{
return EMIT_VERTS(ctx, start, count, buf);
}
* Render non-indexed primitives.
***********************************************************************/
-static void TAG(render_points_verts)( struct gl_context *ctx,
- GLuint start,
- GLuint count,
- GLuint flags )
+static void TAG(render_points_verts)(struct gl_context *ctx,
+ GLuint start,
+ GLuint count,
+ GLuint flags)
{
if (HAVE_POINTS) {
LOCAL_VARS;
- int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
- int currentsz;
+ const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+ unsigned currentsz;
GLuint j, nr;
- INIT( GL_POINTS );
+ INIT(GL_POINTS);
currentsz = GET_CURRENT_VB_MAX_VERTS();
if (currentsz < 8)
- currentsz = dmasz;
+ currentsz = dmasz;
- for (j = start; j < count; j += nr ) {
- nr = MIN2( currentsz, count - j );
- TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
- currentsz = dmasz;
+ for (j = 0; j < count; j += nr) {
+ nr = MIN2(currentsz, count - j);
+ TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
+ currentsz = dmasz;
}
-
} else {
fprintf(stderr, "%s - cannot draw primitive\n", __func__);
return;
}
}
-static void TAG(render_lines_verts)( struct gl_context *ctx,
- GLuint start,
- GLuint count,
- GLuint flags )
+static void TAG(render_lines_verts)(struct gl_context *ctx,
+ GLuint start,
+ GLuint count,
+ GLuint flags)
{
- if (HAVE_LINES) {
- LOCAL_VARS;
- int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
- int currentsz;
- GLuint j, nr;
+ LOCAL_VARS;
+ const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS() & ~1;
+ unsigned currentsz;
+ GLuint j, nr;
- INIT( GL_LINES );
+ INIT(GL_LINES);
- /* Emit whole number of lines in total and in each buffer:
- */
- count -= (count-start) & 1;
- currentsz = GET_CURRENT_VB_MAX_VERTS();
- currentsz -= currentsz & 1;
- dmasz -= dmasz & 1;
-
- if (currentsz < 8)
- currentsz = dmasz;
+ /* Emit whole number of lines in total and in each buffer:
+ */
+ count -= count & 1;
+ currentsz = GET_CURRENT_VB_MAX_VERTS();
+ currentsz -= currentsz & 1;
- for (j = start; j < count; j += nr ) {
- nr = MIN2( currentsz, count - j );
- TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
- currentsz = dmasz;
- }
+ if (currentsz < 8)
+ currentsz = dmasz;
- } else {
- fprintf(stderr, "%s - cannot draw primitive\n", __func__);
- return;
+ for (j = 0; j < count; j += nr) {
+ nr = MIN2(currentsz, count - j);
+ TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
+ currentsz = dmasz;
}
}
-static void TAG(render_line_strip_verts)( struct gl_context *ctx,
- GLuint start,
- GLuint count,
- GLuint flags )
+static void TAG(render_line_strip_verts)(struct gl_context *ctx,
+ GLuint start,
+ GLuint count,
+ GLuint flags)
{
- if (HAVE_LINE_STRIPS) {
- LOCAL_VARS;
- int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
- int currentsz;
- GLuint j, nr;
-
- INIT( GL_LINE_STRIP );
+ LOCAL_VARS;
+ const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+ unsigned currentsz;
+ GLuint j, nr;
- currentsz = GET_CURRENT_VB_MAX_VERTS();
- if (currentsz < 8)
- currentsz = dmasz;
+ INIT(GL_LINE_STRIP);
- for (j = start; j + 1 < count; j += nr - 1 ) {
- nr = MIN2( currentsz, count - j );
- TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
- currentsz = dmasz;
- }
-
- FLUSH();
+ currentsz = GET_CURRENT_VB_MAX_VERTS();
+ if (currentsz < 8)
+ currentsz = dmasz;
- } else {
- fprintf(stderr, "%s - cannot draw primitive\n", __func__);
- return;
+ for (j = 0; j + 1 < count; j += nr - 1) {
+ nr = MIN2(currentsz, count - j);
+ TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
+ currentsz = dmasz;
}
+
+ FLUSH();
}
-static void TAG(render_line_loop_verts)( struct gl_context *ctx,
- GLuint start,
- GLuint count,
- GLuint flags )
+static void TAG(render_line_loop_verts)(struct gl_context *ctx,
+ GLuint start,
+ GLuint count,
+ GLuint flags)
{
- if (HAVE_LINE_STRIPS) {
- LOCAL_VARS;
- int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
- int currentsz;
- GLuint j, nr;
-
- INIT( GL_LINE_STRIP );
+ LOCAL_VARS;
+ const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS() - 1;
+ unsigned currentsz;
+ GLuint j, nr;
- if (flags & PRIM_BEGIN)
- j = start;
- else
- j = start + 1;
+ INIT(GL_LINE_STRIP);
- /* Ensure last vertex won't wrap buffers:
- */
- currentsz = GET_CURRENT_VB_MAX_VERTS();
- currentsz--;
- dmasz--;
+ j = (flags & PRIM_BEGIN) ? 0 : 1;
- if (currentsz < 8) {
- currentsz = dmasz;
- }
+ /* Ensure last vertex won't wrap buffers:
+ */
+ currentsz = GET_CURRENT_VB_MAX_VERTS();
+ currentsz--;
- if (j + 1 < count) {
- for ( ; j + 1 < count; j += nr - 1 ) {
- nr = MIN2( currentsz, count - j );
-
- if (j + nr >= count &&
- start < count - 1 &&
- (flags & PRIM_END))
- {
- void *tmp;
- tmp = ALLOC_VERTS(nr+1);
- tmp = TAG(emit_verts)( ctx, j, nr, tmp );
- tmp = TAG(emit_verts)( ctx, start, 1, tmp );
- (void) tmp;
- }
- else {
- TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
- currentsz = dmasz;
- }
- }
+ if (currentsz < 8)
+ currentsz = dmasz;
+ if (j + 1 < count) {
+ for (/* empty */; j + 1 < count; j += nr - 1) {
+ nr = MIN2(currentsz, count - j);
+
+ if (j + nr >= count &&
+ count > 1 &&
+ (flags & PRIM_END)) {
+ void *tmp;
+ tmp = ALLOC_VERTS(nr+1);
+ tmp = TAG(emit_verts)(ctx, start + j, nr, tmp);
+ tmp = TAG(emit_verts)( ctx, start, 1, tmp );
+ (void) tmp;
+ } else {
+ TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
+ currentsz = dmasz;
+ }
}
- else if (start + 1 < count && (flags & PRIM_END)) {
- void *tmp;
- tmp = ALLOC_VERTS(2);
- tmp = TAG(emit_verts)( ctx, start+1, 1, tmp );
- tmp = TAG(emit_verts)( ctx, start, 1, tmp );
- (void) tmp;
- }
-
- FLUSH();
-
- } else {
- fprintf(stderr, "%s - cannot draw primitive\n", __func__);
- return;
+ } else if (count > 1 && (flags & PRIM_END)) {
+ void *tmp;
+ tmp = ALLOC_VERTS(2);
+ tmp = TAG(emit_verts)( ctx, start+1, 1, tmp );
+ tmp = TAG(emit_verts)( ctx, start, 1, tmp );
+ (void) tmp;
}
+
+ FLUSH();
}
-static void TAG(render_triangles_verts)( struct gl_context *ctx,
- GLuint start,
- GLuint count,
- GLuint flags )
+static void TAG(render_triangles_verts)(struct gl_context *ctx,
+ GLuint start,
+ GLuint count,
+ GLuint flags)
{
LOCAL_VARS;
- int dmasz = (GET_SUBSEQUENT_VB_MAX_VERTS()/3) * 3;
- int currentsz;
+ const unsigned dmasz = (GET_SUBSEQUENT_VB_MAX_VERTS() / 3) * 3;
+ unsigned currentsz;
GLuint j, nr;
INIT(GL_TRIANGLES);
- currentsz = (GET_CURRENT_VB_MAX_VERTS()/3) * 3;
+ currentsz = (GET_CURRENT_VB_MAX_VERTS() / 3) * 3;
/* Emit whole number of tris in total. dmasz is already a multiple
* of 3.
*/
- count -= (count-start)%3;
+ count -= count % 3;
if (currentsz < 8)
currentsz = dmasz;
- for (j = start; j < count; j += nr) {
- nr = MIN2( currentsz, count - j );
- TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+ for (j = 0; j < count; j += nr) {
+ nr = MIN2(currentsz, count - j);
+ TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
currentsz = dmasz;
}
}
-static void TAG(render_tri_strip_verts)( struct gl_context *ctx,
- GLuint start,
- GLuint count,
- GLuint flags )
+static void TAG(render_tri_strip_verts)(struct gl_context *ctx,
+ GLuint start,
+ GLuint count,
+ GLuint flags)
{
- if (HAVE_TRI_STRIPS) {
- LOCAL_VARS;
- GLuint j, nr;
- int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
- int currentsz;
-
- INIT(GL_TRIANGLE_STRIP);
-
- currentsz = GET_CURRENT_VB_MAX_VERTS();
+ LOCAL_VARS;
+ GLuint j, nr;
+ const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS() & ~1;
+ unsigned currentsz;
- if (currentsz < 8) {
- currentsz = dmasz;
- }
+ INIT(GL_TRIANGLE_STRIP);
- /* From here on emit even numbers of tris when wrapping over buffers:
- */
- dmasz -= (dmasz & 1);
- currentsz -= (currentsz & 1);
+ currentsz = GET_CURRENT_VB_MAX_VERTS();
- for (j = start ; j + 2 < count; j += nr - 2 ) {
- nr = MIN2( currentsz, count - j );
- TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
- currentsz = dmasz;
- }
+ if (currentsz < 8)
+ currentsz = dmasz;
- FLUSH();
+ /* From here on emit even numbers of tris when wrapping over buffers:
+ */
+ currentsz -= (currentsz & 1);
- } else {
- fprintf(stderr, "%s - cannot draw primitive\n", __func__);
- return;
+ for (j = 0; j + 2 < count; j += nr - 2) {
+ nr = MIN2(currentsz, count - j);
+ TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
+ currentsz = dmasz;
}
+
+ FLUSH();
}
-static void TAG(render_tri_fan_verts)( struct gl_context *ctx,
- GLuint start,
- GLuint count,
- GLuint flags )
+static void TAG(render_tri_fan_verts)(struct gl_context *ctx,
+ GLuint start,
+ GLuint count,
+ GLuint flags)
{
- if (HAVE_TRI_FANS) {
- LOCAL_VARS;
- GLuint j, nr;
- int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
- int currentsz;
-
- INIT(GL_TRIANGLE_FAN);
+ LOCAL_VARS;
+ GLuint j, nr;
+ const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+ unsigned currentsz;
- currentsz = GET_CURRENT_VB_MAX_VERTS();
- if (currentsz < 8) {
- currentsz = dmasz;
- }
+ INIT(GL_TRIANGLE_FAN);
- for (j = start + 1 ; j + 1 < count; j += nr - 2 ) {
- void *tmp;
- nr = MIN2( currentsz, count - j + 1 );
- tmp = ALLOC_VERTS( nr );
- tmp = TAG(emit_verts)( ctx, start, 1, tmp );
- tmp = TAG(emit_verts)( ctx, j, nr - 1, tmp );
- (void) tmp;
- currentsz = dmasz;
- }
+ currentsz = GET_CURRENT_VB_MAX_VERTS();
+ if (currentsz < 8)
+ currentsz = dmasz;
- FLUSH();
- }
- else {
- /* Could write code to emit these as indexed vertices (for the
- * g400, for instance).
- */
- fprintf(stderr, "%s - cannot draw primitive\n", __func__);
- return;
+ for (j = 1; j + 1 < count; j += nr - 2) {
+ void *tmp;
+ nr = MIN2(currentsz, count - j + 1);
+ tmp = ALLOC_VERTS(nr);
+ tmp = TAG(emit_verts)(ctx, start, 1, tmp);
+ tmp = TAG(emit_verts)(ctx, start + j, nr - 1, tmp);
+ (void) tmp;
+ currentsz = dmasz;
}
+
+ FLUSH();
}
-static void TAG(render_poly_verts)( struct gl_context *ctx,
- GLuint start,
- GLuint count,
- GLuint flags )
+static void TAG(render_poly_verts)(struct gl_context *ctx,
+ GLuint start,
+ GLuint count,
+ GLuint flags)
{
if (HAVE_POLYGONS) {
LOCAL_VARS;
GLuint j, nr;
- int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
- int currentsz;
+ const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
+ unsigned currentsz;
INIT(GL_POLYGON);
currentsz = GET_CURRENT_VB_MAX_VERTS();
if (currentsz < 8) {
- currentsz = dmasz;
+ currentsz = dmasz;
}
- for (j = start + 1 ; j + 1 < count ; j += nr - 2 ) {
- void *tmp;
- nr = MIN2( currentsz, count - j + 1 );
- tmp = ALLOC_VERTS( nr );
- tmp = TAG(emit_verts)( ctx, start, 1, tmp );
- tmp = TAG(emit_verts)( ctx, j, nr - 1, tmp );
- (void) tmp;
- currentsz = dmasz;
+ for (j = 1; j + 1 < count; j += nr - 2) {
+ void *tmp;
+ nr = MIN2(currentsz, count - j + 1);
+ tmp = ALLOC_VERTS(nr);
+ tmp = TAG(emit_verts)(ctx, start, 1, tmp);
+ tmp = TAG(emit_verts)(ctx, start + j, nr - 1, tmp);
+ (void) tmp;
+ currentsz = dmasz;
}
FLUSH();
- }
- else if (HAVE_TRI_FANS && ctx->Light.ShadeModel == GL_SMOOTH) {
+ } else if (ctx->Light.ShadeModel == GL_SMOOTH) {
TAG(render_tri_fan_verts)( ctx, start, count, flags );
} else {
fprintf(stderr, "%s - cannot draw primitive\n", __func__);
}
}
-static void TAG(render_quad_strip_verts)( struct gl_context *ctx,
- GLuint start,
- GLuint count,
- GLuint flags )
+static void TAG(render_quad_strip_verts)(struct gl_context *ctx,
+ GLuint start,
+ GLuint count,
+ GLuint flags)
{
GLuint j, nr;
- if (HAVE_QUAD_STRIPS) {
- LOCAL_VARS;
- GLuint j, nr;
- int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
- int currentsz;
-
- INIT(GL_QUAD_STRIP);
-
- currentsz = GET_CURRENT_VB_MAX_VERTS();
- if (currentsz < 8) {
- currentsz = dmasz;
- }
-
- dmasz -= (dmasz & 2);
- currentsz -= (currentsz & 2);
-
- for (j = start ; j + 3 < count; j += nr - 2 ) {
- nr = MIN2( currentsz, count - j );
- TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
- currentsz = dmasz;
- }
-
- FLUSH();
-
- } else if (HAVE_TRI_STRIPS &&
- ctx->Light.ShadeModel == GL_FLAT &&
- TNL_CONTEXT(ctx)->vb.AttribPtr[_TNL_ATTRIB_COLOR0]->stride) {
- if (HAVE_ELTS) {
- LOCAL_VARS;
- int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
- int currentsz;
- GLuint j, nr;
-
- EMIT_INDEXED_VERTS( ctx, start, count );
-
- /* Simulate flat-shaded quadstrips using indexed vertices:
- */
- ELT_INIT( GL_TRIANGLES );
-
- currentsz = GET_CURRENT_VB_MAX_ELTS();
-
- /* Emit whole number of quads in total, and in each buffer.
- */
- dmasz -= dmasz & 1;
- count -= (count-start) & 1;
- currentsz -= currentsz & 1;
-
- if (currentsz < 12)
- currentsz = dmasz;
-
- currentsz = currentsz/6*2;
- dmasz = dmasz/6*2;
-
- for (j = start; j + 3 < count; j += nr - 2 ) {
- nr = MIN2( currentsz, count - j );
- if (nr >= 4) {
- GLint quads = (nr/2)-1;
- GLint i;
- ELTS_VARS( ALLOC_ELTS( quads*6 ) );
-
- for ( i = j-start ; i < j-start+quads*2 ; i+=2 ) {
- EMIT_TWO_ELTS( 0, (i+0), (i+1) );
- EMIT_TWO_ELTS( 2, (i+2), (i+1) );
- EMIT_TWO_ELTS( 4, (i+3), (i+2) );
- INCR_ELTS( 6 );
- }
-
- FLUSH();
- }
- currentsz = dmasz;
- }
-
- RELEASE_ELT_VERTS();
- FLUSH();
- }
- else {
- /* Vertices won't fit in a single buffer or elts not
- * available - should never happen.
- */
- fprintf(stderr, "%s - cannot draw primitive\n", __func__);
- return;
- }
- }
- else if (HAVE_TRI_STRIPS) {
+ if (ctx->Light.ShadeModel == GL_FLAT &&
+ TNL_CONTEXT(ctx)->vb.AttribPtr[_TNL_ATTRIB_COLOR0]->stride) {
+ /* Vertices won't fit in a single buffer or elts not available - should
+ * never happen.
+ */
+ fprintf(stderr, "%s - cannot draw primitive\n", __func__);
+ return;
+ } else {
LOCAL_VARS;
- int dmasz = GET_SUBSEQUENT_VB_MAX_VERTS();
- int currentsz;
+ const unsigned dmasz = GET_SUBSEQUENT_VB_MAX_VERTS() & ~1;
+ unsigned currentsz;
/* Emit smooth-shaded quadstrips as tristrips:
*/
FLUSH();
- INIT( GL_TRIANGLE_STRIP );
+ INIT(GL_TRIANGLE_STRIP);
/* Emit whole number of quads in total, and in each buffer.
*/
- dmasz -= dmasz & 1;
currentsz = GET_CURRENT_VB_MAX_VERTS();
currentsz -= currentsz & 1;
- count -= (count-start) & 1;
+ count -= count & 1;
- if (currentsz < 8) {
- currentsz = dmasz;
- }
+ if (currentsz < 8)
+ currentsz = dmasz;
- for (j = start; j + 3 < count; j += nr - 2 ) {
- nr = MIN2( currentsz, count - j );
- TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
- currentsz = dmasz;
+ for (j = 0; j + 3 < count; j += nr - 2) {
+ nr = MIN2(currentsz, count - j);
+ TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
+ currentsz = dmasz;
}
FLUSH();
-
- } else {
- fprintf(stderr, "%s - cannot draw primitive\n", __func__);
- return;
}
}
-static void TAG(render_quads_verts)( struct gl_context *ctx,
- GLuint start,
- GLuint count,
- GLuint flags )
+static void TAG(render_quads_verts)(struct gl_context *ctx,
+ GLuint start,
+ GLuint count,
+ GLuint flags)
{
- if (HAVE_QUADS) {
- LOCAL_VARS;
- int dmasz = (GET_SUBSEQUENT_VB_MAX_VERTS()/4) * 4;
- int currentsz;
- GLuint j, nr;
-
- INIT(GL_QUADS);
-
- /* Emit whole number of quads in total. dmasz is already a multiple
- * of 4.
- */
- count -= (count-start)%4;
-
- currentsz = (GET_CURRENT_VB_MAX_VERTS()/4) * 4;
- if (currentsz < 8)
- currentsz = dmasz;
-
- for (j = start; j < count; j += nr) {
- nr = MIN2( currentsz, count - j );
- TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
- currentsz = dmasz;
- }
- }
- else if (HAVE_ELTS) {
- /* Hardware doesn't have a quad primitive type -- try to
- * simulate it using indexed vertices and the triangle
- * primitive:
- */
- LOCAL_VARS;
- int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
- int currentsz;
- GLuint j, nr;
-
- EMIT_INDEXED_VERTS( ctx, start, count );
-
- FLUSH();
- ELT_INIT( GL_TRIANGLES );
- currentsz = GET_CURRENT_VB_MAX_ELTS();
-
- /* Emit whole number of quads in total, and in each buffer.
- */
- dmasz -= dmasz & 3;
- count -= (count-start) & 3;
- currentsz -= currentsz & 3;
+ LOCAL_VARS;
+ GLuint j;
- /* Adjust for rendering as triangles:
- */
- currentsz = currentsz/6*4;
- dmasz = dmasz/6*4;
+ /* Emit whole number of quads in total. */
+ count -= count & 3;
- if (currentsz < 8)
- currentsz = dmasz;
-
- for (j = start; j < count; j += nr ) {
- nr = MIN2( currentsz, count - j );
- if (nr >= 4) {
- GLint quads = nr/4;
- GLint i;
- ELTS_VARS( ALLOC_ELTS( quads*6 ) );
-
- for ( i = j-start ; i < j-start+quads*4 ; i+=4 ) {
- EMIT_TWO_ELTS( 0, (i+0), (i+1) );
- EMIT_TWO_ELTS( 2, (i+3), (i+1) );
- EMIT_TWO_ELTS( 4, (i+2), (i+3) );
- INCR_ELTS( 6 );
- }
-
- FLUSH();
- }
- currentsz = dmasz;
- }
+ /* Hardware doesn't have a quad primitive type -- try to simulate it using
+ * triangle primitive. This is a win for gears, but is it useful in the
+ * broader world?
+ */
+ INIT(GL_TRIANGLES);
- RELEASE_ELT_VERTS();
- }
- else if (HAVE_TRIANGLES) {
- /* Hardware doesn't have a quad primitive type -- try to
- * simulate it using triangle primitive. This is a win for
- * gears, but is it useful in the broader world?
+ for (j = 0; j + 3 < count; j += 4) {
+ void *tmp = ALLOC_VERTS(6);
+ /* Send v0, v1, v3
*/
- LOCAL_VARS;
- GLuint j;
-
- INIT(GL_TRIANGLES);
-
- for (j = start; j < count-3; j += 4) {
- void *tmp = ALLOC_VERTS( 6 );
- /* Send v0, v1, v3
- */
- tmp = EMIT_VERTS(ctx, j, 2, tmp);
- tmp = EMIT_VERTS(ctx, j + 3, 1, tmp);
- /* Send v1, v2, v3
- */
- tmp = EMIT_VERTS(ctx, j + 1, 3, tmp);
- (void) tmp;
- }
- }
- else {
- /* Vertices won't fit in a single buffer, should never happen.
+ tmp = EMIT_VERTS(ctx, start + j, 2, tmp);
+ tmp = EMIT_VERTS(ctx, start + j + 3, 1, tmp);
+ /* Send v1, v2, v3
*/
- fprintf(stderr, "%s - cannot draw primitive\n", __func__);
- return;
+ tmp = EMIT_VERTS(ctx, start + j + 1, 3, tmp);
+ (void) tmp;
}
}
-static void TAG(render_noop)( struct gl_context *ctx,
- GLuint start,
- GLuint count,
- GLuint flags )
+static void TAG(render_noop)(struct gl_context *ctx,
+ GLuint start,
+ GLuint count,
+ GLuint flags)
{
+ (void) ctx;
+ (void) start;
+ (void) count;
+ (void) flags;
}
-
-
-
-static tnl_render_func TAG(render_tab_verts)[GL_POLYGON+2] =
+static const tnl_render_func TAG(render_tab_verts)[GL_POLYGON+2] =
{
TAG(render_points_verts),
TAG(render_lines_verts),
TAG(render_noop),
};
-
-/****************************************************************************
- * Render elts using hardware indexed verts *
- ****************************************************************************/
-
-#if (HAVE_ELTS)
-static void TAG(render_points_elts)( struct gl_context *ctx,
- GLuint start,
- GLuint count,
- GLuint flags )
-{
- if (HAVE_POINTS) {
- LOCAL_VARS;
- int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
- int currentsz;
- GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
- GLuint j, nr;
-
- ELT_INIT( GL_POINTS );
-
- currentsz = GET_CURRENT_VB_MAX_ELTS();
- if (currentsz < 8)
- currentsz = dmasz;
-
- for (j = start; j < count; j += nr ) {
- nr = MIN2( currentsz, count - j );
- TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
- FLUSH();
- currentsz = dmasz;
- }
- } else {
- fprintf(stderr, "%s - cannot draw primitive\n", __func__);
- return;
- }
-}
-
-
-
-static void TAG(render_lines_elts)( struct gl_context *ctx,
- GLuint start,
- GLuint count,
- GLuint flags )
-{
- if (HAVE_LINES) {
- LOCAL_VARS;
- int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
- int currentsz;
- GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
- GLuint j, nr;
-
- ELT_INIT( GL_LINES );
-
- /* Emit whole number of lines in total and in each buffer:
- */
- count -= (count-start) & 1;
- currentsz -= currentsz & 1;
- dmasz -= dmasz & 1;
-
- currentsz = GET_CURRENT_VB_MAX_ELTS();
- if (currentsz < 8)
- currentsz = dmasz;
-
- for (j = start; j < count; j += nr ) {
- nr = MIN2( currentsz, count - j );
- TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
- FLUSH();
- currentsz = dmasz;
- }
- } else {
- fprintf(stderr, "%s - cannot draw primitive\n", __func__);
- return;
- }
-}
-
-
-static void TAG(render_line_strip_elts)( struct gl_context *ctx,
- GLuint start,
- GLuint count,
- GLuint flags )
-{
- if (HAVE_LINE_STRIPS) {
- LOCAL_VARS;
- int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
- int currentsz;
- GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
- GLuint j, nr;
-
- FLUSH(); /* always a new primitive */
- ELT_INIT( GL_LINE_STRIP );
-
- currentsz = GET_CURRENT_VB_MAX_ELTS();
- if (currentsz < 8)
- currentsz = dmasz;
-
- for (j = start; j + 1 < count; j += nr - 1 ) {
- nr = MIN2( currentsz, count - j );
- TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
- FLUSH();
- currentsz = dmasz;
- }
- } else {
- /* TODO: Try to emit as indexed lines.
- */
- fprintf(stderr, "%s - cannot draw primitive\n", __func__);
- return;
- }
-}
-
-
-static void TAG(render_line_loop_elts)( struct gl_context *ctx,
- GLuint start,
- GLuint count,
- GLuint flags )
-{
- if (HAVE_LINE_STRIPS) {
- LOCAL_VARS;
- int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
- int currentsz;
- GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
- GLuint j, nr;
-
- FLUSH();
- ELT_INIT( GL_LINE_STRIP );
-
- if (flags & PRIM_BEGIN)
- j = start;
- else
- j = start + 1;
-
- currentsz = GET_CURRENT_VB_MAX_ELTS();
- if (currentsz < 8) {
- currentsz = dmasz;
- }
-
- /* Ensure last vertex doesn't wrap:
- */
- currentsz--;
- dmasz--;
-
- if (j + 1 < count) {
- for ( ; j + 1 < count; j += nr - 1 ) {
- nr = MIN2( currentsz, count - j );
-
- if (j + nr >= count &&
- start < count - 1 &&
- (flags & PRIM_END))
- {
- void *tmp;
- tmp = ALLOC_ELTS(nr+1);
- tmp = TAG(emit_elts)( ctx, elts+j, nr, tmp );
- tmp = TAG(emit_elts)( ctx, elts+start, 1, tmp );
- (void) tmp;
- }
- else {
- TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
- currentsz = dmasz;
- }
- }
-
- }
- else if (start + 1 < count && (flags & PRIM_END)) {
- void *tmp;
- tmp = ALLOC_ELTS(2);
- tmp = TAG(emit_elts)( ctx, elts+start+1, 1, tmp );
- tmp = TAG(emit_elts)( ctx, elts+start, 1, tmp );
- (void) tmp;
- }
-
- FLUSH();
- } else {
- /* TODO: Try to emit as indexed lines */
- fprintf(stderr, "%s - cannot draw primitive\n", __func__);
- return;
- }
-}
-
-
-/* For verts, we still eliminate the copy from main memory to dma
- * buffers. For elts, this is probably no better (worse?) than the
- * standard path.
- */
-static void TAG(render_triangles_elts)( struct gl_context *ctx,
- GLuint start,
- GLuint count,
- GLuint flags )
-{
- LOCAL_VARS;
- GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
- int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS()/3*3;
- int currentsz;
- GLuint j, nr;
-
- FLUSH();
- ELT_INIT( GL_TRIANGLES );
-
- currentsz = GET_CURRENT_VB_MAX_ELTS();
-
- /* Emit whole number of tris in total. dmasz is already a multiple
- * of 3.
- */
- count -= (count-start)%3;
- currentsz -= currentsz%3;
- if (currentsz < 8)
- currentsz = dmasz;
-
- for (j = start; j < count; j += nr) {
- nr = MIN2( currentsz, count - j );
- TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
- FLUSH();
- currentsz = dmasz;
- }
-}
-
-
-
-static void TAG(render_tri_strip_elts)( struct gl_context *ctx,
- GLuint start,
- GLuint count,
- GLuint flags )
-{
- if (HAVE_TRI_STRIPS) {
- LOCAL_VARS;
- GLuint j, nr;
- GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
- int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
- int currentsz;
-
- FLUSH();
- ELT_INIT( GL_TRIANGLE_STRIP );
-
- currentsz = GET_CURRENT_VB_MAX_ELTS();
- if (currentsz < 8) {
- currentsz = dmasz;
- }
-
- /* Keep the same winding over multiple buffers:
- */
- dmasz -= (dmasz & 1);
- currentsz -= (currentsz & 1);
-
- for (j = start ; j + 2 < count; j += nr - 2 ) {
- nr = MIN2( currentsz, count - j );
- TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
- FLUSH();
- currentsz = dmasz;
- }
- } else {
- /* TODO: try to emit as indexed triangles */
- fprintf(stderr, "%s - cannot draw primitive\n", __func__);
- return;
- }
-}
-
-static void TAG(render_tri_fan_elts)( struct gl_context *ctx,
- GLuint start,
- GLuint count,
- GLuint flags )
-{
- if (HAVE_TRI_FANS) {
- LOCAL_VARS;
- GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
- GLuint j, nr;
- int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
- int currentsz;
-
- FLUSH();
- ELT_INIT( GL_TRIANGLE_FAN );
-
- currentsz = GET_CURRENT_VB_MAX_ELTS();
- if (currentsz < 8) {
- currentsz = dmasz;
- }
-
- for (j = start + 1 ; j + 1 < count; j += nr - 2 ) {
- void *tmp;
- nr = MIN2( currentsz, count - j + 1 );
- tmp = ALLOC_ELTS( nr );
- tmp = TAG(emit_elts)( ctx, elts+start, 1, tmp );
- tmp = TAG(emit_elts)( ctx, elts+j, nr - 1, tmp );
- (void) tmp;
- FLUSH();
- currentsz = dmasz;
- }
- } else {
- /* TODO: try to emit as indexed triangles */
- fprintf(stderr, "%s - cannot draw primitive\n", __func__);
- return;
- }
-}
-
-
-static void TAG(render_poly_elts)( struct gl_context *ctx,
- GLuint start,
- GLuint count,
- GLuint flags )
-{
- if (HAVE_POLYGONS) {
- LOCAL_VARS;
- GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
- GLuint j, nr;
- int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
- int currentsz;
-
- FLUSH();
- ELT_INIT( GL_POLYGON );
-
- currentsz = GET_CURRENT_VB_MAX_ELTS();
- if (currentsz < 8) {
- currentsz = dmasz;
- }
-
- for (j = start + 1 ; j + 1 < count; j += nr - 2 ) {
- void *tmp;
- nr = MIN2( currentsz, count - j + 1 );
- tmp = ALLOC_ELTS( nr );
- tmp = TAG(emit_elts)( ctx, elts+start, 1, tmp );
- tmp = TAG(emit_elts)( ctx, elts+j, nr - 1, tmp );
- (void) tmp;
- FLUSH();
- currentsz = dmasz;
- }
- } else if (HAVE_TRI_FANS && ctx->Light.ShadeModel == GL_SMOOTH) {
- TAG(render_tri_fan_verts)( ctx, start, count, flags );
- } else {
- fprintf(stderr, "%s - cannot draw primitive\n", __func__);
- return;
- }
-}
-
-static void TAG(render_quad_strip_elts)( struct gl_context *ctx,
- GLuint start,
- GLuint count,
- GLuint flags )
-{
- if (HAVE_QUAD_STRIPS && 0) {
- }
- else if (HAVE_TRI_STRIPS) {
- LOCAL_VARS;
- GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
- int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
- int currentsz;
- GLuint j, nr;
-
- FLUSH();
- currentsz = GET_CURRENT_VB_MAX_ELTS();
-
- /* Emit whole number of quads in total, and in each buffer.
- */
- dmasz -= dmasz & 1;
- count -= (count-start) & 1;
- currentsz -= currentsz & 1;
-
- if (currentsz < 12)
- currentsz = dmasz;
-
- if (ctx->Light.ShadeModel == GL_FLAT) {
- ELT_INIT( GL_TRIANGLES );
-
- currentsz = currentsz/6*2;
- dmasz = dmasz/6*2;
-
- for (j = start; j + 3 < count; j += nr - 2 ) {
- nr = MIN2( currentsz, count - j );
-
- if (nr >= 4)
- {
- GLint i;
- GLint quads = (nr/2)-1;
- ELTS_VARS( ALLOC_ELTS( quads*6 ) );
-
- for ( i = j-start ; i < j-start+quads ; i++, elts += 2 ) {
- EMIT_TWO_ELTS( 0, elts[0], elts[1] );
- EMIT_TWO_ELTS( 2, elts[2], elts[1] );
- EMIT_TWO_ELTS( 4, elts[3], elts[2] );
- INCR_ELTS( 6 );
- }
-
- FLUSH();
- }
-
- currentsz = dmasz;
- }
- }
- else {
- ELT_INIT( GL_TRIANGLE_STRIP );
-
- for (j = start; j + 3 < count; j += nr - 2 ) {
- nr = MIN2( currentsz, count - j );
- TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
- FLUSH();
- currentsz = dmasz;
- }
- }
- }
-}
-
-
-static void TAG(render_quads_elts)( struct gl_context *ctx,
- GLuint start,
- GLuint count,
- GLuint flags )
-{
- if (HAVE_QUADS) {
- LOCAL_VARS;
- GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
- int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS()/4*4;
- int currentsz;
- GLuint j, nr;
-
- FLUSH();
- ELT_INIT( GL_TRIANGLES );
-
- currentsz = GET_CURRENT_VB_MAX_ELTS()/4*4;
-
- count -= (count-start)%4;
-
- if (currentsz < 8)
- currentsz = dmasz;
-
- for (j = start; j < count; j += nr) {
- nr = MIN2( currentsz, count - j );
- TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
- FLUSH();
- currentsz = dmasz;
- }
- } else {
- LOCAL_VARS;
- GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
- int dmasz = GET_SUBSEQUENT_VB_MAX_ELTS();
- int currentsz;
- GLuint j, nr;
-
- ELT_INIT( GL_TRIANGLES );
- currentsz = GET_CURRENT_VB_MAX_ELTS();
-
- /* Emit whole number of quads in total, and in each buffer.
- */
- dmasz -= dmasz & 3;
- count -= (count-start) & 3;
- currentsz -= currentsz & 3;
-
- /* Adjust for rendering as triangles:
- */
- currentsz = currentsz/6*4;
- dmasz = dmasz/6*4;
-
- if (currentsz < 8)
- currentsz = dmasz;
-
- for (j = start; j + 3 < count; j += nr - 2 ) {
- nr = MIN2( currentsz, count - j );
-
- if (nr >= 4)
- {
- GLint quads = nr/4;
- GLint i;
- ELTS_VARS( ALLOC_ELTS( quads * 6 ) );
-
- for ( i = j-start ; i < j-start+quads ; i++, elts += 4 ) {
- EMIT_TWO_ELTS( 0, elts[0], elts[1] );
- EMIT_TWO_ELTS( 2, elts[3], elts[1] );
- EMIT_TWO_ELTS( 4, elts[2], elts[3] );
- INCR_ELTS( 6 );
- }
-
- FLUSH();
- }
-
- currentsz = dmasz;
- }
- }
-}
-
-
-
-static tnl_render_func TAG(render_tab_elts)[GL_POLYGON+2] =
-{
- TAG(render_points_elts),
- TAG(render_lines_elts),
- TAG(render_line_loop_elts),
- TAG(render_line_strip_elts),
- TAG(render_triangles_elts),
- TAG(render_tri_strip_elts),
- TAG(render_tri_fan_elts),
- TAG(render_quads_elts),
- TAG(render_quad_strip_elts),
- TAG(render_poly_elts),
- TAG(render_noop),
-};
-
-
-
-#endif
-
-
-
/* Pre-check the primitives in the VB to prevent the need for
* fallbacks later on.
*/
-static GLboolean TAG(validate_render)( struct gl_context *ctx,
- struct vertex_buffer *VB )
+static bool TAG(validate_render)(struct gl_context *ctx,
+ struct vertex_buffer *VB)
{
GLint i;
if (VB->ClipOrMask & ~CLIP_CULL_BIT)
- return GL_FALSE;
+ return false;
- if (VB->Elts && !HAVE_ELTS)
- return GL_FALSE;
+ if (VB->Elts)
+ return false;
for (i = 0 ; i < VB->PrimitiveCount ; i++) {
GLuint prim = VB->Primitive[i].mode;
GLuint count = VB->Primitive[i].count;
- GLboolean ok = GL_FALSE;
+ bool ok = false;
if (!count)
- continue;
+ continue;
switch (prim & PRIM_MODE_MASK) {
case GL_POINTS:
- ok = HAVE_POINTS;
- break;
+ ok = HAVE_POINTS;
+ break;
case GL_LINES:
- ok = HAVE_LINES && !ctx->Line.StippleFlag;
- break;
case GL_LINE_STRIP:
- ok = HAVE_LINE_STRIPS && !ctx->Line.StippleFlag;
- break;
case GL_LINE_LOOP:
- ok = HAVE_LINE_STRIPS && !ctx->Line.StippleFlag;
- break;
+ ok = !ctx->Line.StippleFlag;
+ break;
case GL_TRIANGLES:
- ok = HAVE_TRIANGLES;
- break;
case GL_TRIANGLE_STRIP:
- ok = HAVE_TRI_STRIPS;
- break;
case GL_TRIANGLE_FAN:
- ok = HAVE_TRI_FANS;
- break;
+ ok = true;
+ break;
case GL_POLYGON:
- if (HAVE_POLYGONS) {
- ok = GL_TRUE;
- }
- else {
- ok = (HAVE_TRI_FANS && ctx->Light.ShadeModel == GL_SMOOTH);
- }
- break;
+ ok = (HAVE_POLYGONS) || ctx->Light.ShadeModel == GL_SMOOTH;
+ break;
case GL_QUAD_STRIP:
- if (VB->Elts) {
- ok = HAVE_TRI_STRIPS;
- }
- else if (HAVE_QUAD_STRIPS) {
- ok = GL_TRUE;
- } else if (HAVE_TRI_STRIPS &&
- ctx->Light.ShadeModel == GL_FLAT &&
- VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride != 0) {
- if (HAVE_ELTS) {
- ok = (GLint) count < GET_SUBSEQUENT_VB_MAX_ELTS();
- }
- else {
- ok = GL_FALSE;
- }
- }
- else
- ok = HAVE_TRI_STRIPS;
- break;
+ ok = VB->Elts ||
+ (ctx->Light.ShadeModel != GL_FLAT ||
+ VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride == 0);
+ break;
case GL_QUADS:
- if (HAVE_QUADS) {
- ok = GL_TRUE;
- } else if (HAVE_ELTS) {
- ok = (GLint) count < GET_SUBSEQUENT_VB_MAX_ELTS();
- }
- else {
- ok = HAVE_TRIANGLES; /* flatshading is ok. */
- }
- break;
+ ok = true; /* flatshading is ok. */
+ break;
default:
- break;
+ break;
}
if (!ok) {
-/* fprintf(stderr, "not ok %s\n", _mesa_enum_to_string(prim & PRIM_MODE_MASK)); */
- return GL_FALSE;
+/* fprintf(stderr, "not ok %s\n", _mesa_enum_to_string(prim & PRIM_MODE_MASK)); */
+ return false;
}
}
- return GL_TRUE;
+ return true;
}
$(MESA_UTIL_FILES) \
$(MESA_UTIL_GENERATED_FILES)
-if ENABLE_SHADER_CACHE
-libmesautil_la_SOURCES += $(MESA_UTIL_SHADER_CACHE_FILES)
-
libmesautil_la_LIBADD = $(SHA1_LIBS)
-endif
roundeven_test_LDADD = -lm
CLEANFILES = $(BUILT_SOURCES)
EXTRA_DIST = format_srgb.py SConscript
-format_srgb.c: $(srcdir)/format_srgb.py
- $(AM_V_GEN) $(PYTHON2) $< > $@
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
+
+format_srgb.c: format_srgb.py
+ $(PYTHON_GEN) $(srcdir)/format_srgb.py > $@
-MESA_UTIL_SHADER_CACHE_FILES := \
- mesa-sha1.c \
- mesa-sha1.h
-
MESA_UTIL_FILES := \
bitset.h \
format_srgb.h \
hash_table.h \
list.h \
macros.h \
+ mesa-sha1.c \
+ mesa-sha1.h \
ralloc.c \
ralloc.h \
register_allocate.c \
set.c \
set.h \
simple_list.h \
+ strndup.c \
+ strndup.h \
strtod.c \
strtod.h \
texcompress_rgtc_tmp.h \
#include "mesa-sha1.h"
+#ifdef HAVE_SHA1
+
#if defined(HAVE_SHA1_IN_LIBMD) /* Use libmd for SHA1 */ \
|| defined(HAVE_SHA1_IN_LIBC) /* Use libc for SHA1 */
return buf;
}
+
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#if defined(_WIN32)
+#include <stdlib.h>
+#include <string.h>
+#include "strndup.h"
+
+char *
+strndup(const char *str, size_t max)
+{
+ size_t n;
+ char *ptr;
+
+ if (!str)
+ return NULL;
+
+ n = strlen(str);
+ if (n > max)
+ n = max;
+
+ ptr = (char *) calloc(n + 1, sizeof(char));
+ if (!ptr)
+ return NULL;
+
+ memcpy(ptr, str, n);
+ return ptr;
+}
+
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <stdlib.h> // size_t
+
+#if defined(_WIN32)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+char *strndup(const char *str, size_t max);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
/* Just upload the program verbatim for now. Always send it all
* the inputs it asks for, whether they are varying or not.
*/
- key->base.program_string_id = vp->id;
- brw_setup_vue_key_clip_info(brw, &key->base,
- vp->program.Base.UsesClipDistanceOut);
+ key->program_string_id = vp->id;
/* _NEW_POLYGON */
if (brw->gen < 6) {
/* _NEW_TEXTURE */
brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count,
- &key->base.tex);
+ &key->tex);
}
static bool
* distance varying slots whenever clipping is enabled, even if the vertex
* shader doesn't write to gl_ClipDistance.
*/
- if (key->base.userclip_active) {
+ if (key->nr_userclip_plane_consts) {
outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
}
brw_compute_vue_map(brw->intelScreen->devinfo,
- &prog_data->base.vue_map, outputs_written);
-\
+ &prog_data->base.vue_map, outputs_written,
+ prog ? prog->SeparateShader : false);
+
set_binding_table_layout(&prog_data->base.base, pipeline,
VK_SHADER_STAGE_VERTEX);
memset(key, 0, sizeof(*key));
- key->base.program_string_id = gp->id;
- brw_setup_vue_key_clip_info(brw, &key->base,
- gp->program.Base.UsesClipDistanceOut);
+ key->program_string_id = gp->id;
/* _NEW_TEXTURE */
brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count,
- &key->base.tex);
-
- struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data;
-
- /* BRW_NEW_VUE_MAP_VS */
- key->input_varyings = prog_data->base.vue_map.slots_valid;
+ &key->tex);
}
static bool
nir_builder_instr_insert(&state->builder, &new_load->instr);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
- nir_src_for_ssa(&new_load->dest.ssa),
- state->shader);
+ nir_src_for_ssa(&new_load->dest.ssa));
nir_instr_remove(&intrin->instr);
}