X-Git-Url: http://git.osdn.net/view?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_wm_surface_state.c;h=d8a8d4743834b886d39f6adf2c07722a37760143;hb=e81df76f288943857cef865bb5d6434dc492cf92;hp=0f6abe2e54e7559bd0bab19cc0e60616f0bdcd4c;hpb=d2235b0f4681f75d562131d655a6d7b7033d2d8b;p=android-x86%2Fexternal-mesa.git

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 0f6abe2e54e..d8a8d474383 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -30,6 +30,8 @@
   */
                    
 
+#include "main/context.h"
+#include "main/blend.h"
 #include "main/mtypes.h"
 #include "main/samplerobj.h"
 #include "program/prog_parameter.h"
@@ -38,6 +40,7 @@
 #include "intel_batchbuffer.h"
 #include "intel_tex.h"
 #include "intel_fbo.h"
+#include "intel_buffer_objects.h"
 
 #include "brw_context.h"
 #include "brw_state.h"
@@ -57,12 +60,16 @@ translate_tex_target(GLenum target)
 
    case GL_TEXTURE_2D: 
    case GL_TEXTURE_2D_ARRAY_EXT:
+   case GL_TEXTURE_EXTERNAL_OES:
+   case GL_TEXTURE_2D_MULTISAMPLE:
+   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
       return BRW_SURFACE_2D;
 
    case GL_TEXTURE_3D: 
       return BRW_SURFACE_3D;
 
    case GL_TEXTURE_CUBE_MAP: 
+   case GL_TEXTURE_CUBE_MAP_ARRAY:
       return BRW_SURFACE_CUBE;
 
    default: 
@@ -106,7 +113,25 @@ struct surface_format_info {
  * Y^: 60 (gen6)
  * Y#: 70 (gen7)
  *
+ * The abbreviations in the header below are:
+ * smpl  - Sampling Engine
+ * filt  - Sampling Engine Filtering
+ * shad  - Sampling Engine Shadow Map
+ * CK    - Sampling Engine Chroma Key
+ * RT    - Render Target
+ * AB    - Alpha Blend Render Target
+ * VB    - Input Vertex Buffer
+ * SO    - Steamed Output Vertex Buffers (transform feedback)
+ * color - Color Processing
+ *
  * See page 88 of the Sandybridge PRM VOL4_Part1 PDF.
+ *
+ * As of Ivybridge, the columns are no longer in that table and the
+ * information can be found spread across:
+ *
+ * - VOL2_Part1 section 2.5.11 Format Conversion (vertex fetch).
+ * - VOL4_Part1 section 2.12.2.1.2 Sampler Output Channel Mapping.
+ * - VOL4_Part1 section 3.9.11 Render Target Write.
  */
 const struct surface_format_info surface_formats[] = {
 /* smpl filt shad CK  RT  AB  VB  SO  color */
@@ -119,6 +144,8 @@ const struct surface_format_info surface_formats[] = {
    SF( Y, 50,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R32G32B32X32_FLOAT)
    SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R32G32B32A32_SSCALED)
    SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R32G32B32A32_USCALED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R32G32B32A32_SFIXED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R64G64_PASSTHRU)
    SF( Y, 50,  x,  x,  x,  x,  Y,  Y,  x, BRW_SURFACEFORMAT_R32G32B32_FLOAT)
    SF( Y,  x,  x,  x,  x,  x,  Y,  Y,  x, BRW_SURFACEFORMAT_R32G32B32_SINT)
    SF( Y,  x,  x,  x,  x,  x,  Y,  Y,  x, BRW_SURFACEFORMAT_R32G32B32_UINT)
@@ -126,6 +153,7 @@ const struct surface_format_info surface_formats[] = {
    SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R32G32B32_SNORM)
    SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R32G32B32_SSCALED)
    SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R32G32B32_USCALED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R32G32B32_SFIXED)
    SF( Y,  Y,  x,  x,  Y, 45,  Y,  x, 60, BRW_SURFACEFORMAT_R16G16B16A16_UNORM)
    SF( Y,  Y,  x,  x,  Y, 60,  Y,  x,  x, BRW_SURFACEFORMAT_R16G16B16A16_SNORM)
    SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R16G16B16A16_SINT)
@@ -149,6 +177,8 @@ const struct surface_format_info surface_formats[] = {
    SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R16G16B16A16_USCALED)
    SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R32G32_SSCALED)
    SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R32G32_USCALED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R32G32_SFIXED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R64_PASSTHRU)
    SF( Y,  Y,  x,  Y,  Y,  Y,  Y,  x, 60, BRW_SURFACEFORMAT_B8G8R8A8_UNORM)
    SF( Y,  Y,  x,  x,  Y,  Y,  x,  x,  x, BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB)
 /* smpl filt shad CK  RT  AB  VB  SO  color */
@@ -213,6 +243,8 @@ const struct surface_format_info surface_formats[] = {
    SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R16_SINT)
    SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R16_UINT)
    SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x,  x, BRW_SURFACEFORMAT_R16_FLOAT)
+   SF(50, 50,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_A8P8_UNORM_PALETTE0)
+   SF(50, 50,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_A8P8_UNORM_PALETTE1)
    SF( Y,  Y,  Y,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_I16_UNORM)
    SF( Y,  Y,  Y,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_L16_UNORM)
    SF( Y,  Y,  Y,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_A16_UNORM)
@@ -229,6 +261,12 @@ const struct surface_format_info surface_formats[] = {
 /* smpl filt shad CK  RT  AB  VB  SO  color */
    SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R16_SSCALED)
    SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R16_USCALED)
+   SF(50, 50,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_P8A8_UNORM_PALETTE0)
+   SF(50, 50,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_P8A8_UNORM_PALETTE1)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_A1B5G5R5_UNORM)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_A4B4G4R4_UNORM)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_L8A8_UINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_L8A8_SINT)
    SF( Y,  Y,  x, 45,  Y,  Y,  Y,  x,  x, BRW_SURFACEFORMAT_R8_UNORM)
    SF( Y,  Y,  x,  x,  Y, 60,  Y,  x,  x, BRW_SURFACEFORMAT_R8_SNORM)
    SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R8_SINT)
@@ -240,11 +278,22 @@ const struct surface_format_info surface_formats[] = {
    SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_A4P4_UNORM)
    SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R8_SSCALED)
    SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R8_USCALED)
+   SF(45, 45,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_P8_UNORM_PALETTE0)
    SF(45, 45,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_L8_UNORM_SRGB)
+   SF(45, 45,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_P8_UNORM_PALETTE1)
+   SF(45, 45,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_P4A4_UNORM_PALETTE1)
+   SF(45, 45,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_A4P4_UNORM_PALETTE1)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_Y8_SNORM)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_L8_UINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_L8_SINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_I8_UINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_I8_SINT)
    SF(45, 45,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_DXT1_RGB_SRGB)
    SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R1_UINT)
    SF( Y,  Y,  x,  Y,  Y,  x,  x,  x, 60, BRW_SURFACEFORMAT_YCRCB_NORMAL)
    SF( Y,  Y,  x,  Y,  Y,  x,  x,  x, 60, BRW_SURFACEFORMAT_YCRCB_SWAPUVY)
+   SF(45, 45,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_P2_UNORM_PALETTE0)
+   SF(45, 45,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_P2_UNORM_PALETTE1)
    SF( Y,  Y,  x,  Y,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_BC1_UNORM)
    SF( Y,  Y,  x,  Y,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_BC2_UNORM)
    SF( Y,  Y,  x,  Y,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_BC3_UNORM)
@@ -267,10 +316,44 @@ const struct surface_format_info surface_formats[] = {
    SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R64G64B64_FLOAT)
    SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_BC4_SNORM)
    SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_BC5_SNORM)
+   SF(50, 50,  x,  x,  x,  x, 60,  x,  x, BRW_SURFACEFORMAT_R16G16B16_FLOAT)
    SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R16G16B16_UNORM)
    SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R16G16B16_SNORM)
    SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R16G16B16_SSCALED)
    SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R16G16B16_USCALED)
+   SF(70, 70,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_BC6H_SF16)
+   SF(70, 70,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_BC7_UNORM)
+   SF(70, 70,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_BC7_UNORM_SRGB)
+   SF(70, 70,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_BC6H_UF16)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_PLANAR_420_8)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R8G8B8_UNORM_SRGB)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_ETC1_RGB8)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_ETC2_RGB8)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_EAC_R11)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_EAC_RG11)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_EAC_SIGNED_R11)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_EAC_SIGNED_RG11)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_ETC2_SRGB8)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R16G16B16_UINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R16G16B16_SINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R32_SFIXED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R10G10B10A2_SNORM)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R10G10B10A2_USCALED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R10G10B10A2_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R10G10B10A2_SINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_B10G10R10A2_SNORM)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_B10G10R10A2_USCALED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_B10G10R10A2_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_B10G10R10A2_UINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_B10G10R10A2_SINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R64G64B64A64_PASSTHRU)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R64G64B64_PASSTHRU)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_ETC2_RGB8_PTA)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_ETC2_SRGB8_PTA)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_ETC2_EAC_RGBA8)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_ETC2_EAC_SRGB8_A8)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R8G8B8_UINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R8G8B8_SINT)
 };
 #undef x
 #undef Y
@@ -286,13 +369,15 @@ brw_format_for_mesa_format(gl_format mesa_format)
    static const uint32_t table[MESA_FORMAT_COUNT] =
    {
       [MESA_FORMAT_RGBA8888] = 0,
-      [MESA_FORMAT_RGBA8888_REV] = 0,
+      [MESA_FORMAT_RGBA8888_REV] = BRW_SURFACEFORMAT_R8G8B8A8_UNORM,
       [MESA_FORMAT_ARGB8888] = BRW_SURFACEFORMAT_B8G8R8A8_UNORM,
       [MESA_FORMAT_ARGB8888_REV] = 0,
+      [MESA_FORMAT_RGBX8888] = 0,
+      [MESA_FORMAT_RGBX8888_REV] = BRW_SURFACEFORMAT_R8G8B8X8_UNORM,
       [MESA_FORMAT_XRGB8888] = BRW_SURFACEFORMAT_B8G8R8X8_UNORM,
       [MESA_FORMAT_XRGB8888_REV] = 0,
       [MESA_FORMAT_RGB888] = 0,
-      [MESA_FORMAT_BGR888] = 0,
+      [MESA_FORMAT_BGR888] = BRW_SURFACEFORMAT_R8G8B8_UNORM,
       [MESA_FORMAT_RGB565] = BRW_SURFACEFORMAT_B5G6R5_UNORM,
       [MESA_FORMAT_RGB565_REV] = 0,
       [MESA_FORMAT_ARGB4444] = BRW_SURFACEFORMAT_B4G4R4A4_UNORM,
@@ -315,17 +400,17 @@ brw_format_for_mesa_format(gl_format mesa_format)
       [MESA_FORMAT_YCBCR_REV] = BRW_SURFACEFORMAT_YCRCB_NORMAL,
       [MESA_FORMAT_YCBCR] = BRW_SURFACEFORMAT_YCRCB_SWAPUVY,
       [MESA_FORMAT_R8] = BRW_SURFACEFORMAT_R8_UNORM,
-      [MESA_FORMAT_RG88] = BRW_SURFACEFORMAT_R8G8_UNORM,
-      [MESA_FORMAT_RG88_REV] = 0,
+      [MESA_FORMAT_GR88] = BRW_SURFACEFORMAT_R8G8_UNORM,
+      [MESA_FORMAT_RG88] = 0,
       [MESA_FORMAT_R16] = BRW_SURFACEFORMAT_R16_UNORM,
-      [MESA_FORMAT_RG1616] = BRW_SURFACEFORMAT_R16G16_UNORM,
-      [MESA_FORMAT_RG1616_REV] = 0,
+      [MESA_FORMAT_GR1616] = BRW_SURFACEFORMAT_R16G16_UNORM,
+      [MESA_FORMAT_RG1616] = 0,
       [MESA_FORMAT_ARGB2101010] = BRW_SURFACEFORMAT_B10G10R10A2_UNORM,
       [MESA_FORMAT_Z24_S8] = 0,
       [MESA_FORMAT_S8_Z24] = 0,
       [MESA_FORMAT_Z16] = 0,
       [MESA_FORMAT_X8_Z24] = 0,
-      [MESA_FORMAT_Z24_S8] = 0,
+      [MESA_FORMAT_Z24_X8] = 0,
       [MESA_FORMAT_Z32] = 0,
       [MESA_FORMAT_S8] = 0,
 
@@ -348,8 +433,8 @@ brw_format_for_mesa_format(gl_format mesa_format)
 
       [MESA_FORMAT_RGBA_FLOAT32] = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
       [MESA_FORMAT_RGBA_FLOAT16] = BRW_SURFACEFORMAT_R16G16B16A16_FLOAT,
-      [MESA_FORMAT_RGB_FLOAT32] = 0,
-      [MESA_FORMAT_RGB_FLOAT16] = 0,
+      [MESA_FORMAT_RGB_FLOAT32] = BRW_SURFACEFORMAT_R32G32B32_FLOAT,
+      [MESA_FORMAT_RGB_FLOAT16] = BRW_SURFACEFORMAT_R16G16B16_FLOAT,
       [MESA_FORMAT_ALPHA_FLOAT32] = BRW_SURFACEFORMAT_A32_FLOAT,
       [MESA_FORMAT_ALPHA_FLOAT16] = BRW_SURFACEFORMAT_A16_FLOAT,
       [MESA_FORMAT_LUMINANCE_FLOAT32] = BRW_SURFACEFORMAT_L32_FLOAT,
@@ -393,11 +478,11 @@ brw_format_for_mesa_format(gl_format mesa_format)
 
       [MESA_FORMAT_R_INT8] = BRW_SURFACEFORMAT_R8_SINT,
       [MESA_FORMAT_RG_INT8] = BRW_SURFACEFORMAT_R8G8_SINT,
-      [MESA_FORMAT_RGB_INT8] = 0,
+      [MESA_FORMAT_RGB_INT8] = BRW_SURFACEFORMAT_R8G8B8_SINT,
       [MESA_FORMAT_RGBA_INT8] = BRW_SURFACEFORMAT_R8G8B8A8_SINT,
       [MESA_FORMAT_R_INT16] = BRW_SURFACEFORMAT_R16_SINT,
       [MESA_FORMAT_RG_INT16] = BRW_SURFACEFORMAT_R16G16_SINT,
-      [MESA_FORMAT_RGB_INT16] = 0,
+      [MESA_FORMAT_RGB_INT16] = BRW_SURFACEFORMAT_R16G16B16_SINT,
       [MESA_FORMAT_RGBA_INT16] = BRW_SURFACEFORMAT_R16G16B16A16_SINT,
       [MESA_FORMAT_R_INT32] = BRW_SURFACEFORMAT_R32_SINT,
       [MESA_FORMAT_RG_INT32] = BRW_SURFACEFORMAT_R32G32_SINT,
@@ -406,11 +491,11 @@ brw_format_for_mesa_format(gl_format mesa_format)
 
       [MESA_FORMAT_R_UINT8] = BRW_SURFACEFORMAT_R8_UINT,
       [MESA_FORMAT_RG_UINT8] = BRW_SURFACEFORMAT_R8G8_UINT,
-      [MESA_FORMAT_RGB_UINT8] = 0,
+      [MESA_FORMAT_RGB_UINT8] = BRW_SURFACEFORMAT_R8G8B8_UINT,
       [MESA_FORMAT_RGBA_UINT8] = BRW_SURFACEFORMAT_R8G8B8A8_UINT,
       [MESA_FORMAT_R_UINT16] = BRW_SURFACEFORMAT_R16_UINT,
       [MESA_FORMAT_RG_UINT16] = BRW_SURFACEFORMAT_R16G16_UINT,
-      [MESA_FORMAT_RGB_UINT16] = 0,
+      [MESA_FORMAT_RGB_UINT16] = BRW_SURFACEFORMAT_R16G16B16_UINT,
       [MESA_FORMAT_RGBA_UINT16] = BRW_SURFACEFORMAT_R16G16B16A16_UINT,
       [MESA_FORMAT_R_UINT32] = BRW_SURFACEFORMAT_R32_UINT,
       [MESA_FORMAT_RG_UINT32] = BRW_SURFACEFORMAT_R32G32_UINT,
@@ -425,8 +510,8 @@ brw_format_for_mesa_format(gl_format mesa_format)
       [MESA_FORMAT_SIGNED_RGBA8888_REV] = BRW_SURFACEFORMAT_R8G8B8A8_SNORM,
       [MESA_FORMAT_SIGNED_R16] = BRW_SURFACEFORMAT_R16_SNORM,
       [MESA_FORMAT_SIGNED_GR1616] = BRW_SURFACEFORMAT_R16G16_SNORM,
-      [MESA_FORMAT_SIGNED_RGB_16] = 0,
-      [MESA_FORMAT_SIGNED_RGBA_16] = 0,
+      [MESA_FORMAT_SIGNED_RGB_16] = BRW_SURFACEFORMAT_R16G16B16_SNORM,
+      [MESA_FORMAT_SIGNED_RGBA_16] = BRW_SURFACEFORMAT_R16G16B16A16_SNORM,
       [MESA_FORMAT_RGBA_16] = BRW_SURFACEFORMAT_R16G16B16A16_UNORM,
 
       [MESA_FORMAT_RED_RGTC1] = BRW_SURFACEFORMAT_BC4_UNORM,
@@ -439,6 +524,18 @@ brw_format_for_mesa_format(gl_format mesa_format)
       [MESA_FORMAT_LA_LATC2] = 0,
       [MESA_FORMAT_SIGNED_LA_LATC2] = 0,
 
+      [MESA_FORMAT_ETC1_RGB8] = BRW_SURFACEFORMAT_ETC1_RGB8,
+      [MESA_FORMAT_ETC2_RGB8] = BRW_SURFACEFORMAT_ETC2_RGB8,
+      [MESA_FORMAT_ETC2_SRGB8] = BRW_SURFACEFORMAT_ETC2_SRGB8,
+      [MESA_FORMAT_ETC2_RGBA8_EAC] = BRW_SURFACEFORMAT_ETC2_EAC_RGBA8,
+      [MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC] = BRW_SURFACEFORMAT_ETC2_EAC_SRGB8_A8,
+      [MESA_FORMAT_ETC2_R11_EAC] = BRW_SURFACEFORMAT_EAC_R11,
+      [MESA_FORMAT_ETC2_RG11_EAC] = BRW_SURFACEFORMAT_EAC_RG11,
+      [MESA_FORMAT_ETC2_SIGNED_R11_EAC] = BRW_SURFACEFORMAT_EAC_SIGNED_R11,
+      [MESA_FORMAT_ETC2_SIGNED_RG11_EAC] = BRW_SURFACEFORMAT_EAC_SIGNED_RG11,
+      [MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1] = BRW_SURFACEFORMAT_ETC2_RGB8_PTA,
+      [MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1] = BRW_SURFACEFORMAT_ETC2_SRGB8_PTA,
+
       [MESA_FORMAT_SIGNED_A8] = 0,
       [MESA_FORMAT_SIGNED_L8] = 0,
       [MESA_FORMAT_SIGNED_AL88] = 0,
@@ -453,6 +550,25 @@ brw_format_for_mesa_format(gl_format mesa_format)
 
       [MESA_FORMAT_Z32_FLOAT] = 0,
       [MESA_FORMAT_Z32_FLOAT_X24S8] = 0,
+
+      [MESA_FORMAT_ARGB2101010_UINT] = BRW_SURFACEFORMAT_B10G10R10A2_UINT,
+      [MESA_FORMAT_ABGR2101010_UINT] = BRW_SURFACEFORMAT_R10G10B10A2_UINT,
+
+      [MESA_FORMAT_XRGB4444_UNORM] = 0,
+      [MESA_FORMAT_XRGB1555_UNORM] = BRW_SURFACEFORMAT_B5G5R5X1_UNORM,
+      [MESA_FORMAT_XBGR8888_SNORM] = 0,
+      [MESA_FORMAT_XBGR8888_SRGB] = 0,
+      [MESA_FORMAT_XBGR8888_UINT] = 0,
+      [MESA_FORMAT_XBGR8888_SINT] = 0,
+      [MESA_FORMAT_XRGB2101010_UNORM] = BRW_SURFACEFORMAT_B10G10R10X2_UNORM,
+      [MESA_FORMAT_XBGR16161616_UNORM] = BRW_SURFACEFORMAT_R16G16B16X16_UNORM,
+      [MESA_FORMAT_XBGR16161616_SNORM] = 0,
+      [MESA_FORMAT_XBGR16161616_FLOAT] = BRW_SURFACEFORMAT_R16G16B16X16_FLOAT,
+      [MESA_FORMAT_XBGR16161616_UINT] = 0,
+      [MESA_FORMAT_XBGR16161616_SINT] = 0,
+      [MESA_FORMAT_XBGR32323232_FLOAT] = BRW_SURFACEFORMAT_R32G32B32X32_FLOAT,
+      [MESA_FORMAT_XBGR32323232_UINT] = 0,
+      [MESA_FORMAT_XBGR32323232_SINT] = 0,
    };
    assert(mesa_format < MESA_FORMAT_COUNT);
    return table[mesa_format];
@@ -521,14 +637,9 @@ brw_init_surface_formats(struct brw_context *brw)
        * integer, so we don't need hardware support for blending on it.  Other
        * than that, GL in general requires alpha blending for render targets,
        * even though we don't support it for some formats.
-       *
-       * We don't currently support rendering to SNORM textures because some of
-       * the ARB_color_buffer_float clamping is broken for it
-       * (piglit arb_color_buffer_float-drawpixels GL_RGBA8_SNORM).
        */
       if (gen >= rinfo->render_target &&
-	  (gen >= rinfo->alpha_blend || is_integer) &&
-	  _mesa_get_format_datatype(format) != GL_SIGNED_NORMALIZED) {
+	  (gen >= rinfo->alpha_blend || is_integer)) {
 	 brw->render_target_format[format] = render;
 	 brw->format_supported_as_render_target[format] = true;
       }
@@ -541,30 +652,97 @@ brw_init_surface_formats(struct brw_context *brw)
    brw->format_supported_as_render_target[MESA_FORMAT_X8_Z24] = true;
    brw->format_supported_as_render_target[MESA_FORMAT_S8] = true;
    brw->format_supported_as_render_target[MESA_FORMAT_Z16] = true;
+   brw->format_supported_as_render_target[MESA_FORMAT_Z32_FLOAT] = true;
+   brw->format_supported_as_render_target[MESA_FORMAT_Z32_FLOAT_X24S8] = true;
 
    /* We remap depth formats to a supported texturing format in
     * translate_tex_format().
     */
    ctx->TextureFormatSupported[MESA_FORMAT_S8_Z24] = true;
    ctx->TextureFormatSupported[MESA_FORMAT_X8_Z24] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_Z32_FLOAT] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_Z32_FLOAT_X24S8] = true;
+
+   /* It appears that Z16 is slower than Z24 (on Intel Ivybridge and newer
+    * hardware at least), so there's no real reason to prefer it unless you're
+    * under memory (not memory bandwidth) pressure.  Our speculation is that
+    * this is due to either increased fragment shader execution from
+    * GL_LEQUAL/GL_EQUAL depth tests at the reduced precision, or due to
+    * increased depth stalls from a cacheline-based heuristic for detecting
+    * depth stalls.
+    *
+    * However, desktop GL 3.0+ require that you get exactly 16 bits when
+    * asking for DEPTH_COMPONENT16, so we have to respect that.
+    */
+   if (_mesa_is_desktop_gl(ctx))
+      ctx->TextureFormatSupported[MESA_FORMAT_Z16] = true;
+
+   /* On hardware that lacks support for ETC1, we map ETC1 to RGBX
+    * during glCompressedTexImage2D(). See intel_mipmap_tree::wraps_etc1.
+    */
+   ctx->TextureFormatSupported[MESA_FORMAT_ETC1_RGB8] = true;
+
+   /* On hardware that lacks support for ETC2, we map ETC2 to a suitable
+    * MESA_FORMAT during glCompressedTexImage2D().
+    * See intel_mipmap_tree::wraps_etc2.
+    */
+   ctx->TextureFormatSupported[MESA_FORMAT_ETC2_RGB8] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_ETC2_SRGB8] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_ETC2_RGBA8_EAC] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_ETC2_R11_EAC] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_ETC2_RG11_EAC] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_ETC2_SIGNED_R11_EAC] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_ETC2_SIGNED_RG11_EAC] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1] = true;
 }
 
 bool
-brw_render_target_supported(struct intel_context *intel, gl_format format)
+brw_render_target_supported(struct intel_context *intel,
+			    struct gl_renderbuffer *rb)
 {
    struct brw_context *brw = brw_context(&intel->ctx);
-   /* Not exactly true, as some of those formats are not renderable.
-    * But at least we know how to translate them.
+   gl_format format = rb->Format;
+
+   /* Many integer formats are promoted to RGBA (like XRGB8888 is), which means
+    * we would consider them renderable even though we don't have surface
+    * support for their alpha behavior and don't have the blending unit
+    * available to fake it like we do for XRGB8888.  Force them to being
+    * unsupported.
     */
+   if ((rb->_BaseFormat != GL_RGBA &&
+	rb->_BaseFormat != GL_RG &&
+	rb->_BaseFormat != GL_RED) && _mesa_is_format_integer_color(format))
+      return false;
+
+   /* Under some conditions, MSAA is not supported for formats whose width is
+    * more than 64 bits.
+    */
+   if (rb->NumSamples > 0 && _mesa_get_format_bytes(format) > 8) {
+      /* Gen6: MSAA on >64 bit formats is unsupported. */
+      if (intel->gen <= 6)
+         return false;
+
+      /* Gen7: 8x MSAA on >64 bit formats is unsupported. */
+      if (rb->NumSamples >= 8)
+         return false;
+   }
+
    return brw->format_supported_as_render_target[format];
 }
 
 GLuint
-translate_tex_format(gl_format mesa_format,
+translate_tex_format(struct intel_context *intel,
+                     gl_format mesa_format,
 		     GLenum internal_format,
 		     GLenum depth_mode,
 		     GLenum srgb_decode)
 {
+   struct gl_context *ctx = &intel->ctx;
+   if (srgb_decode == GL_SKIP_DECODE_EXT)
+      mesa_format = _mesa_get_srgb_format_linear(mesa_format);
+
    switch( mesa_format ) {
 
    case MESA_FORMAT_Z16:
@@ -574,17 +752,11 @@ translate_tex_format(gl_format mesa_format,
    case MESA_FORMAT_X8_Z24:
       return BRW_SURFACEFORMAT_I24X8_UNORM;
 
-   case MESA_FORMAT_SARGB8:
-   case MESA_FORMAT_SLA8:
-   case MESA_FORMAT_SL8:
-      if (srgb_decode == GL_DECODE_EXT)
-	 return brw_format_for_mesa_format(mesa_format);
-      else if (srgb_decode == GL_SKIP_DECODE_EXT)
-	 return brw_format_for_mesa_format(_mesa_get_srgb_format_linear(mesa_format));
+   case MESA_FORMAT_Z32_FLOAT:
+      return BRW_SURFACEFORMAT_I32_FLOAT;
 
-   case MESA_FORMAT_RGBA8888_REV:
-      /* This format is not renderable? */
-      return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
+   case MESA_FORMAT_Z32_FLOAT_X24S8:
+      return BRW_SURFACEFORMAT_R32G32_FLOAT;
 
    case MESA_FORMAT_RGBA_FLOAT32:
       /* The value of this BRW_SURFACEFORMAT is 0, which tricks the
@@ -592,13 +764,24 @@ translate_tex_format(gl_format mesa_format,
        */
       return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 
+   case MESA_FORMAT_SRGB_DXT1:
+      if (intel->gen == 4 && !intel->is_g4x) {
+         /* Work around missing SRGB DXT1 support on original gen4 by just
+          * skipping SRGB decode.  It's not worth not supporting sRGB in
+          * general to prevent this.
+          */
+         WARN_ONCE(true, "Demoting sRGB DXT1 texture to non-sRGB\n");
+         mesa_format = MESA_FORMAT_RGB_DXT1;
+      }
+      return brw_format_for_mesa_format(mesa_format);
+
    default:
       assert(brw_format_for_mesa_format(mesa_format) != 0);
       return brw_format_for_mesa_format(mesa_format);
    }
 }
 
-static uint32_t
+uint32_t
 brw_get_surface_tiling_bits(uint32_t tiling)
 {
    switch (tiling) {
@@ -611,67 +794,295 @@ brw_get_surface_tiling_bits(uint32_t tiling)
    }
 }
 
+
+uint32_t
+brw_get_surface_num_multisamples(unsigned num_samples)
+{
+   if (num_samples > 1)
+      return BRW_SURFACE_MULTISAMPLECOUNT_4;
+   else
+      return BRW_SURFACE_MULTISAMPLECOUNT_1;
+}
+
+
+/**
+ * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
+ * swizzling.
+ */
+int
+brw_get_texture_swizzle(const struct gl_context *ctx,
+                        const struct gl_texture_object *t)
+{
+   const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
+
+   int swizzles[SWIZZLE_NIL + 1] = {
+      SWIZZLE_X,
+      SWIZZLE_Y,
+      SWIZZLE_Z,
+      SWIZZLE_W,
+      SWIZZLE_ZERO,
+      SWIZZLE_ONE,
+      SWIZZLE_NIL
+   };
+
+   if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
+       img->_BaseFormat == GL_DEPTH_STENCIL) {
+      GLenum depth_mode = t->DepthMode;
+
+      /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
+       * with depth component data specified with a sized internal format.
+       * Otherwise, it's left at the old default, GL_LUMINANCE.
+       */
+      if (_mesa_is_gles3(ctx) &&
+          img->InternalFormat != GL_DEPTH_COMPONENT &&
+          img->InternalFormat != GL_DEPTH_STENCIL) {
+         depth_mode = GL_RED;
+      }
+
+      switch (depth_mode) {
+      case GL_ALPHA:
+         swizzles[0] = SWIZZLE_ZERO;
+         swizzles[1] = SWIZZLE_ZERO;
+         swizzles[2] = SWIZZLE_ZERO;
+         swizzles[3] = SWIZZLE_X;
+         break;
+      case GL_LUMINANCE:
+         swizzles[0] = SWIZZLE_X;
+         swizzles[1] = SWIZZLE_X;
+         swizzles[2] = SWIZZLE_X;
+         swizzles[3] = SWIZZLE_ONE;
+         break;
+      case GL_INTENSITY:
+         swizzles[0] = SWIZZLE_X;
+         swizzles[1] = SWIZZLE_X;
+         swizzles[2] = SWIZZLE_X;
+         swizzles[3] = SWIZZLE_X;
+         break;
+      case GL_RED:
+         swizzles[0] = SWIZZLE_X;
+         swizzles[1] = SWIZZLE_ZERO;
+         swizzles[2] = SWIZZLE_ZERO;
+         swizzles[3] = SWIZZLE_ONE;
+         break;
+      }
+   }
+
+   /* If the texture's format is alpha-only, force R, G, and B to
+    * 0.0. Similarly, if the texture's format has no alpha channel,
+    * force the alpha value read to 1.0. This allows for the
+    * implementation to use an RGBA texture for any of these formats
+    * without leaking any unexpected values.
+    */
+   switch (img->_BaseFormat) {
+   case GL_ALPHA:
+      swizzles[0] = SWIZZLE_ZERO;
+      swizzles[1] = SWIZZLE_ZERO;
+      swizzles[2] = SWIZZLE_ZERO;
+      break;
+   case GL_RED:
+   case GL_RG:
+   case GL_RGB:
+      if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
+         swizzles[3] = SWIZZLE_ONE;
+      break;
+   }
+
+   return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
+                        swizzles[GET_SWZ(t->_Swizzle, 1)],
+                        swizzles[GET_SWZ(t->_Swizzle, 2)],
+                        swizzles[GET_SWZ(t->_Swizzle, 3)]);
+}
+
+
 static void
-brw_update_texture_surface( struct gl_context *ctx, GLuint unit )
+brw_update_buffer_texture_surface(struct gl_context *ctx,
+                                  unsigned unit,
+                                  uint32_t *binding_table,
+                                  unsigned surf_index)
 {
    struct brw_context *brw = brw_context(ctx);
+   struct intel_context *intel = &brw->intel;
    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
-   struct intel_texture_object *intelObj = intel_texture_object(tObj);
-   struct intel_mipmap_tree *mt = intelObj->mt;
-   struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
-   struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
-   const GLuint surf_index = SURF_INDEX_TEXTURE(unit);
    uint32_t *surf;
-   int width, height, depth;
-
-   intel_miptree_get_dimensions_for_image(firstImage, &width, &height, &depth);
+   struct intel_buffer_object *intel_obj =
+      intel_buffer_object(tObj->BufferObject);
+   drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL;
+   gl_format format = tObj->_BufferObjectFormat;
+   uint32_t brw_format = brw_format_for_mesa_format(format);
+   int texel_size = _mesa_get_format_bytes(format);
+
+   if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
+      _mesa_problem(NULL, "bad format %s for texture buffer\n",
+		    _mesa_get_format_name(format));
+   }
 
    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
-			  6 * 4, 32, &brw->bind.surf_offset[surf_index]);
+			  6 * 4, 32, &binding_table[surf_index]);
+
+   surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
+	      (brw_format_for_mesa_format(format) << BRW_SURFACE_FORMAT_SHIFT));
+
+   if (intel->gen >= 6)
+      surf[0] |= BRW_SURFACE_RC_READ_WRITE;
+
+   if (bo) {
+      surf[1] = bo->offset; /* reloc */
+
+      /* Emit relocation to surface contents. */
+      drm_intel_bo_emit_reloc(brw->intel.batch.bo,
+			      binding_table[surf_index] + 4,
+			      bo, 0, I915_GEM_DOMAIN_SAMPLER, 0);
+
+      int w = intel_obj->Base.Size / texel_size;
+      surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
+		 ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
+      surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
+		 (texel_size - 1) << BRW_SURFACE_PITCH_SHIFT);
+   } else {
+      surf[1] = 0;
+      surf[2] = 0;
+      surf[3] = 0;
+   }
+
+   surf[4] = 0;
+   surf[5] = 0;
+}
+
+static void
+brw_update_texture_component(struct brw_context *brw,
+                             uint32_t *binding_table_slot,
+                             const struct intel_mipmap_tree *mt,
+                             unsigned width, unsigned height,
+                             unsigned depth, unsigned stride,
+                             GLuint target, GLuint tex_format,
+                             uint32_t offset, uint32_t levels,
+                             uint32_t first_level)
+{
+   uint32_t tile_x, tile_y;
+   uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
+                                    6 * 4, 32, binding_table_slot);
 
-   surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
+   surf[0] = (target << BRW_SURFACE_TYPE_SHIFT |
 	      BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 	      BRW_SURFACE_CUBEFACE_ENABLES |
-	      (translate_tex_format(firstImage->TexFormat,
-				    firstImage->InternalFormat,
-				    sampler->DepthMode,
-				    sampler->sRGBDecode) <<
-	       BRW_SURFACE_FORMAT_SHIFT));
+	      (tex_format << BRW_SURFACE_FORMAT_SHIFT));
 
-   surf[1] = intelObj->mt->region->bo->offset; /* reloc */
+   surf[1] = mt->region->bo->offset + offset;
+   surf[1] += intel_miptree_get_tile_offsets(mt, first_level, 0,
+                                             &tile_x, &tile_y);
 
-   surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
+   surf[2] = (levels << BRW_SURFACE_LOD_SHIFT |
 	      (width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 	      (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 
-   surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
+   surf[3] = (brw_get_surface_tiling_bits(mt->region->tiling) |
 	      (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
-	      ((intelObj->mt->region->pitch * intelObj->mt->cpp) - 1) <<
-	      BRW_SURFACE_PITCH_SHIFT);
+	      (stride - 1) << BRW_SURFACE_PITCH_SHIFT);
 
-   surf[4] = 0;
+   surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 
-   surf[5] = (mt->align_h == 4) ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
+   assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
+   /* Note that the low bits of these fields are missing, so
+    * there's the possibility of getting in trouble.
+    */
+   assert(tile_x % 4 == 0);
+   assert(tile_y % 2 == 0);
+   surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
+	      (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
+	      (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 
    /* Emit relocation to surface contents */
    drm_intel_bo_emit_reloc(brw->intel.batch.bo,
-			   brw->bind.surf_offset[surf_index] + 4,
-			   intelObj->mt->region->bo, 0,
-			   I915_GEM_DOMAIN_SAMPLER, 0);
+                           *binding_table_slot + 4,
+                           mt->region->bo,
+                           surf[1] - mt->region->bo->offset,
+                           I915_GEM_DOMAIN_SAMPLER, 0);
+}
+
+static void
+brw_update_ext_texture_surface(struct brw_context *brw,
+                               uint32_t *binding_table_slots,
+                               const struct intel_texture_image *intel_img)
+{
+   unsigned i;
+   const struct intel_region *region = intel_img->mt->region;
+   const struct intel_image_format *f = intel_img->ext_format;
+
+   for (i = 0; i < f->nplanes; ++i) {
+      int format = BRW_SURFACEFORMAT_R8_UNORM;
+      int index = f->planes[i].buffer_index;
+
+      if (f->planes[i].dri_format == __DRI_IMAGE_FORMAT_GR88)
+         format = BRW_SURFACEFORMAT_R8G8_UNORM;
+
+      brw_update_texture_component(brw, binding_table_slots + i,
+         intel_img->mt,
+         region->width >> f->planes[i].width_shift,
+         region->height >> f->planes[i].height_shift,
+         intel_img->base.Base.Depth,
+         intel_img->ext_strides[index],
+         BRW_SURFACE_2D,
+         format,
+         intel_img->ext_offsets[index],
+         0 /*levels*/,
+         0 /*first_level*/);
+   }
+}
+
+static void
+brw_update_texture_surface(struct gl_context *ctx,
+                           unsigned unit,
+                           uint32_t *binding_table,
+                           unsigned surf_index)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
+   struct intel_texture_object *intelObj = intel_texture_object(tObj);
+   struct intel_mipmap_tree *mt = intelObj->mt;
+   struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
+   const struct intel_texture_image *intel_img =
+      (const struct intel_texture_image *)firstImage;
+   struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
+   int width, height, depth;
+
+   if (tObj->Target == GL_TEXTURE_BUFFER) {
+      brw_update_buffer_texture_surface(ctx, unit, binding_table, surf_index);
+      return;
+   }
+   else if (tObj->Target == GL_TEXTURE_EXTERNAL_OES && intel_img->ext_format) {
+      brw_update_ext_texture_surface(brw, binding_table + surf_index,
+                                     intel_img); 
+      return;
+   }
+
+   intel_miptree_get_dimensions_for_image(firstImage, &width, &height, &depth);
+
+   brw_update_texture_component(brw, binding_table + surf_index,
+      mt, width, height, depth, mt->region->pitch,
+      translate_tex_target(tObj->Target),
+      translate_tex_format(intel_context(ctx), mt->format,
+         firstImage->InternalFormat, tObj->DepthMode, sampler->sRGBDecode),
+      mt->offset,
+      intelObj->_MaxLevel - tObj->BaseLevel,
+      firstImage->Level);
 }
 
 /**
  * Create the constant buffer surface.  Vertex/fragment shader constants will be
  * read from this buffer with Data Port Read instructions/messages.
  */
-void
+static void
 brw_create_constant_surface(struct brw_context *brw,
 			    drm_intel_bo *bo,
-			    int width,
-			    uint32_t *out_offset)
+			    uint32_t offset,
+			    uint32_t size,
+			    uint32_t *out_offset,
+                            bool dword_pitch)
 {
    struct intel_context *intel = &brw->intel;
-   const GLint w = width - 1;
+   uint32_t stride = dword_pitch ? 4 : 16;
+   uint32_t elements = ALIGN(size, stride) / stride;
+   const GLint w = elements - 1;
    uint32_t *surf;
 
    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
@@ -684,13 +1095,13 @@ brw_create_constant_surface(struct brw_context *brw,
    if (intel->gen >= 6)
       surf[0] |= BRW_SURFACE_RC_READ_WRITE;
 
-   surf[1] = bo->offset; /* reloc */
+   surf[1] = bo->offset + offset; /* reloc */
 
-   surf[2] = (((w & 0x7f) - 1) << BRW_SURFACE_WIDTH_SHIFT |
-	      (((w >> 7) & 0x1fff) - 1) << BRW_SURFACE_HEIGHT_SHIFT);
+   surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
+	      ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
 
-   surf[3] = ((((w >> 20) & 0x7f) - 1) << BRW_SURFACE_DEPTH_SHIFT |
-	      (width * 16 - 1) << BRW_SURFACE_PITCH_SHIFT);
+   surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
+	      (stride - 1) << BRW_SURFACE_PITCH_SHIFT);
 
    surf[4] = 0;
    surf[5] = 0;
@@ -701,10 +1112,97 @@ brw_create_constant_surface(struct brw_context *brw,
     */
    drm_intel_bo_emit_reloc(brw->intel.batch.bo,
 			   *out_offset + 4,
-			   bo, 0,
+			   bo, offset,
 			   I915_GEM_DOMAIN_SAMPLER, 0);
 }
 
+/**
+ * Set up a binding table entry for use by stream output logic (transform
+ * feedback).
+ *
+ * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
+ */
+void
+brw_update_sol_surface(struct brw_context *brw,
+                       struct gl_buffer_object *buffer_obj,
+                       uint32_t *out_offset, unsigned num_vector_components,
+                       unsigned stride_dwords, unsigned offset_dwords)
+{
+   struct intel_context *intel = &brw->intel;
+   struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
+   drm_intel_bo *bo =
+      intel_bufferobj_buffer(intel, intel_bo, INTEL_WRITE_PART);
+   uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
+                                    out_offset);
+   uint32_t pitch_minus_1 = 4*stride_dwords - 1;
+   uint32_t offset_bytes = 4 * offset_dwords;
+   size_t size_dwords = buffer_obj->Size / 4;
+   uint32_t buffer_size_minus_1, width, height, depth, surface_format;
+
+   /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
+    * too big to map using a single binding table entry?
+    */
+   assert((size_dwords - offset_dwords) / stride_dwords
+          <= BRW_MAX_NUM_BUFFER_ENTRIES);
+
+   if (size_dwords > offset_dwords + num_vector_components) {
+      /* There is room for at least 1 transform feedback output in the buffer.
+       * Compute the number of additional transform feedback outputs the
+       * buffer has room for.
+       */
+      buffer_size_minus_1 =
+         (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
+   } else {
+      /* There isn't even room for a single transform feedback output in the
+       * buffer.  We can't configure the binding table entry to prevent output
+       * entirely; we'll have to rely on the geometry shader to detect
+       * overflow.  But to minimize the damage in case of a bug, set up the
+       * binding table entry to just allow a single output.
+       */
+      buffer_size_minus_1 = 0;
+   }
+   width = buffer_size_minus_1 & 0x7f;
+   height = (buffer_size_minus_1 & 0xfff80) >> 7;
+   depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
+
+   switch (num_vector_components) {
+   case 1:
+      surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
+      break;
+   case 2:
+      surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
+      break;
+   case 3:
+      surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
+      break;
+   case 4:
+      surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+      break;
+   default:
+      assert(!"Invalid vector size for transform feedback output");
+      surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
+      break;
+   }
+
+   surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
+      BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
+      surface_format << BRW_SURFACE_FORMAT_SHIFT |
+      BRW_SURFACE_RC_READ_WRITE;
+   surf[1] = bo->offset + offset_bytes; /* reloc */
+   surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
+	      height << BRW_SURFACE_HEIGHT_SHIFT);
+   surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
+              pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
+   surf[4] = 0;
+   surf[5] = 0;
+
+   /* Emit relocation to surface contents. */
+   drm_intel_bo_emit_reloc(brw->intel.batch.bo,
+			   *out_offset + 4,
+			   bo, offset_bytes,
+			   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
+}
+
 /* Creates a new WM constant buffer reflecting the current fragment program's
  * constants, if needed by the fragment program.
  *
@@ -732,8 +1230,8 @@ brw_upload_wm_pull_constants(struct brw_context *brw)
       if (brw->wm.const_bo) {
 	 drm_intel_bo_unreference(brw->wm.const_bo);
 	 brw->wm.const_bo = NULL;
-	 brw->bind.surf_offset[surf_index] = 0;
-	 brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
+	 brw->wm.surf_offset[surf_index] = 0;
+	 brw->state.dirty.brw |= BRW_NEW_SURFACES;
       }
       return;
    }
@@ -746,16 +1244,15 @@ brw_upload_wm_pull_constants(struct brw_context *brw)
    drm_intel_gem_bo_map_gtt(brw->wm.const_bo);
    constants = brw->wm.const_bo->virtual;
    for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
-      constants[i] = convert_param(brw->wm.prog_data->pull_param_convert[i],
-				   brw->wm.prog_data->pull_param[i]);
+      constants[i] = *brw->wm.prog_data->pull_param[i];
    }
    drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
 
-   intel->vtbl.create_constant_surface(brw, brw->wm.const_bo,
-				       params->NumParameters,
-				       &brw->bind.surf_offset[surf_index]);
+   intel->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0, size,
+				       &brw->wm.surf_offset[surf_index],
+                                       true);
 
-   brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
+   brw->state.dirty.brw |= BRW_NEW_SURFACES;
 }
 
 const struct brw_tracked_state brw_wm_pull_constants = {
@@ -770,13 +1267,66 @@ const struct brw_tracked_state brw_wm_pull_constants = {
 static void
 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
 {
+   /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
+    * Notes):
+    *
+    *     A null surface will be used in instances where an actual surface is
+    *     not bound. When a write message is generated to a null surface, no
+    *     actual surface is written to. When a read message (including any
+    *     sampling engine message) is generated to a null surface, the result
+    *     is all zeros. Note that a null surface type is allowed to be used
+    *     with all messages, even if it is not specificially indicated as
+    *     supported. All of the remaining fields in surface state are ignored
+    *     for null surfaces, with the following exceptions:
+    *
+    *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
+    *       depth bufferâs corresponding state for all render target surfaces,
+    *       including null.
+    *
+    *     - Surface Format must be R8G8B8A8_UNORM.
+    */
    struct intel_context *intel = &brw->intel;
+   struct gl_context *ctx = &intel->ctx;
    uint32_t *surf;
+   unsigned surface_type = BRW_SURFACE_NULL;
+   drm_intel_bo *bo = NULL;
+   unsigned pitch_minus_1 = 0;
+   uint32_t multisampling_state = 0;
+
+   /* _NEW_BUFFERS */
+   const struct gl_framebuffer *fb = ctx->DrawBuffer;
 
    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
-			  6 * 4, 32, &brw->bind.surf_offset[unit]);
+			  6 * 4, 32, &brw->wm.surf_offset[unit]);
 
-   surf[0] = (BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
+   if (fb->Visual.samples > 1) {
+      /* On Gen6, null render targets seem to cause GPU hangs when
+       * multisampling.  So work around this problem by rendering into dummy
+       * color buffer.
+       *
+       * To decrease the amount of memory needed by the workaround buffer, we
+       * set its pitch to 128 bytes (the width of a Y tile).  This means that
+       * the amount of memory needed for the workaround buffer is
+       * (width_in_tiles + height_in_tiles - 1) tiles.
+       *
+       * Note that since the workaround buffer will be interpreted by the
+       * hardware as an interleaved multisampled buffer, we need to compute
+       * width_in_tiles and height_in_tiles by dividing the width and height
+       * by 16 rather than the normal Y-tile size of 32.
+       */
+      unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
+      unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
+      unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
+      brw_get_scratch_bo(intel, &brw->wm.multisampled_null_render_target_bo,
+                         size_needed);
+      bo = brw->wm.multisampled_null_render_target_bo;
+      surface_type = BRW_SURFACE_2D;
+      pitch_minus_1 = 127;
+      multisampling_state =
+         brw_get_surface_num_multisamples(fb->Visual.samples);
+   }
+
+   surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 	      BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
    if (intel->gen < 6) {
       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
@@ -784,11 +1334,26 @@ brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
 		  1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 		  1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
    }
-   surf[1] = 0;
-   surf[2] = 0;
-   surf[3] = 0;
-   surf[4] = 0;
+   surf[1] = bo ? bo->offset : 0;
+   surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
+              (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
+
+   /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
+    * Notes):
+    *
+    *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
+    */
+   surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
+              pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
+   surf[4] = multisampling_state;
    surf[5] = 0;
+
+   if (bo) {
+      drm_intel_bo_emit_reloc(brw->intel.batch.bo,
+                              brw->wm.surf_offset[unit] + 4,
+                              bo, 0,
+                              I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
+   }
 }
 
 /**
@@ -799,52 +1364,62 @@ brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
 static void
 brw_update_renderbuffer_surface(struct brw_context *brw,
 				struct gl_renderbuffer *rb,
+				bool layered,
 				unsigned int unit)
 {
    struct intel_context *intel = &brw->intel;
    struct gl_context *ctx = &intel->ctx;
    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
    struct intel_mipmap_tree *mt = irb->mt;
-   struct intel_region *region = irb->mt->region;
+   struct intel_region *region;
    uint32_t *surf;
    uint32_t tile_x, tile_y;
    uint32_t format = 0;
+   /* _NEW_BUFFERS */
+   gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 
-   surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
-			  6 * 4, 32, &brw->bind.surf_offset[unit]);
-
-   switch (irb->Base.Format) {
-   case MESA_FORMAT_SARGB8:
-      /* without GL_EXT_framebuffer_sRGB we shouldn't bind sRGB
-	 surfaces to the blend/update as sRGB */
-      if (ctx->Color.sRGBEnabled)
-	 format = brw_format_for_mesa_format(irb->Base.Format);
-      else
-	 format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
-      break;
-   default:
-      format = brw->render_target_format[irb->Base.Format];
-      if (unlikely(!brw->format_supported_as_render_target[irb->Base.Format])) {
-	 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
-		       __FUNCTION__, _mesa_get_format_name(irb->Base.Format));
+   assert(!layered);
+
+   if (rb->TexImage && !brw->has_surface_tile_offset) {
+      intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
+
+      if (tile_x != 0 || tile_y != 0) {
+	 /* Original gen4 hardware couldn't draw to a non-tile-aligned
+	  * destination in a miptree unless you actually setup your renderbuffer
+	  * as a miptree and used the fragile lod/array_index/etc. controls to
+	  * select the image.  So, instead, we just make a new single-level
+	  * miptree and render into that.
+	  */
+	 intel_renderbuffer_move_to_temp(intel, irb, false);
+	 mt = irb->mt;
       }
-      break;
+   }
+
+   region = irb->mt->region;
+
+   surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
+			  6 * 4, 32, &brw->wm.surf_offset[unit]);
+
+   format = brw->render_target_format[rb_format];
+   if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
+      _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
+                    __FUNCTION__, _mesa_get_format_name(rb_format));
    }
 
    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 	      format << BRW_SURFACE_FORMAT_SHIFT);
 
    /* reloc */
-   surf[1] = (intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y) +
+   surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 	      region->bo->offset);
 
    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 	      (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 
    surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
-	      ((region->pitch * region->cpp) - 1) << BRW_SURFACE_PITCH_SHIFT);
+	      (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 
-   surf[4] = 0;
+   surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 
    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
    /* Note that the low bits of these fields are missing, so
@@ -879,7 +1454,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
    }
 
    drm_intel_bo_emit_reloc(brw->intel.batch.bo,
-			   brw->bind.surf_offset[unit] + 4,
+			   brw->wm.surf_offset[unit] + 4,
 			   region->bo,
 			   surf[1] - region->bo->offset,
 			   I915_GEM_DOMAIN_RENDER,
@@ -901,7 +1476,8 @@ brw_update_renderbuffer_surfaces(struct brw_context *brw)
    if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
       for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
 	 if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
-	    intel->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i], i);
+	    intel->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
+	                                            ctx->DrawBuffer->Layered, i);
 	 } else {
 	    intel->vtbl.update_null_renderbuffer_surface(brw, i);
 	 }
@@ -909,7 +1485,7 @@ brw_update_renderbuffer_surfaces(struct brw_context *brw)
    } else {
       intel->vtbl.update_null_renderbuffer_surface(brw, 0);
    }
-   brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
+   brw->state.dirty.brw |= BRW_NEW_SURFACES;
 }
 
 const struct brw_tracked_state brw_renderbuffer_surfaces = {
@@ -937,67 +1513,155 @@ const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 static void
 brw_update_texture_surfaces(struct brw_context *brw)
 {
-   struct gl_context *ctx = &brw->intel.ctx;
+   struct intel_context *intel = &brw->intel;
+   struct gl_context *ctx = &intel->ctx;
 
-   for (unsigned i = 0; i < BRW_MAX_TEX_UNIT; i++) {
-      const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
-      const GLuint surf = SURF_INDEX_TEXTURE(i);
+   /* BRW_NEW_VERTEX_PROGRAM and BRW_NEW_FRAGMENT_PROGRAM:
+    * Unfortunately, we're stuck using the gl_program structs until the
+    * ARB_fragment_program front-end gets converted to GLSL IR.  These
+    * have the downside that SamplerUnits is split and only contains the
+    * mappings for samplers active in that stage.
+    */
+   struct gl_program *vs = (struct gl_program *) brw->vertex_program;
+   struct gl_program *fs = (struct gl_program *) brw->fragment_program;
+
+   unsigned num_samplers = _mesa_fls(vs->SamplersUsed | fs->SamplersUsed);
+
+   for (unsigned s = 0; s < num_samplers; s++) {
+      if (vs->SamplersUsed & (1 << s)) {
+         const unsigned unit = vs->SamplerUnits[s];
+
+         /* _NEW_TEXTURE */
+         if (ctx->Texture.Unit[unit]._ReallyEnabled) {
+            intel->vtbl.update_texture_surface(ctx, unit,
+               brw->vs.surf_offset,
+               brw_surf_index_vs_texture(
+                  (const struct brw_vertex_program *)brw->vertex_program,
+                  s));
+         }
+      }
 
-      /* _NEW_TEXTURE */
-      if (texUnit->_ReallyEnabled) {
-	 brw->intel.vtbl.update_texture_surface(ctx, i);
-      } else {
-         brw->bind.surf_offset[surf] = 0;
+      if (fs->SamplersUsed & (1 << s)) {
+         const unsigned unit = fs->SamplerUnits[s];
+
+         /* _NEW_TEXTURE */
+         if (ctx->Texture.Unit[unit]._ReallyEnabled) {
+            intel->vtbl.update_texture_surface(ctx, unit,
+               brw->wm.surf_offset,
+               brw_surf_index_texture(
+                  (const struct brw_fragment_program *)brw->fragment_program,
+                  s));
+         }
       }
    }
 
-   brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
+   brw->state.dirty.brw |= BRW_NEW_SURFACES;
 }
 
 const struct brw_tracked_state brw_texture_surfaces = {
    .dirty = {
       .mesa = _NEW_TEXTURE,
-      .brw = BRW_NEW_BATCH,
+      .brw = BRW_NEW_BATCH |
+             BRW_NEW_VERTEX_PROGRAM |
+             BRW_NEW_FRAGMENT_PROGRAM,
       .cache = 0
    },
    .emit = brw_update_texture_surfaces,
 };
 
+void
+brw_upload_ubo_surfaces(struct brw_context *brw,
+			struct gl_shader *shader,
+			uint32_t *surf_offsets)
+{
+   struct gl_context *ctx = &brw->intel.ctx;
+   struct intel_context *intel = &brw->intel;
+
+   if (!shader)
+      return;
+
+   for (int i = 0; i < shader->NumUniformBlocks; i++) {
+      struct gl_uniform_buffer_binding *binding;
+      struct intel_buffer_object *intel_bo;
+
+      binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
+      intel_bo = intel_buffer_object(binding->BufferObject);
+      drm_intel_bo *bo = intel_bufferobj_buffer(intel, intel_bo, INTEL_READ);
+
+      /* Because behavior for referencing outside of the binding's size in the
+       * glBindBufferRange case is undefined, we can just bind the whole buffer
+       * glBindBufferBase wants and be a correct implementation.
+       */
+      intel->vtbl.create_constant_surface(brw, bo, binding->Offset,
+					  bo->size - binding->Offset,
+					  &surf_offsets[i],
+                                          shader->Type == GL_FRAGMENT_SHADER);
+   }
+
+   if (shader->NumUniformBlocks)
+      brw->state.dirty.brw |= BRW_NEW_SURFACES;
+}
+
+static void
+brw_upload_wm_ubo_surfaces(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->intel.ctx;
+   /* _NEW_PROGRAM */
+   struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
+
+   if (!prog)
+      return;
+
+   brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
+			   &brw->wm.surf_offset[SURF_INDEX_WM_UBO(0)]);
+}
+
+const struct brw_tracked_state brw_wm_ubo_surfaces = {
+   .dirty = {
+      .mesa = _NEW_PROGRAM,
+      .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
+      .cache = 0,
+   },
+   .emit = brw_upload_wm_ubo_surfaces,
+};
+
 /**
  * Constructs the binding table for the WM surface state, which maps unit
  * numbers to surface state objects.
  */
 static void
-brw_upload_binding_table(struct brw_context *brw)
+brw_upload_wm_binding_table(struct brw_context *brw)
 {
    uint32_t *bind;
    int i;
 
+   if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
+      gen7_create_shader_time_surface(brw, &brw->wm.surf_offset[SURF_INDEX_WM_SHADER_TIME]);
+   }
+
    /* Might want to calculate nr_surfaces first, to avoid taking up so much
     * space for the binding table.
     */
    bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
-			  sizeof(uint32_t) * BRW_MAX_SURFACES,
-			  32, &brw->bind.bo_offset);
+			  sizeof(uint32_t) * BRW_MAX_WM_SURFACES,
+			  32, &brw->wm.bind_bo_offset);
 
-   /* BRW_NEW_WM_SURFACES and BRW_NEW_VS_CONSTBUF */
-   for (i = 0; i < BRW_MAX_SURFACES; i++) {
-      bind[i] = brw->bind.surf_offset[i];
+   /* BRW_NEW_SURFACES */
+   for (i = 0; i < BRW_MAX_WM_SURFACES; i++) {
+      bind[i] = brw->wm.surf_offset[i];
    }
 
-   brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE;
    brw->state.dirty.brw |= BRW_NEW_PS_BINDING_TABLE;
 }
 
-const struct brw_tracked_state brw_binding_table = {
+const struct brw_tracked_state brw_wm_binding_table = {
    .dirty = {
       .mesa = 0,
       .brw = (BRW_NEW_BATCH |
-	      BRW_NEW_VS_CONSTBUF |
-	      BRW_NEW_WM_SURFACES),
+	      BRW_NEW_SURFACES),
       .cache = 0
    },
-   .emit = brw_upload_binding_table,
+   .emit = brw_upload_wm_binding_table,
 };
 
 void