X-Git-Url: http://git.osdn.net/view?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_wm_surface_state.c;h=d8a8d4743834b886d39f6adf2c07722a37760143;hb=e81df76f288943857cef865bb5d6434dc492cf92;hp=0f6abe2e54e7559bd0bab19cc0e60616f0bdcd4c;hpb=d2235b0f4681f75d562131d655a6d7b7033d2d8b;p=android-x86%2Fexternal-mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 0f6abe2e54e..d8a8d474383 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -30,6 +30,8 @@ */ +#include "main/context.h" +#include "main/blend.h" #include "main/mtypes.h" #include "main/samplerobj.h" #include "program/prog_parameter.h" @@ -38,6 +40,7 @@ #include "intel_batchbuffer.h" #include "intel_tex.h" #include "intel_fbo.h" +#include "intel_buffer_objects.h" #include "brw_context.h" #include "brw_state.h" @@ -57,12 +60,16 @@ translate_tex_target(GLenum target) case GL_TEXTURE_2D: case GL_TEXTURE_2D_ARRAY_EXT: + case GL_TEXTURE_EXTERNAL_OES: + case GL_TEXTURE_2D_MULTISAMPLE: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: return BRW_SURFACE_2D; case GL_TEXTURE_3D: return BRW_SURFACE_3D; case GL_TEXTURE_CUBE_MAP: + case GL_TEXTURE_CUBE_MAP_ARRAY: return BRW_SURFACE_CUBE; default: @@ -106,7 +113,25 @@ struct surface_format_info { * Y^: 60 (gen6) * Y#: 70 (gen7) * + * The abbreviations in the header below are: + * smpl - Sampling Engine + * filt - Sampling Engine Filtering + * shad - Sampling Engine Shadow Map + * CK - Sampling Engine Chroma Key + * RT - Render Target + * AB - Alpha Blend Render Target + * VB - Input Vertex Buffer + * SO - Steamed Output Vertex Buffers (transform feedback) + * color - Color Processing + * * See page 88 of the Sandybridge PRM VOL4_Part1 PDF. + * + * As of Ivybridge, the columns are no longer in that table and the + * information can be found spread across: + * + * - VOL2_Part1 section 2.5.11 Format Conversion (vertex fetch). + * - VOL4_Part1 section 2.12.2.1.2 Sampler Output Channel Mapping. + * - VOL4_Part1 section 3.9.11 Render Target Write. */ const struct surface_format_info surface_formats[] = { /* smpl filt shad CK RT AB VB SO color */ @@ -119,6 +144,8 @@ const struct surface_format_info surface_formats[] = { SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32G32B32X32_FLOAT) SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32A32_SSCALED) SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32A32_USCALED) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32G32B32A32_SFIXED) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R64G64_PASSTHRU) SF( Y, 50, x, x, x, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32_FLOAT) SF( Y, x, x, x, x, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32_SINT) SF( Y, x, x, x, x, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32_UINT) @@ -126,6 +153,7 @@ const struct surface_format_info surface_formats[] = { SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32_SNORM) SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32_SSCALED) SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32_USCALED) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32G32B32_SFIXED) SF( Y, Y, x, x, Y, 45, Y, x, 60, BRW_SURFACEFORMAT_R16G16B16A16_UNORM) SF( Y, Y, x, x, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_SNORM) SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_SINT) @@ -149,6 +177,8 @@ const struct surface_format_info surface_formats[] = { SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_USCALED) SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32_SSCALED) SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32_USCALED) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32G32_SFIXED) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R64_PASSTHRU) SF( Y, Y, x, Y, Y, Y, Y, x, 60, BRW_SURFACEFORMAT_B8G8R8A8_UNORM) SF( Y, Y, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB) /* smpl filt shad CK RT AB VB SO color */ @@ -213,6 +243,8 @@ const struct surface_format_info surface_formats[] = { SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16_SINT) SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16_UINT) SF( Y, Y, x, x, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R16_FLOAT) + SF(50, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A8P8_UNORM_PALETTE0) + SF(50, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A8P8_UNORM_PALETTE1) SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_I16_UNORM) SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_L16_UNORM) SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_A16_UNORM) @@ -229,6 +261,12 @@ const struct surface_format_info surface_formats[] = { /* smpl filt shad CK RT AB VB SO color */ SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16_SSCALED) SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16_USCALED) + SF(50, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P8A8_UNORM_PALETTE0) + SF(50, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P8A8_UNORM_PALETTE1) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A1B5G5R5_UNORM) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A4B4G4R4_UNORM) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8A8_UINT) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8A8_SINT) SF( Y, Y, x, 45, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R8_UNORM) SF( Y, Y, x, x, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R8_SNORM) SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8_SINT) @@ -240,11 +278,22 @@ const struct surface_format_info surface_formats[] = { SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A4P4_UNORM) SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8_SSCALED) SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8_USCALED) + SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P8_UNORM_PALETTE0) SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8_UNORM_SRGB) + SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P8_UNORM_PALETTE1) + SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P4A4_UNORM_PALETTE1) + SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A4P4_UNORM_PALETTE1) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_Y8_SNORM) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8_UINT) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8_SINT) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_I8_UINT) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_I8_SINT) SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_DXT1_RGB_SRGB) SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R1_UINT) SF( Y, Y, x, Y, Y, x, x, x, 60, BRW_SURFACEFORMAT_YCRCB_NORMAL) SF( Y, Y, x, Y, Y, x, x, x, 60, BRW_SURFACEFORMAT_YCRCB_SWAPUVY) + SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P2_UNORM_PALETTE0) + SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P2_UNORM_PALETTE1) SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_BC1_UNORM) SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_BC2_UNORM) SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_BC3_UNORM) @@ -267,10 +316,44 @@ const struct surface_format_info surface_formats[] = { SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R64G64B64_FLOAT) SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC4_SNORM) SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC5_SNORM) + SF(50, 50, x, x, x, x, 60, x, x, BRW_SURFACEFORMAT_R16G16B16_FLOAT) SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16_UNORM) SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16_SNORM) SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16_SSCALED) SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16_USCALED) + SF(70, 70, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC6H_SF16) + SF(70, 70, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC7_UNORM) + SF(70, 70, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC7_UNORM_SRGB) + SF(70, 70, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC6H_UF16) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_PLANAR_420_8) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8_UNORM_SRGB) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC1_RGB8) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_RGB8) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_EAC_R11) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_EAC_RG11) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_EAC_SIGNED_R11) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_EAC_SIGNED_RG11) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_SRGB8) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R16G16B16_UINT) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R16G16B16_SINT) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32_SFIXED) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R10G10B10A2_SNORM) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R10G10B10A2_USCALED) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R10G10B10A2_SSCALED) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R10G10B10A2_SINT) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_SNORM) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_USCALED) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_SSCALED) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_UINT) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_SINT) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R64G64B64A64_PASSTHRU) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R64G64B64_PASSTHRU) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_RGB8_PTA) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_SRGB8_PTA) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_EAC_RGBA8) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_EAC_SRGB8_A8) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8_UINT) + SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8_SINT) }; #undef x #undef Y @@ -286,13 +369,15 @@ brw_format_for_mesa_format(gl_format mesa_format) static const uint32_t table[MESA_FORMAT_COUNT] = { [MESA_FORMAT_RGBA8888] = 0, - [MESA_FORMAT_RGBA8888_REV] = 0, + [MESA_FORMAT_RGBA8888_REV] = BRW_SURFACEFORMAT_R8G8B8A8_UNORM, [MESA_FORMAT_ARGB8888] = BRW_SURFACEFORMAT_B8G8R8A8_UNORM, [MESA_FORMAT_ARGB8888_REV] = 0, + [MESA_FORMAT_RGBX8888] = 0, + [MESA_FORMAT_RGBX8888_REV] = BRW_SURFACEFORMAT_R8G8B8X8_UNORM, [MESA_FORMAT_XRGB8888] = BRW_SURFACEFORMAT_B8G8R8X8_UNORM, [MESA_FORMAT_XRGB8888_REV] = 0, [MESA_FORMAT_RGB888] = 0, - [MESA_FORMAT_BGR888] = 0, + [MESA_FORMAT_BGR888] = BRW_SURFACEFORMAT_R8G8B8_UNORM, [MESA_FORMAT_RGB565] = BRW_SURFACEFORMAT_B5G6R5_UNORM, [MESA_FORMAT_RGB565_REV] = 0, [MESA_FORMAT_ARGB4444] = BRW_SURFACEFORMAT_B4G4R4A4_UNORM, @@ -315,17 +400,17 @@ brw_format_for_mesa_format(gl_format mesa_format) [MESA_FORMAT_YCBCR_REV] = BRW_SURFACEFORMAT_YCRCB_NORMAL, [MESA_FORMAT_YCBCR] = BRW_SURFACEFORMAT_YCRCB_SWAPUVY, [MESA_FORMAT_R8] = BRW_SURFACEFORMAT_R8_UNORM, - [MESA_FORMAT_RG88] = BRW_SURFACEFORMAT_R8G8_UNORM, - [MESA_FORMAT_RG88_REV] = 0, + [MESA_FORMAT_GR88] = BRW_SURFACEFORMAT_R8G8_UNORM, + [MESA_FORMAT_RG88] = 0, [MESA_FORMAT_R16] = BRW_SURFACEFORMAT_R16_UNORM, - [MESA_FORMAT_RG1616] = BRW_SURFACEFORMAT_R16G16_UNORM, - [MESA_FORMAT_RG1616_REV] = 0, + [MESA_FORMAT_GR1616] = BRW_SURFACEFORMAT_R16G16_UNORM, + [MESA_FORMAT_RG1616] = 0, [MESA_FORMAT_ARGB2101010] = BRW_SURFACEFORMAT_B10G10R10A2_UNORM, [MESA_FORMAT_Z24_S8] = 0, [MESA_FORMAT_S8_Z24] = 0, [MESA_FORMAT_Z16] = 0, [MESA_FORMAT_X8_Z24] = 0, - [MESA_FORMAT_Z24_S8] = 0, + [MESA_FORMAT_Z24_X8] = 0, [MESA_FORMAT_Z32] = 0, [MESA_FORMAT_S8] = 0, @@ -348,8 +433,8 @@ brw_format_for_mesa_format(gl_format mesa_format) [MESA_FORMAT_RGBA_FLOAT32] = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT, [MESA_FORMAT_RGBA_FLOAT16] = BRW_SURFACEFORMAT_R16G16B16A16_FLOAT, - [MESA_FORMAT_RGB_FLOAT32] = 0, - [MESA_FORMAT_RGB_FLOAT16] = 0, + [MESA_FORMAT_RGB_FLOAT32] = BRW_SURFACEFORMAT_R32G32B32_FLOAT, + [MESA_FORMAT_RGB_FLOAT16] = BRW_SURFACEFORMAT_R16G16B16_FLOAT, [MESA_FORMAT_ALPHA_FLOAT32] = BRW_SURFACEFORMAT_A32_FLOAT, [MESA_FORMAT_ALPHA_FLOAT16] = BRW_SURFACEFORMAT_A16_FLOAT, [MESA_FORMAT_LUMINANCE_FLOAT32] = BRW_SURFACEFORMAT_L32_FLOAT, @@ -393,11 +478,11 @@ brw_format_for_mesa_format(gl_format mesa_format) [MESA_FORMAT_R_INT8] = BRW_SURFACEFORMAT_R8_SINT, [MESA_FORMAT_RG_INT8] = BRW_SURFACEFORMAT_R8G8_SINT, - [MESA_FORMAT_RGB_INT8] = 0, + [MESA_FORMAT_RGB_INT8] = BRW_SURFACEFORMAT_R8G8B8_SINT, [MESA_FORMAT_RGBA_INT8] = BRW_SURFACEFORMAT_R8G8B8A8_SINT, [MESA_FORMAT_R_INT16] = BRW_SURFACEFORMAT_R16_SINT, [MESA_FORMAT_RG_INT16] = BRW_SURFACEFORMAT_R16G16_SINT, - [MESA_FORMAT_RGB_INT16] = 0, + [MESA_FORMAT_RGB_INT16] = BRW_SURFACEFORMAT_R16G16B16_SINT, [MESA_FORMAT_RGBA_INT16] = BRW_SURFACEFORMAT_R16G16B16A16_SINT, [MESA_FORMAT_R_INT32] = BRW_SURFACEFORMAT_R32_SINT, [MESA_FORMAT_RG_INT32] = BRW_SURFACEFORMAT_R32G32_SINT, @@ -406,11 +491,11 @@ brw_format_for_mesa_format(gl_format mesa_format) [MESA_FORMAT_R_UINT8] = BRW_SURFACEFORMAT_R8_UINT, [MESA_FORMAT_RG_UINT8] = BRW_SURFACEFORMAT_R8G8_UINT, - [MESA_FORMAT_RGB_UINT8] = 0, + [MESA_FORMAT_RGB_UINT8] = BRW_SURFACEFORMAT_R8G8B8_UINT, [MESA_FORMAT_RGBA_UINT8] = BRW_SURFACEFORMAT_R8G8B8A8_UINT, [MESA_FORMAT_R_UINT16] = BRW_SURFACEFORMAT_R16_UINT, [MESA_FORMAT_RG_UINT16] = BRW_SURFACEFORMAT_R16G16_UINT, - [MESA_FORMAT_RGB_UINT16] = 0, + [MESA_FORMAT_RGB_UINT16] = BRW_SURFACEFORMAT_R16G16B16_UINT, [MESA_FORMAT_RGBA_UINT16] = BRW_SURFACEFORMAT_R16G16B16A16_UINT, [MESA_FORMAT_R_UINT32] = BRW_SURFACEFORMAT_R32_UINT, [MESA_FORMAT_RG_UINT32] = BRW_SURFACEFORMAT_R32G32_UINT, @@ -425,8 +510,8 @@ brw_format_for_mesa_format(gl_format mesa_format) [MESA_FORMAT_SIGNED_RGBA8888_REV] = BRW_SURFACEFORMAT_R8G8B8A8_SNORM, [MESA_FORMAT_SIGNED_R16] = BRW_SURFACEFORMAT_R16_SNORM, [MESA_FORMAT_SIGNED_GR1616] = BRW_SURFACEFORMAT_R16G16_SNORM, - [MESA_FORMAT_SIGNED_RGB_16] = 0, - [MESA_FORMAT_SIGNED_RGBA_16] = 0, + [MESA_FORMAT_SIGNED_RGB_16] = BRW_SURFACEFORMAT_R16G16B16_SNORM, + [MESA_FORMAT_SIGNED_RGBA_16] = BRW_SURFACEFORMAT_R16G16B16A16_SNORM, [MESA_FORMAT_RGBA_16] = BRW_SURFACEFORMAT_R16G16B16A16_UNORM, [MESA_FORMAT_RED_RGTC1] = BRW_SURFACEFORMAT_BC4_UNORM, @@ -439,6 +524,18 @@ brw_format_for_mesa_format(gl_format mesa_format) [MESA_FORMAT_LA_LATC2] = 0, [MESA_FORMAT_SIGNED_LA_LATC2] = 0, + [MESA_FORMAT_ETC1_RGB8] = BRW_SURFACEFORMAT_ETC1_RGB8, + [MESA_FORMAT_ETC2_RGB8] = BRW_SURFACEFORMAT_ETC2_RGB8, + [MESA_FORMAT_ETC2_SRGB8] = BRW_SURFACEFORMAT_ETC2_SRGB8, + [MESA_FORMAT_ETC2_RGBA8_EAC] = BRW_SURFACEFORMAT_ETC2_EAC_RGBA8, + [MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC] = BRW_SURFACEFORMAT_ETC2_EAC_SRGB8_A8, + [MESA_FORMAT_ETC2_R11_EAC] = BRW_SURFACEFORMAT_EAC_R11, + [MESA_FORMAT_ETC2_RG11_EAC] = BRW_SURFACEFORMAT_EAC_RG11, + [MESA_FORMAT_ETC2_SIGNED_R11_EAC] = BRW_SURFACEFORMAT_EAC_SIGNED_R11, + [MESA_FORMAT_ETC2_SIGNED_RG11_EAC] = BRW_SURFACEFORMAT_EAC_SIGNED_RG11, + [MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1] = BRW_SURFACEFORMAT_ETC2_RGB8_PTA, + [MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1] = BRW_SURFACEFORMAT_ETC2_SRGB8_PTA, + [MESA_FORMAT_SIGNED_A8] = 0, [MESA_FORMAT_SIGNED_L8] = 0, [MESA_FORMAT_SIGNED_AL88] = 0, @@ -453,6 +550,25 @@ brw_format_for_mesa_format(gl_format mesa_format) [MESA_FORMAT_Z32_FLOAT] = 0, [MESA_FORMAT_Z32_FLOAT_X24S8] = 0, + + [MESA_FORMAT_ARGB2101010_UINT] = BRW_SURFACEFORMAT_B10G10R10A2_UINT, + [MESA_FORMAT_ABGR2101010_UINT] = BRW_SURFACEFORMAT_R10G10B10A2_UINT, + + [MESA_FORMAT_XRGB4444_UNORM] = 0, + [MESA_FORMAT_XRGB1555_UNORM] = BRW_SURFACEFORMAT_B5G5R5X1_UNORM, + [MESA_FORMAT_XBGR8888_SNORM] = 0, + [MESA_FORMAT_XBGR8888_SRGB] = 0, + [MESA_FORMAT_XBGR8888_UINT] = 0, + [MESA_FORMAT_XBGR8888_SINT] = 0, + [MESA_FORMAT_XRGB2101010_UNORM] = BRW_SURFACEFORMAT_B10G10R10X2_UNORM, + [MESA_FORMAT_XBGR16161616_UNORM] = BRW_SURFACEFORMAT_R16G16B16X16_UNORM, + [MESA_FORMAT_XBGR16161616_SNORM] = 0, + [MESA_FORMAT_XBGR16161616_FLOAT] = BRW_SURFACEFORMAT_R16G16B16X16_FLOAT, + [MESA_FORMAT_XBGR16161616_UINT] = 0, + [MESA_FORMAT_XBGR16161616_SINT] = 0, + [MESA_FORMAT_XBGR32323232_FLOAT] = BRW_SURFACEFORMAT_R32G32B32X32_FLOAT, + [MESA_FORMAT_XBGR32323232_UINT] = 0, + [MESA_FORMAT_XBGR32323232_SINT] = 0, }; assert(mesa_format < MESA_FORMAT_COUNT); return table[mesa_format]; @@ -521,14 +637,9 @@ brw_init_surface_formats(struct brw_context *brw) * integer, so we don't need hardware support for blending on it. Other * than that, GL in general requires alpha blending for render targets, * even though we don't support it for some formats. - * - * We don't currently support rendering to SNORM textures because some of - * the ARB_color_buffer_float clamping is broken for it - * (piglit arb_color_buffer_float-drawpixels GL_RGBA8_SNORM). */ if (gen >= rinfo->render_target && - (gen >= rinfo->alpha_blend || is_integer) && - _mesa_get_format_datatype(format) != GL_SIGNED_NORMALIZED) { + (gen >= rinfo->alpha_blend || is_integer)) { brw->render_target_format[format] = render; brw->format_supported_as_render_target[format] = true; } @@ -541,30 +652,97 @@ brw_init_surface_formats(struct brw_context *brw) brw->format_supported_as_render_target[MESA_FORMAT_X8_Z24] = true; brw->format_supported_as_render_target[MESA_FORMAT_S8] = true; brw->format_supported_as_render_target[MESA_FORMAT_Z16] = true; + brw->format_supported_as_render_target[MESA_FORMAT_Z32_FLOAT] = true; + brw->format_supported_as_render_target[MESA_FORMAT_Z32_FLOAT_X24S8] = true; /* We remap depth formats to a supported texturing format in * translate_tex_format(). */ ctx->TextureFormatSupported[MESA_FORMAT_S8_Z24] = true; ctx->TextureFormatSupported[MESA_FORMAT_X8_Z24] = true; + ctx->TextureFormatSupported[MESA_FORMAT_Z32_FLOAT] = true; + ctx->TextureFormatSupported[MESA_FORMAT_Z32_FLOAT_X24S8] = true; + + /* It appears that Z16 is slower than Z24 (on Intel Ivybridge and newer + * hardware at least), so there's no real reason to prefer it unless you're + * under memory (not memory bandwidth) pressure. Our speculation is that + * this is due to either increased fragment shader execution from + * GL_LEQUAL/GL_EQUAL depth tests at the reduced precision, or due to + * increased depth stalls from a cacheline-based heuristic for detecting + * depth stalls. + * + * However, desktop GL 3.0+ require that you get exactly 16 bits when + * asking for DEPTH_COMPONENT16, so we have to respect that. + */ + if (_mesa_is_desktop_gl(ctx)) + ctx->TextureFormatSupported[MESA_FORMAT_Z16] = true; + + /* On hardware that lacks support for ETC1, we map ETC1 to RGBX + * during glCompressedTexImage2D(). See intel_mipmap_tree::wraps_etc1. + */ + ctx->TextureFormatSupported[MESA_FORMAT_ETC1_RGB8] = true; + + /* On hardware that lacks support for ETC2, we map ETC2 to a suitable + * MESA_FORMAT during glCompressedTexImage2D(). + * See intel_mipmap_tree::wraps_etc2. + */ + ctx->TextureFormatSupported[MESA_FORMAT_ETC2_RGB8] = true; + ctx->TextureFormatSupported[MESA_FORMAT_ETC2_SRGB8] = true; + ctx->TextureFormatSupported[MESA_FORMAT_ETC2_RGBA8_EAC] = true; + ctx->TextureFormatSupported[MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC] = true; + ctx->TextureFormatSupported[MESA_FORMAT_ETC2_R11_EAC] = true; + ctx->TextureFormatSupported[MESA_FORMAT_ETC2_RG11_EAC] = true; + ctx->TextureFormatSupported[MESA_FORMAT_ETC2_SIGNED_R11_EAC] = true; + ctx->TextureFormatSupported[MESA_FORMAT_ETC2_SIGNED_RG11_EAC] = true; + ctx->TextureFormatSupported[MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1] = true; + ctx->TextureFormatSupported[MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1] = true; } bool -brw_render_target_supported(struct intel_context *intel, gl_format format) +brw_render_target_supported(struct intel_context *intel, + struct gl_renderbuffer *rb) { struct brw_context *brw = brw_context(&intel->ctx); - /* Not exactly true, as some of those formats are not renderable. - * But at least we know how to translate them. + gl_format format = rb->Format; + + /* Many integer formats are promoted to RGBA (like XRGB8888 is), which means + * we would consider them renderable even though we don't have surface + * support for their alpha behavior and don't have the blending unit + * available to fake it like we do for XRGB8888. Force them to being + * unsupported. */ + if ((rb->_BaseFormat != GL_RGBA && + rb->_BaseFormat != GL_RG && + rb->_BaseFormat != GL_RED) && _mesa_is_format_integer_color(format)) + return false; + + /* Under some conditions, MSAA is not supported for formats whose width is + * more than 64 bits. + */ + if (rb->NumSamples > 0 && _mesa_get_format_bytes(format) > 8) { + /* Gen6: MSAA on >64 bit formats is unsupported. */ + if (intel->gen <= 6) + return false; + + /* Gen7: 8x MSAA on >64 bit formats is unsupported. */ + if (rb->NumSamples >= 8) + return false; + } + return brw->format_supported_as_render_target[format]; } GLuint -translate_tex_format(gl_format mesa_format, +translate_tex_format(struct intel_context *intel, + gl_format mesa_format, GLenum internal_format, GLenum depth_mode, GLenum srgb_decode) { + struct gl_context *ctx = &intel->ctx; + if (srgb_decode == GL_SKIP_DECODE_EXT) + mesa_format = _mesa_get_srgb_format_linear(mesa_format); + switch( mesa_format ) { case MESA_FORMAT_Z16: @@ -574,17 +752,11 @@ translate_tex_format(gl_format mesa_format, case MESA_FORMAT_X8_Z24: return BRW_SURFACEFORMAT_I24X8_UNORM; - case MESA_FORMAT_SARGB8: - case MESA_FORMAT_SLA8: - case MESA_FORMAT_SL8: - if (srgb_decode == GL_DECODE_EXT) - return brw_format_for_mesa_format(mesa_format); - else if (srgb_decode == GL_SKIP_DECODE_EXT) - return brw_format_for_mesa_format(_mesa_get_srgb_format_linear(mesa_format)); + case MESA_FORMAT_Z32_FLOAT: + return BRW_SURFACEFORMAT_I32_FLOAT; - case MESA_FORMAT_RGBA8888_REV: - /* This format is not renderable? */ - return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + case MESA_FORMAT_Z32_FLOAT_X24S8: + return BRW_SURFACEFORMAT_R32G32_FLOAT; case MESA_FORMAT_RGBA_FLOAT32: /* The value of this BRW_SURFACEFORMAT is 0, which tricks the @@ -592,13 +764,24 @@ translate_tex_format(gl_format mesa_format, */ return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + case MESA_FORMAT_SRGB_DXT1: + if (intel->gen == 4 && !intel->is_g4x) { + /* Work around missing SRGB DXT1 support on original gen4 by just + * skipping SRGB decode. It's not worth not supporting sRGB in + * general to prevent this. + */ + WARN_ONCE(true, "Demoting sRGB DXT1 texture to non-sRGB\n"); + mesa_format = MESA_FORMAT_RGB_DXT1; + } + return brw_format_for_mesa_format(mesa_format); + default: assert(brw_format_for_mesa_format(mesa_format) != 0); return brw_format_for_mesa_format(mesa_format); } } -static uint32_t +uint32_t brw_get_surface_tiling_bits(uint32_t tiling) { switch (tiling) { @@ -611,67 +794,295 @@ brw_get_surface_tiling_bits(uint32_t tiling) } } + +uint32_t +brw_get_surface_num_multisamples(unsigned num_samples) +{ + if (num_samples > 1) + return BRW_SURFACE_MULTISAMPLECOUNT_4; + else + return BRW_SURFACE_MULTISAMPLECOUNT_1; +} + + +/** + * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle + * swizzling. + */ +int +brw_get_texture_swizzle(const struct gl_context *ctx, + const struct gl_texture_object *t) +{ + const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; + + int swizzles[SWIZZLE_NIL + 1] = { + SWIZZLE_X, + SWIZZLE_Y, + SWIZZLE_Z, + SWIZZLE_W, + SWIZZLE_ZERO, + SWIZZLE_ONE, + SWIZZLE_NIL + }; + + if (img->_BaseFormat == GL_DEPTH_COMPONENT || + img->_BaseFormat == GL_DEPTH_STENCIL) { + GLenum depth_mode = t->DepthMode; + + /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures + * with depth component data specified with a sized internal format. + * Otherwise, it's left at the old default, GL_LUMINANCE. + */ + if (_mesa_is_gles3(ctx) && + img->InternalFormat != GL_DEPTH_COMPONENT && + img->InternalFormat != GL_DEPTH_STENCIL) { + depth_mode = GL_RED; + } + + switch (depth_mode) { + case GL_ALPHA: + swizzles[0] = SWIZZLE_ZERO; + swizzles[1] = SWIZZLE_ZERO; + swizzles[2] = SWIZZLE_ZERO; + swizzles[3] = SWIZZLE_X; + break; + case GL_LUMINANCE: + swizzles[0] = SWIZZLE_X; + swizzles[1] = SWIZZLE_X; + swizzles[2] = SWIZZLE_X; + swizzles[3] = SWIZZLE_ONE; + break; + case GL_INTENSITY: + swizzles[0] = SWIZZLE_X; + swizzles[1] = SWIZZLE_X; + swizzles[2] = SWIZZLE_X; + swizzles[3] = SWIZZLE_X; + break; + case GL_RED: + swizzles[0] = SWIZZLE_X; + swizzles[1] = SWIZZLE_ZERO; + swizzles[2] = SWIZZLE_ZERO; + swizzles[3] = SWIZZLE_ONE; + break; + } + } + + /* If the texture's format is alpha-only, force R, G, and B to + * 0.0. Similarly, if the texture's format has no alpha channel, + * force the alpha value read to 1.0. This allows for the + * implementation to use an RGBA texture for any of these formats + * without leaking any unexpected values. + */ + switch (img->_BaseFormat) { + case GL_ALPHA: + swizzles[0] = SWIZZLE_ZERO; + swizzles[1] = SWIZZLE_ZERO; + swizzles[2] = SWIZZLE_ZERO; + break; + case GL_RED: + case GL_RG: + case GL_RGB: + if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0) + swizzles[3] = SWIZZLE_ONE; + break; + } + + return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)], + swizzles[GET_SWZ(t->_Swizzle, 1)], + swizzles[GET_SWZ(t->_Swizzle, 2)], + swizzles[GET_SWZ(t->_Swizzle, 3)]); +} + + static void -brw_update_texture_surface( struct gl_context *ctx, GLuint unit ) +brw_update_buffer_texture_surface(struct gl_context *ctx, + unsigned unit, + uint32_t *binding_table, + unsigned surf_index) { struct brw_context *brw = brw_context(ctx); + struct intel_context *intel = &brw->intel; struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; - struct intel_texture_object *intelObj = intel_texture_object(tObj); - struct intel_mipmap_tree *mt = intelObj->mt; - struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel]; - struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); - const GLuint surf_index = SURF_INDEX_TEXTURE(unit); uint32_t *surf; - int width, height, depth; - - intel_miptree_get_dimensions_for_image(firstImage, &width, &height, &depth); + struct intel_buffer_object *intel_obj = + intel_buffer_object(tObj->BufferObject); + drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL; + gl_format format = tObj->_BufferObjectFormat; + uint32_t brw_format = brw_format_for_mesa_format(format); + int texel_size = _mesa_get_format_bytes(format); + + if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) { + _mesa_problem(NULL, "bad format %s for texture buffer\n", + _mesa_get_format_name(format)); + } surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, - 6 * 4, 32, &brw->bind.surf_offset[surf_index]); + 6 * 4, 32, &binding_table[surf_index]); + + surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT | + (brw_format_for_mesa_format(format) << BRW_SURFACE_FORMAT_SHIFT)); + + if (intel->gen >= 6) + surf[0] |= BRW_SURFACE_RC_READ_WRITE; + + if (bo) { + surf[1] = bo->offset; /* reloc */ + + /* Emit relocation to surface contents. */ + drm_intel_bo_emit_reloc(brw->intel.batch.bo, + binding_table[surf_index] + 4, + bo, 0, I915_GEM_DOMAIN_SAMPLER, 0); + + int w = intel_obj->Base.Size / texel_size; + surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT | + ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT); + surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT | + (texel_size - 1) << BRW_SURFACE_PITCH_SHIFT); + } else { + surf[1] = 0; + surf[2] = 0; + surf[3] = 0; + } + + surf[4] = 0; + surf[5] = 0; +} + +static void +brw_update_texture_component(struct brw_context *brw, + uint32_t *binding_table_slot, + const struct intel_mipmap_tree *mt, + unsigned width, unsigned height, + unsigned depth, unsigned stride, + GLuint target, GLuint tex_format, + uint32_t offset, uint32_t levels, + uint32_t first_level) +{ + uint32_t tile_x, tile_y; + uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + 6 * 4, 32, binding_table_slot); - surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT | + surf[0] = (target << BRW_SURFACE_TYPE_SHIFT | BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | BRW_SURFACE_CUBEFACE_ENABLES | - (translate_tex_format(firstImage->TexFormat, - firstImage->InternalFormat, - sampler->DepthMode, - sampler->sRGBDecode) << - BRW_SURFACE_FORMAT_SHIFT)); + (tex_format << BRW_SURFACE_FORMAT_SHIFT)); - surf[1] = intelObj->mt->region->bo->offset; /* reloc */ + surf[1] = mt->region->bo->offset + offset; + surf[1] += intel_miptree_get_tile_offsets(mt, first_level, 0, + &tile_x, &tile_y); - surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT | + surf[2] = (levels << BRW_SURFACE_LOD_SHIFT | (width - 1) << BRW_SURFACE_WIDTH_SHIFT | (height - 1) << BRW_SURFACE_HEIGHT_SHIFT); - surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) | + surf[3] = (brw_get_surface_tiling_bits(mt->region->tiling) | (depth - 1) << BRW_SURFACE_DEPTH_SHIFT | - ((intelObj->mt->region->pitch * intelObj->mt->cpp) - 1) << - BRW_SURFACE_PITCH_SHIFT); + (stride - 1) << BRW_SURFACE_PITCH_SHIFT); - surf[4] = 0; + surf[4] = brw_get_surface_num_multisamples(mt->num_samples); - surf[5] = (mt->align_h == 4) ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0; + assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0)); + /* Note that the low bits of these fields are missing, so + * there's the possibility of getting in trouble. + */ + assert(tile_x % 4 == 0); + assert(tile_y % 2 == 0); + surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT | + (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT | + (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0)); /* Emit relocation to surface contents */ drm_intel_bo_emit_reloc(brw->intel.batch.bo, - brw->bind.surf_offset[surf_index] + 4, - intelObj->mt->region->bo, 0, - I915_GEM_DOMAIN_SAMPLER, 0); + *binding_table_slot + 4, + mt->region->bo, + surf[1] - mt->region->bo->offset, + I915_GEM_DOMAIN_SAMPLER, 0); +} + +static void +brw_update_ext_texture_surface(struct brw_context *brw, + uint32_t *binding_table_slots, + const struct intel_texture_image *intel_img) +{ + unsigned i; + const struct intel_region *region = intel_img->mt->region; + const struct intel_image_format *f = intel_img->ext_format; + + for (i = 0; i < f->nplanes; ++i) { + int format = BRW_SURFACEFORMAT_R8_UNORM; + int index = f->planes[i].buffer_index; + + if (f->planes[i].dri_format == __DRI_IMAGE_FORMAT_GR88) + format = BRW_SURFACEFORMAT_R8G8_UNORM; + + brw_update_texture_component(brw, binding_table_slots + i, + intel_img->mt, + region->width >> f->planes[i].width_shift, + region->height >> f->planes[i].height_shift, + intel_img->base.Base.Depth, + intel_img->ext_strides[index], + BRW_SURFACE_2D, + format, + intel_img->ext_offsets[index], + 0 /*levels*/, + 0 /*first_level*/); + } +} + +static void +brw_update_texture_surface(struct gl_context *ctx, + unsigned unit, + uint32_t *binding_table, + unsigned surf_index) +{ + struct brw_context *brw = brw_context(ctx); + struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; + struct intel_texture_object *intelObj = intel_texture_object(tObj); + struct intel_mipmap_tree *mt = intelObj->mt; + struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel]; + const struct intel_texture_image *intel_img = + (const struct intel_texture_image *)firstImage; + struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); + int width, height, depth; + + if (tObj->Target == GL_TEXTURE_BUFFER) { + brw_update_buffer_texture_surface(ctx, unit, binding_table, surf_index); + return; + } + else if (tObj->Target == GL_TEXTURE_EXTERNAL_OES && intel_img->ext_format) { + brw_update_ext_texture_surface(brw, binding_table + surf_index, + intel_img); + return; + } + + intel_miptree_get_dimensions_for_image(firstImage, &width, &height, &depth); + + brw_update_texture_component(brw, binding_table + surf_index, + mt, width, height, depth, mt->region->pitch, + translate_tex_target(tObj->Target), + translate_tex_format(intel_context(ctx), mt->format, + firstImage->InternalFormat, tObj->DepthMode, sampler->sRGBDecode), + mt->offset, + intelObj->_MaxLevel - tObj->BaseLevel, + firstImage->Level); } /** * Create the constant buffer surface. Vertex/fragment shader constants will be * read from this buffer with Data Port Read instructions/messages. */ -void +static void brw_create_constant_surface(struct brw_context *brw, drm_intel_bo *bo, - int width, - uint32_t *out_offset) + uint32_t offset, + uint32_t size, + uint32_t *out_offset, + bool dword_pitch) { struct intel_context *intel = &brw->intel; - const GLint w = width - 1; + uint32_t stride = dword_pitch ? 4 : 16; + uint32_t elements = ALIGN(size, stride) / stride; + const GLint w = elements - 1; uint32_t *surf; surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, @@ -684,13 +1095,13 @@ brw_create_constant_surface(struct brw_context *brw, if (intel->gen >= 6) surf[0] |= BRW_SURFACE_RC_READ_WRITE; - surf[1] = bo->offset; /* reloc */ + surf[1] = bo->offset + offset; /* reloc */ - surf[2] = (((w & 0x7f) - 1) << BRW_SURFACE_WIDTH_SHIFT | - (((w >> 7) & 0x1fff) - 1) << BRW_SURFACE_HEIGHT_SHIFT); + surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT | + ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT); - surf[3] = ((((w >> 20) & 0x7f) - 1) << BRW_SURFACE_DEPTH_SHIFT | - (width * 16 - 1) << BRW_SURFACE_PITCH_SHIFT); + surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT | + (stride - 1) << BRW_SURFACE_PITCH_SHIFT); surf[4] = 0; surf[5] = 0; @@ -701,10 +1112,97 @@ brw_create_constant_surface(struct brw_context *brw, */ drm_intel_bo_emit_reloc(brw->intel.batch.bo, *out_offset + 4, - bo, 0, + bo, offset, I915_GEM_DOMAIN_SAMPLER, 0); } +/** + * Set up a binding table entry for use by stream output logic (transform + * feedback). + * + * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES. + */ +void +brw_update_sol_surface(struct brw_context *brw, + struct gl_buffer_object *buffer_obj, + uint32_t *out_offset, unsigned num_vector_components, + unsigned stride_dwords, unsigned offset_dwords) +{ + struct intel_context *intel = &brw->intel; + struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj); + drm_intel_bo *bo = + intel_bufferobj_buffer(intel, intel_bo, INTEL_WRITE_PART); + uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, + out_offset); + uint32_t pitch_minus_1 = 4*stride_dwords - 1; + uint32_t offset_bytes = 4 * offset_dwords; + size_t size_dwords = buffer_obj->Size / 4; + uint32_t buffer_size_minus_1, width, height, depth, surface_format; + + /* FIXME: can we rely on core Mesa to ensure that the buffer isn't + * too big to map using a single binding table entry? + */ + assert((size_dwords - offset_dwords) / stride_dwords + <= BRW_MAX_NUM_BUFFER_ENTRIES); + + if (size_dwords > offset_dwords + num_vector_components) { + /* There is room for at least 1 transform feedback output in the buffer. + * Compute the number of additional transform feedback outputs the + * buffer has room for. + */ + buffer_size_minus_1 = + (size_dwords - offset_dwords - num_vector_components) / stride_dwords; + } else { + /* There isn't even room for a single transform feedback output in the + * buffer. We can't configure the binding table entry to prevent output + * entirely; we'll have to rely on the geometry shader to detect + * overflow. But to minimize the damage in case of a bug, set up the + * binding table entry to just allow a single output. + */ + buffer_size_minus_1 = 0; + } + width = buffer_size_minus_1 & 0x7f; + height = (buffer_size_minus_1 & 0xfff80) >> 7; + depth = (buffer_size_minus_1 & 0x7f00000) >> 20; + + switch (num_vector_components) { + case 1: + surface_format = BRW_SURFACEFORMAT_R32_FLOAT; + break; + case 2: + surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT; + break; + case 3: + surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT; + break; + case 4: + surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + break; + default: + assert(!"Invalid vector size for transform feedback output"); + surface_format = BRW_SURFACEFORMAT_R32_FLOAT; + break; + } + + surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT | + BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | + surface_format << BRW_SURFACE_FORMAT_SHIFT | + BRW_SURFACE_RC_READ_WRITE; + surf[1] = bo->offset + offset_bytes; /* reloc */ + surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT | + height << BRW_SURFACE_HEIGHT_SHIFT); + surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT | + pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT); + surf[4] = 0; + surf[5] = 0; + + /* Emit relocation to surface contents. */ + drm_intel_bo_emit_reloc(brw->intel.batch.bo, + *out_offset + 4, + bo, offset_bytes, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); +} + /* Creates a new WM constant buffer reflecting the current fragment program's * constants, if needed by the fragment program. * @@ -732,8 +1230,8 @@ brw_upload_wm_pull_constants(struct brw_context *brw) if (brw->wm.const_bo) { drm_intel_bo_unreference(brw->wm.const_bo); brw->wm.const_bo = NULL; - brw->bind.surf_offset[surf_index] = 0; - brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; + brw->wm.surf_offset[surf_index] = 0; + brw->state.dirty.brw |= BRW_NEW_SURFACES; } return; } @@ -746,16 +1244,15 @@ brw_upload_wm_pull_constants(struct brw_context *brw) drm_intel_gem_bo_map_gtt(brw->wm.const_bo); constants = brw->wm.const_bo->virtual; for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) { - constants[i] = convert_param(brw->wm.prog_data->pull_param_convert[i], - brw->wm.prog_data->pull_param[i]); + constants[i] = *brw->wm.prog_data->pull_param[i]; } drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo); - intel->vtbl.create_constant_surface(brw, brw->wm.const_bo, - params->NumParameters, - &brw->bind.surf_offset[surf_index]); + intel->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0, size, + &brw->wm.surf_offset[surf_index], + true); - brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; + brw->state.dirty.brw |= BRW_NEW_SURFACES; } const struct brw_tracked_state brw_wm_pull_constants = { @@ -770,13 +1267,66 @@ const struct brw_tracked_state brw_wm_pull_constants = { static void brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit) { + /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming + * Notes): + * + * A null surface will be used in instances where an actual surface is + * not bound. When a write message is generated to a null surface, no + * actual surface is written to. When a read message (including any + * sampling engine message) is generated to a null surface, the result + * is all zeros. Note that a null surface type is allowed to be used + * with all messages, even if it is not specificially indicated as + * supported. All of the remaining fields in surface state are ignored + * for null surfaces, with the following exceptions: + * + * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the + * depth buffer’s corresponding state for all render target surfaces, + * including null. + * + * - Surface Format must be R8G8B8A8_UNORM. + */ struct intel_context *intel = &brw->intel; + struct gl_context *ctx = &intel->ctx; uint32_t *surf; + unsigned surface_type = BRW_SURFACE_NULL; + drm_intel_bo *bo = NULL; + unsigned pitch_minus_1 = 0; + uint32_t multisampling_state = 0; + + /* _NEW_BUFFERS */ + const struct gl_framebuffer *fb = ctx->DrawBuffer; surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, - 6 * 4, 32, &brw->bind.surf_offset[unit]); + 6 * 4, 32, &brw->wm.surf_offset[unit]); - surf[0] = (BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT | + if (fb->Visual.samples > 1) { + /* On Gen6, null render targets seem to cause GPU hangs when + * multisampling. So work around this problem by rendering into dummy + * color buffer. + * + * To decrease the amount of memory needed by the workaround buffer, we + * set its pitch to 128 bytes (the width of a Y tile). This means that + * the amount of memory needed for the workaround buffer is + * (width_in_tiles + height_in_tiles - 1) tiles. + * + * Note that since the workaround buffer will be interpreted by the + * hardware as an interleaved multisampled buffer, we need to compute + * width_in_tiles and height_in_tiles by dividing the width and height + * by 16 rather than the normal Y-tile size of 32. + */ + unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16; + unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16; + unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096; + brw_get_scratch_bo(intel, &brw->wm.multisampled_null_render_target_bo, + size_needed); + bo = brw->wm.multisampled_null_render_target_bo; + surface_type = BRW_SURFACE_2D; + pitch_minus_1 = 127; + multisampling_state = + brw_get_surface_num_multisamples(fb->Visual.samples); + } + + surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT | BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT); if (intel->gen < 6) { surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT | @@ -784,11 +1334,26 @@ brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit) 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT | 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT); } - surf[1] = 0; - surf[2] = 0; - surf[3] = 0; - surf[4] = 0; + surf[1] = bo ? bo->offset : 0; + surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT | + (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT); + + /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming + * Notes): + * + * If Surface Type is SURFTYPE_NULL, this field must be TRUE + */ + surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y | + pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT); + surf[4] = multisampling_state; surf[5] = 0; + + if (bo) { + drm_intel_bo_emit_reloc(brw->intel.batch.bo, + brw->wm.surf_offset[unit] + 4, + bo, 0, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); + } } /** @@ -799,52 +1364,62 @@ brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit) static void brw_update_renderbuffer_surface(struct brw_context *brw, struct gl_renderbuffer *rb, + bool layered, unsigned int unit) { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; struct intel_renderbuffer *irb = intel_renderbuffer(rb); struct intel_mipmap_tree *mt = irb->mt; - struct intel_region *region = irb->mt->region; + struct intel_region *region; uint32_t *surf; uint32_t tile_x, tile_y; uint32_t format = 0; + /* _NEW_BUFFERS */ + gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb)); - surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, - 6 * 4, 32, &brw->bind.surf_offset[unit]); - - switch (irb->Base.Format) { - case MESA_FORMAT_SARGB8: - /* without GL_EXT_framebuffer_sRGB we shouldn't bind sRGB - surfaces to the blend/update as sRGB */ - if (ctx->Color.sRGBEnabled) - format = brw_format_for_mesa_format(irb->Base.Format); - else - format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - break; - default: - format = brw->render_target_format[irb->Base.Format]; - if (unlikely(!brw->format_supported_as_render_target[irb->Base.Format])) { - _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n", - __FUNCTION__, _mesa_get_format_name(irb->Base.Format)); + assert(!layered); + + if (rb->TexImage && !brw->has_surface_tile_offset) { + intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y); + + if (tile_x != 0 || tile_y != 0) { + /* Original gen4 hardware couldn't draw to a non-tile-aligned + * destination in a miptree unless you actually setup your renderbuffer + * as a miptree and used the fragile lod/array_index/etc. controls to + * select the image. So, instead, we just make a new single-level + * miptree and render into that. + */ + intel_renderbuffer_move_to_temp(intel, irb, false); + mt = irb->mt; } - break; + } + + region = irb->mt->region; + + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + 6 * 4, 32, &brw->wm.surf_offset[unit]); + + format = brw->render_target_format[rb_format]; + if (unlikely(!brw->format_supported_as_render_target[rb_format])) { + _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n", + __FUNCTION__, _mesa_get_format_name(rb_format)); } surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT | format << BRW_SURFACE_FORMAT_SHIFT); /* reloc */ - surf[1] = (intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y) + + surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) + region->bo->offset); surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT | (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT); surf[3] = (brw_get_surface_tiling_bits(region->tiling) | - ((region->pitch * region->cpp) - 1) << BRW_SURFACE_PITCH_SHIFT); + (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT); - surf[4] = 0; + surf[4] = brw_get_surface_num_multisamples(mt->num_samples); assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0)); /* Note that the low bits of these fields are missing, so @@ -879,7 +1454,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw, } drm_intel_bo_emit_reloc(brw->intel.batch.bo, - brw->bind.surf_offset[unit] + 4, + brw->wm.surf_offset[unit] + 4, region->bo, surf[1] - region->bo->offset, I915_GEM_DOMAIN_RENDER, @@ -901,7 +1476,8 @@ brw_update_renderbuffer_surfaces(struct brw_context *brw) if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) { for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) { - intel->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i], i); + intel->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i], + ctx->DrawBuffer->Layered, i); } else { intel->vtbl.update_null_renderbuffer_surface(brw, i); } @@ -909,7 +1485,7 @@ brw_update_renderbuffer_surfaces(struct brw_context *brw) } else { intel->vtbl.update_null_renderbuffer_surface(brw, 0); } - brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; + brw->state.dirty.brw |= BRW_NEW_SURFACES; } const struct brw_tracked_state brw_renderbuffer_surfaces = { @@ -937,67 +1513,155 @@ const struct brw_tracked_state gen6_renderbuffer_surfaces = { static void brw_update_texture_surfaces(struct brw_context *brw) { - struct gl_context *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; + struct gl_context *ctx = &intel->ctx; - for (unsigned i = 0; i < BRW_MAX_TEX_UNIT; i++) { - const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; - const GLuint surf = SURF_INDEX_TEXTURE(i); + /* BRW_NEW_VERTEX_PROGRAM and BRW_NEW_FRAGMENT_PROGRAM: + * Unfortunately, we're stuck using the gl_program structs until the + * ARB_fragment_program front-end gets converted to GLSL IR. These + * have the downside that SamplerUnits is split and only contains the + * mappings for samplers active in that stage. + */ + struct gl_program *vs = (struct gl_program *) brw->vertex_program; + struct gl_program *fs = (struct gl_program *) brw->fragment_program; + + unsigned num_samplers = _mesa_fls(vs->SamplersUsed | fs->SamplersUsed); + + for (unsigned s = 0; s < num_samplers; s++) { + if (vs->SamplersUsed & (1 << s)) { + const unsigned unit = vs->SamplerUnits[s]; + + /* _NEW_TEXTURE */ + if (ctx->Texture.Unit[unit]._ReallyEnabled) { + intel->vtbl.update_texture_surface(ctx, unit, + brw->vs.surf_offset, + brw_surf_index_vs_texture( + (const struct brw_vertex_program *)brw->vertex_program, + s)); + } + } - /* _NEW_TEXTURE */ - if (texUnit->_ReallyEnabled) { - brw->intel.vtbl.update_texture_surface(ctx, i); - } else { - brw->bind.surf_offset[surf] = 0; + if (fs->SamplersUsed & (1 << s)) { + const unsigned unit = fs->SamplerUnits[s]; + + /* _NEW_TEXTURE */ + if (ctx->Texture.Unit[unit]._ReallyEnabled) { + intel->vtbl.update_texture_surface(ctx, unit, + brw->wm.surf_offset, + brw_surf_index_texture( + (const struct brw_fragment_program *)brw->fragment_program, + s)); + } } } - brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; + brw->state.dirty.brw |= BRW_NEW_SURFACES; } const struct brw_tracked_state brw_texture_surfaces = { .dirty = { .mesa = _NEW_TEXTURE, - .brw = BRW_NEW_BATCH, + .brw = BRW_NEW_BATCH | + BRW_NEW_VERTEX_PROGRAM | + BRW_NEW_FRAGMENT_PROGRAM, .cache = 0 }, .emit = brw_update_texture_surfaces, }; +void +brw_upload_ubo_surfaces(struct brw_context *brw, + struct gl_shader *shader, + uint32_t *surf_offsets) +{ + struct gl_context *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; + + if (!shader) + return; + + for (int i = 0; i < shader->NumUniformBlocks; i++) { + struct gl_uniform_buffer_binding *binding; + struct intel_buffer_object *intel_bo; + + binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding]; + intel_bo = intel_buffer_object(binding->BufferObject); + drm_intel_bo *bo = intel_bufferobj_buffer(intel, intel_bo, INTEL_READ); + + /* Because behavior for referencing outside of the binding's size in the + * glBindBufferRange case is undefined, we can just bind the whole buffer + * glBindBufferBase wants and be a correct implementation. + */ + intel->vtbl.create_constant_surface(brw, bo, binding->Offset, + bo->size - binding->Offset, + &surf_offsets[i], + shader->Type == GL_FRAGMENT_SHADER); + } + + if (shader->NumUniformBlocks) + brw->state.dirty.brw |= BRW_NEW_SURFACES; +} + +static void +brw_upload_wm_ubo_surfaces(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->intel.ctx; + /* _NEW_PROGRAM */ + struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram; + + if (!prog) + return; + + brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT], + &brw->wm.surf_offset[SURF_INDEX_WM_UBO(0)]); +} + +const struct brw_tracked_state brw_wm_ubo_surfaces = { + .dirty = { + .mesa = _NEW_PROGRAM, + .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER, + .cache = 0, + }, + .emit = brw_upload_wm_ubo_surfaces, +}; + /** * Constructs the binding table for the WM surface state, which maps unit * numbers to surface state objects. */ static void -brw_upload_binding_table(struct brw_context *brw) +brw_upload_wm_binding_table(struct brw_context *brw) { uint32_t *bind; int i; + if (INTEL_DEBUG & DEBUG_SHADER_TIME) { + gen7_create_shader_time_surface(brw, &brw->wm.surf_offset[SURF_INDEX_WM_SHADER_TIME]); + } + /* Might want to calculate nr_surfaces first, to avoid taking up so much * space for the binding table. */ bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE, - sizeof(uint32_t) * BRW_MAX_SURFACES, - 32, &brw->bind.bo_offset); + sizeof(uint32_t) * BRW_MAX_WM_SURFACES, + 32, &brw->wm.bind_bo_offset); - /* BRW_NEW_WM_SURFACES and BRW_NEW_VS_CONSTBUF */ - for (i = 0; i < BRW_MAX_SURFACES; i++) { - bind[i] = brw->bind.surf_offset[i]; + /* BRW_NEW_SURFACES */ + for (i = 0; i < BRW_MAX_WM_SURFACES; i++) { + bind[i] = brw->wm.surf_offset[i]; } - brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE; brw->state.dirty.brw |= BRW_NEW_PS_BINDING_TABLE; } -const struct brw_tracked_state brw_binding_table = { +const struct brw_tracked_state brw_wm_binding_table = { .dirty = { .mesa = 0, .brw = (BRW_NEW_BATCH | - BRW_NEW_VS_CONSTBUF | - BRW_NEW_WM_SURFACES), + BRW_NEW_SURFACES), .cache = 0 }, - .emit = brw_upload_binding_table, + .emit = brw_upload_wm_binding_table, }; void