src/mesa/drivers/dri/i965/brw_context.c

   1 /*
   2  Copyright 2003 VMware, Inc.
   3  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   4  Intel funded Tungsten Graphics to
   5  develop this 3D driver.
   6
   7  Permission is hereby granted, free of charge, to any person obtaining
   8  a copy of this software and associated documentation files (the
   9  "Software"), to deal in the Software without restriction, including
  10  without limitation the rights to use, copy, modify, merge, publish,
  11  distribute, sublicense, and/or sell copies of the Software, and to
  12  permit persons to whom the Software is furnished to do so, subject to
  13  the following conditions:
  14
  15  The above copyright notice and this permission notice (including the
  16  next paragraph) shall be included in all copies or substantial
  17  portions of the Software.
  18
  19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  22  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  23  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26
  27  **********************************************************************/
  28  /*
  29   * Authors:
  30   *   Keith Whitwell <keithw@vmware.com>
  31   */
  32
  33
  34 #include "main/api_exec.h"
  35 #include "main/context.h"
  36 #include "main/fbobject.h"
  37 #include "main/extensions.h"
  38 #include "main/imports.h"
  39 #include "main/macros.h"
  40 #include "main/points.h"
  41 #include "main/version.h"
  42 #include "main/vtxfmt.h"
  43 #include "main/texobj.h"
  44 #include "main/framebuffer.h"
  45
  46 #include "vbo/vbo_context.h"
  47
  48 #include "drivers/common/driverfuncs.h"
  49 #include "drivers/common/meta.h"
  50 #include "utils.h"
  51
  52 #include "brw_context.h"
  53 #include "brw_defines.h"
  54 #include "brw_compiler.h"
  55 #include "brw_draw.h"
  56 #include "brw_state.h"
  57
  58 #include "intel_batchbuffer.h"
  59 #include "intel_buffer_objects.h"
  60 #include "intel_buffers.h"
  61 #include "intel_fbo.h"
  62 #include "intel_mipmap_tree.h"
  63 #include "intel_pixel.h"
  64 #include "intel_image.h"
  65 #include "intel_tex.h"
  66 #include "intel_tex_obj.h"
  67
  68 #include "swrast_setup/swrast_setup.h"
  69 #include "tnl/tnl.h"
  70 #include "tnl/t_pipeline.h"
  71 #include "util/ralloc.h"
  72 #include "util/debug.h"
  73
  74 /***************************************
  75  * Mesa's Driver Functions
  76  ***************************************/
  77
  78 static size_t
  79 brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
  80                              GLenum internalFormat, int samples[16])
  81 {
  82    struct brw_context *brw = brw_context(ctx);
  83
  84    (void) target;
  85
  86    switch (brw->gen) {
  87    case 9:
  88       samples[0] = 16;
  89       samples[1] = 8;
  90       samples[2] = 4;
  91       samples[3] = 2;
  92       return 4;
  93
  94    case 8:
  95       samples[0] = 8;
  96       samples[1] = 4;
  97       samples[2] = 2;
  98       return 3;
  99
 100    case 7:
 101       samples[0] = 8;
 102       samples[1] = 4;
 103       return 2;
 104
 105    case 6:
 106       samples[0] = 4;
 107       return 1;
 108
 109    default:
 110       assert(brw->gen < 6);
 111       samples[0] = 1;
 112       return 1;
 113    }
 114 }
 115
 116 const char *const brw_vendor_string = "Intel Open Source Technology Center";
 117
 118 const char *
 119 brw_get_renderer_string(unsigned deviceID)
 120 {
 121    const char *chipset;
 122    static char buffer[128];
 123
 124    switch (deviceID) {
 125 #undef CHIPSET
 126 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
 127 #include "pci_ids/i965_pci_ids.h"
 128    default:
 129       chipset = "Unknown Intel Chipset";
 130       break;
 131    }
 132
 133    (void) driGetRendererString(buffer, chipset, 0);
 134    return buffer;
 135 }
 136
 137 static const GLubyte *
 138 intel_get_string(struct gl_context * ctx, GLenum name)
 139 {
 140    const struct brw_context *const brw = brw_context(ctx);
 141
 142    switch (name) {
 143    case GL_VENDOR:
 144       return (GLubyte *) brw_vendor_string;
 145
 146    case GL_RENDERER:
 147       return
 148          (GLubyte *) brw_get_renderer_string(brw->intelScreen->deviceID);
 149
 150    default:
 151       return NULL;
 152    }
 153 }
 154
 155 static void
 156 intel_viewport(struct gl_context *ctx)
 157 {
 158    struct brw_context *brw = brw_context(ctx);
 159    __DRIcontext *driContext = brw->driContext;
 160
 161    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 162       dri2InvalidateDrawable(driContext->driDrawablePriv);
 163       dri2InvalidateDrawable(driContext->driReadablePriv);
 164    }
 165 }
 166
 167 static void
 168 intel_update_state(struct gl_context * ctx, GLuint new_state)
 169 {
 170    struct brw_context *brw = brw_context(ctx);
 171    struct intel_texture_object *tex_obj;
 172    struct intel_renderbuffer *depth_irb;
 173
 174    if (ctx->swrast_context)
 175       _swrast_InvalidateState(ctx, new_state);
 176    _vbo_InvalidateState(ctx, new_state);
 177
 178    brw->NewGLState |= new_state;
 179
 180    _mesa_unlock_context_textures(ctx);
 181
 182    /* Resolve the depth buffer's HiZ buffer. */
 183    depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
 184    if (depth_irb)
 185       intel_renderbuffer_resolve_hiz(brw, depth_irb);
 186
 187    /* Resolve depth buffer and render cache of each enabled texture. */
 188    int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
 189    for (int i = 0; i <= maxEnabledUnit; i++) {
 190       if (!ctx->Texture.Unit[i]._Current)
 191          continue;
 192       tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
 193       if (!tex_obj || !tex_obj->mt)
 194          continue;
 195       intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
 196       intel_miptree_resolve_color(brw, tex_obj->mt);
 197       brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
 198    }
 199
 200    /* Resolve color for each active shader image. */
 201    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
 202       const struct gl_shader *shader = ctx->_Shader->CurrentProgram[i] ?
 203          ctx->_Shader->CurrentProgram[i]->_LinkedShaders[i] : NULL;
 204
 205       if (unlikely(shader && shader->NumImages)) {
 206          for (unsigned j = 0; j < shader->NumImages; j++) {
 207             struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[j]];
 208             tex_obj = intel_texture_object(u->TexObj);
 209
 210             if (tex_obj && tex_obj->mt) {
 211                intel_miptree_resolve_color(brw, tex_obj->mt);
 212                brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
 213             }
 214          }
 215       }
 216    }
 217
 218    /* If FRAMEBUFFER_SRGB is used on Gen9+ then we need to resolve any of the
 219     * single-sampled color renderbuffers because the CCS buffer isn't
 220     * supported for SRGB formats. This only matters if FRAMEBUFFER_SRGB is
 221     * enabled because otherwise the surface state will be programmed with the
 222     * linear equivalent format anyway.
 223     */
 224    if (brw->gen >= 9 && ctx->Color.sRGBEnabled) {
 225       struct gl_framebuffer *fb = ctx->DrawBuffer;
 226       for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
 227          struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
 228
 229          if (rb == NULL)
 230             continue;
 231
 232          struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 233          struct intel_mipmap_tree *mt = irb->mt;
 234
 235          if (mt == NULL ||
 236              mt->num_samples > 1 ||
 237              _mesa_get_srgb_format_linear(mt->format) == mt->format)
 238                continue;
 239
 240          intel_miptree_resolve_color(brw, mt);
 241          brw_render_cache_set_check_flush(brw, mt->bo);
 242       }
 243    }
 244
 245    _mesa_lock_context_textures(ctx);
 246 }
 247
 248 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
 249
 250 static void
 251 intel_flush_front(struct gl_context *ctx)
 252 {
 253    struct brw_context *brw = brw_context(ctx);
 254    __DRIcontext *driContext = brw->driContext;
 255    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
 256    __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
 257
 258    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 259       if (flushFront(screen) && driDrawable &&
 260           driDrawable->loaderPrivate) {
 261
 262          /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
 263           *
 264           * This potentially resolves both front and back buffer. It
 265           * is unnecessary to resolve the back, but harms nothing except
 266           * performance. And no one cares about front-buffer render
 267           * performance.
 268           */
 269          intel_resolve_for_dri2_flush(brw, driDrawable);
 270          intel_batchbuffer_flush(brw);
 271
 272          flushFront(screen)(driDrawable, driDrawable->loaderPrivate);
 273
 274          /* We set the dirty bit in intel_prepare_render() if we're
 275           * front buffer rendering once we get there.
 276           */
 277          brw->front_buffer_dirty = false;
 278       }
 279    }
 280 }
 281
 282 static void
 283 intel_glFlush(struct gl_context *ctx)
 284 {
 285    struct brw_context *brw = brw_context(ctx);
 286
 287    intel_batchbuffer_flush(brw);
 288    intel_flush_front(ctx);
 289
 290    brw->need_flush_throttle = true;
 291 }
 292
 293 static void
 294 intel_finish(struct gl_context * ctx)
 295 {
 296    struct brw_context *brw = brw_context(ctx);
 297
 298    intel_glFlush(ctx);
 299
 300    if (brw->batch.last_bo)
 301       drm_intel_bo_wait_rendering(brw->batch.last_bo);
 302 }
 303
 304 static void
 305 brw_init_driver_functions(struct brw_context *brw,
 306                           struct dd_function_table *functions)
 307 {
 308    _mesa_init_driver_functions(functions);
 309
 310    /* GLX uses DRI2 invalidate events to handle window resizing.
 311     * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
 312     * which doesn't provide a mechanism for snooping the event queues.
 313     *
 314     * So EGL still relies on viewport hacks to handle window resizing.
 315     * This should go away with DRI3000.
 316     */
 317    if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
 318       functions->Viewport = intel_viewport;
 319
 320    functions->Flush = intel_glFlush;
 321    functions->Finish = intel_finish;
 322    functions->GetString = intel_get_string;
 323    functions->UpdateState = intel_update_state;
 324
 325    intelInitTextureFuncs(functions);
 326    intelInitTextureImageFuncs(functions);
 327    intelInitTextureSubImageFuncs(functions);
 328    intelInitTextureCopyImageFuncs(functions);
 329    intelInitCopyImageFuncs(functions);
 330    intelInitClearFuncs(functions);
 331    intelInitBufferFuncs(functions);
 332    intelInitPixelFuncs(functions);
 333    intelInitBufferObjectFuncs(functions);
 334    intel_init_syncobj_functions(functions);
 335    brw_init_object_purgeable_functions(functions);
 336
 337    brwInitFragProgFuncs( functions );
 338    brw_init_common_queryobj_functions(functions);
 339    if (brw->gen >= 6)
 340       gen6_init_queryobj_functions(functions);
 341    else
 342       gen4_init_queryobj_functions(functions);
 343    brw_init_compute_functions(functions);
 344    if (brw->gen >= 7)
 345       brw_init_conditional_render_functions(functions);
 346
 347    functions->QuerySamplesForFormat = brw_query_samples_for_format;
 348
 349    functions->NewTransformFeedback = brw_new_transform_feedback;
 350    functions->DeleteTransformFeedback = brw_delete_transform_feedback;
 351    functions->GetTransformFeedbackVertexCount =
 352       brw_get_transform_feedback_vertex_count;
 353    if (brw->gen >= 7) {
 354       functions->BeginTransformFeedback = gen7_begin_transform_feedback;
 355       functions->EndTransformFeedback = gen7_end_transform_feedback;
 356       functions->PauseTransformFeedback = gen7_pause_transform_feedback;
 357       functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
 358    } else {
 359       functions->BeginTransformFeedback = brw_begin_transform_feedback;
 360       functions->EndTransformFeedback = brw_end_transform_feedback;
 361    }
 362
 363    if (brw->gen >= 6)
 364       functions->GetSamplePosition = gen6_get_sample_position;
 365 }
 366
 367 static void
 368 brw_initialize_context_constants(struct brw_context *brw)
 369 {
 370    struct gl_context *ctx = &brw->ctx;
 371    const struct brw_compiler *compiler = brw->intelScreen->compiler;
 372
 373    const bool stage_exists[MESA_SHADER_STAGES] = {
 374       [MESA_SHADER_VERTEX] = true,
 375       [MESA_SHADER_TESS_CTRL] = brw->gen >= 8,
 376       [MESA_SHADER_TESS_EVAL] = brw->gen >= 8,
 377       [MESA_SHADER_GEOMETRY] = brw->gen >= 6,
 378       [MESA_SHADER_FRAGMENT] = true,
 379       [MESA_SHADER_COMPUTE] =
 380          (ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
 381          _mesa_extension_override_enables.ARB_compute_shader,
 382    };
 383
 384    unsigned num_stages = 0;
 385    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 386       if (stage_exists[i])
 387          num_stages++;
 388    }
 389
 390    unsigned max_samplers =
 391       brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
 392
 393    ctx->Const.MaxDualSourceDrawBuffers = 1;
 394    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
 395    ctx->Const.MaxCombinedShaderOutputResources =
 396       MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
 397
 398    ctx->Const.QueryCounterBits.Timestamp = 36;
 399
 400    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
 401    ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
 402    ctx->Const.MaxRenderbufferSize = 8192;
 403    ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
 404    ctx->Const.Max3DTextureLevels = 12; /* 2048 */
 405    ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
 406    ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512;
 407    ctx->Const.MaxTextureMbytes = 1536;
 408    ctx->Const.MaxTextureRectSize = 1 << 12;
 409    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
 410    ctx->Const.StripTextureBorder = true;
 411    if (brw->gen >= 7)
 412       ctx->Const.MaxProgramTextureGatherComponents = 4;
 413    else if (brw->gen == 6)
 414       ctx->Const.MaxProgramTextureGatherComponents = 1;
 415
 416    ctx->Const.MaxUniformBlockSize = 65536;
 417
 418    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 419       struct gl_program_constants *prog = &ctx->Const.Program[i];
 420
 421       if (!stage_exists[i])
 422          continue;
 423
 424       prog->MaxTextureImageUnits = max_samplers;
 425
 426       prog->MaxUniformBlocks = BRW_MAX_UBO;
 427       prog->MaxCombinedUniformComponents =
 428          prog->MaxUniformComponents +
 429          ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
 430
 431       prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
 432       prog->MaxAtomicBuffers = BRW_MAX_ABO;
 433       prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0;
 434       prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
 435    }
 436
 437    ctx->Const.MaxTextureUnits =
 438       MIN2(ctx->Const.MaxTextureCoordUnits,
 439            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
 440
 441    ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
 442    ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
 443    ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
 444    ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
 445    ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
 446    ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers;
 447    ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES;
 448
 449
 450    /* Hardware only supports a limited number of transform feedback buffers.
 451     * So we need to override the Mesa default (which is based only on software
 452     * limits).
 453     */
 454    ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
 455
 456    /* On Gen6, in the worst case, we use up one binding table entry per
 457     * transform feedback component (see comments above the definition of
 458     * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
 459     * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
 460     * BRW_MAX_SOL_BINDINGS.
 461     *
 462     * In "separate components" mode, we need to divide this value by
 463     * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
 464     * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
 465     */
 466    ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
 467    ctx->Const.MaxTransformFeedbackSeparateComponents =
 468       BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
 469
 470    ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true;
 471
 472    int max_samples;
 473    const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen);
 474    const int clamp_max_samples =
 475       driQueryOptioni(&brw->optionCache, "clamp_max_samples");
 476
 477    if (clamp_max_samples < 0) {
 478       max_samples = msaa_modes[0];
 479    } else {
 480       /* Select the largest supported MSAA mode that does not exceed
 481        * clamp_max_samples.
 482        */
 483       max_samples = 0;
 484       for (int i = 0; msaa_modes[i] != 0; ++i) {
 485          if (msaa_modes[i] <= clamp_max_samples) {
 486             max_samples = msaa_modes[i];
 487             break;
 488          }
 489       }
 490    }
 491
 492    ctx->Const.MaxSamples = max_samples;
 493    ctx->Const.MaxColorTextureSamples = max_samples;
 494    ctx->Const.MaxDepthTextureSamples = max_samples;
 495    ctx->Const.MaxIntegerSamples = max_samples;
 496    ctx->Const.MaxImageSamples = 0;
 497
 498    /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
 499     * to map indices of rectangular grid to sample numbers within a pixel.
 500     * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
 501     * extension implementation. For more details see the comment above
 502     * gen6_set_sample_maps() definition.
 503     */
 504    gen6_set_sample_maps(ctx);
 505
 506    ctx->Const.MinLineWidth = 1.0;
 507    ctx->Const.MinLineWidthAA = 1.0;
 508    if (brw->gen >= 6) {
 509       ctx->Const.MaxLineWidth = 7.375;
 510       ctx->Const.MaxLineWidthAA = 7.375;
 511       ctx->Const.LineWidthGranularity = 0.125;
 512    } else {
 513       ctx->Const.MaxLineWidth = 7.0;
 514       ctx->Const.MaxLineWidthAA = 7.0;
 515       ctx->Const.LineWidthGranularity = 0.5;
 516    }
 517
 518    /* For non-antialiased lines, we have to round the line width to the
 519     * nearest whole number. Make sure that we don't advertise a line
 520     * width that, when rounded, will be beyond the actual hardware
 521     * maximum.
 522     */
 523    assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
 524
 525    ctx->Const.MinPointSize = 1.0;
 526    ctx->Const.MinPointSizeAA = 1.0;
 527    ctx->Const.MaxPointSize = 255.0;
 528    ctx->Const.MaxPointSizeAA = 255.0;
 529    ctx->Const.PointSizeGranularity = 1.0;
 530
 531    if (brw->gen >= 5 || brw->is_g4x)
 532       ctx->Const.MaxClipPlanes = 8;
 533
 534    ctx->Const.LowerTessLevel = true;
 535
 536    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
 537    ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
 538    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
 539    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
 540    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
 541    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
 542    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
 543    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
 544    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
 545    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
 546    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
 547    ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
 548       MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
 549            ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
 550
 551    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
 552    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
 553    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
 554    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
 555    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
 556    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
 557    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
 558    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
 559    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
 560       MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
 561            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
 562
 563    /* Fragment shaders use real, 32-bit twos-complement integers for all
 564     * integer types.
 565     */
 566    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
 567    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
 568    ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
 569    ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 570    ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
 571
 572    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
 573    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
 574    ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
 575    ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 576    ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
 577
 578    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
 579     * but we're not sure how it's actually done for vertex order,
 580     * that affect provoking vertex decision. Always use last vertex
 581     * convention for quad primitive which works as expected for now.
 582     */
 583    if (brw->gen >= 6)
 584       ctx->Const.QuadsFollowProvokingVertexConvention = false;
 585
 586    ctx->Const.NativeIntegers = true;
 587    ctx->Const.VertexID_is_zero_based = true;
 588
 589    /* Regarding the CMP instruction, the Ivybridge PRM says:
 590     *
 591     *   "For each enabled channel 0b or 1b is assigned to the appropriate flag
 592     *    bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
 593     *    0xFFFFFFFF) is assigned to dst."
 594     *
 595     * but PRMs for earlier generations say
 596     *
 597     *   "In dword format, one GRF may store up to 8 results. When the register
 598     *    is used later as a vector of Booleans, as only LSB at each channel
 599     *    contains meaning [sic] data, software should make sure all higher bits
 600     *    are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
 601     *
 602     * We select the representation of a true boolean uniform to be ~0, and fix
 603     * the results of Gen <= 5 CMP instruction's with -(result & 1).
 604     */
 605    ctx->Const.UniformBooleanTrue = ~0;
 606
 607    /* From the gen4 PRM, volume 4 page 127:
 608     *
 609     *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
 610     *      the base address of the first element of the surface, computed in
 611     *      software by adding the surface base address to the byte offset of
 612     *      the element in the buffer."
 613     *
 614     * However, unaligned accesses are slower, so enforce buffer alignment.
 615     */
 616    ctx->Const.UniformBufferOffsetAlignment = 16;
 617
 618    /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
 619     * that we can safely have the CPU and GPU writing the same SSBO on
 620     * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
 621     * writes, so there's no problem. For an SSBO, the GPU and the CPU can
 622     * be updating disjoint regions of the buffer simultaneously and that will
 623     * break if the regions overlap the same cacheline.
 624     */
 625    ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
 626    ctx->Const.TextureBufferOffsetAlignment = 16;
 627    ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
 628
 629    if (brw->gen >= 6) {
 630       ctx->Const.MaxVarying = 32;
 631       ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
 632       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64;
 633       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
 634       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
 635       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128;
 636       ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128;
 637       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128;
 638       ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128;
 639    }
 640
 641    /* We want the GLSL compiler to emit code that uses condition codes */
 642    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 643       ctx->Const.ShaderCompilerOptions[i] =
 644          brw->intelScreen->compiler->glsl_compiler_options[i];
 645    }
 646
 647    /* ARB_viewport_array */
 648    if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) {
 649       ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
 650       ctx->Const.ViewportSubpixelBits = 0;
 651
 652       /* Cast to float before negating because MaxViewportWidth is unsigned.
 653        */
 654       ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
 655       ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
 656    }
 657
 658    /* ARB_gpu_shader5 */
 659    if (brw->gen >= 7)
 660       ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
 661
 662    /* ARB_framebuffer_no_attachments */
 663    ctx->Const.MaxFramebufferWidth = ctx->Const.MaxViewportWidth;
 664    ctx->Const.MaxFramebufferHeight = ctx->Const.MaxViewportHeight;
 665    ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
 666    ctx->Const.MaxFramebufferSamples = max_samples;
 667 }
 668
 669 static void
 670 brw_initialize_cs_context_constants(struct brw_context *brw, unsigned max_threads)
 671 {
 672    struct gl_context *ctx = &brw->ctx;
 673
 674    /* For ES, we set these constants based on SIMD8.
 675     *
 676     * TODO: Once we can always generate SIMD16, we should update this.
 677     *
 678     * For GL, we assume we can generate a SIMD16 program, but this currently
 679     * is not always true. This allows us to run more test cases, and will be
 680     * required based on desktop GL compute shader requirements.
 681     */
 682    const int simd_size = ctx->API == API_OPENGL_CORE ? 16 : 8;
 683
 684    const uint32_t max_invocations = simd_size * max_threads;
 685    ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
 686    ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
 687    ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
 688    ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
 689 }
 690
 691 /**
 692  * Process driconf (drirc) options, setting appropriate context flags.
 693  *
 694  * intelInitExtensions still pokes at optionCache directly, in order to
 695  * avoid advertising various extensions.  No flags are set, so it makes
 696  * sense to continue doing that there.
 697  */
 698 static void
 699 brw_process_driconf_options(struct brw_context *brw)
 700 {
 701    struct gl_context *ctx = &brw->ctx;
 702
 703    driOptionCache *options = &brw->optionCache;
 704    driParseConfigFiles(options, &brw->intelScreen->optionCache,
 705                        brw->driContext->driScreenPriv->myNum, "i965");
 706
 707    int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
 708    switch (bo_reuse_mode) {
 709    case DRI_CONF_BO_REUSE_DISABLED:
 710       break;
 711    case DRI_CONF_BO_REUSE_ALL:
 712       intel_bufmgr_gem_enable_reuse(brw->bufmgr);
 713       break;
 714    }
 715
 716    if (!driQueryOptionb(options, "hiz")) {
 717        brw->has_hiz = false;
 718        /* On gen6, you can only do separate stencil with HIZ. */
 719        if (brw->gen == 6)
 720           brw->has_separate_stencil = false;
 721    }
 722
 723    if (driQueryOptionb(options, "always_flush_batch")) {
 724       fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
 725       brw->always_flush_batch = true;
 726    }
 727
 728    if (driQueryOptionb(options, "always_flush_cache")) {
 729       fprintf(stderr, "flushing GPU caches before/after each draw call\n");
 730       brw->always_flush_cache = true;
 731    }
 732
 733    if (driQueryOptionb(options, "disable_throttling")) {
 734       fprintf(stderr, "disabling flush throttling\n");
 735       brw->disable_throttling = true;
 736    }
 737
 738    brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
 739
 740    ctx->Const.ForceGLSLExtensionsWarn =
 741       driQueryOptionb(options, "force_glsl_extensions_warn");
 742
 743    ctx->Const.DisableGLSLLineContinuations =
 744       driQueryOptionb(options, "disable_glsl_line_continuations");
 745
 746    ctx->Const.AllowGLSLExtensionDirectiveMidShader =
 747       driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
 748 }
 749
 750 GLboolean
 751 brwCreateContext(gl_api api,
 752                  const struct gl_config *mesaVis,
 753                  __DRIcontext *driContextPriv,
 754                  unsigned major_version,
 755                  unsigned minor_version,
 756                  uint32_t flags,
 757                  bool notify_reset,
 758                  unsigned *dri_ctx_error,
 759                  void *sharedContextPrivate)
 760 {
 761    __DRIscreen *sPriv = driContextPriv->driScreenPriv;
 762    struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
 763    struct intel_screen *screen = sPriv->driverPrivate;
 764    const struct brw_device_info *devinfo = screen->devinfo;
 765    struct dd_function_table functions;
 766
 767    /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
 768     * provides us with context reset notifications.
 769     */
 770    uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG
 771       | __DRI_CTX_FLAG_FORWARD_COMPATIBLE;
 772
 773    if (screen->has_context_reset_notification)
 774       allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
 775
 776    if (flags & ~allowed_flags) {
 777       *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
 778       return false;
 779    }
 780
 781    struct brw_context *brw = rzalloc(NULL, struct brw_context);
 782    if (!brw) {
 783       fprintf(stderr, "%s: failed to alloc context\n", __func__);
 784       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 785       return false;
 786    }
 787
 788    driContextPriv->driverPrivate = brw;
 789    brw->driContext = driContextPriv;
 790    brw->intelScreen = screen;
 791    brw->bufmgr = screen->bufmgr;
 792
 793    brw->gen = devinfo->gen;
 794    brw->gt = devinfo->gt;
 795    brw->is_g4x = devinfo->is_g4x;
 796    brw->is_baytrail = devinfo->is_baytrail;
 797    brw->is_haswell = devinfo->is_haswell;
 798    brw->is_cherryview = devinfo->is_cherryview;
 799    brw->is_broxton = devinfo->is_broxton;
 800    brw->has_llc = devinfo->has_llc;
 801    brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
 802    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
 803    brw->has_pln = devinfo->has_pln;
 804    brw->has_compr4 = devinfo->has_compr4;
 805    brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
 806    brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
 807    brw->needs_unlit_centroid_workaround =
 808       devinfo->needs_unlit_centroid_workaround;
 809
 810    brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
 811    brw->has_swizzling = screen->hw_has_swizzling;
 812
 813    brw->vs.base.stage = MESA_SHADER_VERTEX;
 814    brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
 815    brw->tes.base.stage = MESA_SHADER_TESS_EVAL;
 816    brw->gs.base.stage = MESA_SHADER_GEOMETRY;
 817    brw->wm.base.stage = MESA_SHADER_FRAGMENT;
 818    if (brw->gen >= 8) {
 819       gen8_init_vtable_surface_functions(brw);
 820       brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
 821    } else if (brw->gen >= 7) {
 822       gen7_init_vtable_surface_functions(brw);
 823       brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
 824    } else if (brw->gen >= 6) {
 825       gen6_init_vtable_surface_functions(brw);
 826       brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz;
 827    } else {
 828       gen4_init_vtable_surface_functions(brw);
 829       brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
 830    }
 831
 832    brw_init_driver_functions(brw, &functions);
 833
 834    if (notify_reset)
 835       functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
 836
 837    struct gl_context *ctx = &brw->ctx;
 838
 839    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
 840       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 841       fprintf(stderr, "%s: failed to init mesa context\n", __func__);
 842       intelDestroyContext(driContextPriv);
 843       return false;
 844    }
 845
 846    driContextSetFlags(ctx, flags);
 847
 848    /* Initialize the software rasterizer and helper modules.
 849     *
 850     * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
 851     * software fallbacks (which we have to support on legacy GL to do weird
 852     * glDrawPixels(), glBitmap(), and other functions).
 853     */
 854    if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
 855       _swrast_CreateContext(ctx);
 856    }
 857
 858    _vbo_CreateContext(ctx);
 859    if (ctx->swrast_context) {
 860       _tnl_CreateContext(ctx);
 861       TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
 862       _swsetup_CreateContext(ctx);
 863
 864       /* Configure swrast to match hardware characteristics: */
 865       _swrast_allow_pixel_fog(ctx, false);
 866       _swrast_allow_vertex_fog(ctx, true);
 867    }
 868
 869    _mesa_meta_init(ctx);
 870
 871    brw_process_driconf_options(brw);
 872
 873    if (INTEL_DEBUG & DEBUG_PERF)
 874       brw->perf_debug = true;
 875
 876    brw_initialize_cs_context_constants(brw, devinfo->max_cs_threads);
 877    brw_initialize_context_constants(brw);
 878
 879    ctx->Const.ResetStrategy = notify_reset
 880       ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
 881
 882    /* Reinitialize the context point state.  It depends on ctx->Const values. */
 883    _mesa_init_point(ctx);
 884
 885    intel_fbo_init(brw);
 886
 887    intel_batchbuffer_init(brw);
 888
 889    if (brw->gen >= 6) {
 890       /* Create a new hardware context.  Using a hardware context means that
 891        * our GPU state will be saved/restored on context switch, allowing us
 892        * to assume that the GPU is in the same state we left it in.
 893        *
 894        * This is required for transform feedback buffer offsets, query objects,
 895        * and also allows us to reduce how much state we have to emit.
 896        */
 897       brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
 898
 899       if (!brw->hw_ctx) {
 900          fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
 901          intelDestroyContext(driContextPriv);
 902          return false;
 903       }
 904    }
 905
 906    if (brw_init_pipe_control(brw, devinfo)) {
 907       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
 908       intelDestroyContext(driContextPriv);
 909       return false;
 910    }
 911
 912    brw_init_state(brw);
 913
 914    intelInitExtensions(ctx);
 915
 916    brw_init_surface_formats(brw);
 917
 918    brw->max_vs_threads = devinfo->max_vs_threads;
 919    brw->max_hs_threads = devinfo->max_hs_threads;
 920    brw->max_ds_threads = devinfo->max_ds_threads;
 921    brw->max_gs_threads = devinfo->max_gs_threads;
 922    brw->max_wm_threads = devinfo->max_wm_threads;
 923    brw->max_cs_threads = devinfo->max_cs_threads;
 924    brw->urb.size = devinfo->urb.size;
 925    brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
 926    brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
 927    brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
 928    brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
 929    brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
 930
 931    /* Estimate the size of the mappable aperture into the GTT.  There's an
 932     * ioctl to get the whole GTT size, but not one to get the mappable subset.
 933     * It turns out it's basically always 256MB, though some ancient hardware
 934     * was smaller.
 935     */
 936    uint32_t gtt_size = 256 * 1024 * 1024;
 937
 938    /* We don't want to map two objects such that a memcpy between them would
 939     * just fault one mapping in and then the other over and over forever.  So
 940     * we would need to divide the GTT size by 2.  Additionally, some GTT is
 941     * taken up by things like the framebuffer and the ringbuffer and such, so
 942     * be more conservative.
 943     */
 944    brw->max_gtt_map_object_size = gtt_size / 4;
 945
 946    if (brw->gen == 6)
 947       brw->urb.gs_present = false;
 948
 949    brw->prim_restart.in_progress = false;
 950    brw->prim_restart.enable_cut_index = false;
 951    brw->gs.enabled = false;
 952    brw->sf.viewport_transform_enable = true;
 953
 954    brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
 955
 956    brw->use_resource_streamer = screen->has_resource_streamer &&
 957       (env_var_as_boolean("INTEL_USE_HW_BT", false) ||
 958        env_var_as_boolean("INTEL_USE_GATHER", false));
 959
 960    ctx->VertexProgram._MaintainTnlProgram = true;
 961    ctx->FragmentProgram._MaintainTexEnvProgram = true;
 962
 963    brw_draw_init( brw );
 964
 965    if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
 966       /* Turn on some extra GL_ARB_debug_output generation. */
 967       brw->perf_debug = true;
 968    }
 969
 970    if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
 971       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
 972
 973    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
 974       brw_init_shader_time(brw);
 975
 976    _mesa_compute_version(ctx);
 977
 978    _mesa_initialize_dispatch_tables(ctx);
 979    _mesa_initialize_vbo_vtxfmt(ctx);
 980
 981    if (ctx->Extensions.AMD_performance_monitor) {
 982       brw_init_performance_monitors(brw);
 983    }
 984
 985    vbo_use_buffer_objects(ctx);
 986    vbo_always_unmap_buffers(ctx);
 987
 988    return true;
 989 }
 990
 991 void
 992 intelDestroyContext(__DRIcontext * driContextPriv)
 993 {
 994    struct brw_context *brw =
 995       (struct brw_context *) driContextPriv->driverPrivate;
 996    struct gl_context *ctx = &brw->ctx;
 997
 998    /* Dump a final BMP in case the application doesn't call SwapBuffers */
 999    if (INTEL_DEBUG & DEBUG_AUB) {
1000       intel_batchbuffer_flush(brw);
1001       aub_dump_bmp(&brw->ctx);
1002    }
1003
1004    _mesa_meta_free(&brw->ctx);
1005    brw_meta_fast_clear_free(brw);
1006
1007    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
1008       /* Force a report. */
1009       brw->shader_time.report_time = 0;
1010
1011       brw_collect_and_report_shader_time(brw);
1012       brw_destroy_shader_time(brw);
1013    }
1014
1015    brw_destroy_state(brw);
1016    brw_draw_destroy(brw);
1017
1018    drm_intel_bo_unreference(brw->curbe.curbe_bo);
1019    if (brw->vs.base.scratch_bo)
1020       drm_intel_bo_unreference(brw->vs.base.scratch_bo);
1021    if (brw->gs.base.scratch_bo)
1022       drm_intel_bo_unreference(brw->gs.base.scratch_bo);
1023    if (brw->wm.base.scratch_bo)
1024       drm_intel_bo_unreference(brw->wm.base.scratch_bo);
1025
1026    gen7_reset_hw_bt_pool_offsets(brw);
1027    drm_intel_bo_unreference(brw->hw_bt_pool.bo);
1028    brw->hw_bt_pool.bo = NULL;
1029
1030    drm_intel_gem_context_destroy(brw->hw_ctx);
1031
1032    if (ctx->swrast_context) {
1033       _swsetup_DestroyContext(&brw->ctx);
1034       _tnl_DestroyContext(&brw->ctx);
1035    }
1036    _vbo_DestroyContext(&brw->ctx);
1037
1038    if (ctx->swrast_context)
1039       _swrast_DestroyContext(&brw->ctx);
1040
1041    brw_fini_pipe_control(brw);
1042    intel_batchbuffer_free(brw);
1043
1044    drm_intel_bo_unreference(brw->throttle_batch[1]);
1045    drm_intel_bo_unreference(brw->throttle_batch[0]);
1046    brw->throttle_batch[1] = NULL;
1047    brw->throttle_batch[0] = NULL;
1048
1049    driDestroyOptionCache(&brw->optionCache);
1050
1051    /* free the Mesa context */
1052    _mesa_free_context_data(&brw->ctx);
1053
1054    ralloc_free(brw);
1055    driContextPriv->driverPrivate = NULL;
1056 }
1057
1058 GLboolean
1059 intelUnbindContext(__DRIcontext * driContextPriv)
1060 {
1061    /* Unset current context and dispath table */
1062    _mesa_make_current(NULL, NULL, NULL);
1063
1064    return true;
1065 }
1066
1067 /**
1068  * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1069  * on window system framebuffers.
1070  *
1071  * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1072  * your renderbuffer can do sRGB encode, and you can flip a switch that does
1073  * sRGB encode if the renderbuffer can handle it.  You can ask specifically
1074  * for a visual where you're guaranteed to be capable, but it turns out that
1075  * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1076  * incapable ones, because there's no difference between the two in resources
1077  * used.  Applications thus get built that accidentally rely on the default
1078  * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
1079  * great...
1080  *
1081  * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1082  * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1083  * So they removed the enable knob and made it "if the renderbuffer is sRGB
1084  * capable, do sRGB encode".  Then, for your window system renderbuffers, you
1085  * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1086  * and get no sRGB encode (assuming that both kinds of visual are available).
1087  * Thus our choice to support sRGB by default on our visuals for desktop would
1088  * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1089  *
1090  * Unfortunately, renderbuffer setup happens before a context is created.  So
1091  * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1092  * context (without an sRGB visual, though we don't have sRGB visuals exposed
1093  * yet), we go turn that back off before anyone finds out.
1094  */
1095 static void
1096 intel_gles3_srgb_workaround(struct brw_context *brw,
1097                             struct gl_framebuffer *fb)
1098 {
1099    struct gl_context *ctx = &brw->ctx;
1100
1101    if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
1102       return;
1103
1104    /* Some day when we support the sRGB capable bit on visuals available for
1105     * GLES, we'll need to respect that and not disable things here.
1106     */
1107    fb->Visual.sRGBCapable = false;
1108    for (int i = 0; i < BUFFER_COUNT; i++) {
1109       if (fb->Attachment[i].Renderbuffer &&
1110           fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) {
1111          fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM;
1112       }
1113    }
1114 }
1115
1116 GLboolean
1117 intelMakeCurrent(__DRIcontext * driContextPriv,
1118                  __DRIdrawable * driDrawPriv,
1119                  __DRIdrawable * driReadPriv)
1120 {
1121    struct brw_context *brw;
1122    GET_CURRENT_CONTEXT(curCtx);
1123
1124    if (driContextPriv)
1125       brw = (struct brw_context *) driContextPriv->driverPrivate;
1126    else
1127       brw = NULL;
1128
1129    /* According to the glXMakeCurrent() man page: "Pending commands to
1130     * the previous context, if any, are flushed before it is released."
1131     * But only flush if we're actually changing contexts.
1132     */
1133    if (brw_context(curCtx) && brw_context(curCtx) != brw) {
1134       _mesa_flush(curCtx);
1135    }
1136
1137    if (driContextPriv) {
1138       struct gl_context *ctx = &brw->ctx;
1139       struct gl_framebuffer *fb, *readFb;
1140
1141       if (driDrawPriv == NULL) {
1142          fb = _mesa_get_incomplete_framebuffer();
1143       } else {
1144          fb = driDrawPriv->driverPrivate;
1145          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1146       }
1147
1148       if (driReadPriv == NULL) {
1149          readFb = _mesa_get_incomplete_framebuffer();
1150       } else {
1151          readFb = driReadPriv->driverPrivate;
1152          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1153       }
1154
1155       /* The sRGB workaround changes the renderbuffer's format. We must change
1156        * the format before the renderbuffer's miptree get's allocated, otherwise
1157        * the formats of the renderbuffer and its miptree will differ.
1158        */
1159       intel_gles3_srgb_workaround(brw, fb);
1160       intel_gles3_srgb_workaround(brw, readFb);
1161
1162       /* If the context viewport hasn't been initialized, force a call out to
1163        * the loader to get buffers so we have a drawable size for the initial
1164        * viewport. */
1165       if (!brw->ctx.ViewportInitialized)
1166          intel_prepare_render(brw);
1167
1168       _mesa_make_current(ctx, fb, readFb);
1169    } else {
1170       _mesa_make_current(NULL, NULL, NULL);
1171    }
1172
1173    return true;
1174 }
1175
1176 void
1177 intel_resolve_for_dri2_flush(struct brw_context *brw,
1178                              __DRIdrawable *drawable)
1179 {
1180    if (brw->gen < 6) {
1181       /* MSAA and fast color clear are not supported, so don't waste time
1182        * checking whether a resolve is needed.
1183        */
1184       return;
1185    }
1186
1187    struct gl_framebuffer *fb = drawable->driverPrivate;
1188    struct intel_renderbuffer *rb;
1189
1190    /* Usually, only the back buffer will need to be downsampled. However,
1191     * the front buffer will also need it if the user has rendered into it.
1192     */
1193    static const gl_buffer_index buffers[2] = {
1194          BUFFER_BACK_LEFT,
1195          BUFFER_FRONT_LEFT,
1196    };
1197
1198    for (int i = 0; i < 2; ++i) {
1199       rb = intel_get_renderbuffer(fb, buffers[i]);
1200       if (rb == NULL || rb->mt == NULL)
1201          continue;
1202       if (rb->mt->num_samples <= 1)
1203          intel_miptree_resolve_color(brw, rb->mt);
1204       else
1205          intel_renderbuffer_downsample(brw, rb);
1206    }
1207 }
1208
1209 static unsigned
1210 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1211 {
1212    return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1213 }
1214
1215 static void
1216 intel_query_dri2_buffers(struct brw_context *brw,
1217                          __DRIdrawable *drawable,
1218                          __DRIbuffer **buffers,
1219                          int *count);
1220
1221 static void
1222 intel_process_dri2_buffer(struct brw_context *brw,
1223                           __DRIdrawable *drawable,
1224                           __DRIbuffer *buffer,
1225                           struct intel_renderbuffer *rb,
1226                           const char *buffer_name);
1227
1228 static void
1229 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1230
1231 static void
1232 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1233 {
1234    struct gl_framebuffer *fb = drawable->driverPrivate;
1235    struct intel_renderbuffer *rb;
1236    __DRIbuffer *buffers = NULL;
1237    int i, count;
1238    const char *region_name;
1239
1240    /* Set this up front, so that in case our buffers get invalidated
1241     * while we're getting new buffers, we don't clobber the stamp and
1242     * thus ignore the invalidate. */
1243    drawable->lastStamp = drawable->dri2.stamp;
1244
1245    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1246       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1247
1248    intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1249
1250    if (buffers == NULL)
1251       return;
1252
1253    for (i = 0; i < count; i++) {
1254        switch (buffers[i].attachment) {
1255        case __DRI_BUFFER_FRONT_LEFT:
1256            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1257            region_name = "dri2 front buffer";
1258            break;
1259
1260        case __DRI_BUFFER_FAKE_FRONT_LEFT:
1261            rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1262            region_name = "dri2 fake front buffer";
1263            break;
1264
1265        case __DRI_BUFFER_BACK_LEFT:
1266            rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1267            region_name = "dri2 back buffer";
1268            break;
1269
1270        case __DRI_BUFFER_DEPTH:
1271        case __DRI_BUFFER_HIZ:
1272        case __DRI_BUFFER_DEPTH_STENCIL:
1273        case __DRI_BUFFER_STENCIL:
1274        case __DRI_BUFFER_ACCUM:
1275        default:
1276            fprintf(stderr,
1277                    "unhandled buffer attach event, attachment type %d\n",
1278                    buffers[i].attachment);
1279            return;
1280        }
1281
1282        intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1283    }
1284
1285 }
1286
1287 void
1288 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1289 {
1290    struct brw_context *brw = context->driverPrivate;
1291    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1292
1293    /* Set this up front, so that in case our buffers get invalidated
1294     * while we're getting new buffers, we don't clobber the stamp and
1295     * thus ignore the invalidate. */
1296    drawable->lastStamp = drawable->dri2.stamp;
1297
1298    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1299       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1300
1301    if (screen->image.loader)
1302       intel_update_image_buffers(brw, drawable);
1303    else
1304       intel_update_dri2_buffers(brw, drawable);
1305
1306    driUpdateFramebufferSize(&brw->ctx, drawable);
1307 }
1308
1309 /**
1310  * intel_prepare_render should be called anywhere that curent read/drawbuffer
1311  * state is required.
1312  */
1313 void
1314 intel_prepare_render(struct brw_context *brw)
1315 {
1316    struct gl_context *ctx = &brw->ctx;
1317    __DRIcontext *driContext = brw->driContext;
1318    __DRIdrawable *drawable;
1319
1320    drawable = driContext->driDrawablePriv;
1321    if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1322       if (drawable->lastStamp != drawable->dri2.stamp)
1323          intel_update_renderbuffers(driContext, drawable);
1324       driContext->dri2.draw_stamp = drawable->dri2.stamp;
1325    }
1326
1327    drawable = driContext->driReadablePriv;
1328    if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1329       if (drawable->lastStamp != drawable->dri2.stamp)
1330          intel_update_renderbuffers(driContext, drawable);
1331       driContext->dri2.read_stamp = drawable->dri2.stamp;
1332    }
1333
1334    /* If we're currently rendering to the front buffer, the rendering
1335     * that will happen next will probably dirty the front buffer.  So
1336     * mark it as dirty here.
1337     */
1338    if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
1339       brw->front_buffer_dirty = true;
1340 }
1341
1342 /**
1343  * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1344  *
1345  * To determine which DRI buffers to request, examine the renderbuffers
1346  * attached to the drawable's framebuffer. Then request the buffers with
1347  * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1348  *
1349  * This is called from intel_update_renderbuffers().
1350  *
1351  * \param drawable      Drawable whose buffers are queried.
1352  * \param buffers       [out] List of buffers returned by DRI2 query.
1353  * \param buffer_count  [out] Number of buffers returned.
1354  *
1355  * \see intel_update_renderbuffers()
1356  * \see DRI2GetBuffers()
1357  * \see DRI2GetBuffersWithFormat()
1358  */
1359 static void
1360 intel_query_dri2_buffers(struct brw_context *brw,
1361                          __DRIdrawable *drawable,
1362                          __DRIbuffer **buffers,
1363                          int *buffer_count)
1364 {
1365    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1366    struct gl_framebuffer *fb = drawable->driverPrivate;
1367    int i = 0;
1368    unsigned attachments[8];
1369
1370    struct intel_renderbuffer *front_rb;
1371    struct intel_renderbuffer *back_rb;
1372
1373    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1374    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1375
1376    memset(attachments, 0, sizeof(attachments));
1377    if ((_mesa_is_front_buffer_drawing(fb) ||
1378         _mesa_is_front_buffer_reading(fb) ||
1379         !back_rb) && front_rb) {
1380       /* If a fake front buffer is in use, then querying for
1381        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1382        * the real front buffer to the fake front buffer.  So before doing the
1383        * query, we need to make sure all the pending drawing has landed in the
1384        * real front buffer.
1385        */
1386       intel_batchbuffer_flush(brw);
1387       intel_flush_front(&brw->ctx);
1388
1389       attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1390       attachments[i++] = intel_bits_per_pixel(front_rb);
1391    } else if (front_rb && brw->front_buffer_dirty) {
1392       /* We have pending front buffer rendering, but we aren't querying for a
1393        * front buffer.  If the front buffer we have is a fake front buffer,
1394        * the X server is going to throw it away when it processes the query.
1395        * So before doing the query, make sure all the pending drawing has
1396        * landed in the real front buffer.
1397        */
1398       intel_batchbuffer_flush(brw);
1399       intel_flush_front(&brw->ctx);
1400    }
1401
1402    if (back_rb) {
1403       attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1404       attachments[i++] = intel_bits_per_pixel(back_rb);
1405    }
1406
1407    assert(i <= ARRAY_SIZE(attachments));
1408
1409    *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
1410                                                         &drawable->w,
1411                                                         &drawable->h,
1412                                                         attachments, i / 2,
1413                                                         buffer_count,
1414                                                         drawable->loaderPrivate);
1415 }
1416
1417 /**
1418  * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1419  *
1420  * This is called from intel_update_renderbuffers().
1421  *
1422  * \par Note:
1423  *    DRI buffers whose attachment point is DRI2BufferStencil or
1424  *    DRI2BufferDepthStencil are handled as special cases.
1425  *
1426  * \param buffer_name is a human readable name, such as "dri2 front buffer",
1427  *        that is passed to drm_intel_bo_gem_create_from_name().
1428  *
1429  * \see intel_update_renderbuffers()
1430  */
1431 static void
1432 intel_process_dri2_buffer(struct brw_context *brw,
1433                           __DRIdrawable *drawable,
1434                           __DRIbuffer *buffer,
1435                           struct intel_renderbuffer *rb,
1436                           const char *buffer_name)
1437 {
1438    struct gl_framebuffer *fb = drawable->driverPrivate;
1439    drm_intel_bo *bo;
1440
1441    if (!rb)
1442       return;
1443
1444    unsigned num_samples = rb->Base.Base.NumSamples;
1445
1446    /* We try to avoid closing and reopening the same BO name, because the first
1447     * use of a mapping of the buffer involves a bunch of page faulting which is
1448     * moderately expensive.
1449     */
1450    struct intel_mipmap_tree *last_mt;
1451    if (num_samples == 0)
1452       last_mt = rb->mt;
1453    else
1454       last_mt = rb->singlesample_mt;
1455
1456    uint32_t old_name = 0;
1457    if (last_mt) {
1458        /* The bo already has a name because the miptree was created by a
1459         * previous call to intel_process_dri2_buffer(). If a bo already has a
1460         * name, then drm_intel_bo_flink() is a low-cost getter.  It does not
1461         * create a new name.
1462         */
1463       drm_intel_bo_flink(last_mt->bo, &old_name);
1464    }
1465
1466    if (old_name == buffer->name)
1467       return;
1468
1469    if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1470       fprintf(stderr,
1471               "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1472               buffer->name, buffer->attachment,
1473               buffer->cpp, buffer->pitch);
1474    }
1475
1476    bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1477                                           buffer->name);
1478    if (!bo) {
1479       fprintf(stderr,
1480               "Failed to open BO for returned DRI2 buffer "
1481               "(%dx%d, %s, named %d).\n"
1482               "This is likely a bug in the X Server that will lead to a "
1483               "crash soon.\n",
1484               drawable->w, drawable->h, buffer_name, buffer->name);
1485       return;
1486    }
1487
1488    intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
1489                                             drawable->w, drawable->h,
1490                                             buffer->pitch);
1491
1492    if (_mesa_is_front_buffer_drawing(fb) &&
1493        (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1494         buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1495        rb->Base.Base.NumSamples > 1) {
1496       intel_renderbuffer_upsample(brw, rb);
1497    }
1498
1499    assert(rb->mt);
1500
1501    drm_intel_bo_unreference(bo);
1502 }
1503
1504 /**
1505  * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1506  *
1507  * To determine which DRI buffers to request, examine the renderbuffers
1508  * attached to the drawable's framebuffer. Then request the buffers from
1509  * the image loader
1510  *
1511  * This is called from intel_update_renderbuffers().
1512  *
1513  * \param drawable      Drawable whose buffers are queried.
1514  * \param buffers       [out] List of buffers returned by DRI2 query.
1515  * \param buffer_count  [out] Number of buffers returned.
1516  *
1517  * \see intel_update_renderbuffers()
1518  */
1519
1520 static void
1521 intel_update_image_buffer(struct brw_context *intel,
1522                           __DRIdrawable *drawable,
1523                           struct intel_renderbuffer *rb,
1524                           __DRIimage *buffer,
1525                           enum __DRIimageBufferMask buffer_type)
1526 {
1527    struct gl_framebuffer *fb = drawable->driverPrivate;
1528
1529    if (!rb || !buffer->bo)
1530       return;
1531
1532    unsigned num_samples = rb->Base.Base.NumSamples;
1533
1534    /* Check and see if we're already bound to the right
1535     * buffer object
1536     */
1537    struct intel_mipmap_tree *last_mt;
1538    if (num_samples == 0)
1539       last_mt = rb->mt;
1540    else
1541       last_mt = rb->singlesample_mt;
1542
1543    if (last_mt && last_mt->bo == buffer->bo)
1544       return;
1545
1546    intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,
1547                                             buffer->width, buffer->height,
1548                                             buffer->pitch);
1549
1550    if (_mesa_is_front_buffer_drawing(fb) &&
1551        buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1552        rb->Base.Base.NumSamples > 1) {
1553       intel_renderbuffer_upsample(intel, rb);
1554    }
1555 }
1556
1557 static void
1558 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1559 {
1560    struct gl_framebuffer *fb = drawable->driverPrivate;
1561    __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1562    struct intel_renderbuffer *front_rb;
1563    struct intel_renderbuffer *back_rb;
1564    struct __DRIimageList images;
1565    unsigned int format;
1566    uint32_t buffer_mask = 0;
1567
1568    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1569    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1570
1571    if (back_rb)
1572       format = intel_rb_format(back_rb);
1573    else if (front_rb)
1574       format = intel_rb_format(front_rb);
1575    else
1576       return;
1577
1578    if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
1579                     _mesa_is_front_buffer_reading(fb) || !back_rb)) {
1580       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1581    }
1582
1583    if (back_rb)
1584       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1585
1586    (*screen->image.loader->getBuffers) (drawable,
1587                                         driGLFormatToImageFormat(format),
1588                                         &drawable->dri2.stamp,
1589                                         drawable->loaderPrivate,
1590                                         buffer_mask,
1591                                         &images);
1592
1593    if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1594       drawable->w = images.front->width;
1595       drawable->h = images.front->height;
1596       intel_update_image_buffer(brw,
1597                                 drawable,
1598                                 front_rb,
1599                                 images.front,
1600                                 __DRI_IMAGE_BUFFER_FRONT);
1601    }
1602    if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1603       drawable->w = images.back->width;
1604       drawable->h = images.back->height;
1605       intel_update_image_buffer(brw,
1606                                 drawable,
1607                                 back_rb,
1608                                 images.back,
1609                                 __DRI_IMAGE_BUFFER_BACK);
1610    }
1611 }