drivers/gpu/drm/i915/gt/intel_lrc.c

   1 /*
   2  * Copyright © 2014 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Ben Widawsky <ben@bwidawsk.net>
  25  *    Michel Thierry <michel.thierry@intel.com>
  26  *    Thomas Daniel <thomas.daniel@intel.com>
  27  *    Oscar Mateo <oscar.mateo@intel.com>
  28  *
  29  */
  30
  31 /**
  32  * DOC: Logical Rings, Logical Ring Contexts and Execlists
  33  *
  34  * Motivation:
  35  * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
  36  * These expanded contexts enable a number of new abilities, especially
  37  * "Execlists" (also implemented in this file).
  38  *
  39  * One of the main differences with the legacy HW contexts is that logical
  40  * ring contexts incorporate many more things to the context's state, like
  41  * PDPs or ringbuffer control registers:
  42  *
  43  * The reason why PDPs are included in the context is straightforward: as
  44  * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
  45  * contained there mean you don't need to do a ppgtt->switch_mm yourself,
  46  * instead, the GPU will do it for you on the context switch.
  47  *
  48  * But, what about the ringbuffer control registers (head, tail, etc..)?
  49  * shouldn't we just need a set of those per engine command streamer? This is
  50  * where the name "Logical Rings" starts to make sense: by virtualizing the
  51  * rings, the engine cs shifts to a new "ring buffer" with every context
  52  * switch. When you want to submit a workload to the GPU you: A) choose your
  53  * context, B) find its appropriate virtualized ring, C) write commands to it
  54  * and then, finally, D) tell the GPU to switch to that context.
  55  *
  56  * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
  57  * to a contexts is via a context execution list, ergo "Execlists".
  58  *
  59  * LRC implementation:
  60  * Regarding the creation of contexts, we have:
  61  *
  62  * - One global default context.
  63  * - One local default context for each opened fd.
  64  * - One local extra context for each context create ioctl call.
  65  *
  66  * Now that ringbuffers belong per-context (and not per-engine, like before)
  67  * and that contexts are uniquely tied to a given engine (and not reusable,
  68  * like before) we need:
  69  *
  70  * - One ringbuffer per-engine inside each context.
  71  * - One backing object per-engine inside each context.
  72  *
  73  * The global default context starts its life with these new objects fully
  74  * allocated and populated. The local default context for each opened fd is
  75  * more complex, because we don't know at creation time which engine is going
  76  * to use them. To handle this, we have implemented a deferred creation of LR
  77  * contexts:
  78  *
  79  * The local context starts its life as a hollow or blank holder, that only
  80  * gets populated for a given engine once we receive an execbuffer. If later
  81  * on we receive another execbuffer ioctl for the same context but a different
  82  * engine, we allocate/populate a new ringbuffer and context backing object and
  83  * so on.
  84  *
  85  * Finally, regarding local contexts created using the ioctl call: as they are
  86  * only allowed with the render ring, we can allocate & populate them right
  87  * away (no need to defer anything, at least for now).
  88  *
  89  * Execlists implementation:
  90  * Execlists are the new method by which, on gen8+ hardware, workloads are
  91  * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
  92  * This method works as follows:
  93  *
  94  * When a request is committed, its commands (the BB start and any leading or
  95  * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
  96  * for the appropriate context. The tail pointer in the hardware context is not
  97  * updated at this time, but instead, kept by the driver in the ringbuffer
  98  * structure. A structure representing this request is added to a request queue
  99  * for the appropriate engine: this structure contains a copy of the context's
 100  * tail after the request was written to the ring buffer and a pointer to the
 101  * context itself.
 102  *
 103  * If the engine's request queue was empty before the request was added, the
 104  * queue is processed immediately. Otherwise the queue will be processed during
 105  * a context switch interrupt. In any case, elements on the queue will get sent
 106  * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
 107  * globally unique 20-bits submission ID.
 108  *
 109  * When execution of a request completes, the GPU updates the context status
 110  * buffer with a context complete event and generates a context switch interrupt.
 111  * During the interrupt handling, the driver examines the events in the buffer:
 112  * for each context complete event, if the announced ID matches that on the head
 113  * of the request queue, then that request is retired and removed from the queue.
 114  *
 115  * After processing, if any requests were retired and the queue is not empty
 116  * then a new execution list can be submitted. The two requests at the front of
 117  * the queue are next to be submitted but since a context may not occur twice in
 118  * an execution list, if subsequent requests have the same ID as the first then
 119  * the two requests must be combined. This is done simply by discarding requests
 120  * at the head of the queue until either only one requests is left (in which case
 121  * we use a NULL second context) or the first two requests have unique IDs.
 122  *
 123  * By always executing the first two requests in the queue the driver ensures
 124  * that the GPU is kept as busy as possible. In the case where a single context
 125  * completes but a second context is still executing, the request for this second
 126  * context will be at the head of the queue when we remove the first one. This
 127  * request will then be resubmitted along with a new request for a different context,
 128  * which will cause the hardware to continue executing the second request and queue
 129  * the new request (the GPU detects the condition of a context getting preempted
 130  * with the same context and optimizes the context switch flow by not doing
 131  * preemption, but just sampling the new tail pointer).
 132  *
 133  */
 134 #include <linux/interrupt.h>
 135
 136 #include "i915_drv.h"
 137 #include "i915_perf.h"
 138 #include "i915_trace.h"
 139 #include "i915_vgpu.h"
 140 #include "intel_context.h"
 141 #include "intel_engine_pm.h"
 142 #include "intel_gt.h"
 143 #include "intel_gt_pm.h"
 144 #include "intel_gt_requests.h"
 145 #include "intel_lrc_reg.h"
 146 #include "intel_mocs.h"
 147 #include "intel_reset.h"
 148 #include "intel_ring.h"
 149 #include "intel_workarounds.h"
 150
 151 #define RING_EXECLIST_QFULL             (1 << 0x2)
 152 #define RING_EXECLIST1_VALID            (1 << 0x3)
 153 #define RING_EXECLIST0_VALID            (1 << 0x4)
 154 #define RING_EXECLIST_ACTIVE_STATUS     (3 << 0xE)
 155 #define RING_EXECLIST1_ACTIVE           (1 << 0x11)
 156 #define RING_EXECLIST0_ACTIVE           (1 << 0x12)
 157
 158 #define GEN8_CTX_STATUS_IDLE_ACTIVE     (1 << 0)
 159 #define GEN8_CTX_STATUS_PREEMPTED       (1 << 1)
 160 #define GEN8_CTX_STATUS_ELEMENT_SWITCH  (1 << 2)
 161 #define GEN8_CTX_STATUS_ACTIVE_IDLE     (1 << 3)
 162 #define GEN8_CTX_STATUS_COMPLETE        (1 << 4)
 163 #define GEN8_CTX_STATUS_LITE_RESTORE    (1 << 15)
 164
 165 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 166          (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
 167
 168 #define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
 169
 170 #define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE  (0x1) /* lower csb dword */
 171 #define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */
 172 #define GEN12_CSB_SW_CTX_ID_MASK                GENMASK(25, 15)
 173 #define GEN12_IDLE_CTX_ID               0x7FF
 174 #define GEN12_CSB_CTX_VALID(csb_dw) \
 175         (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
 176
 177 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
 178 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
 179
 180 struct virtual_engine {
 181         struct intel_engine_cs base;
 182         struct intel_context context;
 183
 184         /*
 185          * We allow only a single request through the virtual engine at a time
 186          * (each request in the timeline waits for the completion fence of
 187          * the previous before being submitted). By restricting ourselves to
 188          * only submitting a single request, each request is placed on to a
 189          * physical to maximise load spreading (by virtue of the late greedy
 190          * scheduling -- each real engine takes the next available request
 191          * upon idling).
 192          */
 193         struct i915_request *request;
 194
 195         /*
 196          * We keep a rbtree of available virtual engines inside each physical
 197          * engine, sorted by priority. Here we preallocate the nodes we need
 198          * for the virtual engine, indexed by physical_engine->id.
 199          */
 200         struct ve_node {
 201                 struct rb_node rb;
 202                 int prio;
 203         } nodes[I915_NUM_ENGINES];
 204
 205         /*
 206          * Keep track of bonded pairs -- restrictions upon on our selection
 207          * of physical engines any particular request may be submitted to.
 208          * If we receive a submit-fence from a master engine, we will only
 209          * use one of sibling_mask physical engines.
 210          */
 211         struct ve_bond {
 212                 const struct intel_engine_cs *master;
 213                 intel_engine_mask_t sibling_mask;
 214         } *bonds;
 215         unsigned int num_bonds;
 216
 217         /* And finally, which physical engines this virtual engine maps onto. */
 218         unsigned int num_siblings;
 219         struct intel_engine_cs *siblings[0];
 220 };
 221
 222 static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
 223 {
 224         GEM_BUG_ON(!intel_engine_is_virtual(engine));
 225         return container_of(engine, struct virtual_engine, base);
 226 }
 227
 228 static int __execlists_context_alloc(struct intel_context *ce,
 229                                      struct intel_engine_cs *engine);
 230
 231 static void execlists_init_reg_state(u32 *reg_state,
 232                                      const struct intel_context *ce,
 233                                      const struct intel_engine_cs *engine,
 234                                      const struct intel_ring *ring,
 235                                      bool close);
 236 static void
 237 __execlists_update_reg_state(const struct intel_context *ce,
 238                              const struct intel_engine_cs *engine,
 239                              u32 head);
 240
 241 static void mark_eio(struct i915_request *rq)
 242 {
 243         if (i915_request_completed(rq))
 244                 return;
 245
 246         GEM_BUG_ON(i915_request_signaled(rq));
 247
 248         i915_request_set_error_once(rq, -EIO);
 249         i915_request_mark_complete(rq);
 250 }
 251
 252 static struct i915_request *
 253 active_request(const struct intel_timeline * const tl, struct i915_request *rq)
 254 {
 255         struct i915_request *active = rq;
 256
 257         rcu_read_lock();
 258         list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
 259                 if (i915_request_completed(rq))
 260                         break;
 261
 262                 active = rq;
 263         }
 264         rcu_read_unlock();
 265
 266         return active;
 267 }
 268
 269 static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
 270 {
 271         return (i915_ggtt_offset(engine->status_page.vma) +
 272                 I915_GEM_HWS_PREEMPT_ADDR);
 273 }
 274
 275 static inline void
 276 ring_set_paused(const struct intel_engine_cs *engine, int state)
 277 {
 278         /*
 279          * We inspect HWS_PREEMPT with a semaphore inside
 280          * engine->emit_fini_breadcrumb. If the dword is true,
 281          * the ring is paused as the semaphore will busywait
 282          * until the dword is false.
 283          */
 284         engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
 285         if (state)
 286                 wmb();
 287 }
 288
 289 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 290 {
 291         return rb_entry(rb, struct i915_priolist, node);
 292 }
 293
 294 static inline int rq_prio(const struct i915_request *rq)
 295 {
 296         return rq->sched.attr.priority;
 297 }
 298
 299 static int effective_prio(const struct i915_request *rq)
 300 {
 301         int prio = rq_prio(rq);
 302
 303         /*
 304          * If this request is special and must not be interrupted at any
 305          * cost, so be it. Note we are only checking the most recent request
 306          * in the context and so may be masking an earlier vip request. It
 307          * is hoped that under the conditions where nopreempt is used, this
 308          * will not matter (i.e. all requests to that context will be
 309          * nopreempt for as long as desired).
 310          */
 311         if (i915_request_has_nopreempt(rq))
 312                 prio = I915_PRIORITY_UNPREEMPTABLE;
 313
 314         /*
 315          * On unwinding the active request, we give it a priority bump
 316          * if it has completed waiting on any semaphore. If we know that
 317          * the request has already started, we can prevent an unwanted
 318          * preempt-to-idle cycle by taking that into account now.
 319          */
 320         if (__i915_request_has_started(rq))
 321                 prio |= I915_PRIORITY_NOSEMAPHORE;
 322
 323         /* Restrict mere WAIT boosts from triggering preemption */
 324         BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
 325         return prio | __NO_PREEMPTION;
 326 }
 327
 328 static int queue_prio(const struct intel_engine_execlists *execlists)
 329 {
 330         struct i915_priolist *p;
 331         struct rb_node *rb;
 332
 333         rb = rb_first_cached(&execlists->queue);
 334         if (!rb)
 335                 return INT_MIN;
 336
 337         /*
 338          * As the priolist[] are inverted, with the highest priority in [0],
 339          * we have to flip the index value to become priority.
 340          */
 341         p = to_priolist(rb);
 342         return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
 343 }
 344
 345 static inline bool need_preempt(const struct intel_engine_cs *engine,
 346                                 const struct i915_request *rq,
 347                                 struct rb_node *rb)
 348 {
 349         int last_prio;
 350
 351         if (!intel_engine_has_semaphores(engine))
 352                 return false;
 353
 354         /*
 355          * Check if the current priority hint merits a preemption attempt.
 356          *
 357          * We record the highest value priority we saw during rescheduling
 358          * prior to this dequeue, therefore we know that if it is strictly
 359          * less than the current tail of ESLP[0], we do not need to force
 360          * a preempt-to-idle cycle.
 361          *
 362          * However, the priority hint is a mere hint that we may need to
 363          * preempt. If that hint is stale or we may be trying to preempt
 364          * ourselves, ignore the request.
 365          *
 366          * More naturally we would write
 367          *      prio >= max(0, last);
 368          * except that we wish to prevent triggering preemption at the same
 369          * priority level: the task that is running should remain running
 370          * to preserve FIFO ordering of dependencies.
 371          */
 372         last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
 373         if (engine->execlists.queue_priority_hint <= last_prio)
 374                 return false;
 375
 376         /*
 377          * Check against the first request in ELSP[1], it will, thanks to the
 378          * power of PI, be the highest priority of that context.
 379          */
 380         if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
 381             rq_prio(list_next_entry(rq, sched.link)) > last_prio)
 382                 return true;
 383
 384         if (rb) {
 385                 struct virtual_engine *ve =
 386                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
 387                 bool preempt = false;
 388
 389                 if (engine == ve->siblings[0]) { /* only preempt one sibling */
 390                         struct i915_request *next;
 391
 392                         rcu_read_lock();
 393                         next = READ_ONCE(ve->request);
 394                         if (next)
 395                                 preempt = rq_prio(next) > last_prio;
 396                         rcu_read_unlock();
 397                 }
 398
 399                 if (preempt)
 400                         return preempt;
 401         }
 402
 403         /*
 404          * If the inflight context did not trigger the preemption, then maybe
 405          * it was the set of queued requests? Pick the highest priority in
 406          * the queue (the first active priolist) and see if it deserves to be
 407          * running instead of ELSP[0].
 408          *
 409          * The highest priority request in the queue can not be either
 410          * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
 411          * context, it's priority would not exceed ELSP[0] aka last_prio.
 412          */
 413         return queue_prio(&engine->execlists) > last_prio;
 414 }
 415
 416 __maybe_unused static inline bool
 417 assert_priority_queue(const struct i915_request *prev,
 418                       const struct i915_request *next)
 419 {
 420         /*
 421          * Without preemption, the prev may refer to the still active element
 422          * which we refuse to let go.
 423          *
 424          * Even with preemption, there are times when we think it is better not
 425          * to preempt and leave an ostensibly lower priority request in flight.
 426          */
 427         if (i915_request_is_active(prev))
 428                 return true;
 429
 430         return rq_prio(prev) >= rq_prio(next);
 431 }
 432
 433 /*
 434  * The context descriptor encodes various attributes of a context,
 435  * including its GTT address and some flags. Because it's fairly
 436  * expensive to calculate, we'll just do it once and cache the result,
 437  * which remains valid until the context is unpinned.
 438  *
 439  * This is what a descriptor looks like, from LSB to MSB::
 440  *
 441  *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
 442  *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
 443  *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
 444  *      bits 53-54:    mbz, reserved for use by hardware
 445  *      bits 55-63:    group ID, currently unused and set to 0
 446  *
 447  * Starting from Gen11, the upper dword of the descriptor has a new format:
 448  *
 449  *      bits 32-36:    reserved
 450  *      bits 37-47:    SW context ID
 451  *      bits 48:53:    engine instance
 452  *      bit 54:        mbz, reserved for use by hardware
 453  *      bits 55-60:    SW counter
 454  *      bits 61-63:    engine class
 455  *
 456  * engine info, SW context ID and SW counter need to form a unique number
 457  * (Context ID) per lrc.
 458  */
 459 static u64
 460 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
 461 {
 462         u64 desc;
 463
 464         desc = INTEL_LEGACY_32B_CONTEXT;
 465         if (i915_vm_is_4lvl(ce->vm))
 466                 desc = INTEL_LEGACY_64B_CONTEXT;
 467         desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
 468
 469         desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
 470         if (IS_GEN(engine->i915, 8))
 471                 desc |= GEN8_CTX_L3LLC_COHERENT;
 472
 473         desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */
 474         /*
 475          * The following 32bits are copied into the OA reports (dword 2).
 476          * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
 477          * anything below.
 478          */
 479         if (INTEL_GEN(engine->i915) >= 11) {
 480                 desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
 481                                                                 /* bits 48-53 */
 482
 483                 desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
 484                                                                 /* bits 61-63 */
 485         }
 486
 487         return desc;
 488 }
 489
 490 static inline unsigned int dword_in_page(void *addr)
 491 {
 492         return offset_in_page(addr) / sizeof(u32);
 493 }
 494
 495 static void set_offsets(u32 *regs,
 496                         const u8 *data,
 497                         const struct intel_engine_cs *engine,
 498                         bool clear)
 499 #define NOP(x) (BIT(7) | (x))
 500 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
 501 #define POSTED BIT(0)
 502 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
 503 #define REG16(x) \
 504         (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
 505         (((x) >> 2) & 0x7f)
 506 #define END(x) 0, (x)
 507 {
 508         const u32 base = engine->mmio_base;
 509
 510         while (*data) {
 511                 u8 count, flags;
 512
 513                 if (*data & BIT(7)) { /* skip */
 514                         count = *data++ & ~BIT(7);
 515                         if (clear)
 516                                 memset32(regs, MI_NOOP, count);
 517                         regs += count;
 518                         continue;
 519                 }
 520
 521                 count = *data & 0x3f;
 522                 flags = *data >> 6;
 523                 data++;
 524
 525                 *regs = MI_LOAD_REGISTER_IMM(count);
 526                 if (flags & POSTED)
 527                         *regs |= MI_LRI_FORCE_POSTED;
 528                 if (INTEL_GEN(engine->i915) >= 11)
 529                         *regs |= MI_LRI_CS_MMIO;
 530                 regs++;
 531
 532                 GEM_BUG_ON(!count);
 533                 do {
 534                         u32 offset = 0;
 535                         u8 v;
 536
 537                         do {
 538                                 v = *data++;
 539                                 offset <<= 7;
 540                                 offset |= v & ~BIT(7);
 541                         } while (v & BIT(7));
 542
 543                         regs[0] = base + (offset << 2);
 544                         if (clear)
 545                                 regs[1] = 0;
 546                         regs += 2;
 547                 } while (--count);
 548         }
 549
 550         if (clear) {
 551                 u8 count = *++data;
 552
 553                 /* Clear past the tail for HW access */
 554                 GEM_BUG_ON(dword_in_page(regs) > count);
 555                 memset32(regs, MI_NOOP, count - dword_in_page(regs));
 556
 557                 /* Close the batch; used mainly by live_lrc_layout() */
 558                 *regs = MI_BATCH_BUFFER_END;
 559                 if (INTEL_GEN(engine->i915) >= 10)
 560                         *regs |= BIT(0);
 561         }
 562 }
 563
 564 static const u8 gen8_xcs_offsets[] = {
 565         NOP(1),
 566         LRI(11, 0),
 567         REG16(0x244),
 568         REG(0x034),
 569         REG(0x030),
 570         REG(0x038),
 571         REG(0x03c),
 572         REG(0x168),
 573         REG(0x140),
 574         REG(0x110),
 575         REG(0x11c),
 576         REG(0x114),
 577         REG(0x118),
 578
 579         NOP(9),
 580         LRI(9, 0),
 581         REG16(0x3a8),
 582         REG16(0x28c),
 583         REG16(0x288),
 584         REG16(0x284),
 585         REG16(0x280),
 586         REG16(0x27c),
 587         REG16(0x278),
 588         REG16(0x274),
 589         REG16(0x270),
 590
 591         NOP(13),
 592         LRI(2, 0),
 593         REG16(0x200),
 594         REG(0x028),
 595
 596         END(80)
 597 };
 598
 599 static const u8 gen9_xcs_offsets[] = {
 600         NOP(1),
 601         LRI(14, POSTED),
 602         REG16(0x244),
 603         REG(0x034),
 604         REG(0x030),
 605         REG(0x038),
 606         REG(0x03c),
 607         REG(0x168),
 608         REG(0x140),
 609         REG(0x110),
 610         REG(0x11c),
 611         REG(0x114),
 612         REG(0x118),
 613         REG(0x1c0),
 614         REG(0x1c4),
 615         REG(0x1c8),
 616
 617         NOP(3),
 618         LRI(9, POSTED),
 619         REG16(0x3a8),
 620         REG16(0x28c),
 621         REG16(0x288),
 622         REG16(0x284),
 623         REG16(0x280),
 624         REG16(0x27c),
 625         REG16(0x278),
 626         REG16(0x274),
 627         REG16(0x270),
 628
 629         NOP(13),
 630         LRI(1, POSTED),
 631         REG16(0x200),
 632
 633         NOP(13),
 634         LRI(44, POSTED),
 635         REG(0x028),
 636         REG(0x09c),
 637         REG(0x0c0),
 638         REG(0x178),
 639         REG(0x17c),
 640         REG16(0x358),
 641         REG(0x170),
 642         REG(0x150),
 643         REG(0x154),
 644         REG(0x158),
 645         REG16(0x41c),
 646         REG16(0x600),
 647         REG16(0x604),
 648         REG16(0x608),
 649         REG16(0x60c),
 650         REG16(0x610),
 651         REG16(0x614),
 652         REG16(0x618),
 653         REG16(0x61c),
 654         REG16(0x620),
 655         REG16(0x624),
 656         REG16(0x628),
 657         REG16(0x62c),
 658         REG16(0x630),
 659         REG16(0x634),
 660         REG16(0x638),
 661         REG16(0x63c),
 662         REG16(0x640),
 663         REG16(0x644),
 664         REG16(0x648),
 665         REG16(0x64c),
 666         REG16(0x650),
 667         REG16(0x654),
 668         REG16(0x658),
 669         REG16(0x65c),
 670         REG16(0x660),
 671         REG16(0x664),
 672         REG16(0x668),
 673         REG16(0x66c),
 674         REG16(0x670),
 675         REG16(0x674),
 676         REG16(0x678),
 677         REG16(0x67c),
 678         REG(0x068),
 679
 680         END(176)
 681 };
 682
 683 static const u8 gen12_xcs_offsets[] = {
 684         NOP(1),
 685         LRI(13, POSTED),
 686         REG16(0x244),
 687         REG(0x034),
 688         REG(0x030),
 689         REG(0x038),
 690         REG(0x03c),
 691         REG(0x168),
 692         REG(0x140),
 693         REG(0x110),
 694         REG(0x1c0),
 695         REG(0x1c4),
 696         REG(0x1c8),
 697         REG(0x180),
 698         REG16(0x2b4),
 699
 700         NOP(5),
 701         LRI(9, POSTED),
 702         REG16(0x3a8),
 703         REG16(0x28c),
 704         REG16(0x288),
 705         REG16(0x284),
 706         REG16(0x280),
 707         REG16(0x27c),
 708         REG16(0x278),
 709         REG16(0x274),
 710         REG16(0x270),
 711
 712         END(80)
 713 };
 714
 715 static const u8 gen8_rcs_offsets[] = {
 716         NOP(1),
 717         LRI(14, POSTED),
 718         REG16(0x244),
 719         REG(0x034),
 720         REG(0x030),
 721         REG(0x038),
 722         REG(0x03c),
 723         REG(0x168),
 724         REG(0x140),
 725         REG(0x110),
 726         REG(0x11c),
 727         REG(0x114),
 728         REG(0x118),
 729         REG(0x1c0),
 730         REG(0x1c4),
 731         REG(0x1c8),
 732
 733         NOP(3),
 734         LRI(9, POSTED),
 735         REG16(0x3a8),
 736         REG16(0x28c),
 737         REG16(0x288),
 738         REG16(0x284),
 739         REG16(0x280),
 740         REG16(0x27c),
 741         REG16(0x278),
 742         REG16(0x274),
 743         REG16(0x270),
 744
 745         NOP(13),
 746         LRI(1, 0),
 747         REG(0x0c8),
 748
 749         END(80)
 750 };
 751
 752 static const u8 gen9_rcs_offsets[] = {
 753         NOP(1),
 754         LRI(14, POSTED),
 755         REG16(0x244),
 756         REG(0x34),
 757         REG(0x30),
 758         REG(0x38),
 759         REG(0x3c),
 760         REG(0x168),
 761         REG(0x140),
 762         REG(0x110),
 763         REG(0x11c),
 764         REG(0x114),
 765         REG(0x118),
 766         REG(0x1c0),
 767         REG(0x1c4),
 768         REG(0x1c8),
 769
 770         NOP(3),
 771         LRI(9, POSTED),
 772         REG16(0x3a8),
 773         REG16(0x28c),
 774         REG16(0x288),
 775         REG16(0x284),
 776         REG16(0x280),
 777         REG16(0x27c),
 778         REG16(0x278),
 779         REG16(0x274),
 780         REG16(0x270),
 781
 782         NOP(13),
 783         LRI(1, 0),
 784         REG(0xc8),
 785
 786         NOP(13),
 787         LRI(44, POSTED),
 788         REG(0x28),
 789         REG(0x9c),
 790         REG(0xc0),
 791         REG(0x178),
 792         REG(0x17c),
 793         REG16(0x358),
 794         REG(0x170),
 795         REG(0x150),
 796         REG(0x154),
 797         REG(0x158),
 798         REG16(0x41c),
 799         REG16(0x600),
 800         REG16(0x604),
 801         REG16(0x608),
 802         REG16(0x60c),
 803         REG16(0x610),
 804         REG16(0x614),
 805         REG16(0x618),
 806         REG16(0x61c),
 807         REG16(0x620),
 808         REG16(0x624),
 809         REG16(0x628),
 810         REG16(0x62c),
 811         REG16(0x630),
 812         REG16(0x634),
 813         REG16(0x638),
 814         REG16(0x63c),
 815         REG16(0x640),
 816         REG16(0x644),
 817         REG16(0x648),
 818         REG16(0x64c),
 819         REG16(0x650),
 820         REG16(0x654),
 821         REG16(0x658),
 822         REG16(0x65c),
 823         REG16(0x660),
 824         REG16(0x664),
 825         REG16(0x668),
 826         REG16(0x66c),
 827         REG16(0x670),
 828         REG16(0x674),
 829         REG16(0x678),
 830         REG16(0x67c),
 831         REG(0x68),
 832
 833         END(176)
 834 };
 835
 836 static const u8 gen11_rcs_offsets[] = {
 837         NOP(1),
 838         LRI(15, POSTED),
 839         REG16(0x244),
 840         REG(0x034),
 841         REG(0x030),
 842         REG(0x038),
 843         REG(0x03c),
 844         REG(0x168),
 845         REG(0x140),
 846         REG(0x110),
 847         REG(0x11c),
 848         REG(0x114),
 849         REG(0x118),
 850         REG(0x1c0),
 851         REG(0x1c4),
 852         REG(0x1c8),
 853         REG(0x180),
 854
 855         NOP(1),
 856         LRI(9, POSTED),
 857         REG16(0x3a8),
 858         REG16(0x28c),
 859         REG16(0x288),
 860         REG16(0x284),
 861         REG16(0x280),
 862         REG16(0x27c),
 863         REG16(0x278),
 864         REG16(0x274),
 865         REG16(0x270),
 866
 867         LRI(1, POSTED),
 868         REG(0x1b0),
 869
 870         NOP(10),
 871         LRI(1, 0),
 872         REG(0x0c8),
 873
 874         END(80)
 875 };
 876
 877 static const u8 gen12_rcs_offsets[] = {
 878         NOP(1),
 879         LRI(13, POSTED),
 880         REG16(0x244),
 881         REG(0x034),
 882         REG(0x030),
 883         REG(0x038),
 884         REG(0x03c),
 885         REG(0x168),
 886         REG(0x140),
 887         REG(0x110),
 888         REG(0x1c0),
 889         REG(0x1c4),
 890         REG(0x1c8),
 891         REG(0x180),
 892         REG16(0x2b4),
 893
 894         NOP(5),
 895         LRI(9, POSTED),
 896         REG16(0x3a8),
 897         REG16(0x28c),
 898         REG16(0x288),
 899         REG16(0x284),
 900         REG16(0x280),
 901         REG16(0x27c),
 902         REG16(0x278),
 903         REG16(0x274),
 904         REG16(0x270),
 905
 906         LRI(3, POSTED),
 907         REG(0x1b0),
 908         REG16(0x5a8),
 909         REG16(0x5ac),
 910
 911         NOP(6),
 912         LRI(1, 0),
 913         REG(0x0c8),
 914
 915         END(80)
 916 };
 917
 918 #undef END
 919 #undef REG16
 920 #undef REG
 921 #undef LRI
 922 #undef NOP
 923
 924 static const u8 *reg_offsets(const struct intel_engine_cs *engine)
 925 {
 926         /*
 927          * The gen12+ lists only have the registers we program in the basic
 928          * default state. We rely on the context image using relative
 929          * addressing to automatic fixup the register state between the
 930          * physical engines for virtual engine.
 931          */
 932         GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
 933                    !intel_engine_has_relative_mmio(engine));
 934
 935         if (engine->class == RENDER_CLASS) {
 936                 if (INTEL_GEN(engine->i915) >= 12)
 937                         return gen12_rcs_offsets;
 938                 else if (INTEL_GEN(engine->i915) >= 11)
 939                         return gen11_rcs_offsets;
 940                 else if (INTEL_GEN(engine->i915) >= 9)
 941                         return gen9_rcs_offsets;
 942                 else
 943                         return gen8_rcs_offsets;
 944         } else {
 945                 if (INTEL_GEN(engine->i915) >= 12)
 946                         return gen12_xcs_offsets;
 947                 else if (INTEL_GEN(engine->i915) >= 9)
 948                         return gen9_xcs_offsets;
 949                 else
 950                         return gen8_xcs_offsets;
 951         }
 952 }
 953
 954 static struct i915_request *
 955 __unwind_incomplete_requests(struct intel_engine_cs *engine)
 956 {
 957         struct i915_request *rq, *rn, *active = NULL;
 958         struct list_head *uninitialized_var(pl);
 959         int prio = I915_PRIORITY_INVALID;
 960
 961         lockdep_assert_held(&engine->active.lock);
 962
 963         list_for_each_entry_safe_reverse(rq, rn,
 964                                          &engine->active.requests,
 965                                          sched.link) {
 966                 if (i915_request_completed(rq))
 967                         continue; /* XXX */
 968
 969                 __i915_request_unsubmit(rq);
 970
 971                 /*
 972                  * Push the request back into the queue for later resubmission.
 973                  * If this request is not native to this physical engine (i.e.
 974                  * it came from a virtual source), push it back onto the virtual
 975                  * engine so that it can be moved across onto another physical
 976                  * engine as load dictates.
 977                  */
 978                 if (likely(rq->execution_mask == engine->mask)) {
 979                         GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
 980                         if (rq_prio(rq) != prio) {
 981                                 prio = rq_prio(rq);
 982                                 pl = i915_sched_lookup_priolist(engine, prio);
 983                         }
 984                         GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 985
 986                         list_move(&rq->sched.link, pl);
 987                         set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
 988
 989                         active = rq;
 990                 } else {
 991                         struct intel_engine_cs *owner = rq->context->engine;
 992
 993                         /*
 994                          * Decouple the virtual breadcrumb before moving it
 995                          * back to the virtual engine -- we don't want the
 996                          * request to complete in the background and try
 997                          * and cancel the breadcrumb on the virtual engine
 998                          * (instead of the old engine where it is linked)!
 999                          */
1000                         if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
1001                                      &rq->fence.flags)) {
1002                                 spin_lock_nested(&rq->lock,
1003                                                  SINGLE_DEPTH_NESTING);
1004                                 i915_request_cancel_breadcrumb(rq);
1005                                 spin_unlock(&rq->lock);
1006                         }
1007                         rq->engine = owner;
1008                         owner->submit_request(rq);
1009                         active = NULL;
1010                 }
1011         }
1012
1013         return active;
1014 }
1015
1016 struct i915_request *
1017 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
1018 {
1019         struct intel_engine_cs *engine =
1020                 container_of(execlists, typeof(*engine), execlists);
1021
1022         return __unwind_incomplete_requests(engine);
1023 }
1024
1025 static inline void
1026 execlists_context_status_change(struct i915_request *rq, unsigned long status)
1027 {
1028         /*
1029          * Only used when GVT-g is enabled now. When GVT-g is disabled,
1030          * The compiler should eliminate this function as dead-code.
1031          */
1032         if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
1033                 return;
1034
1035         atomic_notifier_call_chain(&rq->engine->context_status_notifier,
1036                                    status, rq);
1037 }
1038
1039 static void intel_engine_context_in(struct intel_engine_cs *engine)
1040 {
1041         unsigned long flags;
1042
1043         if (READ_ONCE(engine->stats.enabled) == 0)
1044                 return;
1045
1046         write_seqlock_irqsave(&engine->stats.lock, flags);
1047
1048         if (engine->stats.enabled > 0) {
1049                 if (engine->stats.active++ == 0)
1050                         engine->stats.start = ktime_get();
1051                 GEM_BUG_ON(engine->stats.active == 0);
1052         }
1053
1054         write_sequnlock_irqrestore(&engine->stats.lock, flags);
1055 }
1056
1057 static void intel_engine_context_out(struct intel_engine_cs *engine)
1058 {
1059         unsigned long flags;
1060
1061         if (READ_ONCE(engine->stats.enabled) == 0)
1062                 return;
1063
1064         write_seqlock_irqsave(&engine->stats.lock, flags);
1065
1066         if (engine->stats.enabled > 0) {
1067                 ktime_t last;
1068
1069                 if (engine->stats.active && --engine->stats.active == 0) {
1070                         /*
1071                          * Decrement the active context count and in case GPU
1072                          * is now idle add up to the running total.
1073                          */
1074                         last = ktime_sub(ktime_get(), engine->stats.start);
1075
1076                         engine->stats.total = ktime_add(engine->stats.total,
1077                                                         last);
1078                 } else if (engine->stats.active == 0) {
1079                         /*
1080                          * After turning on engine stats, context out might be
1081                          * the first event in which case we account from the
1082                          * time stats gathering was turned on.
1083                          */
1084                         last = ktime_sub(ktime_get(), engine->stats.enabled_at);
1085
1086                         engine->stats.total = ktime_add(engine->stats.total,
1087                                                         last);
1088                 }
1089         }
1090
1091         write_sequnlock_irqrestore(&engine->stats.lock, flags);
1092 }
1093
1094 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
1095 {
1096         if (INTEL_GEN(engine->i915) >= 12)
1097                 return 0x60;
1098         else if (INTEL_GEN(engine->i915) >= 9)
1099                 return 0x54;
1100         else if (engine->class == RENDER_CLASS)
1101                 return 0x58;
1102         else
1103                 return -1;
1104 }
1105
1106 static void
1107 execlists_check_context(const struct intel_context *ce,
1108                         const struct intel_engine_cs *engine)
1109 {
1110         const struct intel_ring *ring = ce->ring;
1111         u32 *regs = ce->lrc_reg_state;
1112         bool valid = true;
1113         int x;
1114
1115         if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
1116                 pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1117                        engine->name,
1118                        regs[CTX_RING_START],
1119                        i915_ggtt_offset(ring->vma));
1120                 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1121                 valid = false;
1122         }
1123
1124         if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1125             (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1126                 pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1127                        engine->name,
1128                        regs[CTX_RING_CTL],
1129                        (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1130                 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1131                 valid = false;
1132         }
1133
1134         x = lrc_ring_mi_mode(engine);
1135         if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1136                 pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1137                        engine->name, regs[x + 1]);
1138                 regs[x + 1] &= ~STOP_RING;
1139                 regs[x + 1] |= STOP_RING << 16;
1140                 valid = false;
1141         }
1142
1143         WARN_ONCE(!valid, "Invalid lrc state found before submission\n");
1144 }
1145
1146 static void restore_default_state(struct intel_context *ce,
1147                                   struct intel_engine_cs *engine)
1148 {
1149         u32 *regs = ce->lrc_reg_state;
1150
1151         if (engine->pinned_default_state)
1152                 memcpy(regs, /* skip restoring the vanilla PPHWSP */
1153                        engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
1154                        engine->context_size - PAGE_SIZE);
1155
1156         execlists_init_reg_state(regs, ce, engine, ce->ring, false);
1157 }
1158
1159 static void reset_active(struct i915_request *rq,
1160                          struct intel_engine_cs *engine)
1161 {
1162         struct intel_context * const ce = rq->context;
1163         u32 head;
1164
1165         /*
1166          * The executing context has been cancelled. We want to prevent
1167          * further execution along this context and propagate the error on
1168          * to anything depending on its results.
1169          *
1170          * In __i915_request_submit(), we apply the -EIO and remove the
1171          * requests' payloads for any banned requests. But first, we must
1172          * rewind the context back to the start of the incomplete request so
1173          * that we do not jump back into the middle of the batch.
1174          *
1175          * We preserve the breadcrumbs and semaphores of the incomplete
1176          * requests so that inter-timeline dependencies (i.e other timelines)
1177          * remain correctly ordered. And we defer to __i915_request_submit()
1178          * so that all asynchronous waits are correctly handled.
1179          */
1180         ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n",
1181                      rq->fence.context, rq->fence.seqno);
1182
1183         /* On resubmission of the active request, payload will be scrubbed */
1184         if (i915_request_completed(rq))
1185                 head = rq->tail;
1186         else
1187                 head = active_request(ce->timeline, rq)->head;
1188         head = intel_ring_wrap(ce->ring, head);
1189
1190         /* Scrub the context image to prevent replaying the previous batch */
1191         restore_default_state(ce, engine);
1192         __execlists_update_reg_state(ce, engine, head);
1193
1194         /* We've switched away, so this should be a no-op, but intent matters */
1195         ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
1196 }
1197
1198 static u32 intel_context_get_runtime(const struct intel_context *ce)
1199 {
1200         /*
1201          * We can use either ppHWSP[16] which is recorded before the context
1202          * switch (and so excludes the cost of context switches) or use the
1203          * value from the context image itself, which is saved/restored earlier
1204          * and so includes the cost of the save.
1205          */
1206         return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
1207 }
1208
1209 static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
1210 {
1211 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1212         ce->runtime.num_underflow += dt < 0;
1213         ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt);
1214 #endif
1215 }
1216
1217 static void intel_context_update_runtime(struct intel_context *ce)
1218 {
1219         u32 old;
1220         s32 dt;
1221
1222         if (intel_context_is_barrier(ce))
1223                 return;
1224
1225         old = ce->runtime.last;
1226         ce->runtime.last = intel_context_get_runtime(ce);
1227         dt = ce->runtime.last - old;
1228
1229         if (unlikely(dt <= 0)) {
1230                 CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
1231                          old, ce->runtime.last, dt);
1232                 st_update_runtime_underflow(ce, dt);
1233                 return;
1234         }
1235
1236         ewma_runtime_add(&ce->runtime.avg, dt);
1237         ce->runtime.total += dt;
1238 }
1239
1240 static inline struct intel_engine_cs *
1241 __execlists_schedule_in(struct i915_request *rq)
1242 {
1243         struct intel_engine_cs * const engine = rq->engine;
1244         struct intel_context * const ce = rq->context;
1245
1246         intel_context_get(ce);
1247
1248         if (unlikely(intel_context_is_banned(ce)))
1249                 reset_active(rq, engine);
1250
1251         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1252                 execlists_check_context(ce, engine);
1253
1254         ce->lrc_desc &= ~GENMASK_ULL(47, 37);
1255         if (ce->tag) {
1256                 /* Use a fixed tag for OA and friends */
1257                 ce->lrc_desc |= (u64)ce->tag << 32;
1258         } else {
1259                 /* We don't need a strict matching tag, just different values */
1260                 ce->lrc_desc |=
1261                         (u64)(++engine->context_tag % NUM_CONTEXT_TAG) <<
1262                         GEN11_SW_CTX_ID_SHIFT;
1263                 BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
1264         }
1265
1266         __intel_gt_pm_get(engine->gt);
1267         execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
1268         intel_engine_context_in(engine);
1269
1270         return engine;
1271 }
1272
1273 static inline struct i915_request *
1274 execlists_schedule_in(struct i915_request *rq, int idx)
1275 {
1276         struct intel_context * const ce = rq->context;
1277         struct intel_engine_cs *old;
1278
1279         GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
1280         trace_i915_request_in(rq, idx);
1281
1282         old = READ_ONCE(ce->inflight);
1283         do {
1284                 if (!old) {
1285                         WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
1286                         break;
1287                 }
1288         } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
1289
1290         GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
1291         return i915_request_get(rq);
1292 }
1293
1294 static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
1295 {
1296         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
1297         struct i915_request *next = READ_ONCE(ve->request);
1298
1299         if (next && next->execution_mask & ~rq->execution_mask)
1300                 tasklet_schedule(&ve->base.execlists.tasklet);
1301 }
1302
1303 static inline void
1304 __execlists_schedule_out(struct i915_request *rq,
1305                          struct intel_engine_cs * const engine)
1306 {
1307         struct intel_context * const ce = rq->context;
1308
1309         /*
1310          * NB process_csb() is not under the engine->active.lock and hence
1311          * schedule_out can race with schedule_in meaning that we should
1312          * refrain from doing non-trivial work here.
1313          */
1314
1315         /*
1316          * If we have just completed this context, the engine may now be
1317          * idle and we want to re-enter powersaving.
1318          */
1319         if (list_is_last_rcu(&rq->link, &ce->timeline->requests) &&
1320             i915_request_completed(rq))
1321                 intel_engine_add_retire(engine, ce->timeline);
1322
1323         intel_context_update_runtime(ce);
1324         intel_engine_context_out(engine);
1325         execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
1326         intel_gt_pm_put_async(engine->gt);
1327
1328         /*
1329          * If this is part of a virtual engine, its next request may
1330          * have been blocked waiting for access to the active context.
1331          * We have to kick all the siblings again in case we need to
1332          * switch (e.g. the next request is not runnable on this
1333          * engine). Hopefully, we will already have submitted the next
1334          * request before the tasklet runs and do not need to rebuild
1335          * each virtual tree and kick everyone again.
1336          */
1337         if (ce->engine != engine)
1338                 kick_siblings(rq, ce);
1339
1340         intel_context_put(ce);
1341 }
1342
1343 static inline void
1344 execlists_schedule_out(struct i915_request *rq)
1345 {
1346         struct intel_context * const ce = rq->context;
1347         struct intel_engine_cs *cur, *old;
1348
1349         trace_i915_request_out(rq);
1350
1351         old = READ_ONCE(ce->inflight);
1352         do
1353                 cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
1354         while (!try_cmpxchg(&ce->inflight, &old, cur));
1355         if (!cur)
1356                 __execlists_schedule_out(rq, old);
1357
1358         i915_request_put(rq);
1359 }
1360
1361 static u64 execlists_update_context(struct i915_request *rq)
1362 {
1363         struct intel_context *ce = rq->context;
1364         u64 desc = ce->lrc_desc;
1365         u32 tail, prev;
1366
1367         /*
1368          * WaIdleLiteRestore:bdw,skl
1369          *
1370          * We should never submit the context with the same RING_TAIL twice
1371          * just in case we submit an empty ring, which confuses the HW.
1372          *
1373          * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
1374          * the normal request to be able to always advance the RING_TAIL on
1375          * subsequent resubmissions (for lite restore). Should that fail us,
1376          * and we try and submit the same tail again, force the context
1377          * reload.
1378          *
1379          * If we need to return to a preempted context, we need to skip the
1380          * lite-restore and force it to reload the RING_TAIL. Otherwise, the
1381          * HW has a tendency to ignore us rewinding the TAIL to the end of
1382          * an earlier request.
1383          */
1384         tail = intel_ring_set_tail(rq->ring, rq->tail);
1385         prev = ce->lrc_reg_state[CTX_RING_TAIL];
1386         if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
1387                 desc |= CTX_DESC_FORCE_RESTORE;
1388         ce->lrc_reg_state[CTX_RING_TAIL] = tail;
1389         rq->tail = rq->wa_tail;
1390
1391         /*
1392          * Make sure the context image is complete before we submit it to HW.
1393          *
1394          * Ostensibly, writes (including the WCB) should be flushed prior to
1395          * an uncached write such as our mmio register access, the empirical
1396          * evidence (esp. on Braswell) suggests that the WC write into memory
1397          * may not be visible to the HW prior to the completion of the UC
1398          * register write and that we may begin execution from the context
1399          * before its image is complete leading to invalid PD chasing.
1400          */
1401         wmb();
1402
1403         ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
1404         return desc;
1405 }
1406
1407 static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
1408 {
1409         if (execlists->ctrl_reg) {
1410                 writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
1411                 writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
1412         } else {
1413                 writel(upper_32_bits(desc), execlists->submit_reg);
1414                 writel(lower_32_bits(desc), execlists->submit_reg);
1415         }
1416 }
1417
1418 static __maybe_unused void
1419 trace_ports(const struct intel_engine_execlists *execlists,
1420             const char *msg,
1421             struct i915_request * const *ports)
1422 {
1423         const struct intel_engine_cs *engine =
1424                 container_of(execlists, typeof(*engine), execlists);
1425
1426         if (!ports[0])
1427                 return;
1428
1429         ENGINE_TRACE(engine, "%s { %llx:%lld%s, %llx:%lld }\n", msg,
1430                      ports[0]->fence.context,
1431                      ports[0]->fence.seqno,
1432                      i915_request_completed(ports[0]) ? "!" :
1433                      i915_request_started(ports[0]) ? "*" :
1434                      "",
1435                      ports[1] ? ports[1]->fence.context : 0,
1436                      ports[1] ? ports[1]->fence.seqno : 0);
1437 }
1438
1439 static inline bool
1440 reset_in_progress(const struct intel_engine_execlists *execlists)
1441 {
1442         return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
1443 }
1444
1445 static __maybe_unused bool
1446 assert_pending_valid(const struct intel_engine_execlists *execlists,
1447                      const char *msg)
1448 {
1449         struct i915_request * const *port, *rq;
1450         struct intel_context *ce = NULL;
1451         bool sentinel = false;
1452
1453         trace_ports(execlists, msg, execlists->pending);
1454
1455         /* We may be messing around with the lists during reset, lalala */
1456         if (reset_in_progress(execlists))
1457                 return true;
1458
1459         if (!execlists->pending[0]) {
1460                 GEM_TRACE_ERR("Nothing pending for promotion!\n");
1461                 return false;
1462         }
1463
1464         if (execlists->pending[execlists_num_ports(execlists)]) {
1465                 GEM_TRACE_ERR("Excess pending[%d] for promotion!\n",
1466                               execlists_num_ports(execlists));
1467                 return false;
1468         }
1469
1470         for (port = execlists->pending; (rq = *port); port++) {
1471                 unsigned long flags;
1472                 bool ok = true;
1473
1474                 GEM_BUG_ON(!kref_read(&rq->fence.refcount));
1475                 GEM_BUG_ON(!i915_request_is_active(rq));
1476
1477                 if (ce == rq->context) {
1478                         GEM_TRACE_ERR("Dup context:%llx in pending[%zd]\n",
1479                                       ce->timeline->fence_context,
1480                                       port - execlists->pending);
1481                         return false;
1482                 }
1483                 ce = rq->context;
1484
1485                 /*
1486                  * Sentinels are supposed to be lonely so they flush the
1487                  * current exection off the HW. Check that they are the
1488                  * only request in the pending submission.
1489                  */
1490                 if (sentinel) {
1491                         GEM_TRACE_ERR("context:%llx after sentinel in pending[%zd]\n",
1492                                       ce->timeline->fence_context,
1493                                       port - execlists->pending);
1494                         return false;
1495                 }
1496
1497                 sentinel = i915_request_has_sentinel(rq);
1498                 if (sentinel && port != execlists->pending) {
1499                         GEM_TRACE_ERR("sentinel context:%llx not in prime position[%zd]\n",
1500                                       ce->timeline->fence_context,
1501                                       port - execlists->pending);
1502                         return false;
1503                 }
1504
1505                 /* Hold tightly onto the lock to prevent concurrent retires! */
1506                 if (!spin_trylock_irqsave(&rq->lock, flags))
1507                         continue;
1508
1509                 if (i915_request_completed(rq))
1510                         goto unlock;
1511
1512                 if (i915_active_is_idle(&ce->active) &&
1513                     !intel_context_is_barrier(ce)) {
1514                         GEM_TRACE_ERR("Inactive context:%llx in pending[%zd]\n",
1515                                       ce->timeline->fence_context,
1516                                       port - execlists->pending);
1517                         ok = false;
1518                         goto unlock;
1519                 }
1520
1521                 if (!i915_vma_is_pinned(ce->state)) {
1522                         GEM_TRACE_ERR("Unpinned context:%llx in pending[%zd]\n",
1523                                       ce->timeline->fence_context,
1524                                       port - execlists->pending);
1525                         ok = false;
1526                         goto unlock;
1527                 }
1528
1529                 if (!i915_vma_is_pinned(ce->ring->vma)) {
1530                         GEM_TRACE_ERR("Unpinned ring:%llx in pending[%zd]\n",
1531                                       ce->timeline->fence_context,
1532                                       port - execlists->pending);
1533                         ok = false;
1534                         goto unlock;
1535                 }
1536
1537 unlock:
1538                 spin_unlock_irqrestore(&rq->lock, flags);
1539                 if (!ok)
1540                         return false;
1541         }
1542
1543         return ce;
1544 }
1545
1546 static void execlists_submit_ports(struct intel_engine_cs *engine)
1547 {
1548         struct intel_engine_execlists *execlists = &engine->execlists;
1549         unsigned int n;
1550
1551         GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
1552
1553         /*
1554          * We can skip acquiring intel_runtime_pm_get() here as it was taken
1555          * on our behalf by the request (see i915_gem_mark_busy()) and it will
1556          * not be relinquished until the device is idle (see
1557          * i915_gem_idle_work_handler()). As a precaution, we make sure
1558          * that all ELSP are drained i.e. we have processed the CSB,
1559          * before allowing ourselves to idle and calling intel_runtime_pm_put().
1560          */
1561         GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
1562
1563         /*
1564          * ELSQ note: the submit queue is not cleared after being submitted
1565          * to the HW so we need to make sure we always clean it up. This is
1566          * currently ensured by the fact that we always write the same number
1567          * of elsq entries, keep this in mind before changing the loop below.
1568          */
1569         for (n = execlists_num_ports(execlists); n--; ) {
1570                 struct i915_request *rq = execlists->pending[n];
1571
1572                 write_desc(execlists,
1573                            rq ? execlists_update_context(rq) : 0,
1574                            n);
1575         }
1576
1577         /* we need to manually load the submit queue */
1578         if (execlists->ctrl_reg)
1579                 writel(EL_CTRL_LOAD, execlists->ctrl_reg);
1580 }
1581
1582 static bool ctx_single_port_submission(const struct intel_context *ce)
1583 {
1584         return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
1585                 intel_context_force_single_submission(ce));
1586 }
1587
1588 static bool can_merge_ctx(const struct intel_context *prev,
1589                           const struct intel_context *next)
1590 {
1591         if (prev != next)
1592                 return false;
1593
1594         if (ctx_single_port_submission(prev))
1595                 return false;
1596
1597         return true;
1598 }
1599
1600 static unsigned long i915_request_flags(const struct i915_request *rq)
1601 {
1602         return READ_ONCE(rq->fence.flags);
1603 }
1604
1605 static bool can_merge_rq(const struct i915_request *prev,
1606                          const struct i915_request *next)
1607 {
1608         GEM_BUG_ON(prev == next);
1609         GEM_BUG_ON(!assert_priority_queue(prev, next));
1610
1611         /*
1612          * We do not submit known completed requests. Therefore if the next
1613          * request is already completed, we can pretend to merge it in
1614          * with the previous context (and we will skip updating the ELSP
1615          * and tracking). Thus hopefully keeping the ELSP full with active
1616          * contexts, despite the best efforts of preempt-to-busy to confuse
1617          * us.
1618          */
1619         if (i915_request_completed(next))
1620                 return true;
1621
1622         if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) &
1623                      (BIT(I915_FENCE_FLAG_NOPREEMPT) |
1624                       BIT(I915_FENCE_FLAG_SENTINEL))))
1625                 return false;
1626
1627         if (!can_merge_ctx(prev->context, next->context))
1628                 return false;
1629
1630         GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno));
1631         return true;
1632 }
1633
1634 static void virtual_update_register_offsets(u32 *regs,
1635                                             struct intel_engine_cs *engine)
1636 {
1637         set_offsets(regs, reg_offsets(engine), engine, false);
1638 }
1639
1640 static bool virtual_matches(const struct virtual_engine *ve,
1641                             const struct i915_request *rq,
1642                             const struct intel_engine_cs *engine)
1643 {
1644         const struct intel_engine_cs *inflight;
1645
1646         if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
1647                 return false;
1648
1649         /*
1650          * We track when the HW has completed saving the context image
1651          * (i.e. when we have seen the final CS event switching out of
1652          * the context) and must not overwrite the context image before
1653          * then. This restricts us to only using the active engine
1654          * while the previous virtualized request is inflight (so
1655          * we reuse the register offsets). This is a very small
1656          * hystersis on the greedy seelction algorithm.
1657          */
1658         inflight = intel_context_inflight(&ve->context);
1659         if (inflight && inflight != engine)
1660                 return false;
1661
1662         return true;
1663 }
1664
1665 static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
1666                                      struct intel_engine_cs *engine)
1667 {
1668         struct intel_engine_cs *old = ve->siblings[0];
1669
1670         /* All unattached (rq->engine == old) must already be completed */
1671
1672         spin_lock(&old->breadcrumbs.irq_lock);
1673         if (!list_empty(&ve->context.signal_link)) {
1674                 list_move_tail(&ve->context.signal_link,
1675                                &engine->breadcrumbs.signalers);
1676                 intel_engine_signal_breadcrumbs(engine);
1677         }
1678         spin_unlock(&old->breadcrumbs.irq_lock);
1679 }
1680
1681 static struct i915_request *
1682 last_active(const struct intel_engine_execlists *execlists)
1683 {
1684         struct i915_request * const *last = READ_ONCE(execlists->active);
1685
1686         while (*last && i915_request_completed(*last))
1687                 last++;
1688
1689         return *last;
1690 }
1691
1692 #define for_each_waiter(p__, rq__) \
1693         list_for_each_entry_lockless(p__, \
1694                                      &(rq__)->sched.waiters_list, \
1695                                      wait_link)
1696
1697 #define for_each_signaler(p__, rq__) \
1698         list_for_each_entry_rcu(p__, \
1699                                 &(rq__)->sched.signalers_list, \
1700                                 signal_link)
1701
1702 static void defer_request(struct i915_request *rq, struct list_head * const pl)
1703 {
1704         LIST_HEAD(list);
1705
1706         /*
1707          * We want to move the interrupted request to the back of
1708          * the round-robin list (i.e. its priority level), but
1709          * in doing so, we must then move all requests that were in
1710          * flight and were waiting for the interrupted request to
1711          * be run after it again.
1712          */
1713         do {
1714                 struct i915_dependency *p;
1715
1716                 GEM_BUG_ON(i915_request_is_active(rq));
1717                 list_move_tail(&rq->sched.link, pl);
1718
1719                 for_each_waiter(p, rq) {
1720                         struct i915_request *w =
1721                                 container_of(p->waiter, typeof(*w), sched);
1722
1723                         /* Leave semaphores spinning on the other engines */
1724                         if (w->engine != rq->engine)
1725                                 continue;
1726
1727                         /* No waiter should start before its signaler */
1728                         GEM_BUG_ON(i915_request_started(w) &&
1729                                    !i915_request_completed(rq));
1730
1731                         GEM_BUG_ON(i915_request_is_active(w));
1732                         if (!i915_request_is_ready(w))
1733                                 continue;
1734
1735                         if (rq_prio(w) < rq_prio(rq))
1736                                 continue;
1737
1738                         GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
1739                         list_move_tail(&w->sched.link, &list);
1740                 }
1741
1742                 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
1743         } while (rq);
1744 }
1745
1746 static void defer_active(struct intel_engine_cs *engine)
1747 {
1748         struct i915_request *rq;
1749
1750         rq = __unwind_incomplete_requests(engine);
1751         if (!rq)
1752                 return;
1753
1754         defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
1755 }
1756
1757 static bool
1758 need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
1759 {
1760         int hint;
1761
1762         if (!intel_engine_has_timeslices(engine))
1763                 return false;
1764
1765         hint = engine->execlists.queue_priority_hint;
1766         if (!list_is_last(&rq->sched.link, &engine->active.requests))
1767                 hint = max(hint, rq_prio(list_next_entry(rq, sched.link)));
1768
1769         return hint >= effective_prio(rq);
1770 }
1771
1772 static int
1773 switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
1774 {
1775         if (list_is_last(&rq->sched.link, &engine->active.requests))
1776                 return INT_MIN;
1777
1778         return rq_prio(list_next_entry(rq, sched.link));
1779 }
1780
1781 static inline unsigned long
1782 timeslice(const struct intel_engine_cs *engine)
1783 {
1784         return READ_ONCE(engine->props.timeslice_duration_ms);
1785 }
1786
1787 static unsigned long
1788 active_timeslice(const struct intel_engine_cs *engine)
1789 {
1790         const struct intel_engine_execlists *execlists = &engine->execlists;
1791         const struct i915_request *rq = *execlists->active;
1792
1793         if (!rq || i915_request_completed(rq))
1794                 return 0;
1795
1796         if (READ_ONCE(execlists->switch_priority_hint) < effective_prio(rq))
1797                 return 0;
1798
1799         return timeslice(engine);
1800 }
1801
1802 static void set_timeslice(struct intel_engine_cs *engine)
1803 {
1804         if (!intel_engine_has_timeslices(engine))
1805                 return;
1806
1807         set_timer_ms(&engine->execlists.timer, active_timeslice(engine));
1808 }
1809
1810 static void start_timeslice(struct intel_engine_cs *engine)
1811 {
1812         struct intel_engine_execlists *execlists = &engine->execlists;
1813         int prio = queue_prio(execlists);
1814
1815         WRITE_ONCE(execlists->switch_priority_hint, prio);
1816         if (prio == INT_MIN)
1817                 return;
1818
1819         if (timer_pending(&execlists->timer))
1820                 return;
1821
1822         set_timer_ms(&execlists->timer, timeslice(engine));
1823 }
1824
1825 static void record_preemption(struct intel_engine_execlists *execlists)
1826 {
1827         (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
1828 }
1829
1830 static unsigned long active_preempt_timeout(struct intel_engine_cs *engine)
1831 {
1832         struct i915_request *rq;
1833
1834         rq = last_active(&engine->execlists);
1835         if (!rq)
1836                 return 0;
1837
1838         /* Force a fast reset for terminated contexts (ignoring sysfs!) */
1839         if (unlikely(intel_context_is_banned(rq->context)))
1840                 return 1;
1841
1842         return READ_ONCE(engine->props.preempt_timeout_ms);
1843 }
1844
1845 static void set_preempt_timeout(struct intel_engine_cs *engine)
1846 {
1847         if (!intel_engine_has_preempt_reset(engine))
1848                 return;
1849
1850         set_timer_ms(&engine->execlists.preempt,
1851                      active_preempt_timeout(engine));
1852 }
1853
1854 static inline void clear_ports(struct i915_request **ports, int count)
1855 {
1856         memset_p((void **)ports, NULL, count);
1857 }
1858
1859 static void execlists_dequeue(struct intel_engine_cs *engine)
1860 {
1861         struct intel_engine_execlists * const execlists = &engine->execlists;
1862         struct i915_request **port = execlists->pending;
1863         struct i915_request ** const last_port = port + execlists->port_mask;
1864         struct i915_request *last;
1865         struct rb_node *rb;
1866         bool submit = false;
1867
1868         /*
1869          * Hardware submission is through 2 ports. Conceptually each port
1870          * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
1871          * static for a context, and unique to each, so we only execute
1872          * requests belonging to a single context from each ring. RING_HEAD
1873          * is maintained by the CS in the context image, it marks the place
1874          * where it got up to last time, and through RING_TAIL we tell the CS
1875          * where we want to execute up to this time.
1876          *
1877          * In this list the requests are in order of execution. Consecutive
1878          * requests from the same context are adjacent in the ringbuffer. We
1879          * can combine these requests into a single RING_TAIL update:
1880          *
1881          *              RING_HEAD...req1...req2
1882          *                                    ^- RING_TAIL
1883          * since to execute req2 the CS must first execute req1.
1884          *
1885          * Our goal then is to point each port to the end of a consecutive
1886          * sequence of requests as being the most optimal (fewest wake ups
1887          * and context switches) submission.
1888          */
1889
1890         for (rb = rb_first_cached(&execlists->virtual); rb; ) {
1891                 struct virtual_engine *ve =
1892                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
1893                 struct i915_request *rq = READ_ONCE(ve->request);
1894
1895                 if (!rq) { /* lazily cleanup after another engine handled rq */
1896                         rb_erase_cached(rb, &execlists->virtual);
1897                         RB_CLEAR_NODE(rb);
1898                         rb = rb_first_cached(&execlists->virtual);
1899                         continue;
1900                 }
1901
1902                 if (!virtual_matches(ve, rq, engine)) {
1903                         rb = rb_next(rb);
1904                         continue;
1905                 }
1906
1907                 break;
1908         }
1909
1910         /*
1911          * If the queue is higher priority than the last
1912          * request in the currently active context, submit afresh.
1913          * We will resubmit again afterwards in case we need to split
1914          * the active context to interject the preemption request,
1915          * i.e. we will retrigger preemption following the ack in case
1916          * of trouble.
1917          */
1918         last = last_active(execlists);
1919         if (last) {
1920                 if (need_preempt(engine, last, rb)) {
1921                         ENGINE_TRACE(engine,
1922                                      "preempting last=%llx:%lld, prio=%d, hint=%d\n",
1923                                      last->fence.context,
1924                                      last->fence.seqno,
1925                                      last->sched.attr.priority,
1926                                      execlists->queue_priority_hint);
1927                         record_preemption(execlists);
1928
1929                         /*
1930                          * Don't let the RING_HEAD advance past the breadcrumb
1931                          * as we unwind (and until we resubmit) so that we do
1932                          * not accidentally tell it to go backwards.
1933                          */
1934                         ring_set_paused(engine, 1);
1935
1936                         /*
1937                          * Note that we have not stopped the GPU at this point,
1938                          * so we are unwinding the incomplete requests as they
1939                          * remain inflight and so by the time we do complete
1940                          * the preemption, some of the unwound requests may
1941                          * complete!
1942                          */
1943                         __unwind_incomplete_requests(engine);
1944
1945                         last = NULL;
1946                 } else if (need_timeslice(engine, last) &&
1947                            timer_expired(&engine->execlists.timer)) {
1948                         ENGINE_TRACE(engine,
1949                                      "expired last=%llx:%lld, prio=%d, hint=%d\n",
1950                                      last->fence.context,
1951                                      last->fence.seqno,
1952                                      last->sched.attr.priority,
1953                                      execlists->queue_priority_hint);
1954
1955                         ring_set_paused(engine, 1);
1956                         defer_active(engine);
1957
1958                         /*
1959                          * Unlike for preemption, if we rewind and continue
1960                          * executing the same context as previously active,
1961                          * the order of execution will remain the same and
1962                          * the tail will only advance. We do not need to
1963                          * force a full context restore, as a lite-restore
1964                          * is sufficient to resample the monotonic TAIL.
1965                          *
1966                          * If we switch to any other context, similarly we
1967                          * will not rewind TAIL of current context, and
1968                          * normal save/restore will preserve state and allow
1969                          * us to later continue executing the same request.
1970                          */
1971                         last = NULL;
1972                 } else {
1973                         /*
1974                          * Otherwise if we already have a request pending
1975                          * for execution after the current one, we can
1976                          * just wait until the next CS event before
1977                          * queuing more. In either case we will force a
1978                          * lite-restore preemption event, but if we wait
1979                          * we hopefully coalesce several updates into a single
1980                          * submission.
1981                          */
1982                         if (!list_is_last(&last->sched.link,
1983                                           &engine->active.requests)) {
1984                                 /*
1985                                  * Even if ELSP[1] is occupied and not worthy
1986                                  * of timeslices, our queue might be.
1987                                  */
1988                                 start_timeslice(engine);
1989                                 return;
1990                         }
1991                 }
1992         }
1993
1994         while (rb) { /* XXX virtual is always taking precedence */
1995                 struct virtual_engine *ve =
1996                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
1997                 struct i915_request *rq;
1998
1999                 spin_lock(&ve->base.active.lock);
2000
2001                 rq = ve->request;
2002                 if (unlikely(!rq)) { /* lost the race to a sibling */
2003                         spin_unlock(&ve->base.active.lock);
2004                         rb_erase_cached(rb, &execlists->virtual);
2005                         RB_CLEAR_NODE(rb);
2006                         rb = rb_first_cached(&execlists->virtual);
2007                         continue;
2008                 }
2009
2010                 GEM_BUG_ON(rq != ve->request);
2011                 GEM_BUG_ON(rq->engine != &ve->base);
2012                 GEM_BUG_ON(rq->context != &ve->context);
2013
2014                 if (rq_prio(rq) >= queue_prio(execlists)) {
2015                         if (!virtual_matches(ve, rq, engine)) {
2016                                 spin_unlock(&ve->base.active.lock);
2017                                 rb = rb_next(rb);
2018                                 continue;
2019                         }
2020
2021                         if (last && !can_merge_rq(last, rq)) {
2022                                 spin_unlock(&ve->base.active.lock);
2023                                 start_timeslice(engine);
2024                                 return; /* leave this for another sibling */
2025                         }
2026
2027                         ENGINE_TRACE(engine,
2028                                      "virtual rq=%llx:%lld%s, new engine? %s\n",
2029                                      rq->fence.context,
2030                                      rq->fence.seqno,
2031                                      i915_request_completed(rq) ? "!" :
2032                                      i915_request_started(rq) ? "*" :
2033                                      "",
2034                                      yesno(engine != ve->siblings[0]));
2035
2036                         ve->request = NULL;
2037                         ve->base.execlists.queue_priority_hint = INT_MIN;
2038                         rb_erase_cached(rb, &execlists->virtual);
2039                         RB_CLEAR_NODE(rb);
2040
2041                         GEM_BUG_ON(!(rq->execution_mask & engine->mask));
2042                         rq->engine = engine;
2043
2044                         if (engine != ve->siblings[0]) {
2045                                 u32 *regs = ve->context.lrc_reg_state;
2046                                 unsigned int n;
2047
2048                                 GEM_BUG_ON(READ_ONCE(ve->context.inflight));
2049
2050                                 if (!intel_engine_has_relative_mmio(engine))
2051                                         virtual_update_register_offsets(regs,
2052                                                                         engine);
2053
2054                                 if (!list_empty(&ve->context.signals))
2055                                         virtual_xfer_breadcrumbs(ve, engine);
2056
2057                                 /*
2058                                  * Move the bound engine to the top of the list
2059                                  * for future execution. We then kick this
2060                                  * tasklet first before checking others, so that
2061                                  * we preferentially reuse this set of bound
2062                                  * registers.
2063                                  */
2064                                 for (n = 1; n < ve->num_siblings; n++) {
2065                                         if (ve->siblings[n] == engine) {
2066                                                 swap(ve->siblings[n],
2067                                                      ve->siblings[0]);
2068                                                 break;
2069                                         }
2070                                 }
2071
2072                                 GEM_BUG_ON(ve->siblings[0] != engine);
2073                         }
2074
2075                         if (__i915_request_submit(rq)) {
2076                                 submit = true;
2077                                 last = rq;
2078                         }
2079                         i915_request_put(rq);
2080
2081                         /*
2082                          * Hmm, we have a bunch of virtual engine requests,
2083                          * but the first one was already completed (thanks
2084                          * preempt-to-busy!). Keep looking at the veng queue
2085                          * until we have no more relevant requests (i.e.
2086                          * the normal submit queue has higher priority).
2087                          */
2088                         if (!submit) {
2089                                 spin_unlock(&ve->base.active.lock);
2090                                 rb = rb_first_cached(&execlists->virtual);
2091                                 continue;
2092                         }
2093                 }
2094
2095                 spin_unlock(&ve->base.active.lock);
2096                 break;
2097         }
2098
2099         while ((rb = rb_first_cached(&execlists->queue))) {
2100                 struct i915_priolist *p = to_priolist(rb);
2101                 struct i915_request *rq, *rn;
2102                 int i;
2103
2104                 priolist_for_each_request_consume(rq, rn, p, i) {
2105                         bool merge = true;
2106
2107                         /*
2108                          * Can we combine this request with the current port?
2109                          * It has to be the same context/ringbuffer and not
2110                          * have any exceptions (e.g. GVT saying never to
2111                          * combine contexts).
2112                          *
2113                          * If we can combine the requests, we can execute both
2114                          * by updating the RING_TAIL to point to the end of the
2115                          * second request, and so we never need to tell the
2116                          * hardware about the first.
2117                          */
2118                         if (last && !can_merge_rq(last, rq)) {
2119                                 /*
2120                                  * If we are on the second port and cannot
2121                                  * combine this request with the last, then we
2122                                  * are done.
2123                                  */
2124                                 if (port == last_port)
2125                                         goto done;
2126
2127                                 /*
2128                                  * We must not populate both ELSP[] with the
2129                                  * same LRCA, i.e. we must submit 2 different
2130                                  * contexts if we submit 2 ELSP.
2131                                  */
2132                                 if (last->context == rq->context)
2133                                         goto done;
2134
2135                                 if (i915_request_has_sentinel(last))
2136                                         goto done;
2137
2138                                 /*
2139                                  * If GVT overrides us we only ever submit
2140                                  * port[0], leaving port[1] empty. Note that we
2141                                  * also have to be careful that we don't queue
2142                                  * the same context (even though a different
2143                                  * request) to the second port.
2144                                  */
2145                                 if (ctx_single_port_submission(last->context) ||
2146                                     ctx_single_port_submission(rq->context))
2147                                         goto done;
2148
2149                                 merge = false;
2150                         }
2151
2152                         if (__i915_request_submit(rq)) {
2153                                 if (!merge) {
2154                                         *port = execlists_schedule_in(last, port - execlists->pending);
2155                                         port++;
2156                                         last = NULL;
2157                                 }
2158
2159                                 GEM_BUG_ON(last &&
2160                                            !can_merge_ctx(last->context,
2161                                                           rq->context));
2162                                 GEM_BUG_ON(last &&
2163                                            i915_seqno_passed(last->fence.seqno,
2164                                                              rq->fence.seqno));
2165
2166                                 submit = true;
2167                                 last = rq;
2168                         }
2169                 }
2170
2171                 rb_erase_cached(&p->node, &execlists->queue);
2172                 i915_priolist_free(p);
2173         }
2174
2175 done:
2176         /*
2177          * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
2178          *
2179          * We choose the priority hint such that if we add a request of greater
2180          * priority than this, we kick the submission tasklet to decide on
2181          * the right order of submitting the requests to hardware. We must
2182          * also be prepared to reorder requests as they are in-flight on the
2183          * HW. We derive the priority hint then as the first "hole" in
2184          * the HW submission ports and if there are no available slots,
2185          * the priority of the lowest executing request, i.e. last.
2186          *
2187          * When we do receive a higher priority request ready to run from the
2188          * user, see queue_request(), the priority hint is bumped to that
2189          * request triggering preemption on the next dequeue (or subsequent
2190          * interrupt for secondary ports).
2191          */
2192         execlists->queue_priority_hint = queue_prio(execlists);
2193
2194         if (submit) {
2195                 *port = execlists_schedule_in(last, port - execlists->pending);
2196                 execlists->switch_priority_hint =
2197                         switch_prio(engine, *execlists->pending);
2198
2199                 /*
2200                  * Skip if we ended up with exactly the same set of requests,
2201                  * e.g. trying to timeslice a pair of ordered contexts
2202                  */
2203                 if (!memcmp(execlists->active, execlists->pending,
2204                             (port - execlists->pending + 1) * sizeof(*port))) {
2205                         do
2206                                 execlists_schedule_out(fetch_and_zero(port));
2207                         while (port-- != execlists->pending);
2208
2209                         goto skip_submit;
2210                 }
2211                 clear_ports(port + 1, last_port - port);
2212
2213                 execlists_submit_ports(engine);
2214                 set_preempt_timeout(engine);
2215         } else {
2216 skip_submit:
2217                 ring_set_paused(engine, 0);
2218         }
2219 }
2220
2221 static void
2222 cancel_port_requests(struct intel_engine_execlists * const execlists)
2223 {
2224         struct i915_request * const *port;
2225
2226         for (port = execlists->pending; *port; port++)
2227                 execlists_schedule_out(*port);
2228         clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending));
2229
2230         /* Mark the end of active before we overwrite *active */
2231         for (port = xchg(&execlists->active, execlists->pending); *port; port++)
2232                 execlists_schedule_out(*port);
2233         clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
2234
2235         WRITE_ONCE(execlists->active, execlists->inflight);
2236 }
2237
2238 static inline void
2239 invalidate_csb_entries(const u32 *first, const u32 *last)
2240 {
2241         clflush((void *)first);
2242         clflush((void *)last);
2243 }
2244
2245 /*
2246  * Starting with Gen12, the status has a new format:
2247  *
2248  *     bit  0:     switched to new queue
2249  *     bit  1:     reserved
2250  *     bit  2:     semaphore wait mode (poll or signal), only valid when
2251  *                 switch detail is set to "wait on semaphore"
2252  *     bits 3-5:   engine class
2253  *     bits 6-11:  engine instance
2254  *     bits 12-14: reserved
2255  *     bits 15-25: sw context id of the lrc the GT switched to
2256  *     bits 26-31: sw counter of the lrc the GT switched to
2257  *     bits 32-35: context switch detail
2258  *                  - 0: ctx complete
2259  *                  - 1: wait on sync flip
2260  *                  - 2: wait on vblank
2261  *                  - 3: wait on scanline
2262  *                  - 4: wait on semaphore
2263  *                  - 5: context preempted (not on SEMAPHORE_WAIT or
2264  *                       WAIT_FOR_EVENT)
2265  *     bit  36:    reserved
2266  *     bits 37-43: wait detail (for switch detail 1 to 4)
2267  *     bits 44-46: reserved
2268  *     bits 47-57: sw context id of the lrc the GT switched away from
2269  *     bits 58-63: sw counter of the lrc the GT switched away from
2270  */
2271 static inline bool
2272 gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
2273 {
2274         u32 lower_dw = csb[0];
2275         u32 upper_dw = csb[1];
2276         bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
2277         bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
2278         bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
2279
2280         /*
2281          * The context switch detail is not guaranteed to be 5 when a preemption
2282          * occurs, so we can't just check for that. The check below works for
2283          * all the cases we care about, including preemptions of WAIT
2284          * instructions and lite-restore. Preempt-to-idle via the CTRL register
2285          * would require some extra handling, but we don't support that.
2286          */
2287         if (!ctx_away_valid || new_queue) {
2288                 GEM_BUG_ON(!ctx_to_valid);
2289                 return true;
2290         }
2291
2292         /*
2293          * switch detail = 5 is covered by the case above and we do not expect a
2294          * context switch on an unsuccessful wait instruction since we always
2295          * use polling mode.
2296          */
2297         GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
2298         return false;
2299 }
2300
2301 static inline bool
2302 gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
2303 {
2304         return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
2305 }
2306
2307 static void process_csb(struct intel_engine_cs *engine)
2308 {
2309         struct intel_engine_execlists * const execlists = &engine->execlists;
2310         const u32 * const buf = execlists->csb_status;
2311         const u8 num_entries = execlists->csb_size;
2312         u8 head, tail;
2313
2314         /*
2315          * As we modify our execlists state tracking we require exclusive
2316          * access. Either we are inside the tasklet, or the tasklet is disabled
2317          * and we assume that is only inside the reset paths and so serialised.
2318          */
2319         GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
2320                    !reset_in_progress(execlists));
2321         GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine));
2322
2323         /*
2324          * Note that csb_write, csb_status may be either in HWSP or mmio.
2325          * When reading from the csb_write mmio register, we have to be
2326          * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
2327          * the low 4bits. As it happens we know the next 4bits are always
2328          * zero and so we can simply masked off the low u8 of the register
2329          * and treat it identically to reading from the HWSP (without having
2330          * to use explicit shifting and masking, and probably bifurcating
2331          * the code to handle the legacy mmio read).
2332          */
2333         head = execlists->csb_head;
2334         tail = READ_ONCE(*execlists->csb_write);
2335         if (unlikely(head == tail))
2336                 return;
2337
2338         /*
2339          * Hopefully paired with a wmb() in HW!
2340          *
2341          * We must complete the read of the write pointer before any reads
2342          * from the CSB, so that we do not see stale values. Without an rmb
2343          * (lfence) the HW may speculatively perform the CSB[] reads *before*
2344          * we perform the READ_ONCE(*csb_write).
2345          */
2346         rmb();
2347
2348         ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
2349         do {
2350                 bool promote;
2351
2352                 if (++head == num_entries)
2353                         head = 0;
2354
2355                 /*
2356                  * We are flying near dragons again.
2357                  *
2358                  * We hold a reference to the request in execlist_port[]
2359                  * but no more than that. We are operating in softirq
2360                  * context and so cannot hold any mutex or sleep. That
2361                  * prevents us stopping the requests we are processing
2362                  * in port[] from being retired simultaneously (the
2363                  * breadcrumb will be complete before we see the
2364                  * context-switch). As we only hold the reference to the
2365                  * request, any pointer chasing underneath the request
2366                  * is subject to a potential use-after-free. Thus we
2367                  * store all of the bookkeeping within port[] as
2368                  * required, and avoid using unguarded pointers beneath
2369                  * request itself. The same applies to the atomic
2370                  * status notifier.
2371                  */
2372
2373                 ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
2374                              head, buf[2 * head + 0], buf[2 * head + 1]);
2375
2376                 if (INTEL_GEN(engine->i915) >= 12)
2377                         promote = gen12_csb_parse(execlists, buf + 2 * head);
2378                 else
2379                         promote = gen8_csb_parse(execlists, buf + 2 * head);
2380                 if (promote) {
2381                         struct i915_request * const *old = execlists->active;
2382
2383                         GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
2384
2385                         ring_set_paused(engine, 0);
2386
2387                         /* Point active to the new ELSP; prevent overwriting */
2388                         WRITE_ONCE(execlists->active, execlists->pending);
2389
2390                         /* cancel old inflight, prepare for switch */
2391                         trace_ports(execlists, "preempted", old);
2392                         while (*old)
2393                                 execlists_schedule_out(*old++);
2394
2395                         /* switch pending to inflight */
2396                         WRITE_ONCE(execlists->active,
2397                                    memcpy(execlists->inflight,
2398                                           execlists->pending,
2399                                           execlists_num_ports(execlists) *
2400                                           sizeof(*execlists->pending)));
2401
2402                         WRITE_ONCE(execlists->pending[0], NULL);
2403                 } else {
2404                         GEM_BUG_ON(!*execlists->active);
2405
2406                         /* port0 completed, advanced to port1 */
2407                         trace_ports(execlists, "completed", execlists->active);
2408
2409                         /*
2410                          * We rely on the hardware being strongly
2411                          * ordered, that the breadcrumb write is
2412                          * coherent (visible from the CPU) before the
2413                          * user interrupt and CSB is processed.
2414                          */
2415                         if (GEM_SHOW_DEBUG() &&
2416                             !i915_request_completed(*execlists->active) &&
2417                             !reset_in_progress(execlists)) {
2418                                 struct i915_request *rq __maybe_unused =
2419                                         *execlists->active;
2420                                 const u32 *regs __maybe_unused =
2421                                         rq->context->lrc_reg_state;
2422
2423                                 ENGINE_TRACE(engine,
2424                                              "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n",
2425                                              ENGINE_READ(engine, RING_START),
2426                                              ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR,
2427                                              ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR,
2428                                              ENGINE_READ(engine, RING_CTL),
2429                                              ENGINE_READ(engine, RING_MI_MODE));
2430                                 ENGINE_TRACE(engine,
2431                                              "rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ",
2432                                              i915_ggtt_offset(rq->ring->vma),
2433                                              rq->head, rq->tail,
2434                                              rq->fence.context,
2435                                              lower_32_bits(rq->fence.seqno),
2436                                              hwsp_seqno(rq));
2437                                 ENGINE_TRACE(engine,
2438                                              "ctx:{start:%08x, head:%04x, tail:%04x}, ",
2439                                              regs[CTX_RING_START],
2440                                              regs[CTX_RING_HEAD],
2441                                              regs[CTX_RING_TAIL]);
2442
2443                                 GEM_BUG_ON("context completed before request");
2444                         }
2445
2446                         execlists_schedule_out(*execlists->active++);
2447
2448                         GEM_BUG_ON(execlists->active - execlists->inflight >
2449                                    execlists_num_ports(execlists));
2450                 }
2451         } while (head != tail);
2452
2453         execlists->csb_head = head;
2454         set_timeslice(engine);
2455
2456         /*
2457          * Gen11 has proven to fail wrt global observation point between
2458          * entry and tail update, failing on the ordering and thus
2459          * we see an old entry in the context status buffer.
2460          *
2461          * Forcibly evict out entries for the next gpu csb update,
2462          * to increase the odds that we get a fresh entries with non
2463          * working hardware. The cost for doing so comes out mostly with
2464          * the wash as hardware, working or not, will need to do the
2465          * invalidation before.
2466          */
2467         invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
2468 }
2469
2470 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
2471 {
2472         lockdep_assert_held(&engine->active.lock);
2473         if (!READ_ONCE(engine->execlists.pending[0])) {
2474                 rcu_read_lock(); /* protect peeking at execlists->active */
2475                 execlists_dequeue(engine);
2476                 rcu_read_unlock();
2477         }
2478 }
2479
2480 static void __execlists_hold(struct i915_request *rq)
2481 {
2482         LIST_HEAD(list);
2483
2484         do {
2485                 struct i915_dependency *p;
2486
2487                 if (i915_request_is_active(rq))
2488                         __i915_request_unsubmit(rq);
2489
2490                 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2491                 list_move_tail(&rq->sched.link, &rq->engine->active.hold);
2492                 i915_request_set_hold(rq);
2493                 RQ_TRACE(rq, "on hold\n");
2494
2495                 for_each_waiter(p, rq) {
2496                         struct i915_request *w =
2497                                 container_of(p->waiter, typeof(*w), sched);
2498
2499                         /* Leave semaphores spinning on the other engines */
2500                         if (w->engine != rq->engine)
2501                                 continue;
2502
2503                         if (!i915_request_is_ready(w))
2504                                 continue;
2505
2506                         if (i915_request_completed(w))
2507                                 continue;
2508
2509                         if (i915_request_on_hold(w))
2510                                 continue;
2511
2512                         list_move_tail(&w->sched.link, &list);
2513                 }
2514
2515                 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2516         } while (rq);
2517 }
2518
2519 static bool execlists_hold(struct intel_engine_cs *engine,
2520                            struct i915_request *rq)
2521 {
2522         spin_lock_irq(&engine->active.lock);
2523
2524         if (i915_request_completed(rq)) { /* too late! */
2525                 rq = NULL;
2526                 goto unlock;
2527         }
2528
2529         if (rq->engine != engine) { /* preempted virtual engine */
2530                 struct virtual_engine *ve = to_virtual_engine(rq->engine);
2531
2532                 /*
2533                  * intel_context_inflight() is only protected by virtue
2534                  * of process_csb() being called only by the tasklet (or
2535                  * directly from inside reset while the tasklet is suspended).
2536                  * Assert that neither of those are allowed to run while we
2537                  * poke at the request queues.
2538                  */
2539                 GEM_BUG_ON(!reset_in_progress(&engine->execlists));
2540
2541                 /*
2542                  * An unsubmitted request along a virtual engine will
2543                  * remain on the active (this) engine until we are able
2544                  * to process the context switch away (and so mark the
2545                  * context as no longer in flight). That cannot have happened
2546                  * yet, otherwise we would not be hanging!
2547                  */
2548                 spin_lock(&ve->base.active.lock);
2549                 GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
2550                 GEM_BUG_ON(ve->request != rq);
2551                 ve->request = NULL;
2552                 spin_unlock(&ve->base.active.lock);
2553                 i915_request_put(rq);
2554
2555                 rq->engine = engine;
2556         }
2557
2558         /*
2559          * Transfer this request onto the hold queue to prevent it
2560          * being resumbitted to HW (and potentially completed) before we have
2561          * released it. Since we may have already submitted following
2562          * requests, we need to remove those as well.
2563          */
2564         GEM_BUG_ON(i915_request_on_hold(rq));
2565         GEM_BUG_ON(rq->engine != engine);
2566         __execlists_hold(rq);
2567         GEM_BUG_ON(list_empty(&engine->active.hold));
2568
2569 unlock:
2570         spin_unlock_irq(&engine->active.lock);
2571         return rq;
2572 }
2573
2574 static bool hold_request(const struct i915_request *rq)
2575 {
2576         struct i915_dependency *p;
2577         bool result = false;
2578
2579         /*
2580          * If one of our ancestors is on hold, we must also be on hold,
2581          * otherwise we will bypass it and execute before it.
2582          */
2583         rcu_read_lock();
2584         for_each_signaler(p, rq) {
2585                 const struct i915_request *s =
2586                         container_of(p->signaler, typeof(*s), sched);
2587
2588                 if (s->engine != rq->engine)
2589                         continue;
2590
2591                 result = i915_request_on_hold(s);
2592                 if (result)
2593                         break;
2594         }
2595         rcu_read_unlock();
2596
2597         return result;
2598 }
2599
2600 static void __execlists_unhold(struct i915_request *rq)
2601 {
2602         LIST_HEAD(list);
2603
2604         do {
2605                 struct i915_dependency *p;
2606
2607                 RQ_TRACE(rq, "hold release\n");
2608
2609                 GEM_BUG_ON(!i915_request_on_hold(rq));
2610                 GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
2611
2612                 i915_request_clear_hold(rq);
2613                 list_move_tail(&rq->sched.link,
2614                                i915_sched_lookup_priolist(rq->engine,
2615                                                           rq_prio(rq)));
2616                 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2617
2618                 /* Also release any children on this engine that are ready */
2619                 for_each_waiter(p, rq) {
2620                         struct i915_request *w =
2621                                 container_of(p->waiter, typeof(*w), sched);
2622
2623                         /* Propagate any change in error status */
2624                         if (rq->fence.error)
2625                                 i915_request_set_error_once(w, rq->fence.error);
2626
2627                         if (w->engine != rq->engine)
2628                                 continue;
2629
2630                         if (!i915_request_on_hold(w))
2631                                 continue;
2632
2633                         /* Check that no other parents are also on hold */
2634                         if (hold_request(w))
2635                                 continue;
2636
2637                         list_move_tail(&w->sched.link, &list);
2638                 }
2639
2640                 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2641         } while (rq);
2642 }
2643
2644 static void execlists_unhold(struct intel_engine_cs *engine,
2645                              struct i915_request *rq)
2646 {
2647         spin_lock_irq(&engine->active.lock);
2648
2649         /*
2650          * Move this request back to the priority queue, and all of its
2651          * children and grandchildren that were suspended along with it.
2652          */
2653         __execlists_unhold(rq);
2654
2655         if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
2656                 engine->execlists.queue_priority_hint = rq_prio(rq);
2657                 tasklet_hi_schedule(&engine->execlists.tasklet);
2658         }
2659
2660         spin_unlock_irq(&engine->active.lock);
2661 }
2662
2663 struct execlists_capture {
2664         struct work_struct work;
2665         struct i915_request *rq;
2666         struct i915_gpu_coredump *error;
2667 };
2668
2669 static void execlists_capture_work(struct work_struct *work)
2670 {
2671         struct execlists_capture *cap = container_of(work, typeof(*cap), work);
2672         const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
2673         struct intel_engine_cs *engine = cap->rq->engine;
2674         struct intel_gt_coredump *gt = cap->error->gt;
2675         struct intel_engine_capture_vma *vma;
2676
2677         /* Compress all the objects attached to the request, slow! */
2678         vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
2679         if (vma) {
2680                 struct i915_vma_compress *compress =
2681                         i915_vma_capture_prepare(gt);
2682
2683                 intel_engine_coredump_add_vma(gt->engine, vma, compress);
2684                 i915_vma_capture_finish(gt, compress);
2685         }
2686
2687         gt->simulated = gt->engine->simulated;
2688         cap->error->simulated = gt->simulated;
2689
2690         /* Publish the error state, and announce it to the world */
2691         i915_error_state_store(cap->error);
2692         i915_gpu_coredump_put(cap->error);
2693
2694         /* Return this request and all that depend upon it for signaling */
2695         execlists_unhold(engine, cap->rq);
2696         i915_request_put(cap->rq);
2697
2698         kfree(cap);
2699 }
2700
2701 static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
2702 {
2703         const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
2704         struct execlists_capture *cap;
2705
2706         cap = kmalloc(sizeof(*cap), gfp);
2707         if (!cap)
2708                 return NULL;
2709
2710         cap->error = i915_gpu_coredump_alloc(engine->i915, gfp);
2711         if (!cap->error)
2712                 goto err_cap;
2713
2714         cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
2715         if (!cap->error->gt)
2716                 goto err_gpu;
2717
2718         cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
2719         if (!cap->error->gt->engine)
2720                 goto err_gt;
2721
2722         return cap;
2723
2724 err_gt:
2725         kfree(cap->error->gt);
2726 err_gpu:
2727         kfree(cap->error);
2728 err_cap:
2729         kfree(cap);
2730         return NULL;
2731 }
2732
2733 static bool execlists_capture(struct intel_engine_cs *engine)
2734 {
2735         struct execlists_capture *cap;
2736
2737         if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
2738                 return true;
2739
2740         /*
2741          * We need to _quickly_ capture the engine state before we reset.
2742          * We are inside an atomic section (softirq) here and we are delaying
2743          * the forced preemption event.
2744          */
2745         cap = capture_regs(engine);
2746         if (!cap)
2747                 return true;
2748
2749         spin_lock_irq(&engine->active.lock);
2750         cap->rq = execlists_active(&engine->execlists);
2751         if (cap->rq) {
2752                 cap->rq = active_request(cap->rq->context->timeline, cap->rq);
2753                 cap->rq = i915_request_get_rcu(cap->rq);
2754         }
2755         spin_unlock_irq(&engine->active.lock);
2756         if (!cap->rq)
2757                 goto err_free;
2758
2759         /*
2760          * Remove the request from the execlists queue, and take ownership
2761          * of the request. We pass it to our worker who will _slowly_ compress
2762          * all the pages the _user_ requested for debugging their batch, after
2763          * which we return it to the queue for signaling.
2764          *
2765          * By removing them from the execlists queue, we also remove the
2766          * requests from being processed by __unwind_incomplete_requests()
2767          * during the intel_engine_reset(), and so they will *not* be replayed
2768          * afterwards.
2769          *
2770          * Note that because we have not yet reset the engine at this point,
2771          * it is possible for the request that we have identified as being
2772          * guilty, did in fact complete and we will then hit an arbitration
2773          * point allowing the outstanding preemption to succeed. The likelihood
2774          * of that is very low (as capturing of the engine registers should be
2775          * fast enough to run inside an irq-off atomic section!), so we will
2776          * simply hold that request accountable for being non-preemptible
2777          * long enough to force the reset.
2778          */
2779         if (!execlists_hold(engine, cap->rq))
2780                 goto err_rq;
2781
2782         INIT_WORK(&cap->work, execlists_capture_work);
2783         schedule_work(&cap->work);
2784         return true;
2785
2786 err_rq:
2787         i915_request_put(cap->rq);
2788 err_free:
2789         i915_gpu_coredump_put(cap->error);
2790         kfree(cap);
2791         return false;
2792 }
2793
2794 static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
2795 {
2796         const unsigned int bit = I915_RESET_ENGINE + engine->id;
2797         unsigned long *lock = &engine->gt->reset.flags;
2798
2799         if (!intel_has_reset_engine(engine->gt))
2800                 return;
2801
2802         if (test_and_set_bit(bit, lock))
2803                 return;
2804
2805         ENGINE_TRACE(engine, "reset for %s\n", msg);
2806
2807         /* Mark this tasklet as disabled to avoid waiting for it to complete */
2808         tasklet_disable_nosync(&engine->execlists.tasklet);
2809
2810         ring_set_paused(engine, 1); /* Freeze the current request in place */
2811         if (execlists_capture(engine))
2812                 intel_engine_reset(engine, msg);
2813         else
2814                 ring_set_paused(engine, 0);
2815
2816         tasklet_enable(&engine->execlists.tasklet);
2817         clear_and_wake_up_bit(bit, lock);
2818 }
2819
2820 static bool preempt_timeout(const struct intel_engine_cs *const engine)
2821 {
2822         const struct timer_list *t = &engine->execlists.preempt;
2823
2824         if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
2825                 return false;
2826
2827         if (!timer_expired(t))
2828                 return false;
2829
2830         return READ_ONCE(engine->execlists.pending[0]);
2831 }
2832
2833 /*
2834  * Check the unread Context Status Buffers and manage the submission of new
2835  * contexts to the ELSP accordingly.
2836  */
2837 static void execlists_submission_tasklet(unsigned long data)
2838 {
2839         struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
2840         bool timeout = preempt_timeout(engine);
2841
2842         process_csb(engine);
2843
2844         if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) {
2845                 engine->execlists.error_interrupt = 0;
2846                 if (ENGINE_READ(engine, RING_ESR)) /* confirm the error */
2847                         execlists_reset(engine, "CS error");
2848         }
2849
2850         if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
2851                 unsigned long flags;
2852
2853                 spin_lock_irqsave(&engine->active.lock, flags);
2854                 __execlists_submission_tasklet(engine);
2855                 spin_unlock_irqrestore(&engine->active.lock, flags);
2856
2857                 /* Recheck after serialising with direct-submission */
2858                 if (unlikely(timeout && preempt_timeout(engine)))
2859                         execlists_reset(engine, "preemption time out");
2860         }
2861 }
2862
2863 static void __execlists_kick(struct intel_engine_execlists *execlists)
2864 {
2865         /* Kick the tasklet for some interrupt coalescing and reset handling */
2866         tasklet_hi_schedule(&execlists->tasklet);
2867 }
2868
2869 #define execlists_kick(t, member) \
2870         __execlists_kick(container_of(t, struct intel_engine_execlists, member))
2871
2872 static void execlists_timeslice(struct timer_list *timer)
2873 {
2874         execlists_kick(timer, timer);
2875 }
2876
2877 static void execlists_preempt(struct timer_list *timer)
2878 {
2879         execlists_kick(timer, preempt);
2880 }
2881
2882 static void queue_request(struct intel_engine_cs *engine,
2883                           struct i915_request *rq)
2884 {
2885         GEM_BUG_ON(!list_empty(&rq->sched.link));
2886         list_add_tail(&rq->sched.link,
2887                       i915_sched_lookup_priolist(engine, rq_prio(rq)));
2888         set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2889 }
2890
2891 static void __submit_queue_imm(struct intel_engine_cs *engine)
2892 {
2893         struct intel_engine_execlists * const execlists = &engine->execlists;
2894
2895         if (reset_in_progress(execlists))
2896                 return; /* defer until we restart the engine following reset */
2897
2898         if (execlists->tasklet.func == execlists_submission_tasklet)
2899                 __execlists_submission_tasklet(engine);
2900         else
2901                 tasklet_hi_schedule(&execlists->tasklet);
2902 }
2903
2904 static void submit_queue(struct intel_engine_cs *engine,
2905                          const struct i915_request *rq)
2906 {
2907         struct intel_engine_execlists *execlists = &engine->execlists;
2908
2909         if (rq_prio(rq) <= execlists->queue_priority_hint)
2910                 return;
2911
2912         execlists->queue_priority_hint = rq_prio(rq);
2913         __submit_queue_imm(engine);
2914 }
2915
2916 static bool ancestor_on_hold(const struct intel_engine_cs *engine,
2917                              const struct i915_request *rq)
2918 {
2919         GEM_BUG_ON(i915_request_on_hold(rq));
2920         return !list_empty(&engine->active.hold) && hold_request(rq);
2921 }
2922
2923 static void execlists_submit_request(struct i915_request *request)
2924 {
2925         struct intel_engine_cs *engine = request->engine;
2926         unsigned long flags;
2927
2928         /* Will be called from irq-context when using foreign fences. */
2929         spin_lock_irqsave(&engine->active.lock, flags);
2930
2931         if (unlikely(ancestor_on_hold(engine, request))) {
2932                 RQ_TRACE(request, "ancestor on hold\n");
2933                 list_add_tail(&request->sched.link, &engine->active.hold);
2934                 i915_request_set_hold(request);
2935         } else {
2936                 queue_request(engine, request);
2937
2938                 GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
2939                 GEM_BUG_ON(list_empty(&request->sched.link));
2940
2941                 submit_queue(engine, request);
2942         }
2943
2944         spin_unlock_irqrestore(&engine->active.lock, flags);
2945 }
2946
2947 static void __execlists_context_fini(struct intel_context *ce)
2948 {
2949         intel_ring_put(ce->ring);
2950         i915_vma_put(ce->state);
2951 }
2952
2953 static void execlists_context_destroy(struct kref *kref)
2954 {
2955         struct intel_context *ce = container_of(kref, typeof(*ce), ref);
2956
2957         GEM_BUG_ON(!i915_active_is_idle(&ce->active));
2958         GEM_BUG_ON(intel_context_is_pinned(ce));
2959
2960         if (ce->state)
2961                 __execlists_context_fini(ce);
2962
2963         intel_context_fini(ce);
2964         intel_context_free(ce);
2965 }
2966
2967 static void
2968 set_redzone(void *vaddr, const struct intel_engine_cs *engine)
2969 {
2970         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
2971                 return;
2972
2973         vaddr += engine->context_size;
2974
2975         memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
2976 }
2977
2978 static void
2979 check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
2980 {
2981         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
2982                 return;
2983
2984         vaddr += engine->context_size;
2985
2986         if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
2987                 dev_err_once(engine->i915->drm.dev,
2988                              "%s context redzone overwritten!\n",
2989                              engine->name);
2990 }
2991
2992 static void execlists_context_unpin(struct intel_context *ce)
2993 {
2994         check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE,
2995                       ce->engine);
2996
2997         i915_gem_object_unpin_map(ce->state->obj);
2998 }
2999
3000 static void
3001 __execlists_update_reg_state(const struct intel_context *ce,
3002                              const struct intel_engine_cs *engine,
3003                              u32 head)
3004 {
3005         struct intel_ring *ring = ce->ring;
3006         u32 *regs = ce->lrc_reg_state;
3007
3008         GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
3009         GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
3010
3011         regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
3012         regs[CTX_RING_HEAD] = head;
3013         regs[CTX_RING_TAIL] = ring->tail;
3014         regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
3015
3016         /* RPCS */
3017         if (engine->class == RENDER_CLASS) {
3018                 regs[CTX_R_PWR_CLK_STATE] =
3019                         intel_sseu_make_rpcs(engine->i915, &ce->sseu);
3020
3021                 i915_oa_init_reg_state(ce, engine);
3022         }
3023 }
3024
3025 static int
3026 __execlists_context_pin(struct intel_context *ce,
3027                         struct intel_engine_cs *engine)
3028 {
3029         void *vaddr;
3030
3031         GEM_BUG_ON(!ce->state);
3032         GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
3033
3034         vaddr = i915_gem_object_pin_map(ce->state->obj,
3035                                         i915_coherent_map_type(engine->i915) |
3036                                         I915_MAP_OVERRIDE);
3037         if (IS_ERR(vaddr))
3038                 return PTR_ERR(vaddr);
3039
3040         ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
3041         ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
3042         __execlists_update_reg_state(ce, engine, ce->ring->tail);
3043
3044         return 0;
3045 }
3046
3047 static int execlists_context_pin(struct intel_context *ce)
3048 {
3049         return __execlists_context_pin(ce, ce->engine);
3050 }
3051
3052 static int execlists_context_alloc(struct intel_context *ce)
3053 {
3054         return __execlists_context_alloc(ce, ce->engine);
3055 }
3056
3057 static void execlists_context_reset(struct intel_context *ce)
3058 {
3059         CE_TRACE(ce, "reset\n");
3060         GEM_BUG_ON(!intel_context_is_pinned(ce));
3061
3062         intel_ring_reset(ce->ring, ce->ring->emit);
3063
3064         /* Scrub away the garbage */
3065         execlists_init_reg_state(ce->lrc_reg_state,
3066                                  ce, ce->engine, ce->ring, true);
3067         __execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
3068
3069         ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
3070 }
3071
3072 static const struct intel_context_ops execlists_context_ops = {
3073         .alloc = execlists_context_alloc,
3074
3075         .pin = execlists_context_pin,
3076         .unpin = execlists_context_unpin,
3077
3078         .enter = intel_context_enter_engine,
3079         .exit = intel_context_exit_engine,
3080
3081         .reset = execlists_context_reset,
3082         .destroy = execlists_context_destroy,
3083 };
3084
3085 static int gen8_emit_init_breadcrumb(struct i915_request *rq)
3086 {
3087         u32 *cs;
3088
3089         if (!i915_request_timeline(rq)->has_initial_breadcrumb)
3090                 return 0;
3091
3092         cs = intel_ring_begin(rq, 6);
3093         if (IS_ERR(cs))
3094                 return PTR_ERR(cs);
3095
3096         /*
3097          * Check if we have been preempted before we even get started.
3098          *
3099          * After this point i915_request_started() reports true, even if
3100          * we get preempted and so are no longer running.
3101          */
3102         *cs++ = MI_ARB_CHECK;
3103         *cs++ = MI_NOOP;
3104
3105         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3106         *cs++ = i915_request_timeline(rq)->hwsp_offset;
3107         *cs++ = 0;
3108         *cs++ = rq->fence.seqno - 1;
3109
3110         intel_ring_advance(rq, cs);
3111
3112         /* Record the updated position of the request's payload */
3113         rq->infix = intel_ring_offset(rq, cs);
3114
3115         return 0;
3116 }
3117
3118 static int execlists_request_alloc(struct i915_request *request)
3119 {
3120         int ret;
3121
3122         GEM_BUG_ON(!intel_context_is_pinned(request->context));
3123
3124         /*
3125          * Flush enough space to reduce the likelihood of waiting after
3126          * we start building the request - in which case we will just
3127          * have to repeat work.
3128          */
3129         request->reserved_space += EXECLISTS_REQUEST_SIZE;
3130
3131         /*
3132          * Note that after this point, we have committed to using
3133          * this request as it is being used to both track the
3134          * state of engine initialisation and liveness of the
3135          * golden renderstate above. Think twice before you try
3136          * to cancel/unwind this request now.
3137          */
3138
3139         /* Unconditionally invalidate GPU caches and TLBs. */
3140         ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
3141         if (ret)
3142                 return ret;
3143
3144         request->reserved_space -= EXECLISTS_REQUEST_SIZE;
3145         return 0;
3146 }
3147
3148 /*
3149  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
3150  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
3151  * but there is a slight complication as this is applied in WA batch where the
3152  * values are only initialized once so we cannot take register value at the
3153  * beginning and reuse it further; hence we save its value to memory, upload a
3154  * constant value with bit21 set and then we restore it back with the saved value.
3155  * To simplify the WA, a constant value is formed by using the default value
3156  * of this register. This shouldn't be a problem because we are only modifying
3157  * it for a short period and this batch in non-premptible. We can ofcourse
3158  * use additional instructions that read the actual value of the register
3159  * at that time and set our bit of interest but it makes the WA complicated.
3160  *
3161  * This WA is also required for Gen9 so extracting as a function avoids
3162  * code duplication.
3163  */
3164 static u32 *
3165 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
3166 {
3167         /* NB no one else is allowed to scribble over scratch + 256! */
3168         *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3169         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3170         *batch++ = intel_gt_scratch_offset(engine->gt,
3171                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3172         *batch++ = 0;
3173
3174         *batch++ = MI_LOAD_REGISTER_IMM(1);
3175         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3176         *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
3177
3178         batch = gen8_emit_pipe_control(batch,
3179                                        PIPE_CONTROL_CS_STALL |
3180                                        PIPE_CONTROL_DC_FLUSH_ENABLE,
3181                                        0);
3182
3183         *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3184         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3185         *batch++ = intel_gt_scratch_offset(engine->gt,
3186                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3187         *batch++ = 0;
3188
3189         return batch;
3190 }
3191
3192 /*
3193  * Typically we only have one indirect_ctx and per_ctx batch buffer which are
3194  * initialized at the beginning and shared across all contexts but this field
3195  * helps us to have multiple batches at different offsets and select them based
3196  * on a criteria. At the moment this batch always start at the beginning of the page
3197  * and at this point we don't have multiple wa_ctx batch buffers.
3198  *
3199  * The number of WA applied are not known at the beginning; we use this field
3200  * to return the no of DWORDS written.
3201  *
3202  * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
3203  * so it adds NOOPs as padding to make it cacheline aligned.
3204  * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
3205  * makes a complete batch buffer.
3206  */
3207 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3208 {
3209         /* WaDisableCtxRestoreArbitration:bdw,chv */
3210         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3211
3212         /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
3213         if (IS_BROADWELL(engine->i915))
3214                 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3215
3216         /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
3217         /* Actual scratch location is at 128 bytes offset */
3218         batch = gen8_emit_pipe_control(batch,
3219                                        PIPE_CONTROL_FLUSH_L3 |
3220                                        PIPE_CONTROL_STORE_DATA_INDEX |
3221                                        PIPE_CONTROL_CS_STALL |
3222                                        PIPE_CONTROL_QW_WRITE,
3223                                        LRC_PPHWSP_SCRATCH_ADDR);
3224
3225         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3226
3227         /* Pad to end of cacheline */
3228         while ((unsigned long)batch % CACHELINE_BYTES)
3229                 *batch++ = MI_NOOP;
3230
3231         /*
3232          * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
3233          * execution depends on the length specified in terms of cache lines
3234          * in the register CTX_RCS_INDIRECT_CTX
3235          */
3236
3237         return batch;
3238 }
3239
3240 struct lri {
3241         i915_reg_t reg;
3242         u32 value;
3243 };
3244
3245 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
3246 {
3247         GEM_BUG_ON(!count || count > 63);
3248
3249         *batch++ = MI_LOAD_REGISTER_IMM(count);
3250         do {
3251                 *batch++ = i915_mmio_reg_offset(lri->reg);
3252                 *batch++ = lri->value;
3253         } while (lri++, --count);
3254         *batch++ = MI_NOOP;
3255
3256         return batch;
3257 }
3258
3259 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3260 {
3261         static const struct lri lri[] = {
3262                 /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
3263                 {
3264                         COMMON_SLICE_CHICKEN2,
3265                         __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
3266                                        0),
3267                 },
3268
3269                 /* BSpec: 11391 */
3270                 {
3271                         FF_SLICE_CHICKEN,
3272                         __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
3273                                        FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
3274                 },
3275
3276                 /* BSpec: 11299 */
3277                 {
3278                         _3D_CHICKEN3,
3279                         __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
3280                                        _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
3281                 }
3282         };
3283
3284         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3285
3286         /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
3287         batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3288
3289         /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
3290         batch = gen8_emit_pipe_control(batch,
3291                                        PIPE_CONTROL_FLUSH_L3 |
3292                                        PIPE_CONTROL_STORE_DATA_INDEX |
3293                                        PIPE_CONTROL_CS_STALL |
3294                                        PIPE_CONTROL_QW_WRITE,
3295                                        LRC_PPHWSP_SCRATCH_ADDR);
3296
3297         batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
3298
3299         /* WaMediaPoolStateCmdInWABB:bxt,glk */
3300         if (HAS_POOLED_EU(engine->i915)) {
3301                 /*
3302                  * EU pool configuration is setup along with golden context
3303                  * during context initialization. This value depends on
3304                  * device type (2x6 or 3x6) and needs to be updated based
3305                  * on which subslice is disabled especially for 2x6
3306                  * devices, however it is safe to load default
3307                  * configuration of 3x6 device instead of masking off
3308                  * corresponding bits because HW ignores bits of a disabled
3309                  * subslice and drops down to appropriate config. Please
3310                  * see render_state_setup() in i915_gem_render_state.c for
3311                  * possible configurations, to avoid duplication they are
3312                  * not shown here again.
3313                  */
3314                 *batch++ = GEN9_MEDIA_POOL_STATE;
3315                 *batch++ = GEN9_MEDIA_POOL_ENABLE;
3316                 *batch++ = 0x00777000;
3317                 *batch++ = 0;
3318                 *batch++ = 0;
3319                 *batch++ = 0;
3320         }
3321
3322         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3323
3324         /* Pad to end of cacheline */
3325         while ((unsigned long)batch % CACHELINE_BYTES)
3326                 *batch++ = MI_NOOP;
3327
3328         return batch;
3329 }
3330
3331 static u32 *
3332 gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3333 {
3334         int i;
3335
3336         /*
3337          * WaPipeControlBefore3DStateSamplePattern: cnl
3338          *
3339          * Ensure the engine is idle prior to programming a
3340          * 3DSTATE_SAMPLE_PATTERN during a context restore.
3341          */
3342         batch = gen8_emit_pipe_control(batch,
3343                                        PIPE_CONTROL_CS_STALL,
3344                                        0);
3345         /*
3346          * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
3347          * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
3348          * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
3349          * confusing. Since gen8_emit_pipe_control() already advances the
3350          * batch by 6 dwords, we advance the other 10 here, completing a
3351          * cacheline. It's not clear if the workaround requires this padding
3352          * before other commands, or if it's just the regular padding we would
3353          * already have for the workaround bb, so leave it here for now.
3354          */
3355         for (i = 0; i < 10; i++)
3356                 *batch++ = MI_NOOP;
3357
3358         /* Pad to end of cacheline */
3359         while ((unsigned long)batch % CACHELINE_BYTES)
3360                 *batch++ = MI_NOOP;
3361
3362         return batch;
3363 }
3364
3365 #define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
3366
3367 static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
3368 {
3369         struct drm_i915_gem_object *obj;
3370         struct i915_vma *vma;
3371         int err;
3372
3373         obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
3374         if (IS_ERR(obj))
3375                 return PTR_ERR(obj);
3376
3377         vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
3378         if (IS_ERR(vma)) {
3379                 err = PTR_ERR(vma);
3380                 goto err;
3381         }
3382
3383         err = i915_ggtt_pin(vma, 0, PIN_HIGH);
3384         if (err)
3385                 goto err;
3386
3387         engine->wa_ctx.vma = vma;
3388         return 0;
3389
3390 err:
3391         i915_gem_object_put(obj);
3392         return err;
3393 }
3394
3395 static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
3396 {
3397         i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
3398 }
3399
3400 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
3401
3402 static int intel_init_workaround_bb(struct intel_engine_cs *engine)
3403 {
3404         struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
3405         struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
3406                                             &wa_ctx->per_ctx };
3407         wa_bb_func_t wa_bb_fn[2];
3408         struct page *page;
3409         void *batch, *batch_ptr;
3410         unsigned int i;
3411         int ret;
3412
3413         if (engine->class != RENDER_CLASS)
3414                 return 0;
3415
3416         switch (INTEL_GEN(engine->i915)) {
3417         case 12:
3418         case 11:
3419                 return 0;
3420         case 10:
3421                 wa_bb_fn[0] = gen10_init_indirectctx_bb;
3422                 wa_bb_fn[1] = NULL;
3423                 break;
3424         case 9:
3425                 wa_bb_fn[0] = gen9_init_indirectctx_bb;
3426                 wa_bb_fn[1] = NULL;
3427                 break;
3428         case 8:
3429                 wa_bb_fn[0] = gen8_init_indirectctx_bb;
3430                 wa_bb_fn[1] = NULL;
3431                 break;
3432         default:
3433                 MISSING_CASE(INTEL_GEN(engine->i915));
3434                 return 0;
3435         }
3436
3437         ret = lrc_setup_wa_ctx(engine);
3438         if (ret) {
3439                 DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
3440                 return ret;
3441         }
3442
3443         page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
3444         batch = batch_ptr = kmap_atomic(page);
3445
3446         /*
3447          * Emit the two workaround batch buffers, recording the offset from the
3448          * start of the workaround batch buffer object for each and their
3449          * respective sizes.
3450          */
3451         for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
3452                 wa_bb[i]->offset = batch_ptr - batch;
3453                 if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
3454                                                   CACHELINE_BYTES))) {
3455                         ret = -EINVAL;
3456                         break;
3457                 }
3458                 if (wa_bb_fn[i])
3459                         batch_ptr = wa_bb_fn[i](engine, batch_ptr);
3460                 wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
3461         }
3462
3463         BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
3464
3465         kunmap_atomic(batch);
3466         if (ret)
3467                 lrc_destroy_wa_ctx(engine);
3468
3469         return ret;
3470 }
3471
3472 static void enable_error_interrupt(struct intel_engine_cs *engine)
3473 {
3474         u32 status;
3475
3476         engine->execlists.error_interrupt = 0;
3477         ENGINE_WRITE(engine, RING_EMR, ~0u);
3478         ENGINE_WRITE(engine, RING_EIR, ~0u); /* clear all existing errors */
3479
3480         status = ENGINE_READ(engine, RING_ESR);
3481         if (unlikely(status)) {
3482                 dev_err(engine->i915->drm.dev,
3483                         "engine '%s' resumed still in error: %08x\n",
3484                         engine->name, status);
3485                 __intel_gt_reset(engine->gt, engine->mask);
3486         }
3487
3488         /*
3489          * On current gen8+, we have 2 signals to play with
3490          *
3491          * - I915_ERROR_INSTUCTION (bit 0)
3492          *
3493          *    Generate an error if the command parser encounters an invalid
3494          *    instruction
3495          *
3496          *    This is a fatal error.
3497          *
3498          * - CP_PRIV (bit 2)
3499          *
3500          *    Generate an error on privilege violation (where the CP replaces
3501          *    the instruction with a no-op). This also fires for writes into
3502          *    read-only scratch pages.
3503          *
3504          *    This is a non-fatal error, parsing continues.
3505          *
3506          * * there are a few others defined for odd HW that we do not use
3507          *
3508          * Since CP_PRIV fires for cases where we have chosen to ignore the
3509          * error (as the HW is validating and suppressing the mistakes), we
3510          * only unmask the instruction error bit.
3511          */
3512         ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION);
3513 }
3514
3515 static void enable_execlists(struct intel_engine_cs *engine)
3516 {
3517         u32 mode;
3518
3519         assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
3520
3521         intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
3522
3523         if (INTEL_GEN(engine->i915) >= 11)
3524                 mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE);
3525         else
3526                 mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE);
3527         ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode);
3528
3529         ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
3530
3531         ENGINE_WRITE_FW(engine,
3532                         RING_HWS_PGA,
3533                         i915_ggtt_offset(engine->status_page.vma));
3534         ENGINE_POSTING_READ(engine, RING_HWS_PGA);
3535
3536         enable_error_interrupt(engine);
3537
3538         engine->context_tag = 0;
3539 }
3540
3541 static bool unexpected_starting_state(struct intel_engine_cs *engine)
3542 {
3543         bool unexpected = false;
3544
3545         if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) {
3546                 DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n");
3547                 unexpected = true;
3548         }
3549
3550         return unexpected;
3551 }
3552
3553 static int execlists_resume(struct intel_engine_cs *engine)
3554 {
3555         intel_mocs_init_engine(engine);
3556
3557         intel_engine_reset_breadcrumbs(engine);
3558
3559         if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
3560                 struct drm_printer p = drm_debug_printer(__func__);
3561
3562                 intel_engine_dump(engine, &p, NULL);
3563         }
3564
3565         enable_execlists(engine);
3566
3567         return 0;
3568 }
3569
3570 static void execlists_reset_prepare(struct intel_engine_cs *engine)
3571 {
3572         struct intel_engine_execlists * const execlists = &engine->execlists;
3573         unsigned long flags;
3574
3575         ENGINE_TRACE(engine, "depth<-%d\n",
3576                      atomic_read(&execlists->tasklet.count));
3577
3578         /*
3579          * Prevent request submission to the hardware until we have
3580          * completed the reset in i915_gem_reset_finish(). If a request
3581          * is completed by one engine, it may then queue a request
3582          * to a second via its execlists->tasklet *just* as we are
3583          * calling engine->resume() and also writing the ELSP.
3584          * Turning off the execlists->tasklet until the reset is over
3585          * prevents the race.
3586          */
3587         __tasklet_disable_sync_once(&execlists->tasklet);
3588         GEM_BUG_ON(!reset_in_progress(execlists));
3589
3590         /* And flush any current direct submission. */
3591         spin_lock_irqsave(&engine->active.lock, flags);
3592         spin_unlock_irqrestore(&engine->active.lock, flags);
3593
3594         /*
3595          * We stop engines, otherwise we might get failed reset and a
3596          * dead gpu (on elk). Also as modern gpu as kbl can suffer
3597          * from system hang if batchbuffer is progressing when
3598          * the reset is issued, regardless of READY_TO_RESET ack.
3599          * Thus assume it is best to stop engines on all gens
3600          * where we have a gpu reset.
3601          *
3602          * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
3603          *
3604          * FIXME: Wa for more modern gens needs to be validated
3605          */
3606         intel_engine_stop_cs(engine);
3607 }
3608
3609 static void reset_csb_pointers(struct intel_engine_cs *engine)
3610 {
3611         struct intel_engine_execlists * const execlists = &engine->execlists;
3612         const unsigned int reset_value = execlists->csb_size - 1;
3613
3614         ring_set_paused(engine, 0);
3615
3616         /*
3617          * After a reset, the HW starts writing into CSB entry [0]. We
3618          * therefore have to set our HEAD pointer back one entry so that
3619          * the *first* entry we check is entry 0. To complicate this further,
3620          * as we don't wait for the first interrupt after reset, we have to
3621          * fake the HW write to point back to the last entry so that our
3622          * inline comparison of our cached head position against the last HW
3623          * write works even before the first interrupt.
3624          */
3625         execlists->csb_head = reset_value;
3626         WRITE_ONCE(*execlists->csb_write, reset_value);
3627         wmb(); /* Make sure this is visible to HW (paranoia?) */
3628
3629         /*
3630          * Sometimes Icelake forgets to reset its pointers on a GPU reset.
3631          * Bludgeon them with a mmio update to be sure.
3632          */
3633         ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
3634                      reset_value << 8 | reset_value);
3635         ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
3636
3637         invalidate_csb_entries(&execlists->csb_status[0],
3638                                &execlists->csb_status[reset_value]);
3639 }
3640
3641 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
3642 {
3643         int x;
3644
3645         x = lrc_ring_mi_mode(engine);
3646         if (x != -1) {
3647                 regs[x + 1] &= ~STOP_RING;
3648                 regs[x + 1] |= STOP_RING << 16;
3649         }
3650 }
3651
3652 static void __execlists_reset_reg_state(const struct intel_context *ce,
3653                                         const struct intel_engine_cs *engine)
3654 {
3655         u32 *regs = ce->lrc_reg_state;
3656
3657         __reset_stop_ring(regs, engine);
3658 }
3659
3660 static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
3661 {
3662         struct intel_engine_execlists * const execlists = &engine->execlists;
3663         struct intel_context *ce;
3664         struct i915_request *rq;
3665         u32 head;
3666
3667         mb(); /* paranoia: read the CSB pointers from after the reset */
3668         clflush(execlists->csb_write);
3669         mb();
3670
3671         process_csb(engine); /* drain preemption events */
3672
3673         /* Following the reset, we need to reload the CSB read/write pointers */
3674         reset_csb_pointers(engine);
3675
3676         /*
3677          * Save the currently executing context, even if we completed
3678          * its request, it was still running at the time of the
3679          * reset and will have been clobbered.
3680          */
3681         rq = execlists_active(execlists);
3682         if (!rq)
3683                 goto unwind;
3684
3685         ce = rq->context;
3686         GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
3687
3688         if (i915_request_completed(rq)) {
3689                 /* Idle context; tidy up the ring so we can restart afresh */
3690                 head = intel_ring_wrap(ce->ring, rq->tail);
3691                 goto out_replay;
3692         }
3693
3694         /* We still have requests in-flight; the engine should be active */
3695         GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
3696
3697         /* Context has requests still in-flight; it should not be idle! */
3698         GEM_BUG_ON(i915_active_is_idle(&ce->active));
3699
3700         rq = active_request(ce->timeline, rq);
3701         head = intel_ring_wrap(ce->ring, rq->head);
3702         GEM_BUG_ON(head == ce->ring->tail);
3703
3704         /*
3705          * If this request hasn't started yet, e.g. it is waiting on a
3706          * semaphore, we need to avoid skipping the request or else we
3707          * break the signaling chain. However, if the context is corrupt
3708          * the request will not restart and we will be stuck with a wedged
3709          * device. It is quite often the case that if we issue a reset
3710          * while the GPU is loading the context image, that the context
3711          * image becomes corrupt.
3712          *
3713          * Otherwise, if we have not started yet, the request should replay
3714          * perfectly and we do not need to flag the result as being erroneous.
3715          */
3716         if (!i915_request_started(rq))
3717                 goto out_replay;
3718
3719         /*
3720          * If the request was innocent, we leave the request in the ELSP
3721          * and will try to replay it on restarting. The context image may
3722          * have been corrupted by the reset, in which case we may have
3723          * to service a new GPU hang, but more likely we can continue on
3724          * without impact.
3725          *
3726          * If the request was guilty, we presume the context is corrupt
3727          * and have to at least restore the RING register in the context
3728          * image back to the expected values to skip over the guilty request.
3729          */
3730         __i915_request_reset(rq, stalled);
3731         if (!stalled)
3732                 goto out_replay;
3733
3734         /*
3735          * We want a simple context + ring to execute the breadcrumb update.
3736          * We cannot rely on the context being intact across the GPU hang,
3737          * so clear it and rebuild just what we need for the breadcrumb.
3738          * All pending requests for this context will be zapped, and any
3739          * future request will be after userspace has had the opportunity
3740          * to recreate its own state.
3741          */
3742         GEM_BUG_ON(!intel_context_is_pinned(ce));
3743         restore_default_state(ce, engine);
3744
3745 out_replay:
3746         ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
3747                      head, ce->ring->tail);
3748         __execlists_reset_reg_state(ce, engine);
3749         __execlists_update_reg_state(ce, engine, head);
3750         ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
3751
3752 unwind:
3753         /* Push back any incomplete requests for replay after the reset. */
3754         cancel_port_requests(execlists);
3755         __unwind_incomplete_requests(engine);
3756 }
3757
3758 static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
3759 {
3760         unsigned long flags;
3761
3762         ENGINE_TRACE(engine, "\n");
3763
3764         spin_lock_irqsave(&engine->active.lock, flags);
3765
3766         __execlists_reset(engine, stalled);
3767
3768         spin_unlock_irqrestore(&engine->active.lock, flags);
3769 }
3770
3771 static void nop_submission_tasklet(unsigned long data)
3772 {
3773         struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
3774
3775         /* The driver is wedged; don't process any more events. */
3776         WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN);
3777 }
3778
3779 static void execlists_reset_cancel(struct intel_engine_cs *engine)
3780 {
3781         struct intel_engine_execlists * const execlists = &engine->execlists;
3782         struct i915_request *rq, *rn;
3783         struct rb_node *rb;
3784         unsigned long flags;
3785
3786         ENGINE_TRACE(engine, "\n");
3787
3788         /*
3789          * Before we call engine->cancel_requests(), we should have exclusive
3790          * access to the submission state. This is arranged for us by the
3791          * caller disabling the interrupt generation, the tasklet and other
3792          * threads that may then access the same state, giving us a free hand
3793          * to reset state. However, we still need to let lockdep be aware that
3794          * we know this state may be accessed in hardirq context, so we
3795          * disable the irq around this manipulation and we want to keep
3796          * the spinlock focused on its duties and not accidentally conflate
3797          * coverage to the submission's irq state. (Similarly, although we
3798          * shouldn't need to disable irq around the manipulation of the
3799          * submission's irq state, we also wish to remind ourselves that
3800          * it is irq state.)
3801          */
3802         spin_lock_irqsave(&engine->active.lock, flags);
3803
3804         __execlists_reset(engine, true);
3805
3806         /* Mark all executing requests as skipped. */
3807         list_for_each_entry(rq, &engine->active.requests, sched.link)
3808                 mark_eio(rq);
3809
3810         /* Flush the queued requests to the timeline list (for retiring). */
3811         while ((rb = rb_first_cached(&execlists->queue))) {
3812                 struct i915_priolist *p = to_priolist(rb);
3813                 int i;
3814
3815                 priolist_for_each_request_consume(rq, rn, p, i) {
3816                         mark_eio(rq);
3817                         __i915_request_submit(rq);
3818                 }
3819
3820                 rb_erase_cached(&p->node, &execlists->queue);
3821                 i915_priolist_free(p);
3822         }
3823
3824         /* On-hold requests will be flushed to timeline upon their release */
3825         list_for_each_entry(rq, &engine->active.hold, sched.link)
3826                 mark_eio(rq);
3827
3828         /* Cancel all attached virtual engines */
3829         while ((rb = rb_first_cached(&execlists->virtual))) {
3830                 struct virtual_engine *ve =
3831                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
3832
3833                 rb_erase_cached(rb, &execlists->virtual);
3834                 RB_CLEAR_NODE(rb);
3835
3836                 spin_lock(&ve->base.active.lock);
3837                 rq = fetch_and_zero(&ve->request);
3838                 if (rq) {
3839                         mark_eio(rq);
3840
3841                         rq->engine = engine;
3842                         __i915_request_submit(rq);
3843                         i915_request_put(rq);
3844
3845                         ve->base.execlists.queue_priority_hint = INT_MIN;
3846                 }
3847                 spin_unlock(&ve->base.active.lock);
3848         }
3849
3850         /* Remaining _unready_ requests will be nop'ed when submitted */
3851
3852         execlists->queue_priority_hint = INT_MIN;
3853         execlists->queue = RB_ROOT_CACHED;
3854
3855         GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
3856         execlists->tasklet.func = nop_submission_tasklet;
3857
3858         spin_unlock_irqrestore(&engine->active.lock, flags);
3859 }
3860
3861 static void execlists_reset_finish(struct intel_engine_cs *engine)
3862 {
3863         struct intel_engine_execlists * const execlists = &engine->execlists;
3864
3865         /*
3866          * After a GPU reset, we may have requests to replay. Do so now while
3867          * we still have the forcewake to be sure that the GPU is not allowed
3868          * to sleep before we restart and reload a context.
3869          */
3870         GEM_BUG_ON(!reset_in_progress(execlists));
3871         if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
3872                 execlists->tasklet.func(execlists->tasklet.data);
3873
3874         if (__tasklet_enable(&execlists->tasklet))
3875                 /* And kick in case we missed a new request submission. */
3876                 tasklet_hi_schedule(&execlists->tasklet);
3877         ENGINE_TRACE(engine, "depth->%d\n",
3878                      atomic_read(&execlists->tasklet.count));
3879 }
3880
3881 static int gen8_emit_bb_start_noarb(struct i915_request *rq,
3882                                     u64 offset, u32 len,
3883                                     const unsigned int flags)
3884 {
3885         u32 *cs;
3886
3887         cs = intel_ring_begin(rq, 4);
3888         if (IS_ERR(cs))
3889                 return PTR_ERR(cs);
3890
3891         /*
3892          * WaDisableCtxRestoreArbitration:bdw,chv
3893          *
3894          * We don't need to perform MI_ARB_ENABLE as often as we do (in
3895          * particular all the gen that do not need the w/a at all!), if we
3896          * took care to make sure that on every switch into this context
3897          * (both ordinary and for preemption) that arbitrartion was enabled
3898          * we would be fine.  However, for gen8 there is another w/a that
3899          * requires us to not preempt inside GPGPU execution, so we keep
3900          * arbitration disabled for gen8 batches. Arbitration will be
3901          * re-enabled before we close the request
3902          * (engine->emit_fini_breadcrumb).
3903          */
3904         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3905
3906         /* FIXME(BDW+): Address space and security selectors. */
3907         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
3908                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
3909         *cs++ = lower_32_bits(offset);
3910         *cs++ = upper_32_bits(offset);
3911
3912         intel_ring_advance(rq, cs);
3913
3914         return 0;
3915 }
3916
3917 static int gen8_emit_bb_start(struct i915_request *rq,
3918                               u64 offset, u32 len,
3919                               const unsigned int flags)
3920 {
3921         u32 *cs;
3922
3923         cs = intel_ring_begin(rq, 6);
3924         if (IS_ERR(cs))
3925                 return PTR_ERR(cs);
3926
3927         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3928
3929         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
3930                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
3931         *cs++ = lower_32_bits(offset);
3932         *cs++ = upper_32_bits(offset);
3933
3934         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3935         *cs++ = MI_NOOP;
3936
3937         intel_ring_advance(rq, cs);
3938
3939         return 0;
3940 }
3941
3942 static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
3943 {
3944         ENGINE_WRITE(engine, RING_IMR,
3945                      ~(engine->irq_enable_mask | engine->irq_keep_mask));
3946         ENGINE_POSTING_READ(engine, RING_IMR);
3947 }
3948
3949 static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
3950 {
3951         ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
3952 }
3953
3954 static int gen8_emit_flush(struct i915_request *request, u32 mode)
3955 {
3956         u32 cmd, *cs;
3957
3958         cs = intel_ring_begin(request, 4);
3959         if (IS_ERR(cs))
3960                 return PTR_ERR(cs);
3961
3962         cmd = MI_FLUSH_DW + 1;
3963
3964         /* We always require a command barrier so that subsequent
3965          * commands, such as breadcrumb interrupts, are strictly ordered
3966          * wrt the contents of the write cache being flushed to memory
3967          * (and thus being coherent from the CPU).
3968          */
3969         cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
3970
3971         if (mode & EMIT_INVALIDATE) {
3972                 cmd |= MI_INVALIDATE_TLB;
3973                 if (request->engine->class == VIDEO_DECODE_CLASS)
3974                         cmd |= MI_INVALIDATE_BSD;
3975         }
3976
3977         *cs++ = cmd;
3978         *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
3979         *cs++ = 0; /* upper addr */
3980         *cs++ = 0; /* value */
3981         intel_ring_advance(request, cs);
3982
3983         return 0;
3984 }
3985
3986 static int gen8_emit_flush_render(struct i915_request *request,
3987                                   u32 mode)
3988 {
3989         bool vf_flush_wa = false, dc_flush_wa = false;
3990         u32 *cs, flags = 0;
3991         int len;
3992
3993         flags |= PIPE_CONTROL_CS_STALL;
3994
3995         if (mode & EMIT_FLUSH) {
3996                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
3997                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
3998                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
3999                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
4000         }
4001
4002         if (mode & EMIT_INVALIDATE) {
4003                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
4004                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4005                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4006                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4007                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4008                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4009                 flags |= PIPE_CONTROL_QW_WRITE;
4010                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4011
4012                 /*
4013                  * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
4014                  * pipe control.
4015                  */
4016                 if (IS_GEN(request->i915, 9))
4017                         vf_flush_wa = true;
4018
4019                 /* WaForGAMHang:kbl */
4020                 if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
4021                         dc_flush_wa = true;
4022         }
4023
4024         len = 6;
4025
4026         if (vf_flush_wa)
4027                 len += 6;
4028
4029         if (dc_flush_wa)
4030                 len += 12;
4031
4032         cs = intel_ring_begin(request, len);
4033         if (IS_ERR(cs))
4034                 return PTR_ERR(cs);
4035
4036         if (vf_flush_wa)
4037                 cs = gen8_emit_pipe_control(cs, 0, 0);
4038
4039         if (dc_flush_wa)
4040                 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
4041                                             0);
4042
4043         cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4044
4045         if (dc_flush_wa)
4046                 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
4047
4048         intel_ring_advance(request, cs);
4049
4050         return 0;
4051 }
4052
4053 static int gen11_emit_flush_render(struct i915_request *request,
4054                                    u32 mode)
4055 {
4056         if (mode & EMIT_FLUSH) {
4057                 u32 *cs;
4058                 u32 flags = 0;
4059
4060                 flags |= PIPE_CONTROL_CS_STALL;
4061
4062                 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
4063                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4064                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4065                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4066                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
4067                 flags |= PIPE_CONTROL_QW_WRITE;
4068                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4069
4070                 cs = intel_ring_begin(request, 6);
4071                 if (IS_ERR(cs))
4072                         return PTR_ERR(cs);
4073
4074                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4075                 intel_ring_advance(request, cs);
4076         }
4077
4078         if (mode & EMIT_INVALIDATE) {
4079                 u32 *cs;
4080                 u32 flags = 0;
4081
4082                 flags |= PIPE_CONTROL_CS_STALL;
4083
4084                 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
4085                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
4086                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4087                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4088                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4089                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4090                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4091                 flags |= PIPE_CONTROL_QW_WRITE;
4092                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4093
4094                 cs = intel_ring_begin(request, 6);
4095                 if (IS_ERR(cs))
4096                         return PTR_ERR(cs);
4097
4098                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4099                 intel_ring_advance(request, cs);
4100         }
4101
4102         return 0;
4103 }
4104
4105 static u32 preparser_disable(bool state)
4106 {
4107         return MI_ARB_CHECK | 1 << 8 | state;
4108 }
4109
4110 static int gen12_emit_flush_render(struct i915_request *request,
4111                                    u32 mode)
4112 {
4113         if (mode & EMIT_FLUSH) {
4114                 u32 flags = 0;
4115                 u32 *cs;
4116
4117                 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
4118                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4119                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4120                 /* Wa_1409600907:tgl */
4121                 flags |= PIPE_CONTROL_DEPTH_STALL;
4122                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4123                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
4124                 flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
4125
4126                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4127                 flags |= PIPE_CONTROL_QW_WRITE;
4128
4129                 flags |= PIPE_CONTROL_CS_STALL;
4130
4131                 cs = intel_ring_begin(request, 6);
4132                 if (IS_ERR(cs))
4133                         return PTR_ERR(cs);
4134
4135                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4136                 intel_ring_advance(request, cs);
4137         }
4138
4139         if (mode & EMIT_INVALIDATE) {
4140                 u32 flags = 0;
4141                 u32 *cs;
4142
4143                 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
4144                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
4145                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4146                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4147                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4148                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4149                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4150                 flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE;
4151
4152                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4153                 flags |= PIPE_CONTROL_QW_WRITE;
4154
4155                 flags |= PIPE_CONTROL_CS_STALL;
4156
4157                 cs = intel_ring_begin(request, 8);
4158                 if (IS_ERR(cs))
4159                         return PTR_ERR(cs);
4160
4161                 /*
4162                  * Prevent the pre-parser from skipping past the TLB
4163                  * invalidate and loading a stale page for the batch
4164                  * buffer / request payload.
4165                  */
4166                 *cs++ = preparser_disable(true);
4167
4168                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4169
4170                 *cs++ = preparser_disable(false);
4171                 intel_ring_advance(request, cs);
4172
4173                 /*
4174                  * Wa_1604544889:tgl
4175                  */
4176                 if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) {
4177                         flags = 0;
4178                         flags |= PIPE_CONTROL_CS_STALL;
4179                         flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
4180
4181                         flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4182                         flags |= PIPE_CONTROL_QW_WRITE;
4183
4184                         cs = intel_ring_begin(request, 6);
4185                         if (IS_ERR(cs))
4186                                 return PTR_ERR(cs);
4187
4188                         cs = gen8_emit_pipe_control(cs, flags,
4189                                                     LRC_PPHWSP_SCRATCH_ADDR);
4190                         intel_ring_advance(request, cs);
4191                 }
4192         }
4193
4194         return 0;
4195 }
4196
4197 /*
4198  * Reserve space for 2 NOOPs at the end of each request to be
4199  * used as a workaround for not being allowed to do lite
4200  * restore with HEAD==TAIL (WaIdleLiteRestore).
4201  */
4202 static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
4203 {
4204         /* Ensure there's always at least one preemption point per-request. */
4205         *cs++ = MI_ARB_CHECK;
4206         *cs++ = MI_NOOP;
4207         request->wa_tail = intel_ring_offset(request, cs);
4208
4209         return cs;
4210 }
4211
4212 static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
4213 {
4214         *cs++ = MI_SEMAPHORE_WAIT |
4215                 MI_SEMAPHORE_GLOBAL_GTT |
4216                 MI_SEMAPHORE_POLL |
4217                 MI_SEMAPHORE_SAD_EQ_SDD;
4218         *cs++ = 0;
4219         *cs++ = intel_hws_preempt_address(request->engine);
4220         *cs++ = 0;
4221
4222         return cs;
4223 }
4224
4225 static __always_inline u32*
4226 gen8_emit_fini_breadcrumb_footer(struct i915_request *request,
4227                                  u32 *cs)
4228 {
4229         *cs++ = MI_USER_INTERRUPT;
4230
4231         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4232         if (intel_engine_has_semaphores(request->engine))
4233                 cs = emit_preempt_busywait(request, cs);
4234
4235         request->tail = intel_ring_offset(request, cs);
4236         assert_ring_tail_valid(request->ring, request->tail);
4237
4238         return gen8_emit_wa_tail(request, cs);
4239 }
4240
4241 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
4242 {
4243         cs = gen8_emit_ggtt_write(cs,
4244                                   request->fence.seqno,
4245                                   i915_request_active_timeline(request)->hwsp_offset,
4246                                   0);
4247
4248         return gen8_emit_fini_breadcrumb_footer(request, cs);
4249 }
4250
4251 static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4252 {
4253         cs = gen8_emit_pipe_control(cs,
4254                                     PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4255                                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4256                                     PIPE_CONTROL_DC_FLUSH_ENABLE,
4257                                     0);
4258
4259         /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
4260         cs = gen8_emit_ggtt_write_rcs(cs,
4261                                       request->fence.seqno,
4262                                       i915_request_active_timeline(request)->hwsp_offset,
4263                                       PIPE_CONTROL_FLUSH_ENABLE |
4264                                       PIPE_CONTROL_CS_STALL);
4265
4266         return gen8_emit_fini_breadcrumb_footer(request, cs);
4267 }
4268
4269 static u32 *
4270 gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4271 {
4272         cs = gen8_emit_ggtt_write_rcs(cs,
4273                                       request->fence.seqno,
4274                                       i915_request_active_timeline(request)->hwsp_offset,
4275                                       PIPE_CONTROL_CS_STALL |
4276                                       PIPE_CONTROL_TILE_CACHE_FLUSH |
4277                                       PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4278                                       PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4279                                       PIPE_CONTROL_DC_FLUSH_ENABLE |
4280                                       PIPE_CONTROL_FLUSH_ENABLE);
4281
4282         return gen8_emit_fini_breadcrumb_footer(request, cs);
4283 }
4284
4285 /*
4286  * Note that the CS instruction pre-parser will not stall on the breadcrumb
4287  * flush and will continue pre-fetching the instructions after it before the
4288  * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
4289  * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
4290  * of the next request before the memory has been flushed, we're guaranteed that
4291  * we won't access the batch itself too early.
4292  * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
4293  * so, if the current request is modifying an instruction in the next request on
4294  * the same intel_context, we might pre-fetch and then execute the pre-update
4295  * instruction. To avoid this, the users of self-modifying code should either
4296  * disable the parser around the code emitting the memory writes, via a new flag
4297  * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
4298  * the in-kernel use-cases we've opted to use a separate context, see
4299  * reloc_gpu() as an example.
4300  * All the above applies only to the instructions themselves. Non-inline data
4301  * used by the instructions is not pre-fetched.
4302  */
4303
4304 static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
4305 {
4306         *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
4307                 MI_SEMAPHORE_GLOBAL_GTT |
4308                 MI_SEMAPHORE_POLL |
4309                 MI_SEMAPHORE_SAD_EQ_SDD;
4310         *cs++ = 0;
4311         *cs++ = intel_hws_preempt_address(request->engine);
4312         *cs++ = 0;
4313         *cs++ = 0;
4314         *cs++ = MI_NOOP;
4315
4316         return cs;
4317 }
4318
4319 static __always_inline u32*
4320 gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
4321 {
4322         *cs++ = MI_USER_INTERRUPT;
4323
4324         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4325         if (intel_engine_has_semaphores(request->engine))
4326                 cs = gen12_emit_preempt_busywait(request, cs);
4327
4328         request->tail = intel_ring_offset(request, cs);
4329         assert_ring_tail_valid(request->ring, request->tail);
4330
4331         return gen8_emit_wa_tail(request, cs);
4332 }
4333
4334 static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
4335 {
4336         cs = gen8_emit_ggtt_write(cs,
4337                                   request->fence.seqno,
4338                                   i915_request_active_timeline(request)->hwsp_offset,
4339                                   0);
4340
4341         return gen12_emit_fini_breadcrumb_footer(request, cs);
4342 }
4343
4344 static u32 *
4345 gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4346 {
4347         cs = gen8_emit_ggtt_write_rcs(cs,
4348                                       request->fence.seqno,
4349                                       i915_request_active_timeline(request)->hwsp_offset,
4350                                       PIPE_CONTROL_CS_STALL |
4351                                       PIPE_CONTROL_TILE_CACHE_FLUSH |
4352                                       PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4353                                       PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4354                                       /* Wa_1409600907:tgl */
4355                                       PIPE_CONTROL_DEPTH_STALL |
4356                                       PIPE_CONTROL_DC_FLUSH_ENABLE |
4357                                       PIPE_CONTROL_FLUSH_ENABLE |
4358                                       PIPE_CONTROL_HDC_PIPELINE_FLUSH);
4359
4360         return gen12_emit_fini_breadcrumb_footer(request, cs);
4361 }
4362
4363 static void execlists_park(struct intel_engine_cs *engine)
4364 {
4365         cancel_timer(&engine->execlists.timer);
4366         cancel_timer(&engine->execlists.preempt);
4367 }
4368
4369 void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
4370 {
4371         engine->submit_request = execlists_submit_request;
4372         engine->schedule = i915_schedule;
4373         engine->execlists.tasklet.func = execlists_submission_tasklet;
4374
4375         engine->reset.prepare = execlists_reset_prepare;
4376         engine->reset.rewind = execlists_reset_rewind;
4377         engine->reset.cancel = execlists_reset_cancel;
4378         engine->reset.finish = execlists_reset_finish;
4379
4380         engine->park = execlists_park;
4381         engine->unpark = NULL;
4382
4383         engine->flags |= I915_ENGINE_SUPPORTS_STATS;
4384         if (!intel_vgpu_active(engine->i915)) {
4385                 engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
4386                 if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
4387                         engine->flags |= I915_ENGINE_HAS_PREEMPTION;
4388         }
4389
4390         if (INTEL_GEN(engine->i915) >= 12)
4391                 engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
4392
4393         if (intel_engine_has_preemption(engine))
4394                 engine->emit_bb_start = gen8_emit_bb_start;
4395         else
4396                 engine->emit_bb_start = gen8_emit_bb_start_noarb;
4397 }
4398
4399 static void execlists_shutdown(struct intel_engine_cs *engine)
4400 {
4401         /* Synchronise with residual timers and any softirq they raise */
4402         del_timer_sync(&engine->execlists.timer);
4403         del_timer_sync(&engine->execlists.preempt);
4404         tasklet_kill(&engine->execlists.tasklet);
4405 }
4406
4407 static void execlists_release(struct intel_engine_cs *engine)
4408 {
4409         execlists_shutdown(engine);
4410
4411         intel_engine_cleanup_common(engine);
4412         lrc_destroy_wa_ctx(engine);
4413 }
4414
4415 static void
4416 logical_ring_default_vfuncs(struct intel_engine_cs *engine)
4417 {
4418         /* Default vfuncs which can be overriden by each engine. */
4419
4420         engine->resume = execlists_resume;
4421
4422         engine->cops = &execlists_context_ops;
4423         engine->request_alloc = execlists_request_alloc;
4424
4425         engine->emit_flush = gen8_emit_flush;
4426         engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
4427         engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
4428         if (INTEL_GEN(engine->i915) >= 12)
4429                 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
4430
4431         engine->set_default_submission = intel_execlists_set_default_submission;
4432
4433         if (INTEL_GEN(engine->i915) < 11) {
4434                 engine->irq_enable = gen8_logical_ring_enable_irq;
4435                 engine->irq_disable = gen8_logical_ring_disable_irq;
4436         } else {
4437                 /*
4438                  * TODO: On Gen11 interrupt masks need to be clear
4439                  * to allow C6 entry. Keep interrupts enabled at
4440                  * and take the hit of generating extra interrupts
4441                  * until a more refined solution exists.
4442                  */
4443         }
4444 }
4445
4446 static inline void
4447 logical_ring_default_irqs(struct intel_engine_cs *engine)
4448 {
4449         unsigned int shift = 0;
4450
4451         if (INTEL_GEN(engine->i915) < 11) {
4452                 const u8 irq_shifts[] = {
4453                         [RCS0]  = GEN8_RCS_IRQ_SHIFT,
4454                         [BCS0]  = GEN8_BCS_IRQ_SHIFT,
4455                         [VCS0]  = GEN8_VCS0_IRQ_SHIFT,
4456                         [VCS1]  = GEN8_VCS1_IRQ_SHIFT,
4457                         [VECS0] = GEN8_VECS_IRQ_SHIFT,
4458                 };
4459
4460                 shift = irq_shifts[engine->id];
4461         }
4462
4463         engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
4464         engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
4465         engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
4466 }
4467
4468 static void rcs_submission_override(struct intel_engine_cs *engine)
4469 {
4470         switch (INTEL_GEN(engine->i915)) {
4471         case 12:
4472                 engine->emit_flush = gen12_emit_flush_render;
4473                 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
4474                 break;
4475         case 11:
4476                 engine->emit_flush = gen11_emit_flush_render;
4477                 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
4478                 break;
4479         default:
4480                 engine->emit_flush = gen8_emit_flush_render;
4481                 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
4482                 break;
4483         }
4484 }
4485
4486 int intel_execlists_submission_setup(struct intel_engine_cs *engine)
4487 {
4488         struct intel_engine_execlists * const execlists = &engine->execlists;
4489         struct drm_i915_private *i915 = engine->i915;
4490         struct intel_uncore *uncore = engine->uncore;
4491         u32 base = engine->mmio_base;
4492
4493         tasklet_init(&engine->execlists.tasklet,
4494                      execlists_submission_tasklet, (unsigned long)engine);
4495         timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
4496         timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
4497
4498         logical_ring_default_vfuncs(engine);
4499         logical_ring_default_irqs(engine);
4500
4501         if (engine->class == RENDER_CLASS)
4502                 rcs_submission_override(engine);
4503
4504         if (intel_init_workaround_bb(engine))
4505                 /*
4506                  * We continue even if we fail to initialize WA batch
4507                  * because we only expect rare glitches but nothing
4508                  * critical to prevent us from using GPU
4509                  */
4510                 DRM_ERROR("WA batch buffer initialization failed\n");
4511
4512         if (HAS_LOGICAL_RING_ELSQ(i915)) {
4513                 execlists->submit_reg = uncore->regs +
4514                         i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
4515                 execlists->ctrl_reg = uncore->regs +
4516                         i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
4517         } else {
4518                 execlists->submit_reg = uncore->regs +
4519                         i915_mmio_reg_offset(RING_ELSP(base));
4520         }
4521
4522         execlists->csb_status =
4523                 &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
4524
4525         execlists->csb_write =
4526                 &engine->status_page.addr[intel_hws_csb_write_index(i915)];
4527
4528         if (INTEL_GEN(i915) < 11)
4529                 execlists->csb_size = GEN8_CSB_ENTRIES;
4530         else
4531                 execlists->csb_size = GEN11_CSB_ENTRIES;
4532
4533         reset_csb_pointers(engine);
4534
4535         /* Finally, take ownership and responsibility for cleanup! */
4536         engine->release = execlists_release;
4537
4538         return 0;
4539 }
4540
4541 static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
4542 {
4543         u32 indirect_ctx_offset;
4544
4545         switch (INTEL_GEN(engine->i915)) {
4546         default:
4547                 MISSING_CASE(INTEL_GEN(engine->i915));
4548                 /* fall through */
4549         case 12:
4550                 indirect_ctx_offset =
4551                         GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4552                 break;
4553         case 11:
4554                 indirect_ctx_offset =
4555                         GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4556                 break;
4557         case 10:
4558                 indirect_ctx_offset =
4559                         GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4560                 break;
4561         case 9:
4562                 indirect_ctx_offset =
4563                         GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4564                 break;
4565         case 8:
4566                 indirect_ctx_offset =
4567                         GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4568                 break;
4569         }
4570
4571         return indirect_ctx_offset;
4572 }
4573
4574
4575 static void init_common_reg_state(u32 * const regs,
4576                                   const struct intel_engine_cs *engine,
4577                                   const struct intel_ring *ring,
4578                                   bool inhibit)
4579 {
4580         u32 ctl;
4581
4582         ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
4583         ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
4584         if (inhibit)
4585                 ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
4586         if (INTEL_GEN(engine->i915) < 11)
4587                 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
4588                                            CTX_CTRL_RS_CTX_ENABLE);
4589         regs[CTX_CONTEXT_CONTROL] = ctl;
4590
4591         regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
4592 }
4593
4594 static void init_wa_bb_reg_state(u32 * const regs,
4595                                  const struct intel_engine_cs *engine,
4596                                  u32 pos_bb_per_ctx)
4597 {
4598         const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
4599
4600         if (wa_ctx->per_ctx.size) {
4601                 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
4602
4603                 regs[pos_bb_per_ctx] =
4604                         (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
4605         }
4606
4607         if (wa_ctx->indirect_ctx.size) {
4608                 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
4609
4610                 regs[pos_bb_per_ctx + 2] =
4611                         (ggtt_offset + wa_ctx->indirect_ctx.offset) |
4612                         (wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
4613
4614                 regs[pos_bb_per_ctx + 4] =
4615                         intel_lr_indirect_ctx_offset(engine) << 6;
4616         }
4617 }
4618
4619 static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
4620 {
4621         if (i915_vm_is_4lvl(&ppgtt->vm)) {
4622                 /* 64b PPGTT (48bit canonical)
4623                  * PDP0_DESCRIPTOR contains the base address to PML4 and
4624                  * other PDP Descriptors are ignored.
4625                  */
4626                 ASSIGN_CTX_PML4(ppgtt, regs);
4627         } else {
4628                 ASSIGN_CTX_PDP(ppgtt, regs, 3);
4629                 ASSIGN_CTX_PDP(ppgtt, regs, 2);
4630                 ASSIGN_CTX_PDP(ppgtt, regs, 1);
4631                 ASSIGN_CTX_PDP(ppgtt, regs, 0);
4632         }
4633 }
4634
4635 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
4636 {
4637         if (i915_is_ggtt(vm))
4638                 return i915_vm_to_ggtt(vm)->alias;
4639         else
4640                 return i915_vm_to_ppgtt(vm);
4641 }
4642
4643 static void execlists_init_reg_state(u32 *regs,
4644                                      const struct intel_context *ce,
4645                                      const struct intel_engine_cs *engine,
4646                                      const struct intel_ring *ring,
4647                                      bool inhibit)
4648 {
4649         /*
4650          * A context is actually a big batch buffer with several
4651          * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
4652          * values we are setting here are only for the first context restore:
4653          * on a subsequent save, the GPU will recreate this batchbuffer with new
4654          * values (including all the missing MI_LOAD_REGISTER_IMM commands that
4655          * we are not initializing here).
4656          *
4657          * Must keep consistent with virtual_update_register_offsets().
4658          */
4659         set_offsets(regs, reg_offsets(engine), engine, inhibit);
4660
4661         init_common_reg_state(regs, engine, ring, inhibit);
4662         init_ppgtt_reg_state(regs, vm_alias(ce->vm));
4663
4664         init_wa_bb_reg_state(regs, engine,
4665                              INTEL_GEN(engine->i915) >= 12 ?
4666                              GEN12_CTX_BB_PER_CTX_PTR :
4667                              CTX_BB_PER_CTX_PTR);
4668
4669         __reset_stop_ring(regs, engine);
4670 }
4671
4672 static int
4673 populate_lr_context(struct intel_context *ce,
4674                     struct drm_i915_gem_object *ctx_obj,
4675                     struct intel_engine_cs *engine,
4676                     struct intel_ring *ring)
4677 {
4678         bool inhibit = true;
4679         void *vaddr;
4680         int ret;
4681
4682         vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
4683         if (IS_ERR(vaddr)) {
4684                 ret = PTR_ERR(vaddr);
4685                 DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
4686                 return ret;
4687         }
4688
4689         set_redzone(vaddr, engine);
4690
4691         if (engine->default_state) {
4692                 void *defaults;
4693
4694                 defaults = i915_gem_object_pin_map(engine->default_state,
4695                                                    I915_MAP_WB);
4696                 if (IS_ERR(defaults)) {
4697                         ret = PTR_ERR(defaults);
4698                         goto err_unpin_ctx;
4699                 }
4700
4701                 memcpy(vaddr, defaults, engine->context_size);
4702                 i915_gem_object_unpin_map(engine->default_state);
4703                 __set_bit(CONTEXT_VALID_BIT, &ce->flags);
4704                 inhibit = false;
4705         }
4706
4707         /* Clear the ppHWSP (inc. per-context counters) */
4708         memset(vaddr, 0, PAGE_SIZE);
4709
4710         /*
4711          * The second page of the context object contains some registers which
4712          * must be set up prior to the first execution.
4713          */
4714         execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
4715                                  ce, engine, ring, inhibit);
4716
4717         ret = 0;
4718 err_unpin_ctx:
4719         __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
4720         i915_gem_object_unpin_map(ctx_obj);
4721         return ret;
4722 }
4723
4724 static int __execlists_context_alloc(struct intel_context *ce,
4725                                      struct intel_engine_cs *engine)
4726 {
4727         struct drm_i915_gem_object *ctx_obj;
4728         struct intel_ring *ring;
4729         struct i915_vma *vma;
4730         u32 context_size;
4731         int ret;
4732
4733         GEM_BUG_ON(ce->state);
4734         context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
4735
4736         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
4737                 context_size += I915_GTT_PAGE_SIZE; /* for redzone */
4738
4739         ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
4740         if (IS_ERR(ctx_obj))
4741                 return PTR_ERR(ctx_obj);
4742
4743         vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL);
4744         if (IS_ERR(vma)) {
4745                 ret = PTR_ERR(vma);
4746                 goto error_deref_obj;
4747         }
4748
4749         if (!ce->timeline) {
4750                 struct intel_timeline *tl;
4751                 struct i915_vma *hwsp;
4752
4753                 /*
4754                  * Use the static global HWSP for the kernel context, and
4755                  * a dynamically allocated cacheline for everyone else.
4756                  */
4757                 hwsp = NULL;
4758                 if (unlikely(intel_context_is_barrier(ce)))
4759                         hwsp = engine->status_page.vma;
4760
4761                 tl = intel_timeline_create(engine->gt, hwsp);
4762                 if (IS_ERR(tl)) {
4763                         ret = PTR_ERR(tl);
4764                         goto error_deref_obj;
4765                 }
4766
4767                 ce->timeline = tl;
4768         }
4769
4770         ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
4771         if (IS_ERR(ring)) {
4772                 ret = PTR_ERR(ring);
4773                 goto error_deref_obj;
4774         }
4775
4776         ret = populate_lr_context(ce, ctx_obj, engine, ring);
4777         if (ret) {
4778                 DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
4779                 goto error_ring_free;
4780         }
4781
4782         ce->ring = ring;
4783         ce->state = vma;
4784
4785         return 0;
4786
4787 error_ring_free:
4788         intel_ring_put(ring);
4789 error_deref_obj:
4790         i915_gem_object_put(ctx_obj);
4791         return ret;
4792 }
4793
4794 static struct list_head *virtual_queue(struct virtual_engine *ve)
4795 {
4796         return &ve->base.execlists.default_priolist.requests[0];
4797 }
4798
4799 static void virtual_context_destroy(struct kref *kref)
4800 {
4801         struct virtual_engine *ve =
4802                 container_of(kref, typeof(*ve), context.ref);
4803         unsigned int n;
4804
4805         GEM_BUG_ON(!list_empty(virtual_queue(ve)));
4806         GEM_BUG_ON(ve->request);
4807         GEM_BUG_ON(ve->context.inflight);
4808
4809         for (n = 0; n < ve->num_siblings; n++) {
4810                 struct intel_engine_cs *sibling = ve->siblings[n];
4811                 struct rb_node *node = &ve->nodes[sibling->id].rb;
4812                 unsigned long flags;
4813
4814                 if (RB_EMPTY_NODE(node))
4815                         continue;
4816
4817                 spin_lock_irqsave(&sibling->active.lock, flags);
4818
4819                 /* Detachment is lazily performed in the execlists tasklet */
4820                 if (!RB_EMPTY_NODE(node))
4821                         rb_erase_cached(node, &sibling->execlists.virtual);
4822
4823                 spin_unlock_irqrestore(&sibling->active.lock, flags);
4824         }
4825         GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
4826
4827         if (ve->context.state)
4828                 __execlists_context_fini(&ve->context);
4829         intel_context_fini(&ve->context);
4830
4831         kfree(ve->bonds);
4832         kfree(ve);
4833 }
4834
4835 static void virtual_engine_initial_hint(struct virtual_engine *ve)
4836 {
4837         int swp;
4838
4839         /*
4840          * Pick a random sibling on starting to help spread the load around.
4841          *
4842          * New contexts are typically created with exactly the same order
4843          * of siblings, and often started in batches. Due to the way we iterate
4844          * the array of sibling when submitting requests, sibling[0] is
4845          * prioritised for dequeuing. If we make sure that sibling[0] is fairly
4846          * randomised across the system, we also help spread the load by the
4847          * first engine we inspect being different each time.
4848          *
4849          * NB This does not force us to execute on this engine, it will just
4850          * typically be the first we inspect for submission.
4851          */
4852         swp = prandom_u32_max(ve->num_siblings);
4853         if (!swp)
4854                 return;
4855
4856         swap(ve->siblings[swp], ve->siblings[0]);
4857         if (!intel_engine_has_relative_mmio(ve->siblings[0]))
4858                 virtual_update_register_offsets(ve->context.lrc_reg_state,
4859                                                 ve->siblings[0]);
4860 }
4861
4862 static int virtual_context_alloc(struct intel_context *ce)
4863 {
4864         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4865
4866         return __execlists_context_alloc(ce, ve->siblings[0]);
4867 }
4868
4869 static int virtual_context_pin(struct intel_context *ce)
4870 {
4871         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4872         int err;
4873
4874         /* Note: we must use a real engine class for setting up reg state */
4875         err = __execlists_context_pin(ce, ve->siblings[0]);
4876         if (err)
4877                 return err;
4878
4879         virtual_engine_initial_hint(ve);
4880         return 0;
4881 }
4882
4883 static void virtual_context_enter(struct intel_context *ce)
4884 {
4885         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4886         unsigned int n;
4887
4888         for (n = 0; n < ve->num_siblings; n++)
4889                 intel_engine_pm_get(ve->siblings[n]);
4890
4891         intel_timeline_enter(ce->timeline);
4892 }
4893
4894 static void virtual_context_exit(struct intel_context *ce)
4895 {
4896         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4897         unsigned int n;
4898
4899         intel_timeline_exit(ce->timeline);
4900
4901         for (n = 0; n < ve->num_siblings; n++)
4902                 intel_engine_pm_put(ve->siblings[n]);
4903 }
4904
4905 static const struct intel_context_ops virtual_context_ops = {
4906         .alloc = virtual_context_alloc,
4907
4908         .pin = virtual_context_pin,
4909         .unpin = execlists_context_unpin,
4910
4911         .enter = virtual_context_enter,
4912         .exit = virtual_context_exit,
4913
4914         .destroy = virtual_context_destroy,
4915 };
4916
4917 static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
4918 {
4919         struct i915_request *rq;
4920         intel_engine_mask_t mask;
4921
4922         rq = READ_ONCE(ve->request);
4923         if (!rq)
4924                 return 0;
4925
4926         /* The rq is ready for submission; rq->execution_mask is now stable. */
4927         mask = rq->execution_mask;
4928         if (unlikely(!mask)) {
4929                 /* Invalid selection, submit to a random engine in error */
4930                 i915_request_set_error_once(rq, -ENODEV);
4931                 mask = ve->siblings[0]->mask;
4932         }
4933
4934         ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
4935                      rq->fence.context, rq->fence.seqno,
4936                      mask, ve->base.execlists.queue_priority_hint);
4937
4938         return mask;
4939 }
4940
4941 static void virtual_submission_tasklet(unsigned long data)
4942 {
4943         struct virtual_engine * const ve = (struct virtual_engine *)data;
4944         const int prio = ve->base.execlists.queue_priority_hint;
4945         intel_engine_mask_t mask;
4946         unsigned int n;
4947
4948         rcu_read_lock();
4949         mask = virtual_submission_mask(ve);
4950         rcu_read_unlock();
4951         if (unlikely(!mask))
4952                 return;
4953
4954         local_irq_disable();
4955         for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
4956                 struct intel_engine_cs *sibling = ve->siblings[n];
4957                 struct ve_node * const node = &ve->nodes[sibling->id];
4958                 struct rb_node **parent, *rb;
4959                 bool first;
4960
4961                 if (unlikely(!(mask & sibling->mask))) {
4962                         if (!RB_EMPTY_NODE(&node->rb)) {
4963                                 spin_lock(&sibling->active.lock);
4964                                 rb_erase_cached(&node->rb,
4965                                                 &sibling->execlists.virtual);
4966                                 RB_CLEAR_NODE(&node->rb);
4967                                 spin_unlock(&sibling->active.lock);
4968                         }
4969                         continue;
4970                 }
4971
4972                 spin_lock(&sibling->active.lock);
4973
4974                 if (!RB_EMPTY_NODE(&node->rb)) {
4975                         /*
4976                          * Cheat and avoid rebalancing the tree if we can
4977                          * reuse this node in situ.
4978                          */
4979                         first = rb_first_cached(&sibling->execlists.virtual) ==
4980                                 &node->rb;
4981                         if (prio == node->prio || (prio > node->prio && first))
4982                                 goto submit_engine;
4983
4984                         rb_erase_cached(&node->rb, &sibling->execlists.virtual);
4985                 }
4986
4987                 rb = NULL;
4988                 first = true;
4989                 parent = &sibling->execlists.virtual.rb_root.rb_node;
4990                 while (*parent) {
4991                         struct ve_node *other;
4992
4993                         rb = *parent;
4994                         other = rb_entry(rb, typeof(*other), rb);
4995                         if (prio > other->prio) {
4996                                 parent = &rb->rb_left;
4997                         } else {
4998                                 parent = &rb->rb_right;
4999                                 first = false;
5000                         }
5001                 }
5002
5003                 rb_link_node(&node->rb, rb, parent);
5004                 rb_insert_color_cached(&node->rb,
5005                                        &sibling->execlists.virtual,
5006                                        first);
5007
5008 submit_engine:
5009                 GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
5010                 node->prio = prio;
5011                 if (first && prio > sibling->execlists.queue_priority_hint) {
5012                         sibling->execlists.queue_priority_hint = prio;
5013                         tasklet_hi_schedule(&sibling->execlists.tasklet);
5014                 }
5015
5016                 spin_unlock(&sibling->active.lock);
5017         }
5018         local_irq_enable();
5019 }
5020
5021 static void virtual_submit_request(struct i915_request *rq)
5022 {
5023         struct virtual_engine *ve = to_virtual_engine(rq->engine);
5024         struct i915_request *old;
5025         unsigned long flags;
5026
5027         ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
5028                      rq->fence.context,
5029                      rq->fence.seqno);
5030
5031         GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
5032
5033         spin_lock_irqsave(&ve->base.active.lock, flags);
5034
5035         old = ve->request;
5036         if (old) { /* background completion event from preempt-to-busy */
5037                 GEM_BUG_ON(!i915_request_completed(old));
5038                 __i915_request_submit(old);
5039                 i915_request_put(old);
5040         }
5041
5042         if (i915_request_completed(rq)) {
5043                 __i915_request_submit(rq);
5044
5045                 ve->base.execlists.queue_priority_hint = INT_MIN;
5046                 ve->request = NULL;
5047         } else {
5048                 ve->base.execlists.queue_priority_hint = rq_prio(rq);
5049                 ve->request = i915_request_get(rq);
5050
5051                 GEM_BUG_ON(!list_empty(virtual_queue(ve)));
5052                 list_move_tail(&rq->sched.link, virtual_queue(ve));
5053
5054                 tasklet_schedule(&ve->base.execlists.tasklet);
5055         }
5056
5057         spin_unlock_irqrestore(&ve->base.active.lock, flags);
5058 }
5059
5060 static struct ve_bond *
5061 virtual_find_bond(struct virtual_engine *ve,
5062                   const struct intel_engine_cs *master)
5063 {
5064         int i;
5065
5066         for (i = 0; i < ve->num_bonds; i++) {
5067                 if (ve->bonds[i].master == master)
5068                         return &ve->bonds[i];
5069         }
5070
5071         return NULL;
5072 }
5073
5074 static void
5075 virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
5076 {
5077         struct virtual_engine *ve = to_virtual_engine(rq->engine);
5078         intel_engine_mask_t allowed, exec;
5079         struct ve_bond *bond;
5080
5081         allowed = ~to_request(signal)->engine->mask;
5082
5083         bond = virtual_find_bond(ve, to_request(signal)->engine);
5084         if (bond)
5085                 allowed &= bond->sibling_mask;
5086
5087         /* Restrict the bonded request to run on only the available engines */
5088         exec = READ_ONCE(rq->execution_mask);
5089         while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
5090                 ;
5091
5092         /* Prevent the master from being re-run on the bonded engines */
5093         to_request(signal)->execution_mask &= ~allowed;
5094 }
5095
5096 struct intel_context *
5097 intel_execlists_create_virtual(struct intel_engine_cs **siblings,
5098                                unsigned int count)
5099 {
5100         struct virtual_engine *ve;
5101         unsigned int n;
5102         int err;
5103
5104         if (count == 0)
5105                 return ERR_PTR(-EINVAL);
5106
5107         if (count == 1)
5108                 return intel_context_create(siblings[0]);
5109
5110         ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
5111         if (!ve)
5112                 return ERR_PTR(-ENOMEM);
5113
5114         ve->base.i915 = siblings[0]->i915;
5115         ve->base.gt = siblings[0]->gt;
5116         ve->base.uncore = siblings[0]->uncore;
5117         ve->base.id = -1;
5118
5119         ve->base.class = OTHER_CLASS;
5120         ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
5121         ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5122         ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5123
5124         /*
5125          * The decision on whether to submit a request using semaphores
5126          * depends on the saturated state of the engine. We only compute
5127          * this during HW submission of the request, and we need for this
5128          * state to be globally applied to all requests being submitted
5129          * to this engine. Virtual engines encompass more than one physical
5130          * engine and so we cannot accurately tell in advance if one of those
5131          * engines is already saturated and so cannot afford to use a semaphore
5132          * and be pessimized in priority for doing so -- if we are the only
5133          * context using semaphores after all other clients have stopped, we
5134          * will be starved on the saturated system. Such a global switch for
5135          * semaphores is less than ideal, but alas is the current compromise.
5136          */
5137         ve->base.saturated = ALL_ENGINES;
5138
5139         snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
5140
5141         intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
5142         intel_engine_init_breadcrumbs(&ve->base);
5143         intel_engine_init_execlists(&ve->base);
5144
5145         ve->base.cops = &virtual_context_ops;
5146         ve->base.request_alloc = execlists_request_alloc;
5147
5148         ve->base.schedule = i915_schedule;
5149         ve->base.submit_request = virtual_submit_request;
5150         ve->base.bond_execute = virtual_bond_execute;
5151
5152         INIT_LIST_HEAD(virtual_queue(ve));
5153         ve->base.execlists.queue_priority_hint = INT_MIN;
5154         tasklet_init(&ve->base.execlists.tasklet,
5155                      virtual_submission_tasklet,
5156                      (unsigned long)ve);
5157
5158         intel_context_init(&ve->context, &ve->base);
5159
5160         for (n = 0; n < count; n++) {
5161                 struct intel_engine_cs *sibling = siblings[n];
5162
5163                 GEM_BUG_ON(!is_power_of_2(sibling->mask));
5164                 if (sibling->mask & ve->base.mask) {
5165                         DRM_DEBUG("duplicate %s entry in load balancer\n",
5166                                   sibling->name);
5167                         err = -EINVAL;
5168                         goto err_put;
5169                 }
5170
5171                 /*
5172                  * The virtual engine implementation is tightly coupled to
5173                  * the execlists backend -- we push out request directly
5174                  * into a tree inside each physical engine. We could support
5175                  * layering if we handle cloning of the requests and
5176                  * submitting a copy into each backend.
5177                  */
5178                 if (sibling->execlists.tasklet.func !=
5179                     execlists_submission_tasklet) {
5180                         err = -ENODEV;
5181                         goto err_put;
5182                 }
5183
5184                 GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
5185                 RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
5186
5187                 ve->siblings[ve->num_siblings++] = sibling;
5188                 ve->base.mask |= sibling->mask;
5189
5190                 /*
5191                  * All physical engines must be compatible for their emission
5192                  * functions (as we build the instructions during request
5193                  * construction and do not alter them before submission
5194                  * on the physical engine). We use the engine class as a guide
5195                  * here, although that could be refined.
5196                  */
5197                 if (ve->base.class != OTHER_CLASS) {
5198                         if (ve->base.class != sibling->class) {
5199                                 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
5200                                           sibling->class, ve->base.class);
5201                                 err = -EINVAL;
5202                                 goto err_put;
5203                         }
5204                         continue;
5205                 }
5206
5207                 ve->base.class = sibling->class;
5208                 ve->base.uabi_class = sibling->uabi_class;
5209                 snprintf(ve->base.name, sizeof(ve->base.name),
5210                          "v%dx%d", ve->base.class, count);
5211                 ve->base.context_size = sibling->context_size;
5212
5213                 ve->base.emit_bb_start = sibling->emit_bb_start;
5214                 ve->base.emit_flush = sibling->emit_flush;
5215                 ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
5216                 ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
5217                 ve->base.emit_fini_breadcrumb_dw =
5218                         sibling->emit_fini_breadcrumb_dw;
5219
5220                 ve->base.flags = sibling->flags;
5221         }
5222
5223         ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
5224
5225         return &ve->context;
5226
5227 err_put:
5228         intel_context_put(&ve->context);
5229         return ERR_PTR(err);
5230 }
5231
5232 struct intel_context *
5233 intel_execlists_clone_virtual(struct intel_engine_cs *src)
5234 {
5235         struct virtual_engine *se = to_virtual_engine(src);
5236         struct intel_context *dst;
5237
5238         dst = intel_execlists_create_virtual(se->siblings,
5239                                              se->num_siblings);
5240         if (IS_ERR(dst))
5241                 return dst;
5242
5243         if (se->num_bonds) {
5244                 struct virtual_engine *de = to_virtual_engine(dst->engine);
5245
5246                 de->bonds = kmemdup(se->bonds,
5247                                     sizeof(*se->bonds) * se->num_bonds,
5248                                     GFP_KERNEL);
5249                 if (!de->bonds) {
5250                         intel_context_put(dst);
5251                         return ERR_PTR(-ENOMEM);
5252                 }
5253
5254                 de->num_bonds = se->num_bonds;
5255         }
5256
5257         return dst;
5258 }
5259
5260 int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
5261                                      const struct intel_engine_cs *master,
5262                                      const struct intel_engine_cs *sibling)
5263 {
5264         struct virtual_engine *ve = to_virtual_engine(engine);
5265         struct ve_bond *bond;
5266         int n;
5267
5268         /* Sanity check the sibling is part of the virtual engine */
5269         for (n = 0; n < ve->num_siblings; n++)
5270                 if (sibling == ve->siblings[n])
5271                         break;
5272         if (n == ve->num_siblings)
5273                 return -EINVAL;
5274
5275         bond = virtual_find_bond(ve, master);
5276         if (bond) {
5277                 bond->sibling_mask |= sibling->mask;
5278                 return 0;
5279         }
5280
5281         bond = krealloc(ve->bonds,
5282                         sizeof(*bond) * (ve->num_bonds + 1),
5283                         GFP_KERNEL);
5284         if (!bond)
5285                 return -ENOMEM;
5286
5287         bond[ve->num_bonds].master = master;
5288         bond[ve->num_bonds].sibling_mask = sibling->mask;
5289
5290         ve->bonds = bond;
5291         ve->num_bonds++;
5292
5293         return 0;
5294 }
5295
5296 struct intel_engine_cs *
5297 intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
5298                                  unsigned int sibling)
5299 {
5300         struct virtual_engine *ve = to_virtual_engine(engine);
5301
5302         if (sibling >= ve->num_siblings)
5303                 return NULL;
5304
5305         return ve->siblings[sibling];
5306 }
5307
5308 void intel_execlists_show_requests(struct intel_engine_cs *engine,
5309                                    struct drm_printer *m,
5310                                    void (*show_request)(struct drm_printer *m,
5311                                                         struct i915_request *rq,
5312                                                         const char *prefix),
5313                                    unsigned int max)
5314 {
5315         const struct intel_engine_execlists *execlists = &engine->execlists;
5316         struct i915_request *rq, *last;
5317         unsigned long flags;
5318         unsigned int count;
5319         struct rb_node *rb;
5320
5321         spin_lock_irqsave(&engine->active.lock, flags);
5322
5323         last = NULL;
5324         count = 0;
5325         list_for_each_entry(rq, &engine->active.requests, sched.link) {
5326                 if (count++ < max - 1)
5327                         show_request(m, rq, "\t\tE ");
5328                 else
5329                         last = rq;
5330         }
5331         if (last) {
5332                 if (count > max) {
5333                         drm_printf(m,
5334                                    "\t\t...skipping %d executing requests...\n",
5335                                    count - max);
5336                 }
5337                 show_request(m, last, "\t\tE ");
5338         }
5339
5340         if (execlists->switch_priority_hint != INT_MIN)
5341                 drm_printf(m, "\t\tSwitch priority hint: %d\n",
5342                            READ_ONCE(execlists->switch_priority_hint));
5343         if (execlists->queue_priority_hint != INT_MIN)
5344                 drm_printf(m, "\t\tQueue priority hint: %d\n",
5345                            READ_ONCE(execlists->queue_priority_hint));
5346
5347         last = NULL;
5348         count = 0;
5349         for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
5350                 struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
5351                 int i;
5352
5353                 priolist_for_each_request(rq, p, i) {
5354                         if (count++ < max - 1)
5355                                 show_request(m, rq, "\t\tQ ");
5356                         else
5357                                 last = rq;
5358                 }
5359         }
5360         if (last) {
5361                 if (count > max) {
5362                         drm_printf(m,
5363                                    "\t\t...skipping %d queued requests...\n",
5364                                    count - max);
5365                 }
5366                 show_request(m, last, "\t\tQ ");
5367         }
5368
5369         last = NULL;
5370         count = 0;
5371         for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
5372                 struct virtual_engine *ve =
5373                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
5374                 struct i915_request *rq = READ_ONCE(ve->request);
5375
5376                 if (rq) {
5377                         if (count++ < max - 1)
5378                                 show_request(m, rq, "\t\tV ");
5379                         else
5380                                 last = rq;
5381                 }
5382         }
5383         if (last) {
5384                 if (count > max) {
5385                         drm_printf(m,
5386                                    "\t\t...skipping %d virtual requests...\n",
5387                                    count - max);
5388                 }
5389                 show_request(m, last, "\t\tV ");
5390         }
5391
5392         spin_unlock_irqrestore(&engine->active.lock, flags);
5393 }
5394
5395 void intel_lr_context_reset(struct intel_engine_cs *engine,
5396                             struct intel_context *ce,
5397                             u32 head,
5398                             bool scrub)
5399 {
5400         GEM_BUG_ON(!intel_context_is_pinned(ce));
5401
5402         /*
5403          * We want a simple context + ring to execute the breadcrumb update.
5404          * We cannot rely on the context being intact across the GPU hang,
5405          * so clear it and rebuild just what we need for the breadcrumb.
5406          * All pending requests for this context will be zapped, and any
5407          * future request will be after userspace has had the opportunity
5408          * to recreate its own state.
5409          */
5410         if (scrub)
5411                 restore_default_state(ce, engine);
5412
5413         /* Rerun the request; its payload has been neutered (if guilty). */
5414         __execlists_update_reg_state(ce, engine, head);
5415 }
5416
5417 bool
5418 intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
5419 {
5420         return engine->set_default_submission ==
5421                intel_execlists_set_default_submission;
5422 }
5423
5424 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
5425 #include "selftest_lrc.c"
5426 #endif