drivers/gpu/drm/i915/gt/intel_engine_types.h

   1 /* SPDX-License-Identifier: MIT */
   2 /*
   3  * Copyright © 2019 Intel Corporation
   4  */
   5
   6 #ifndef __INTEL_ENGINE_TYPES__
   7 #define __INTEL_ENGINE_TYPES__
   8
   9 #include <linux/average.h>
  10 #include <linux/hashtable.h>
  11 #include <linux/irq_work.h>
  12 #include <linux/kref.h>
  13 #include <linux/list.h>
  14 #include <linux/llist.h>
  15 #include <linux/rbtree.h>
  16 #include <linux/timer.h>
  17 #include <linux/types.h>
  18 #include <linux/workqueue.h>
  19
  20 #include "i915_gem.h"
  21 #include "i915_pmu.h"
  22 #include "i915_priolist_types.h"
  23 #include "i915_selftest.h"
  24 #include "intel_sseu.h"
  25 #include "intel_timeline_types.h"
  26 #include "intel_uncore.h"
  27 #include "intel_wakeref.h"
  28 #include "intel_workarounds_types.h"
  29
  30 /* HW Engine class + instance */
  31 #define RENDER_CLASS            0
  32 #define VIDEO_DECODE_CLASS      1
  33 #define VIDEO_ENHANCEMENT_CLASS 2
  34 #define COPY_ENGINE_CLASS       3
  35 #define OTHER_CLASS             4
  36 #define COMPUTE_CLASS           5
  37 #define MAX_ENGINE_CLASS        5
  38 #define MAX_ENGINE_INSTANCE     8
  39
  40 #define I915_MAX_SLICES 3
  41 #define I915_MAX_SUBSLICES 8
  42
  43 #define I915_CMD_HASH_ORDER 9
  44
  45 struct dma_fence;
  46 struct drm_i915_gem_object;
  47 struct drm_i915_reg_table;
  48 struct i915_gem_context;
  49 struct i915_request;
  50 struct i915_sched_attr;
  51 struct i915_sched_engine;
  52 struct intel_gt;
  53 struct intel_ring;
  54 struct intel_uncore;
  55 struct intel_breadcrumbs;
  56 struct intel_engine_cs;
  57 struct i915_perf_group;
  58
  59 typedef u32 intel_engine_mask_t;
  60 #define ALL_ENGINES ((intel_engine_mask_t)~0ul)
  61 #define VIRTUAL_ENGINES BIT(BITS_PER_TYPE(intel_engine_mask_t) - 1)
  62
  63 struct intel_hw_status_page {
  64         struct list_head timelines;
  65         struct i915_vma *vma;
  66         u32 *addr;
  67 };
  68
  69 struct intel_instdone {
  70         u32 instdone;
  71         /* The following exist only in the RCS engine */
  72         u32 slice_common;
  73         u32 slice_common_extra[2];
  74         u32 sampler[GEN_MAX_GSLICES][I915_MAX_SUBSLICES];
  75         u32 row[GEN_MAX_GSLICES][I915_MAX_SUBSLICES];
  76
  77         /* Added in XeHPG */
  78         u32 geom_svg[GEN_MAX_GSLICES][I915_MAX_SUBSLICES];
  79 };
  80
  81 /*
  82  * we use a single page to load ctx workarounds so all of these
  83  * values are referred in terms of dwords
  84  *
  85  * struct i915_wa_ctx_bb:
  86  *  offset: specifies batch starting position, also helpful in case
  87  *    if we want to have multiple batches at different offsets based on
  88  *    some criteria. It is not a requirement at the moment but provides
  89  *    an option for future use.
  90  *  size: size of the batch in DWORDS
  91  */
  92 struct i915_ctx_workarounds {
  93         struct i915_wa_ctx_bb {
  94                 u32 offset;
  95                 u32 size;
  96         } indirect_ctx, per_ctx;
  97         struct i915_vma *vma;
  98 };
  99
 100 #define I915_MAX_VCS    8
 101 #define I915_MAX_VECS   4
 102 #define I915_MAX_SFC    (I915_MAX_VCS / 2)
 103 #define I915_MAX_CCS    4
 104 #define I915_MAX_RCS    1
 105 #define I915_MAX_BCS    9
 106
 107 /*
 108  * Engine IDs definitions.
 109  * Keep instances of the same type engine together.
 110  */
 111 enum intel_engine_id {
 112         RCS0 = 0,
 113         BCS0,
 114         BCS1,
 115         BCS2,
 116         BCS3,
 117         BCS4,
 118         BCS5,
 119         BCS6,
 120         BCS7,
 121         BCS8,
 122 #define _BCS(n) (BCS0 + (n))
 123         VCS0,
 124         VCS1,
 125         VCS2,
 126         VCS3,
 127         VCS4,
 128         VCS5,
 129         VCS6,
 130         VCS7,
 131 #define _VCS(n) (VCS0 + (n))
 132         VECS0,
 133         VECS1,
 134         VECS2,
 135         VECS3,
 136 #define _VECS(n) (VECS0 + (n))
 137         CCS0,
 138         CCS1,
 139         CCS2,
 140         CCS3,
 141 #define _CCS(n) (CCS0 + (n))
 142         GSC0,
 143         I915_NUM_ENGINES
 144 #define INVALID_ENGINE ((enum intel_engine_id)-1)
 145 };
 146
 147 /* A simple estimator for the round-trip latency of an engine */
 148 DECLARE_EWMA(_engine_latency, 6, 4)
 149
 150 struct st_preempt_hang {
 151         struct completion completion;
 152         unsigned int count;
 153 };
 154
 155 /**
 156  * struct intel_engine_execlists - execlist submission queue and port state
 157  *
 158  * The struct intel_engine_execlists represents the combined logical state of
 159  * driver and the hardware state for execlist mode of submission.
 160  */
 161 struct intel_engine_execlists {
 162         /**
 163          * @timer: kick the current context if its timeslice expires
 164          */
 165         struct timer_list timer;
 166
 167         /**
 168          * @preempt: reset the current context if it fails to give way
 169          */
 170         struct timer_list preempt;
 171
 172         /**
 173          * @preempt_target: active request at the time of the preemption request
 174          *
 175          * We force a preemption to occur if the pending contexts have not
 176          * been promoted to active upon receipt of the CS ack event within
 177          * the timeout. This timeout maybe chosen based on the target,
 178          * using a very short timeout if the context is no longer schedulable.
 179          * That short timeout may not be applicable to other contexts, so
 180          * if a context switch should happen within before the preemption
 181          * timeout, we may shoot early at an innocent context. To prevent this,
 182          * we record which context was active at the time of the preemption
 183          * request and only reset that context upon the timeout.
 184          */
 185         const struct i915_request *preempt_target;
 186
 187         /**
 188          * @ccid: identifier for contexts submitted to this engine
 189          */
 190         u32 ccid;
 191
 192         /**
 193          * @yield: CCID at the time of the last semaphore-wait interrupt.
 194          *
 195          * Instead of leaving a semaphore busy-spinning on an engine, we would
 196          * like to switch to another ready context, i.e. yielding the semaphore
 197          * timeslice.
 198          */
 199         u32 yield;
 200
 201         /**
 202          * @error_interrupt: CS Master EIR
 203          *
 204          * The CS generates an interrupt when it detects an error. We capture
 205          * the first error interrupt, record the EIR and schedule the tasklet.
 206          * In the tasklet, we process the pending CS events to ensure we have
 207          * the guilty request, and then reset the engine.
 208          *
 209          * Low 16b are used by HW, with the upper 16b used as the enabling mask.
 210          * Reserve the upper 16b for tracking internal errors.
 211          */
 212         u32 error_interrupt;
 213 #define ERROR_CSB       BIT(31)
 214 #define ERROR_PREEMPT   BIT(30)
 215
 216         /**
 217          * @reset_ccid: Active CCID [EXECLISTS_STATUS_HI] at the time of reset
 218          */
 219         u32 reset_ccid;
 220
 221         /**
 222          * @submit_reg: gen-specific execlist submission register
 223          * set to the ExecList Submission Port (elsp) register pre-Gen11 and to
 224          * the ExecList Submission Queue Contents register array for Gen11+
 225          */
 226         u32 __iomem *submit_reg;
 227
 228         /**
 229          * @ctrl_reg: the enhanced execlists control register, used to load the
 230          * submit queue on the HW and to request preemptions to idle
 231          */
 232         u32 __iomem *ctrl_reg;
 233
 234 #define EXECLIST_MAX_PORTS 2
 235         /**
 236          * @active: the currently known context executing on HW
 237          */
 238         struct i915_request * const *active;
 239         /**
 240          * @inflight: the set of contexts submitted and acknowleged by HW
 241          *
 242          * The set of inflight contexts is managed by reading CS events
 243          * from the HW. On a context-switch event (not preemption), we
 244          * know the HW has transitioned from port0 to port1, and we
 245          * advance our inflight/active tracking accordingly.
 246          */
 247         struct i915_request *inflight[EXECLIST_MAX_PORTS + 1 /* sentinel */];
 248         /**
 249          * @pending: the next set of contexts submitted to ELSP
 250          *
 251          * We store the array of contexts that we submit to HW (via ELSP) and
 252          * promote them to the inflight array once HW has signaled the
 253          * preemption or idle-to-active event.
 254          */
 255         struct i915_request *pending[EXECLIST_MAX_PORTS + 1];
 256
 257         /**
 258          * @port_mask: number of execlist ports - 1
 259          */
 260         unsigned int port_mask;
 261
 262         /**
 263          * @virtual: Queue of requets on a virtual engine, sorted by priority.
 264          * Each RB entry is a struct i915_priolist containing a list of requests
 265          * of the same priority.
 266          */
 267         struct rb_root_cached virtual;
 268
 269         /**
 270          * @csb_write: control register for Context Switch buffer
 271          *
 272          * Note this register may be either mmio or HWSP shadow.
 273          */
 274         u32 *csb_write;
 275
 276         /**
 277          * @csb_status: status array for Context Switch buffer
 278          *
 279          * Note these register may be either mmio or HWSP shadow.
 280          */
 281         u64 *csb_status;
 282
 283         /**
 284          * @csb_size: context status buffer FIFO size
 285          */
 286         u8 csb_size;
 287
 288         /**
 289          * @csb_head: context status buffer head
 290          */
 291         u8 csb_head;
 292
 293         /* private: selftest */
 294         I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;)
 295 };
 296
 297 #define INTEL_ENGINE_CS_MAX_NAME 8
 298
 299 struct intel_engine_execlists_stats {
 300         /**
 301          * @active: Number of contexts currently scheduled in.
 302          */
 303         unsigned int active;
 304
 305         /**
 306          * @lock: Lock protecting the below fields.
 307          */
 308         seqcount_t lock;
 309
 310         /**
 311          * @total: Total time this engine was busy.
 312          *
 313          * Accumulated time not counting the most recent block in cases where
 314          * engine is currently busy (active > 0).
 315          */
 316         ktime_t total;
 317
 318         /**
 319          * @start: Timestamp of the last idle to active transition.
 320          *
 321          * Idle is defined as active == 0, active is active > 0.
 322          */
 323         ktime_t start;
 324 };
 325
 326 struct intel_engine_guc_stats {
 327         /**
 328          * @running: Active state of the engine when busyness was last sampled.
 329          */
 330         bool running;
 331
 332         /**
 333          * @prev_total: Previous value of total runtime clock cycles.
 334          */
 335         u32 prev_total;
 336
 337         /**
 338          * @total_gt_clks: Total gt clock cycles this engine was busy.
 339          */
 340         u64 total_gt_clks;
 341
 342         /**
 343          * @start_gt_clk: GT clock time of last idle to active transition.
 344          */
 345         u64 start_gt_clk;
 346 };
 347
 348 union intel_engine_tlb_inv_reg {
 349         i915_reg_t      reg;
 350         i915_mcr_reg_t  mcr_reg;
 351 };
 352
 353 struct intel_engine_tlb_inv {
 354         bool mcr;
 355         union intel_engine_tlb_inv_reg reg;
 356         u32 request;
 357         u32 done;
 358 };
 359
 360 struct intel_engine_cs {
 361         struct drm_i915_private *i915;
 362         struct intel_gt *gt;
 363         struct intel_uncore *uncore;
 364         char name[INTEL_ENGINE_CS_MAX_NAME];
 365
 366         enum intel_engine_id id;
 367         enum intel_engine_id legacy_idx;
 368
 369         unsigned int guc_id;
 370
 371         intel_engine_mask_t mask;
 372         u32 reset_domain;
 373         /**
 374          * @logical_mask: logical mask of engine, reported to user space via
 375          * query IOCTL and used to communicate with the GuC in logical space.
 376          * The logical instance of a physical engine can change based on product
 377          * and fusing.
 378          */
 379         intel_engine_mask_t logical_mask;
 380
 381         u8 class;
 382         u8 instance;
 383
 384         u16 uabi_class;
 385         u16 uabi_instance;
 386
 387         u32 uabi_capabilities;
 388         u32 context_size;
 389         u32 mmio_base;
 390
 391         struct intel_engine_tlb_inv tlb_inv;
 392
 393         /*
 394          * Some w/a require forcewake to be held (which prevents RC6) while
 395          * a particular engine is active. If so, we set fw_domain to which
 396          * domains need to be held for the duration of request activity,
 397          * and 0 if none. We try to limit the duration of the hold as much
 398          * as possible.
 399          */
 400         enum forcewake_domains fw_domain;
 401         unsigned int fw_active;
 402
 403         unsigned long context_tag;
 404
 405         struct rb_node uabi_node;
 406
 407         struct intel_sseu sseu;
 408
 409         struct i915_sched_engine *sched_engine;
 410
 411         /* keep a request in reserve for a [pm] barrier under oom */
 412         struct i915_request *request_pool;
 413
 414         struct intel_context *hung_ce;
 415
 416         struct llist_head barrier_tasks;
 417
 418         struct intel_context *kernel_context; /* pinned */
 419
 420         /**
 421          * pinned_contexts_list: List of pinned contexts. This list is only
 422          * assumed to be manipulated during driver load- or unload time and
 423          * does therefore not have any additional protection.
 424          */
 425         struct list_head pinned_contexts_list;
 426
 427         intel_engine_mask_t saturated; /* submitting semaphores too late? */
 428
 429         struct {
 430                 struct delayed_work work;
 431                 struct i915_request *systole;
 432                 unsigned long blocked;
 433         } heartbeat;
 434
 435         unsigned long serial;
 436
 437         unsigned long wakeref_serial;
 438         struct intel_wakeref wakeref;
 439         struct file *default_state;
 440
 441         struct {
 442                 struct intel_ring *ring;
 443                 struct intel_timeline *timeline;
 444         } legacy;
 445
 446         /*
 447          * We track the average duration of the idle pulse on parking the
 448          * engine to keep an estimate of the how the fast the engine is
 449          * under ideal conditions.
 450          */
 451         struct ewma__engine_latency latency;
 452
 453         /* Keep track of all the seqno used, a trail of breadcrumbs */
 454         struct intel_breadcrumbs *breadcrumbs;
 455
 456         struct intel_engine_pmu {
 457                 /**
 458                  * @enable: Bitmask of enable sample events on this engine.
 459                  *
 460                  * Bits correspond to sample event types, for instance
 461                  * I915_SAMPLE_QUEUED is bit 0 etc.
 462                  */
 463                 u32 enable;
 464                 /**
 465                  * @enable_count: Reference count for the enabled samplers.
 466                  *
 467                  * Index number corresponds to @enum drm_i915_pmu_engine_sample.
 468                  */
 469                 unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT];
 470                 /**
 471                  * @sample: Counter values for sampling events.
 472                  *
 473                  * Our internal timer stores the current counters in this field.
 474                  *
 475                  * Index number corresponds to @enum drm_i915_pmu_engine_sample.
 476                  */
 477                 struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT];
 478         } pmu;
 479
 480         struct intel_hw_status_page status_page;
 481         struct i915_ctx_workarounds wa_ctx;
 482         struct i915_wa_list ctx_wa_list;
 483         struct i915_wa_list wa_list;
 484         struct i915_wa_list whitelist;
 485
 486         u32             irq_keep_mask; /* always keep these interrupts */
 487         u32             irq_enable_mask; /* bitmask to enable ring interrupt */
 488         void            (*irq_enable)(struct intel_engine_cs *engine);
 489         void            (*irq_disable)(struct intel_engine_cs *engine);
 490         void            (*irq_handler)(struct intel_engine_cs *engine, u16 iir);
 491
 492         void            (*sanitize)(struct intel_engine_cs *engine);
 493         int             (*resume)(struct intel_engine_cs *engine);
 494
 495         struct {
 496                 void (*prepare)(struct intel_engine_cs *engine);
 497
 498                 void (*rewind)(struct intel_engine_cs *engine, bool stalled);
 499                 void (*cancel)(struct intel_engine_cs *engine);
 500
 501                 void (*finish)(struct intel_engine_cs *engine);
 502         } reset;
 503
 504         void            (*park)(struct intel_engine_cs *engine);
 505         void            (*unpark)(struct intel_engine_cs *engine);
 506
 507         void            (*bump_serial)(struct intel_engine_cs *engine);
 508
 509         void            (*set_default_submission)(struct intel_engine_cs *engine);
 510
 511         const struct intel_context_ops *cops;
 512
 513         int             (*request_alloc)(struct i915_request *rq);
 514
 515         int             (*emit_flush)(struct i915_request *request, u32 mode);
 516 #define EMIT_INVALIDATE BIT(0)
 517 #define EMIT_FLUSH      BIT(1)
 518 #define EMIT_BARRIER    (EMIT_INVALIDATE | EMIT_FLUSH)
 519         int             (*emit_bb_start)(struct i915_request *rq,
 520                                          u64 offset, u32 length,
 521                                          unsigned int dispatch_flags);
 522 #define I915_DISPATCH_SECURE BIT(0)
 523 #define I915_DISPATCH_PINNED BIT(1)
 524         int              (*emit_init_breadcrumb)(struct i915_request *rq);
 525         u32             *(*emit_fini_breadcrumb)(struct i915_request *rq,
 526                                                  u32 *cs);
 527         unsigned int    emit_fini_breadcrumb_dw;
 528
 529         /* Pass the request to the hardware queue (e.g. directly into
 530          * the legacy ringbuffer or to the end of an execlist).
 531          *
 532          * This is called from an atomic context with irqs disabled; must
 533          * be irq safe.
 534          */
 535         void            (*submit_request)(struct i915_request *rq);
 536
 537         void            (*release)(struct intel_engine_cs *engine);
 538
 539         /*
 540          * Add / remove request from engine active tracking
 541          */
 542         void            (*add_active_request)(struct i915_request *rq);
 543         void            (*remove_active_request)(struct i915_request *rq);
 544
 545         /*
 546          * Get engine busyness and the time at which the busyness was sampled.
 547          */
 548         ktime_t         (*busyness)(struct intel_engine_cs *engine,
 549                                     ktime_t *now);
 550
 551         struct intel_engine_execlists execlists;
 552
 553         /*
 554          * Keep track of completed timelines on this engine for early
 555          * retirement with the goal of quickly enabling powersaving as
 556          * soon as the engine is idle.
 557          */
 558         struct intel_timeline *retire;
 559         struct work_struct retire_work;
 560
 561         /* status_notifier: list of callbacks for context-switch changes */
 562         struct atomic_notifier_head context_status_notifier;
 563
 564 #define I915_ENGINE_USING_CMD_PARSER BIT(0)
 565 #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
 566 #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
 567 #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
 568 #define I915_ENGINE_HAS_TIMESLICES   BIT(4)
 569 #define I915_ENGINE_IS_VIRTUAL       BIT(5)
 570 #define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
 571 #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
 572 #define I915_ENGINE_WANT_FORCED_PREEMPTION BIT(8)
 573 #define I915_ENGINE_HAS_RCS_REG_STATE  BIT(9)
 574 #define I915_ENGINE_HAS_EU_PRIORITY    BIT(10)
 575 #define I915_ENGINE_FIRST_RENDER_COMPUTE BIT(11)
 576 #define I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT BIT(12)
 577         unsigned int flags;
 578
 579         /*
 580          * Table of commands the command parser needs to know about
 581          * for this engine.
 582          */
 583         DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER);
 584
 585         /*
 586          * Table of registers allowed in commands that read/write registers.
 587          */
 588         const struct drm_i915_reg_table *reg_tables;
 589         int reg_table_count;
 590
 591         /*
 592          * Returns the bitmask for the length field of the specified command.
 593          * Return 0 for an unrecognized/invalid command.
 594          *
 595          * If the command parser finds an entry for a command in the engine's
 596          * cmd_tables, it gets the command's length based on the table entry.
 597          * If not, it calls this function to determine the per-engine length
 598          * field encoding for the command (i.e. different opcode ranges use
 599          * certain bits to encode the command length in the header).
 600          */
 601         u32 (*get_cmd_length_mask)(u32 cmd_header);
 602
 603         struct {
 604                 union {
 605                         struct intel_engine_execlists_stats execlists;
 606                         struct intel_engine_guc_stats guc;
 607                 };
 608
 609                 /**
 610                  * @rps: Utilisation at last RPS sampling.
 611                  */
 612                 ktime_t rps;
 613         } stats;
 614
 615         struct {
 616                 unsigned long heartbeat_interval_ms;
 617                 unsigned long max_busywait_duration_ns;
 618                 unsigned long preempt_timeout_ms;
 619                 unsigned long stop_timeout_ms;
 620                 unsigned long timeslice_duration_ms;
 621         } props, defaults;
 622
 623         I915_SELFTEST_DECLARE(struct fault_attr reset_timeout);
 624
 625         /*
 626          * The perf group maps to one OA unit which controls one OA buffer. All
 627          * reports corresponding to this engine will be reported to this OA
 628          * buffer. An engine will map to a single OA unit, but a single OA unit
 629          * can generate reports for multiple engines.
 630          */
 631         struct i915_perf_group *oa_group;
 632 };
 633
 634 static inline bool
 635 intel_engine_using_cmd_parser(const struct intel_engine_cs *engine)
 636 {
 637         return engine->flags & I915_ENGINE_USING_CMD_PARSER;
 638 }
 639
 640 static inline bool
 641 intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine)
 642 {
 643         return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER;
 644 }
 645
 646 static inline bool
 647 intel_engine_supports_stats(const struct intel_engine_cs *engine)
 648 {
 649         return engine->flags & I915_ENGINE_SUPPORTS_STATS;
 650 }
 651
 652 static inline bool
 653 intel_engine_has_preemption(const struct intel_engine_cs *engine)
 654 {
 655         return engine->flags & I915_ENGINE_HAS_PREEMPTION;
 656 }
 657
 658 static inline bool
 659 intel_engine_has_semaphores(const struct intel_engine_cs *engine)
 660 {
 661         return engine->flags & I915_ENGINE_HAS_SEMAPHORES;
 662 }
 663
 664 static inline bool
 665 intel_engine_has_timeslices(const struct intel_engine_cs *engine)
 666 {
 667         if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
 668                 return false;
 669
 670         return engine->flags & I915_ENGINE_HAS_TIMESLICES;
 671 }
 672
 673 static inline bool
 674 intel_engine_is_virtual(const struct intel_engine_cs *engine)
 675 {
 676         return engine->flags & I915_ENGINE_IS_VIRTUAL;
 677 }
 678
 679 static inline bool
 680 intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine)
 681 {
 682         return engine->flags & I915_ENGINE_HAS_RELATIVE_MMIO;
 683 }
 684
 685 /* Wa_14014475959:dg2 */
 686 static inline bool
 687 intel_engine_uses_wa_hold_ccs_switchout(struct intel_engine_cs *engine)
 688 {
 689         return engine->flags & I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
 690 }
 691
 692 #endif /* __INTEL_ENGINE_TYPES_H__ */