1 /* SPDX-License-Identifier: MIT */
3 * Copyright © 2019 Intel Corporation
6 #ifndef _I915_PERF_TYPES_H_
7 #define _I915_PERF_TYPES_H_
9 #include <linux/atomic.h>
10 #include <linux/device.h>
11 #include <linux/hrtimer.h>
12 #include <linux/llist.h>
13 #include <linux/poll.h>
14 #include <linux/sysfs.h>
15 #include <linux/types.h>
16 #include <linux/uuid.h>
17 #include <linux/wait.h>
20 #include "intel_wakeref.h"
22 struct drm_i915_private;
24 struct i915_gem_context;
28 struct intel_engine_cs;
30 struct i915_oa_format {
40 struct i915_oa_config {
41 struct i915_perf *perf;
43 char uuid[UUID_STRING_LEN + 1];
46 const struct i915_oa_reg *mux_regs;
48 const struct i915_oa_reg *b_counter_regs;
49 u32 b_counter_regs_len;
50 const struct i915_oa_reg *flex_regs;
53 struct attribute_group sysfs_metric;
54 struct attribute *attrs[2];
55 struct device_attribute sysfs_metric_id;
61 struct i915_perf_stream;
64 * struct i915_perf_stream_ops - the OPs to support a specific stream type
66 struct i915_perf_stream_ops {
68 * @enable: Enables the collection of HW samples, either in response to
69 * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
70 * without `I915_PERF_FLAG_DISABLED`.
72 void (*enable)(struct i915_perf_stream *stream);
75 * @disable: Disables the collection of HW samples, either in response
76 * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
79 void (*disable)(struct i915_perf_stream *stream);
82 * @poll_wait: Call poll_wait, passing a wait queue that will be woken
83 * once there is something ready to read() for the stream
85 void (*poll_wait)(struct i915_perf_stream *stream,
90 * @wait_unlocked: For handling a blocking read, wait until there is
91 * something to ready to read() for the stream. E.g. wait on the same
92 * wait queue that would be passed to poll_wait().
94 int (*wait_unlocked)(struct i915_perf_stream *stream);
97 * @read: Copy buffered metrics as records to userspace
98 * **buf**: the userspace, destination buffer
99 * **count**: the number of bytes to copy, requested by userspace
100 * **offset**: zero at the start of the read, updated as the read
101 * proceeds, it represents how many bytes have been copied so far and
102 * the buffer offset for copying the next record.
104 * Copy as many buffered i915 perf samples and records for this stream
105 * to userspace as will fit in the given buffer.
107 * Only write complete records; returning -%ENOSPC if there isn't room
108 * for a complete record.
110 * Return any error condition that results in a short read such as
111 * -%ENOSPC or -%EFAULT, even though these may be squashed before
112 * returning to userspace.
114 int (*read)(struct i915_perf_stream *stream,
120 * @destroy: Cleanup any stream specific resources.
122 * The stream will always be disabled before this is called.
124 void (*destroy)(struct i915_perf_stream *stream);
128 * struct i915_perf_stream - state for a single open stream FD
130 struct i915_perf_stream {
132 * @perf: i915_perf backpointer
134 struct i915_perf *perf;
137 * @uncore: mmio access path
139 struct intel_uncore *uncore;
142 * @engine: Engine associated with this performance stream.
144 struct intel_engine_cs *engine;
147 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
148 * properties given when opening a stream, representing the contents
149 * of a single sample as read() by userspace.
154 * @sample_size: Considering the configured contents of a sample
155 * combined with the required header size, this is the total size
156 * of a single sample record.
161 * @ctx: %NULL if measuring system-wide across all contexts or a
162 * specific context that is being monitored.
164 struct i915_gem_context *ctx;
167 * @enabled: Whether the stream is currently enabled, considering
168 * whether the stream was opened in a disabled state and based
169 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
174 * @hold_preemption: Whether preemption is put on hold for command
175 * submissions done on the @ctx. This is useful for some drivers that
176 * cannot easily post process the OA buffer context to subtract delta
177 * of performance counters not associated with @ctx.
179 bool hold_preemption;
182 * @ops: The callbacks providing the implementation of this specific
183 * type of configured stream.
185 const struct i915_perf_stream_ops *ops;
188 * @oa_config: The OA configuration used by the stream.
190 struct i915_oa_config *oa_config;
193 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
194 * each time @oa_config changes.
196 struct llist_head oa_config_bos;
199 * @pinned_ctx: The OA context specific information.
201 struct intel_context *pinned_ctx;
204 * @specific_ctx_id: The id of the specific context.
209 * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits.
211 u32 specific_ctx_id_mask;
214 * @poll_check_timer: High resolution timer that will periodically
215 * check for data in the circular OA buffer for notifying userspace
216 * (e.g. during a read() or poll()).
218 struct hrtimer poll_check_timer;
221 * @poll_wq: The wait queue that hrtimer callback wakes when it
222 * sees data ready to read in the circular OA buffer.
224 wait_queue_head_t poll_wq;
227 * @pollin: Whether there is data available to read.
232 * @periodic: Whether periodic sampling is currently enabled.
237 * @period_exponent: The OA unit sampling frequency is derived from this.
242 * @oa_buffer: State of the OA buffer.
245 struct i915_vma *vma;
253 * @ptr_lock: Locks reads and writes to all head/tail state
255 * Consider: the head and tail pointer state needs to be read
256 * consistently from a hrtimer callback (atomic context) and
257 * read() fop (user context) with tail pointer updates happening
258 * in atomic context and head updates in user context and the
259 * (unlikely) possibility of read() errors needing to reset all
262 * Note: Contention/performance aren't currently a significant
263 * concern here considering the relatively low frequency of
264 * hrtimer callbacks (5ms period) and that reads typically only
265 * happen in response to a hrtimer event and likely complete
266 * before the next callback.
268 * Note: This lock is not held *while* reading and copying data
269 * to userspace so the value of head observed in htrimer
270 * callbacks won't represent any partial consumption of data.
275 * @tails: One 'aging' tail pointer and one 'aged' tail pointer ready to
278 * Initial values of 0xffffffff are invalid and imply that an
279 * update is required (and should be ignored by an attempted
287 * @aged_tail_idx: Index for the aged tail ready to read() data up to.
289 unsigned int aged_tail_idx;
292 * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer
293 * was read; used to determine when it is old enough to trust.
298 * @head: Although we can always read back the head pointer register,
299 * we prefer to avoid trusting the HW state, just to avoid any
300 * risk that some hardware condition could * somehow bump the
301 * head pointer unpredictably and cause us to forward the wrong
302 * OA buffer data to userspace.
308 * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be
311 struct i915_vma *noa_wait;
315 * struct i915_oa_ops - Gen specific implementation of an OA unit stream
319 * @is_valid_b_counter_reg: Validates register's address for
320 * programming boolean counters for a particular platform.
322 bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr);
325 * @is_valid_mux_reg: Validates register's address for programming mux
326 * for a particular platform.
328 bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr);
331 * @is_valid_flex_reg: Validates register's address for programming
332 * flex EU filtering for a particular platform.
334 bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr);
337 * @enable_metric_set: Selects and applies any MUX configuration to set
338 * up the Boolean and Custom (B/C) counters that are part of the
339 * counter reports being sampled. May apply system constraints such as
340 * disabling EU clock gating as required.
342 struct i915_request *
343 (*enable_metric_set)(struct i915_perf_stream *stream);
346 * @disable_metric_set: Remove system constraints associated with using
349 void (*disable_metric_set)(struct i915_perf_stream *stream);
352 * @oa_enable: Enable periodic sampling
354 void (*oa_enable)(struct i915_perf_stream *stream);
357 * @oa_disable: Disable periodic sampling
359 void (*oa_disable)(struct i915_perf_stream *stream);
362 * @read: Copy data from the circular OA buffer into a given userspace
365 int (*read)(struct i915_perf_stream *stream,
371 * @oa_hw_tail_read: read the OA tail pointer register
373 * In particular this enables us to share all the fiddly code for
374 * handling the OA unit tail pointer race that affects multiple
377 u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
381 struct drm_i915_private *i915;
383 struct kobject *metrics_kobj;
386 * Lock associated with adding/modifying/removing OA configs
387 * in perf->metrics_idr.
389 struct mutex metrics_lock;
392 * List of dynamic configurations (struct i915_oa_config), you
393 * need to hold perf->metrics_lock to access it.
395 struct idr metrics_idr;
398 * Lock associated with anything below within this structure
399 * except exclusive_stream.
404 * The stream currently using the OA unit. If accessed
405 * outside a syscall associated to its file
408 struct i915_perf_stream *exclusive_stream;
411 * For rate limiting any notifications of spurious
414 struct ratelimit_state spurious_report_rs;
416 struct i915_oa_config test_config;
418 u32 gen7_latched_oastatus1;
419 u32 ctx_oactxctrl_offset;
420 u32 ctx_flexeu0_offset;
423 * The RPT_ID/reason field for Gen8+ includes a bit
424 * to determine if the CTX ID in the report is valid
425 * but the specific bit differs between Gen 8 and 9
427 u32 gen8_valid_ctx_bit;
429 struct i915_oa_ops ops;
430 const struct i915_oa_format *oa_formats;
432 atomic64_t noa_programming_delay;
435 #endif /* _I915_PERF_TYPES_H_ */