i915_perf_types.h revision 1.3
1/* SPDX-License-Identifier: MIT */
2/*
3 * Copyright �� 2019 Intel Corporation
4 */
5
6#ifndef _I915_PERF_TYPES_H_
7#define _I915_PERF_TYPES_H_
8
9#include <linux/atomic.h>
10#include <linux/device.h>
11#include <linux/hrtimer.h>
12#include <linux/llist.h>
13#include <linux/poll.h>
14#include <linux/sysfs.h>
15#include <linux/types.h>
16#include <linux/uuid.h>
17#include <linux/wait.h>
18#include <uapi/drm/i915_drm.h>
19
20#include "gt/intel_sseu.h"
21#include "i915_reg.h"
22#include "intel_wakeref.h"
23
24struct drm_i915_private;
25struct file;
26struct i915_active;
27struct i915_gem_context;
28struct i915_perf;
29struct i915_vma;
30struct intel_context;
31struct intel_engine_cs;
32
33struct i915_oa_format {
34	u32 format;
35	int size;
36};
37
38struct i915_oa_reg {
39	i915_reg_t addr;
40	u32 value;
41};
42
43struct i915_oa_config {
44	struct i915_perf *perf;
45
46	char uuid[UUID_STRING_LEN + 1];
47	int id;
48
49	const struct i915_oa_reg *mux_regs;
50	u32 mux_regs_len;
51	const struct i915_oa_reg *b_counter_regs;
52	u32 b_counter_regs_len;
53	const struct i915_oa_reg *flex_regs;
54	u32 flex_regs_len;
55
56	struct attribute_group sysfs_metric;
57	struct attribute *attrs[2];
58	struct device_attribute sysfs_metric_id;
59
60	struct kref ref;
61	struct rcu_head rcu;
62};
63
64struct i915_perf_stream;
65
66/**
67 * struct i915_perf_stream_ops - the OPs to support a specific stream type
68 */
69struct i915_perf_stream_ops {
70	/**
71	 * @enable: Enables the collection of HW samples, either in response to
72	 * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
73	 * without `I915_PERF_FLAG_DISABLED`.
74	 */
75	void (*enable)(struct i915_perf_stream *stream);
76
77	/**
78	 * @disable: Disables the collection of HW samples, either in response
79	 * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
80	 * the stream.
81	 */
82	void (*disable)(struct i915_perf_stream *stream);
83
84	/**
85	 * @poll_wait: Call poll_wait, passing a wait queue that will be woken
86	 * once there is something ready to read() for the stream
87	 */
88#ifdef notyet
89	void (*poll_wait)(struct i915_perf_stream *stream,
90			  struct file *file,
91			  poll_table *wait);
92#endif
93
94	/**
95	 * @wait_unlocked: For handling a blocking read, wait until there is
96	 * something to ready to read() for the stream. E.g. wait on the same
97	 * wait queue that would be passed to poll_wait().
98	 */
99	int (*wait_unlocked)(struct i915_perf_stream *stream);
100
101	/**
102	 * @read: Copy buffered metrics as records to userspace
103	 * **buf**: the userspace, destination buffer
104	 * **count**: the number of bytes to copy, requested by userspace
105	 * **offset**: zero at the start of the read, updated as the read
106	 * proceeds, it represents how many bytes have been copied so far and
107	 * the buffer offset for copying the next record.
108	 *
109	 * Copy as many buffered i915 perf samples and records for this stream
110	 * to userspace as will fit in the given buffer.
111	 *
112	 * Only write complete records; returning -%ENOSPC if there isn't room
113	 * for a complete record.
114	 *
115	 * Return any error condition that results in a short read such as
116	 * -%ENOSPC or -%EFAULT, even though these may be squashed before
117	 * returning to userspace.
118	 */
119	int (*read)(struct i915_perf_stream *stream,
120		    char __user *buf,
121		    size_t count,
122		    size_t *offset);
123
124	/**
125	 * @destroy: Cleanup any stream specific resources.
126	 *
127	 * The stream will always be disabled before this is called.
128	 */
129	void (*destroy)(struct i915_perf_stream *stream);
130};
131
132/**
133 * struct i915_perf_stream - state for a single open stream FD
134 */
135struct i915_perf_stream {
136	/**
137	 * @perf: i915_perf backpointer
138	 */
139	struct i915_perf *perf;
140
141	/**
142	 * @uncore: mmio access path
143	 */
144	struct intel_uncore *uncore;
145
146	/**
147	 * @engine: Engine associated with this performance stream.
148	 */
149	struct intel_engine_cs *engine;
150
151	/**
152	 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
153	 * properties given when opening a stream, representing the contents
154	 * of a single sample as read() by userspace.
155	 */
156	u32 sample_flags;
157
158	/**
159	 * @sample_size: Considering the configured contents of a sample
160	 * combined with the required header size, this is the total size
161	 * of a single sample record.
162	 */
163	int sample_size;
164
165	/**
166	 * @ctx: %NULL if measuring system-wide across all contexts or a
167	 * specific context that is being monitored.
168	 */
169	struct i915_gem_context *ctx;
170
171	/**
172	 * @enabled: Whether the stream is currently enabled, considering
173	 * whether the stream was opened in a disabled state and based
174	 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
175	 */
176	bool enabled;
177
178	/**
179	 * @hold_preemption: Whether preemption is put on hold for command
180	 * submissions done on the @ctx. This is useful for some drivers that
181	 * cannot easily post process the OA buffer context to subtract delta
182	 * of performance counters not associated with @ctx.
183	 */
184	bool hold_preemption;
185
186	/**
187	 * @ops: The callbacks providing the implementation of this specific
188	 * type of configured stream.
189	 */
190	const struct i915_perf_stream_ops *ops;
191
192	/**
193	 * @oa_config: The OA configuration used by the stream.
194	 */
195	struct i915_oa_config *oa_config;
196
197	/**
198	 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
199	 * each time @oa_config changes.
200	 */
201	struct llist_head oa_config_bos;
202
203	/**
204	 * @pinned_ctx: The OA context specific information.
205	 */
206	struct intel_context *pinned_ctx;
207
208	/**
209	 * @specific_ctx_id: The id of the specific context.
210	 */
211	u32 specific_ctx_id;
212
213	/**
214	 * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits.
215	 */
216	u32 specific_ctx_id_mask;
217
218	/**
219	 * @poll_check_timer: High resolution timer that will periodically
220	 * check for data in the circular OA buffer for notifying userspace
221	 * (e.g. during a read() or poll()).
222	 */
223	struct hrtimer poll_check_timer;
224
225	/**
226	 * @poll_wq: The wait queue that hrtimer callback wakes when it
227	 * sees data ready to read in the circular OA buffer.
228	 */
229	wait_queue_head_t poll_wq;
230
231	/**
232	 * @pollin: Whether there is data available to read.
233	 */
234	bool pollin;
235
236	/**
237	 * @periodic: Whether periodic sampling is currently enabled.
238	 */
239	bool periodic;
240
241	/**
242	 * @period_exponent: The OA unit sampling frequency is derived from this.
243	 */
244	int period_exponent;
245
246	/**
247	 * @oa_buffer: State of the OA buffer.
248	 */
249	struct {
250		struct i915_vma *vma;
251		u8 *vaddr;
252		u32 last_ctx_id;
253		int format;
254		int format_size;
255		int size_exponent;
256
257		/**
258		 * @ptr_lock: Locks reads and writes to all head/tail state
259		 *
260		 * Consider: the head and tail pointer state needs to be read
261		 * consistently from a hrtimer callback (atomic context) and
262		 * read() fop (user context) with tail pointer updates happening
263		 * in atomic context and head updates in user context and the
264		 * (unlikely) possibility of read() errors needing to reset all
265		 * head/tail state.
266		 *
267		 * Note: Contention/performance aren't currently a significant
268		 * concern here considering the relatively low frequency of
269		 * hrtimer callbacks (5ms period) and that reads typically only
270		 * happen in response to a hrtimer event and likely complete
271		 * before the next callback.
272		 *
273		 * Note: This lock is not held *while* reading and copying data
274		 * to userspace so the value of head observed in htrimer
275		 * callbacks won't represent any partial consumption of data.
276		 */
277		spinlock_t ptr_lock;
278
279		/**
280		 * @aging_tail: The last HW tail reported by HW. The data
281		 * might not have made it to memory yet though.
282		 */
283		u32 aging_tail;
284
285		/**
286		 * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer
287		 * was read; used to determine when it is old enough to trust.
288		 */
289		u64 aging_timestamp;
290
291		/**
292		 * @head: Although we can always read back the head pointer register,
293		 * we prefer to avoid trusting the HW state, just to avoid any
294		 * risk that some hardware condition could * somehow bump the
295		 * head pointer unpredictably and cause us to forward the wrong
296		 * OA buffer data to userspace.
297		 */
298		u32 head;
299
300		/**
301		 * @tail: The last verified tail that can be read by userspace.
302		 */
303		u32 tail;
304	} oa_buffer;
305
306	/**
307	 * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be
308	 * reprogrammed.
309	 */
310	struct i915_vma *noa_wait;
311
312	/**
313	 * @poll_oa_period: The period in nanoseconds at which the OA
314	 * buffer should be checked for available data.
315	 */
316	u64 poll_oa_period;
317};
318
319/**
320 * struct i915_oa_ops - Gen specific implementation of an OA unit stream
321 */
322struct i915_oa_ops {
323	/**
324	 * @is_valid_b_counter_reg: Validates register's address for
325	 * programming boolean counters for a particular platform.
326	 */
327	bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr);
328
329	/**
330	 * @is_valid_mux_reg: Validates register's address for programming mux
331	 * for a particular platform.
332	 */
333	bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr);
334
335	/**
336	 * @is_valid_flex_reg: Validates register's address for programming
337	 * flex EU filtering for a particular platform.
338	 */
339	bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr);
340
341	/**
342	 * @enable_metric_set: Selects and applies any MUX configuration to set
343	 * up the Boolean and Custom (B/C) counters that are part of the
344	 * counter reports being sampled. May apply system constraints such as
345	 * disabling EU clock gating as required.
346	 */
347	int (*enable_metric_set)(struct i915_perf_stream *stream,
348				 struct i915_active *active);
349
350	/**
351	 * @disable_metric_set: Remove system constraints associated with using
352	 * the OA unit.
353	 */
354	void (*disable_metric_set)(struct i915_perf_stream *stream);
355
356	/**
357	 * @oa_enable: Enable periodic sampling
358	 */
359	void (*oa_enable)(struct i915_perf_stream *stream);
360
361	/**
362	 * @oa_disable: Disable periodic sampling
363	 */
364	void (*oa_disable)(struct i915_perf_stream *stream);
365
366	/**
367	 * @read: Copy data from the circular OA buffer into a given userspace
368	 * buffer.
369	 */
370	int (*read)(struct i915_perf_stream *stream,
371		    char __user *buf,
372		    size_t count,
373		    size_t *offset);
374
375	/**
376	 * @oa_hw_tail_read: read the OA tail pointer register
377	 *
378	 * In particular this enables us to share all the fiddly code for
379	 * handling the OA unit tail pointer race that affects multiple
380	 * generations.
381	 */
382	u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
383};
384
385struct i915_perf {
386	struct drm_i915_private *i915;
387
388	struct kobject *metrics_kobj;
389
390	/*
391	 * Lock associated with adding/modifying/removing OA configs
392	 * in perf->metrics_idr.
393	 */
394	struct rwlock metrics_lock;
395
396	/*
397	 * List of dynamic configurations (struct i915_oa_config), you
398	 * need to hold perf->metrics_lock to access it.
399	 */
400	struct idr metrics_idr;
401
402	/*
403	 * Lock associated with anything below within this structure
404	 * except exclusive_stream.
405	 */
406	struct rwlock lock;
407
408	/*
409	 * The stream currently using the OA unit. If accessed
410	 * outside a syscall associated to its file
411	 * descriptor.
412	 */
413	struct i915_perf_stream *exclusive_stream;
414
415	/**
416	 * @sseu: sseu configuration selected to run while perf is active,
417	 * applies to all contexts.
418	 */
419	struct intel_sseu sseu;
420
421	/**
422	 * For rate limiting any notifications of spurious
423	 * invalid OA reports
424	 */
425#ifdef notyet
426	struct ratelimit_state spurious_report_rs;
427#endif
428
429	/**
430	 * For rate limiting any notifications of tail pointer
431	 * race.
432	 */
433	struct ratelimit_state tail_pointer_race;
434
435	u32 gen7_latched_oastatus1;
436	u32 ctx_oactxctrl_offset;
437	u32 ctx_flexeu0_offset;
438
439	/**
440	 * The RPT_ID/reason field for Gen8+ includes a bit
441	 * to determine if the CTX ID in the report is valid
442	 * but the specific bit differs between Gen 8 and 9
443	 */
444	u32 gen8_valid_ctx_bit;
445
446	struct i915_oa_ops ops;
447	const struct i915_oa_format *oa_formats;
448
449	/**
450	 * Use a format mask to store the supported formats
451	 * for a platform.
452	 */
453#define FORMAT_MASK_SIZE DIV_ROUND_UP(I915_OA_FORMAT_MAX - 1, BITS_PER_LONG)
454	unsigned long format_mask[FORMAT_MASK_SIZE];
455
456	atomic64_t noa_programming_delay;
457};
458
459#endif /* _I915_PERF_TYPES_H_ */
460