i915_perf_types.h revision 1.7
1/* SPDX-License-Identifier: MIT */
2/*
3 * Copyright �� 2019 Intel Corporation
4 */
5
6#ifndef _I915_PERF_TYPES_H_
7#define _I915_PERF_TYPES_H_
8
9#include <linux/atomic.h>
10#include <linux/device.h>
11#include <linux/hrtimer.h>
12#include <linux/llist.h>
13#include <linux/poll.h>
14#include <linux/sysfs.h>
15#include <linux/types.h>
16#include <linux/uuid.h>
17#include <linux/wait.h>
18#include <uapi/drm/i915_drm.h>
19
20#include "gt/intel_engine_types.h"
21#include "gt/intel_sseu.h"
22#include "i915_reg_defs.h"
23#include "intel_uncore.h"
24#include "intel_wakeref.h"
25
26struct drm_i915_private;
27struct file;
28struct i915_active;
29struct i915_gem_context;
30struct i915_perf;
31struct i915_vma;
32struct intel_context;
33struct intel_engine_cs;
34
35enum {
36	PERF_GROUP_OAG = 0,
37	PERF_GROUP_OAM_SAMEDIA_0 = 0,
38
39	PERF_GROUP_MAX,
40	PERF_GROUP_INVALID = U32_MAX,
41};
42
43enum report_header {
44	HDR_32_BIT = 0,
45	HDR_64_BIT,
46};
47
48struct i915_perf_regs {
49	u32 base;
50	i915_reg_t oa_head_ptr;
51	i915_reg_t oa_tail_ptr;
52	i915_reg_t oa_buffer;
53	i915_reg_t oa_ctx_ctrl;
54	i915_reg_t oa_ctrl;
55	i915_reg_t oa_debug;
56	i915_reg_t oa_status;
57	u32 oa_ctrl_counter_format_shift;
58};
59
60enum oa_type {
61	TYPE_OAG,
62	TYPE_OAM,
63};
64
65struct i915_oa_format {
66	u32 format;
67	int size;
68	int type;
69	enum report_header header;
70};
71
72struct i915_oa_reg {
73	i915_reg_t addr;
74	u32 value;
75};
76
77struct i915_oa_config {
78	struct i915_perf *perf;
79
80	char uuid[UUID_STRING_LEN + 1];
81	int id;
82
83	const struct i915_oa_reg *mux_regs;
84	u32 mux_regs_len;
85	const struct i915_oa_reg *b_counter_regs;
86	u32 b_counter_regs_len;
87	const struct i915_oa_reg *flex_regs;
88	u32 flex_regs_len;
89
90	struct attribute_group sysfs_metric;
91	struct attribute *attrs[2];
92	struct kobj_attribute sysfs_metric_id;
93
94	struct kref ref;
95	struct rcu_head rcu;
96};
97
98struct i915_perf_stream;
99
100/**
101 * struct i915_perf_stream_ops - the OPs to support a specific stream type
102 */
103struct i915_perf_stream_ops {
104	/**
105	 * @enable: Enables the collection of HW samples, either in response to
106	 * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
107	 * without `I915_PERF_FLAG_DISABLED`.
108	 */
109	void (*enable)(struct i915_perf_stream *stream);
110
111	/**
112	 * @disable: Disables the collection of HW samples, either in response
113	 * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
114	 * the stream.
115	 */
116	void (*disable)(struct i915_perf_stream *stream);
117
118	/**
119	 * @poll_wait: Call poll_wait, passing a wait queue that will be woken
120	 * once there is something ready to read() for the stream
121	 */
122#ifdef notyet
123	void (*poll_wait)(struct i915_perf_stream *stream,
124			  struct file *file,
125			  poll_table *wait);
126#endif
127
128	/**
129	 * @wait_unlocked: For handling a blocking read, wait until there is
130	 * something to ready to read() for the stream. E.g. wait on the same
131	 * wait queue that would be passed to poll_wait().
132	 */
133	int (*wait_unlocked)(struct i915_perf_stream *stream);
134
135	/**
136	 * @read: Copy buffered metrics as records to userspace
137	 * **buf**: the userspace, destination buffer
138	 * **count**: the number of bytes to copy, requested by userspace
139	 * **offset**: zero at the start of the read, updated as the read
140	 * proceeds, it represents how many bytes have been copied so far and
141	 * the buffer offset for copying the next record.
142	 *
143	 * Copy as many buffered i915 perf samples and records for this stream
144	 * to userspace as will fit in the given buffer.
145	 *
146	 * Only write complete records; returning -%ENOSPC if there isn't room
147	 * for a complete record.
148	 *
149	 * Return any error condition that results in a short read such as
150	 * -%ENOSPC or -%EFAULT, even though these may be squashed before
151	 * returning to userspace.
152	 */
153	int (*read)(struct i915_perf_stream *stream,
154		    char __user *buf,
155		    size_t count,
156		    size_t *offset);
157
158	/**
159	 * @destroy: Cleanup any stream specific resources.
160	 *
161	 * The stream will always be disabled before this is called.
162	 */
163	void (*destroy)(struct i915_perf_stream *stream);
164};
165
166/**
167 * struct i915_perf_stream - state for a single open stream FD
168 */
169struct i915_perf_stream {
170	/**
171	 * @perf: i915_perf backpointer
172	 */
173	struct i915_perf *perf;
174
175	/**
176	 * @uncore: mmio access path
177	 */
178	struct intel_uncore *uncore;
179
180	/**
181	 * @engine: Engine associated with this performance stream.
182	 */
183	struct intel_engine_cs *engine;
184
185	/**
186	 * @lock: Lock associated with operations on stream
187	 */
188	struct rwlock lock;
189
190	/**
191	 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
192	 * properties given when opening a stream, representing the contents
193	 * of a single sample as read() by userspace.
194	 */
195	u32 sample_flags;
196
197	/**
198	 * @sample_size: Considering the configured contents of a sample
199	 * combined with the required header size, this is the total size
200	 * of a single sample record.
201	 */
202	int sample_size;
203
204	/**
205	 * @ctx: %NULL if measuring system-wide across all contexts or a
206	 * specific context that is being monitored.
207	 */
208	struct i915_gem_context *ctx;
209
210	/**
211	 * @enabled: Whether the stream is currently enabled, considering
212	 * whether the stream was opened in a disabled state and based
213	 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
214	 */
215	bool enabled;
216
217	/**
218	 * @hold_preemption: Whether preemption is put on hold for command
219	 * submissions done on the @ctx. This is useful for some drivers that
220	 * cannot easily post process the OA buffer context to subtract delta
221	 * of performance counters not associated with @ctx.
222	 */
223	bool hold_preemption;
224
225	/**
226	 * @ops: The callbacks providing the implementation of this specific
227	 * type of configured stream.
228	 */
229	const struct i915_perf_stream_ops *ops;
230
231	/**
232	 * @oa_config: The OA configuration used by the stream.
233	 */
234	struct i915_oa_config *oa_config;
235
236	/**
237	 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
238	 * each time @oa_config changes.
239	 */
240	struct llist_head oa_config_bos;
241
242	/**
243	 * @pinned_ctx: The OA context specific information.
244	 */
245	struct intel_context *pinned_ctx;
246
247	/**
248	 * @specific_ctx_id: The id of the specific context.
249	 */
250	u32 specific_ctx_id;
251
252	/**
253	 * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits.
254	 */
255	u32 specific_ctx_id_mask;
256
257	/**
258	 * @poll_check_timer: High resolution timer that will periodically
259	 * check for data in the circular OA buffer for notifying userspace
260	 * (e.g. during a read() or poll()).
261	 */
262	struct hrtimer poll_check_timer;
263
264	/**
265	 * @poll_wq: The wait queue that hrtimer callback wakes when it
266	 * sees data ready to read in the circular OA buffer.
267	 */
268	wait_queue_head_t poll_wq;
269
270	/**
271	 * @pollin: Whether there is data available to read.
272	 */
273	bool pollin;
274
275	/**
276	 * @periodic: Whether periodic sampling is currently enabled.
277	 */
278	bool periodic;
279
280	/**
281	 * @period_exponent: The OA unit sampling frequency is derived from this.
282	 */
283	int period_exponent;
284
285	/**
286	 * @oa_buffer: State of the OA buffer.
287	 */
288	struct {
289		const struct i915_oa_format *format;
290		struct i915_vma *vma;
291		u8 *vaddr;
292		u32 last_ctx_id;
293		int size_exponent;
294
295		/**
296		 * @ptr_lock: Locks reads and writes to all head/tail state
297		 *
298		 * Consider: the head and tail pointer state needs to be read
299		 * consistently from a hrtimer callback (atomic context) and
300		 * read() fop (user context) with tail pointer updates happening
301		 * in atomic context and head updates in user context and the
302		 * (unlikely) possibility of read() errors needing to reset all
303		 * head/tail state.
304		 *
305		 * Note: Contention/performance aren't currently a significant
306		 * concern here considering the relatively low frequency of
307		 * hrtimer callbacks (5ms period) and that reads typically only
308		 * happen in response to a hrtimer event and likely complete
309		 * before the next callback.
310		 *
311		 * Note: This lock is not held *while* reading and copying data
312		 * to userspace so the value of head observed in htrimer
313		 * callbacks won't represent any partial consumption of data.
314		 */
315		spinlock_t ptr_lock;
316
317		/**
318		 * @head: Although we can always read back the head pointer register,
319		 * we prefer to avoid trusting the HW state, just to avoid any
320		 * risk that some hardware condition could * somehow bump the
321		 * head pointer unpredictably and cause us to forward the wrong
322		 * OA buffer data to userspace.
323		 */
324		u32 head;
325
326		/**
327		 * @tail: The last verified tail that can be read by userspace.
328		 */
329		u32 tail;
330	} oa_buffer;
331
332	/**
333	 * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be
334	 * reprogrammed.
335	 */
336	struct i915_vma *noa_wait;
337
338	/**
339	 * @poll_oa_period: The period in nanoseconds at which the OA
340	 * buffer should be checked for available data.
341	 */
342	u64 poll_oa_period;
343
344	/**
345	 * @override_gucrc: GuC RC has been overridden for the perf stream,
346	 * and we need to restore the default configuration on release.
347	 */
348	bool override_gucrc;
349};
350
351/**
352 * struct i915_oa_ops - Gen specific implementation of an OA unit stream
353 */
354struct i915_oa_ops {
355	/**
356	 * @is_valid_b_counter_reg: Validates register's address for
357	 * programming boolean counters for a particular platform.
358	 */
359	bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr);
360
361	/**
362	 * @is_valid_mux_reg: Validates register's address for programming mux
363	 * for a particular platform.
364	 */
365	bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr);
366
367	/**
368	 * @is_valid_flex_reg: Validates register's address for programming
369	 * flex EU filtering for a particular platform.
370	 */
371	bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr);
372
373	/**
374	 * @enable_metric_set: Selects and applies any MUX configuration to set
375	 * up the Boolean and Custom (B/C) counters that are part of the
376	 * counter reports being sampled. May apply system constraints such as
377	 * disabling EU clock gating as required.
378	 */
379	int (*enable_metric_set)(struct i915_perf_stream *stream,
380				 struct i915_active *active);
381
382	/**
383	 * @disable_metric_set: Remove system constraints associated with using
384	 * the OA unit.
385	 */
386	void (*disable_metric_set)(struct i915_perf_stream *stream);
387
388	/**
389	 * @oa_enable: Enable periodic sampling
390	 */
391	void (*oa_enable)(struct i915_perf_stream *stream);
392
393	/**
394	 * @oa_disable: Disable periodic sampling
395	 */
396	void (*oa_disable)(struct i915_perf_stream *stream);
397
398	/**
399	 * @read: Copy data from the circular OA buffer into a given userspace
400	 * buffer.
401	 */
402	int (*read)(struct i915_perf_stream *stream,
403		    char __user *buf,
404		    size_t count,
405		    size_t *offset);
406
407	/**
408	 * @oa_hw_tail_read: read the OA tail pointer register
409	 *
410	 * In particular this enables us to share all the fiddly code for
411	 * handling the OA unit tail pointer race that affects multiple
412	 * generations.
413	 */
414	u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
415};
416
417struct i915_perf_group {
418	/*
419	 * @exclusive_stream: The stream currently using the OA unit. This is
420	 * sometimes accessed outside a syscall associated to its file
421	 * descriptor.
422	 */
423	struct i915_perf_stream *exclusive_stream;
424
425	/*
426	 * @num_engines: The number of engines using this OA unit.
427	 */
428	u32 num_engines;
429
430	/*
431	 * @regs: OA buffer register group for programming the OA unit.
432	 */
433	struct i915_perf_regs regs;
434
435	/*
436	 * @type: Type of OA unit - OAM, OAG etc.
437	 */
438	enum oa_type type;
439};
440
441struct i915_perf_gt {
442	/*
443	 * Lock associated with anything below within this structure.
444	 */
445	struct rwlock lock;
446
447	/**
448	 * @sseu: sseu configuration selected to run while perf is active,
449	 * applies to all contexts.
450	 */
451	struct intel_sseu sseu;
452
453	/**
454	 * @num_perf_groups: number of perf groups per gt.
455	 */
456	u32 num_perf_groups;
457
458	/*
459	 * @group: list of OA groups - one for each OA buffer.
460	 */
461	struct i915_perf_group *group;
462};
463
464struct i915_perf {
465	struct drm_i915_private *i915;
466
467	struct kobject *metrics_kobj;
468
469	/*
470	 * Lock associated with adding/modifying/removing OA configs
471	 * in perf->metrics_idr.
472	 */
473	struct rwlock metrics_lock;
474
475	/*
476	 * List of dynamic configurations (struct i915_oa_config), you
477	 * need to hold perf->metrics_lock to access it.
478	 */
479	struct idr metrics_idr;
480
481	/**
482	 * For rate limiting any notifications of spurious
483	 * invalid OA reports
484	 */
485	struct ratelimit_state spurious_report_rs;
486
487	/**
488	 * For rate limiting any notifications of tail pointer
489	 * race.
490	 */
491	struct ratelimit_state tail_pointer_race;
492
493	u32 gen7_latched_oastatus1;
494	u32 ctx_oactxctrl_offset;
495	u32 ctx_flexeu0_offset;
496
497	/**
498	 * The RPT_ID/reason field for Gen8+ includes a bit
499	 * to determine if the CTX ID in the report is valid
500	 * but the specific bit differs between Gen 8 and 9
501	 */
502	u32 gen8_valid_ctx_bit;
503
504	struct i915_oa_ops ops;
505	const struct i915_oa_format *oa_formats;
506
507	/**
508	 * Use a format mask to store the supported formats
509	 * for a platform.
510	 */
511#define FORMAT_MASK_SIZE DIV_ROUND_UP(I915_OA_FORMAT_MAX - 1, BITS_PER_LONG)
512	unsigned long format_mask[FORMAT_MASK_SIZE];
513
514	atomic64_t noa_programming_delay;
515};
516
517#endif /* _I915_PERF_TYPES_H_ */
518