1/*	$NetBSD: i915_perf_types.h,v 1.6 2021/12/19 11:36:57 riastradh Exp $	*/
2
3/* SPDX-License-Identifier: MIT */
4/*
5 * Copyright �� 2019 Intel Corporation
6 */
7
8#ifndef _I915_PERF_TYPES_H_
9#define _I915_PERF_TYPES_H_
10
11#include <linux/atomic.h>
12#include <linux/device.h>
13#include <linux/hrtimer.h>
14#include <linux/llist.h>
15#include <linux/poll.h>
16#include <linux/sysfs.h>
17#include <linux/types.h>
18#include <linux/uuid.h>
19#include <linux/wait.h>
20
21#include "i915_reg.h"
22#include "intel_wakeref.h"
23
24struct drm_i915_private;
25struct file;
26struct i915_gem_context;
27struct i915_perf;
28struct i915_vma;
29struct intel_context;
30struct intel_engine_cs;
31
32struct i915_oa_format {
33	u32 format;
34	int size;
35};
36
37struct i915_oa_reg {
38	i915_reg_t addr;
39	u32 value;
40};
41
42struct i915_oa_config {
43	struct i915_perf *perf;
44
45	char uuid[UUID_STRING_LEN + 1];
46	int id;
47
48	const struct i915_oa_reg *mux_regs;
49	u32 mux_regs_len;
50	const struct i915_oa_reg *b_counter_regs;
51	u32 b_counter_regs_len;
52	const struct i915_oa_reg *flex_regs;
53	u32 flex_regs_len;
54
55#ifndef __NetBSD__		/* XXX sysfs */
56	struct attribute_group sysfs_metric;
57	struct attribute *attrs[2];
58	struct device_attribute sysfs_metric_id;
59#endif
60
61	struct kref ref;
62	struct rcu_head rcu;
63};
64
65struct i915_perf_stream;
66
67/**
68 * struct i915_perf_stream_ops - the OPs to support a specific stream type
69 */
70struct i915_perf_stream_ops {
71	/**
72	 * @enable: Enables the collection of HW samples, either in response to
73	 * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
74	 * without `I915_PERF_FLAG_DISABLED`.
75	 */
76	void (*enable)(struct i915_perf_stream *stream);
77
78	/**
79	 * @disable: Disables the collection of HW samples, either in response
80	 * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
81	 * the stream.
82	 */
83	void (*disable)(struct i915_perf_stream *stream);
84
85#ifndef __NetBSD__
86	/**
87	 * @poll_wait: Call poll_wait, passing a wait queue that will be woken
88	 * once there is something ready to read() for the stream
89	 */
90	void (*poll_wait)(struct i915_perf_stream *stream,
91			  struct file *file,
92			  poll_table *wait);
93#endif
94
95	/**
96	 * @wait_unlocked: For handling a blocking read, wait until there is
97	 * something to ready to read() for the stream. E.g. wait on the same
98	 * wait queue that would be passed to poll_wait().
99	 */
100	int (*wait_unlocked)(struct i915_perf_stream *stream);
101
102	/**
103	 * @read: Copy buffered metrics as records to userspace
104	 * **buf**: the userspace, destination buffer
105	 * **count**: the number of bytes to copy, requested by userspace
106	 * **offset**: zero at the start of the read, updated as the read
107	 * proceeds, it represents how many bytes have been copied so far and
108	 * the buffer offset for copying the next record.
109	 *
110	 * Copy as many buffered i915 perf samples and records for this stream
111	 * to userspace as will fit in the given buffer.
112	 *
113	 * Only write complete records; returning -%ENOSPC if there isn't room
114	 * for a complete record.
115	 *
116	 * Return any error condition that results in a short read such as
117	 * -%ENOSPC or -%EFAULT, even though these may be squashed before
118	 * returning to userspace.
119	 */
120#ifdef __NetBSD__
121	int (*read)(struct i915_perf_stream *stream,
122		    struct uio *buf,
123		    kauth_cred_t count, /* XXX dummy */
124		    int offset);	/* XXX dummy */
125#else
126	int (*read)(struct i915_perf_stream *stream,
127		    char __user *buf,
128		    size_t count,
129		    size_t *offset);
130#endif
131
132	/**
133	 * @destroy: Cleanup any stream specific resources.
134	 *
135	 * The stream will always be disabled before this is called.
136	 */
137	void (*destroy)(struct i915_perf_stream *stream);
138};
139
140/**
141 * struct i915_perf_stream - state for a single open stream FD
142 */
143struct i915_perf_stream {
144	/**
145	 * @perf: i915_perf backpointer
146	 */
147	struct i915_perf *perf;
148
149	/**
150	 * @uncore: mmio access path
151	 */
152	struct intel_uncore *uncore;
153
154	/**
155	 * @engine: Engine associated with this performance stream.
156	 */
157	struct intel_engine_cs *engine;
158
159	/**
160	 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
161	 * properties given when opening a stream, representing the contents
162	 * of a single sample as read() by userspace.
163	 */
164	u32 sample_flags;
165
166	/**
167	 * @sample_size: Considering the configured contents of a sample
168	 * combined with the required header size, this is the total size
169	 * of a single sample record.
170	 */
171	int sample_size;
172
173	/**
174	 * @ctx: %NULL if measuring system-wide across all contexts or a
175	 * specific context that is being monitored.
176	 */
177	struct i915_gem_context *ctx;
178
179	/**
180	 * @enabled: Whether the stream is currently enabled, considering
181	 * whether the stream was opened in a disabled state and based
182	 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
183	 */
184	bool enabled;
185
186	/**
187	 * @hold_preemption: Whether preemption is put on hold for command
188	 * submissions done on the @ctx. This is useful for some drivers that
189	 * cannot easily post process the OA buffer context to subtract delta
190	 * of performance counters not associated with @ctx.
191	 */
192	bool hold_preemption;
193
194	/**
195	 * @ops: The callbacks providing the implementation of this specific
196	 * type of configured stream.
197	 */
198	const struct i915_perf_stream_ops *ops;
199
200	/**
201	 * @oa_config: The OA configuration used by the stream.
202	 */
203	struct i915_oa_config *oa_config;
204
205	/**
206	 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
207	 * each time @oa_config changes.
208	 */
209	struct llist_head oa_config_bos;
210
211	/**
212	 * @pinned_ctx: The OA context specific information.
213	 */
214	struct intel_context *pinned_ctx;
215
216	/**
217	 * @specific_ctx_id: The id of the specific context.
218	 */
219	u32 specific_ctx_id;
220
221	/**
222	 * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits.
223	 */
224	u32 specific_ctx_id_mask;
225
226	/**
227	 * @poll_check_timer: High resolution timer that will periodically
228	 * check for data in the circular OA buffer for notifying userspace
229	 * (e.g. during a read() or poll()).
230	 */
231	struct hrtimer poll_check_timer;
232
233	/**
234	 * @poll_wq: The wait queue that hrtimer callback wakes when it
235	 * sees data ready to read in the circular OA buffer.
236	 */
237#ifdef __NetBSD__
238	drm_waitqueue_t poll_wq;
239	struct selinfo poll_selq;
240#else
241	wait_queue_head_t poll_wq;
242#endif
243
244	/**
245	 * @pollin: Whether there is data available to read.
246	 */
247	bool pollin;
248
249	/**
250	 * @periodic: Whether periodic sampling is currently enabled.
251	 */
252	bool periodic;
253
254	/**
255	 * @period_exponent: The OA unit sampling frequency is derived from this.
256	 */
257	int period_exponent;
258
259	/**
260	 * @oa_buffer: State of the OA buffer.
261	 */
262	struct {
263		struct i915_vma *vma;
264		u8 *vaddr;
265		u32 last_ctx_id;
266		int format;
267		int format_size;
268		int size_exponent;
269
270		/**
271		 * @ptr_lock: Locks reads and writes to all head/tail state
272		 *
273		 * Consider: the head and tail pointer state needs to be read
274		 * consistently from a hrtimer callback (atomic context) and
275		 * read() fop (user context) with tail pointer updates happening
276		 * in atomic context and head updates in user context and the
277		 * (unlikely) possibility of read() errors needing to reset all
278		 * head/tail state.
279		 *
280		 * Note: Contention/performance aren't currently a significant
281		 * concern here considering the relatively low frequency of
282		 * hrtimer callbacks (5ms period) and that reads typically only
283		 * happen in response to a hrtimer event and likely complete
284		 * before the next callback.
285		 *
286		 * Note: This lock is not held *while* reading and copying data
287		 * to userspace so the value of head observed in htrimer
288		 * callbacks won't represent any partial consumption of data.
289		 */
290		spinlock_t ptr_lock;
291
292		/**
293		 * @tails: One 'aging' tail pointer and one 'aged' tail pointer ready to
294		 * used for reading.
295		 *
296		 * Initial values of 0xffffffff are invalid and imply that an
297		 * update is required (and should be ignored by an attempted
298		 * read)
299		 */
300		struct {
301			u32 offset;
302		} tails[2];
303
304		/**
305		 * @aged_tail_idx: Index for the aged tail ready to read() data up to.
306		 */
307		unsigned int aged_tail_idx;
308
309		/**
310		 * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer
311		 * was read; used to determine when it is old enough to trust.
312		 */
313		u64 aging_timestamp;
314
315		/**
316		 * @head: Although we can always read back the head pointer register,
317		 * we prefer to avoid trusting the HW state, just to avoid any
318		 * risk that some hardware condition could * somehow bump the
319		 * head pointer unpredictably and cause us to forward the wrong
320		 * OA buffer data to userspace.
321		 */
322		u32 head;
323	} oa_buffer;
324
325	/**
326	 * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be
327	 * reprogrammed.
328	 */
329	struct i915_vma *noa_wait;
330};
331
332/**
333 * struct i915_oa_ops - Gen specific implementation of an OA unit stream
334 */
335struct i915_oa_ops {
336	/**
337	 * @is_valid_b_counter_reg: Validates register's address for
338	 * programming boolean counters for a particular platform.
339	 */
340	bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr);
341
342	/**
343	 * @is_valid_mux_reg: Validates register's address for programming mux
344	 * for a particular platform.
345	 */
346	bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr);
347
348	/**
349	 * @is_valid_flex_reg: Validates register's address for programming
350	 * flex EU filtering for a particular platform.
351	 */
352	bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr);
353
354	/**
355	 * @enable_metric_set: Selects and applies any MUX configuration to set
356	 * up the Boolean and Custom (B/C) counters that are part of the
357	 * counter reports being sampled. May apply system constraints such as
358	 * disabling EU clock gating as required.
359	 */
360	int (*enable_metric_set)(struct i915_perf_stream *stream);
361
362	/**
363	 * @disable_metric_set: Remove system constraints associated with using
364	 * the OA unit.
365	 */
366	void (*disable_metric_set)(struct i915_perf_stream *stream);
367
368	/**
369	 * @oa_enable: Enable periodic sampling
370	 */
371	void (*oa_enable)(struct i915_perf_stream *stream);
372
373	/**
374	 * @oa_disable: Disable periodic sampling
375	 */
376	void (*oa_disable)(struct i915_perf_stream *stream);
377
378	/**
379	 * @read: Copy data from the circular OA buffer into a given userspace
380	 * buffer.
381	 */
382#ifdef __NetBSD__
383	int (*read)(struct i915_perf_stream *stream,
384		    struct uio *buf,
385		    kauth_cred_t count, /* XXX dummy */
386		    int offset);	/* XXX dummy */
387#else
388	int (*read)(struct i915_perf_stream *stream,
389		    char __user *buf,
390		    size_t count,
391		    size_t *offset);
392#endif
393
394	/**
395	 * @oa_hw_tail_read: read the OA tail pointer register
396	 *
397	 * In particular this enables us to share all the fiddly code for
398	 * handling the OA unit tail pointer race that affects multiple
399	 * generations.
400	 */
401	u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
402};
403
404struct i915_perf {
405	struct drm_i915_private *i915;
406
407	struct kobject *metrics_kobj;
408
409	/*
410	 * Lock associated with adding/modifying/removing OA configs
411	 * in perf->metrics_idr.
412	 */
413	struct mutex metrics_lock;
414
415	/*
416	 * List of dynamic configurations (struct i915_oa_config), you
417	 * need to hold perf->metrics_lock to access it.
418	 */
419	struct idr metrics_idr;
420
421	/*
422	 * Lock associated with anything below within this structure
423	 * except exclusive_stream.
424	 */
425	struct mutex lock;
426
427	/*
428	 * The stream currently using the OA unit. If accessed
429	 * outside a syscall associated to its file
430	 * descriptor.
431	 */
432	struct i915_perf_stream *exclusive_stream;
433
434	/**
435	 * For rate limiting any notifications of spurious
436	 * invalid OA reports
437	 */
438	struct ratelimit_state spurious_report_rs;
439
440	struct i915_oa_config test_config;
441
442	u32 gen7_latched_oastatus1;
443	u32 ctx_oactxctrl_offset;
444	u32 ctx_flexeu0_offset;
445
446	/**
447	 * The RPT_ID/reason field for Gen8+ includes a bit
448	 * to determine if the CTX ID in the report is valid
449	 * but the specific bit differs between Gen 8 and 9
450	 */
451	u32 gen8_valid_ctx_bit;
452
453	struct i915_oa_ops ops;
454	const struct i915_oa_format *oa_formats;
455
456	atomic64_t noa_programming_delay;
457};
458
459#endif /* _I915_PERF_TYPES_H_ */
460