i915_perf_types.h revision 1.2
1/* SPDX-License-Identifier: MIT */
2/*
3 * Copyright �� 2019 Intel Corporation
4 */
5
6#ifndef _I915_PERF_TYPES_H_
7#define _I915_PERF_TYPES_H_
8
9#include <linux/atomic.h>
10#include <linux/device.h>
11#include <linux/hrtimer.h>
12#include <linux/llist.h>
13#include <linux/poll.h>
14#include <linux/sysfs.h>
15#include <linux/types.h>
16#include <linux/uuid.h>
17#include <linux/wait.h>
18
19#include "gt/intel_sseu.h"
20#include "i915_reg.h"
21#include "intel_wakeref.h"
22
23struct drm_i915_private;
24struct file;
25struct i915_active;
26struct i915_gem_context;
27struct i915_perf;
28struct i915_vma;
29struct intel_context;
30struct intel_engine_cs;
31
32struct i915_oa_format {
33	u32 format;
34	int size;
35};
36
37struct i915_oa_reg {
38	i915_reg_t addr;
39	u32 value;
40};
41
42struct i915_oa_config {
43	struct i915_perf *perf;
44
45	char uuid[UUID_STRING_LEN + 1];
46	int id;
47
48	const struct i915_oa_reg *mux_regs;
49	u32 mux_regs_len;
50	const struct i915_oa_reg *b_counter_regs;
51	u32 b_counter_regs_len;
52	const struct i915_oa_reg *flex_regs;
53	u32 flex_regs_len;
54
55	struct attribute_group sysfs_metric;
56	struct attribute *attrs[2];
57	struct device_attribute sysfs_metric_id;
58
59	struct kref ref;
60	struct rcu_head rcu;
61};
62
63struct i915_perf_stream;
64
65/**
66 * struct i915_perf_stream_ops - the OPs to support a specific stream type
67 */
68struct i915_perf_stream_ops {
69	/**
70	 * @enable: Enables the collection of HW samples, either in response to
71	 * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
72	 * without `I915_PERF_FLAG_DISABLED`.
73	 */
74	void (*enable)(struct i915_perf_stream *stream);
75
76	/**
77	 * @disable: Disables the collection of HW samples, either in response
78	 * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
79	 * the stream.
80	 */
81	void (*disable)(struct i915_perf_stream *stream);
82
83	/**
84	 * @poll_wait: Call poll_wait, passing a wait queue that will be woken
85	 * once there is something ready to read() for the stream
86	 */
87#ifdef notyet
88	void (*poll_wait)(struct i915_perf_stream *stream,
89			  struct file *file,
90			  poll_table *wait);
91#endif
92
93	/**
94	 * @wait_unlocked: For handling a blocking read, wait until there is
95	 * something to ready to read() for the stream. E.g. wait on the same
96	 * wait queue that would be passed to poll_wait().
97	 */
98	int (*wait_unlocked)(struct i915_perf_stream *stream);
99
100	/**
101	 * @read: Copy buffered metrics as records to userspace
102	 * **buf**: the userspace, destination buffer
103	 * **count**: the number of bytes to copy, requested by userspace
104	 * **offset**: zero at the start of the read, updated as the read
105	 * proceeds, it represents how many bytes have been copied so far and
106	 * the buffer offset for copying the next record.
107	 *
108	 * Copy as many buffered i915 perf samples and records for this stream
109	 * to userspace as will fit in the given buffer.
110	 *
111	 * Only write complete records; returning -%ENOSPC if there isn't room
112	 * for a complete record.
113	 *
114	 * Return any error condition that results in a short read such as
115	 * -%ENOSPC or -%EFAULT, even though these may be squashed before
116	 * returning to userspace.
117	 */
118	int (*read)(struct i915_perf_stream *stream,
119		    char __user *buf,
120		    size_t count,
121		    size_t *offset);
122
123	/**
124	 * @destroy: Cleanup any stream specific resources.
125	 *
126	 * The stream will always be disabled before this is called.
127	 */
128	void (*destroy)(struct i915_perf_stream *stream);
129};
130
131/**
132 * struct i915_perf_stream - state for a single open stream FD
133 */
134struct i915_perf_stream {
135	/**
136	 * @perf: i915_perf backpointer
137	 */
138	struct i915_perf *perf;
139
140	/**
141	 * @uncore: mmio access path
142	 */
143	struct intel_uncore *uncore;
144
145	/**
146	 * @engine: Engine associated with this performance stream.
147	 */
148	struct intel_engine_cs *engine;
149
150	/**
151	 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
152	 * properties given when opening a stream, representing the contents
153	 * of a single sample as read() by userspace.
154	 */
155	u32 sample_flags;
156
157	/**
158	 * @sample_size: Considering the configured contents of a sample
159	 * combined with the required header size, this is the total size
160	 * of a single sample record.
161	 */
162	int sample_size;
163
164	/**
165	 * @ctx: %NULL if measuring system-wide across all contexts or a
166	 * specific context that is being monitored.
167	 */
168	struct i915_gem_context *ctx;
169
170	/**
171	 * @enabled: Whether the stream is currently enabled, considering
172	 * whether the stream was opened in a disabled state and based
173	 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
174	 */
175	bool enabled;
176
177	/**
178	 * @hold_preemption: Whether preemption is put on hold for command
179	 * submissions done on the @ctx. This is useful for some drivers that
180	 * cannot easily post process the OA buffer context to subtract delta
181	 * of performance counters not associated with @ctx.
182	 */
183	bool hold_preemption;
184
185	/**
186	 * @ops: The callbacks providing the implementation of this specific
187	 * type of configured stream.
188	 */
189	const struct i915_perf_stream_ops *ops;
190
191	/**
192	 * @oa_config: The OA configuration used by the stream.
193	 */
194	struct i915_oa_config *oa_config;
195
196	/**
197	 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
198	 * each time @oa_config changes.
199	 */
200	struct llist_head oa_config_bos;
201
202	/**
203	 * @pinned_ctx: The OA context specific information.
204	 */
205	struct intel_context *pinned_ctx;
206
207	/**
208	 * @specific_ctx_id: The id of the specific context.
209	 */
210	u32 specific_ctx_id;
211
212	/**
213	 * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits.
214	 */
215	u32 specific_ctx_id_mask;
216
217	/**
218	 * @poll_check_timer: High resolution timer that will periodically
219	 * check for data in the circular OA buffer for notifying userspace
220	 * (e.g. during a read() or poll()).
221	 */
222	struct hrtimer poll_check_timer;
223
224	/**
225	 * @poll_wq: The wait queue that hrtimer callback wakes when it
226	 * sees data ready to read in the circular OA buffer.
227	 */
228	wait_queue_head_t poll_wq;
229
230	/**
231	 * @pollin: Whether there is data available to read.
232	 */
233	bool pollin;
234
235	/**
236	 * @periodic: Whether periodic sampling is currently enabled.
237	 */
238	bool periodic;
239
240	/**
241	 * @period_exponent: The OA unit sampling frequency is derived from this.
242	 */
243	int period_exponent;
244
245	/**
246	 * @oa_buffer: State of the OA buffer.
247	 */
248	struct {
249		struct i915_vma *vma;
250		u8 *vaddr;
251		u32 last_ctx_id;
252		int format;
253		int format_size;
254		int size_exponent;
255
256		/**
257		 * @ptr_lock: Locks reads and writes to all head/tail state
258		 *
259		 * Consider: the head and tail pointer state needs to be read
260		 * consistently from a hrtimer callback (atomic context) and
261		 * read() fop (user context) with tail pointer updates happening
262		 * in atomic context and head updates in user context and the
263		 * (unlikely) possibility of read() errors needing to reset all
264		 * head/tail state.
265		 *
266		 * Note: Contention/performance aren't currently a significant
267		 * concern here considering the relatively low frequency of
268		 * hrtimer callbacks (5ms period) and that reads typically only
269		 * happen in response to a hrtimer event and likely complete
270		 * before the next callback.
271		 *
272		 * Note: This lock is not held *while* reading and copying data
273		 * to userspace so the value of head observed in htrimer
274		 * callbacks won't represent any partial consumption of data.
275		 */
276		spinlock_t ptr_lock;
277
278		/**
279		 * @aging_tail: The last HW tail reported by HW. The data
280		 * might not have made it to memory yet though.
281		 */
282		u32 aging_tail;
283
284		/**
285		 * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer
286		 * was read; used to determine when it is old enough to trust.
287		 */
288		u64 aging_timestamp;
289
290		/**
291		 * @head: Although we can always read back the head pointer register,
292		 * we prefer to avoid trusting the HW state, just to avoid any
293		 * risk that some hardware condition could * somehow bump the
294		 * head pointer unpredictably and cause us to forward the wrong
295		 * OA buffer data to userspace.
296		 */
297		u32 head;
298
299		/**
300		 * @tail: The last verified tail that can be read by userspace.
301		 */
302		u32 tail;
303	} oa_buffer;
304
305	/**
306	 * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be
307	 * reprogrammed.
308	 */
309	struct i915_vma *noa_wait;
310
311	/**
312	 * @poll_oa_period: The period in nanoseconds at which the OA
313	 * buffer should be checked for available data.
314	 */
315	u64 poll_oa_period;
316};
317
318/**
319 * struct i915_oa_ops - Gen specific implementation of an OA unit stream
320 */
321struct i915_oa_ops {
322	/**
323	 * @is_valid_b_counter_reg: Validates register's address for
324	 * programming boolean counters for a particular platform.
325	 */
326	bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr);
327
328	/**
329	 * @is_valid_mux_reg: Validates register's address for programming mux
330	 * for a particular platform.
331	 */
332	bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr);
333
334	/**
335	 * @is_valid_flex_reg: Validates register's address for programming
336	 * flex EU filtering for a particular platform.
337	 */
338	bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr);
339
340	/**
341	 * @enable_metric_set: Selects and applies any MUX configuration to set
342	 * up the Boolean and Custom (B/C) counters that are part of the
343	 * counter reports being sampled. May apply system constraints such as
344	 * disabling EU clock gating as required.
345	 */
346	int (*enable_metric_set)(struct i915_perf_stream *stream,
347				 struct i915_active *active);
348
349	/**
350	 * @disable_metric_set: Remove system constraints associated with using
351	 * the OA unit.
352	 */
353	void (*disable_metric_set)(struct i915_perf_stream *stream);
354
355	/**
356	 * @oa_enable: Enable periodic sampling
357	 */
358	void (*oa_enable)(struct i915_perf_stream *stream);
359
360	/**
361	 * @oa_disable: Disable periodic sampling
362	 */
363	void (*oa_disable)(struct i915_perf_stream *stream);
364
365	/**
366	 * @read: Copy data from the circular OA buffer into a given userspace
367	 * buffer.
368	 */
369	int (*read)(struct i915_perf_stream *stream,
370		    char __user *buf,
371		    size_t count,
372		    size_t *offset);
373
374	/**
375	 * @oa_hw_tail_read: read the OA tail pointer register
376	 *
377	 * In particular this enables us to share all the fiddly code for
378	 * handling the OA unit tail pointer race that affects multiple
379	 * generations.
380	 */
381	u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
382};
383
384struct i915_perf {
385	struct drm_i915_private *i915;
386
387	struct kobject *metrics_kobj;
388
389	/*
390	 * Lock associated with adding/modifying/removing OA configs
391	 * in perf->metrics_idr.
392	 */
393	struct rwlock metrics_lock;
394
395	/*
396	 * List of dynamic configurations (struct i915_oa_config), you
397	 * need to hold perf->metrics_lock to access it.
398	 */
399	struct idr metrics_idr;
400
401	/*
402	 * Lock associated with anything below within this structure
403	 * except exclusive_stream.
404	 */
405	struct rwlock lock;
406
407	/*
408	 * The stream currently using the OA unit. If accessed
409	 * outside a syscall associated to its file
410	 * descriptor.
411	 */
412	struct i915_perf_stream *exclusive_stream;
413
414	/**
415	 * @sseu: sseu configuration selected to run while perf is active,
416	 * applies to all contexts.
417	 */
418	struct intel_sseu sseu;
419
420	/**
421	 * For rate limiting any notifications of spurious
422	 * invalid OA reports
423	 */
424#ifdef notyet
425	struct ratelimit_state spurious_report_rs;
426#endif
427
428	/**
429	 * For rate limiting any notifications of tail pointer
430	 * race.
431	 */
432	struct ratelimit_state tail_pointer_race;
433
434	u32 gen7_latched_oastatus1;
435	u32 ctx_oactxctrl_offset;
436	u32 ctx_flexeu0_offset;
437
438	/**
439	 * The RPT_ID/reason field for Gen8+ includes a bit
440	 * to determine if the CTX ID in the report is valid
441	 * but the specific bit differs between Gen 8 and 9
442	 */
443	u32 gen8_valid_ctx_bit;
444
445	struct i915_oa_ops ops;
446	const struct i915_oa_format *oa_formats;
447
448	atomic64_t noa_programming_delay;
449};
450
451#endif /* _I915_PERF_TYPES_H_ */
452