i915_perf_types.h revision 1.1
1/* SPDX-License-Identifier: MIT */
2/*
3 * Copyright �� 2019 Intel Corporation
4 */
5
6#ifndef _I915_PERF_TYPES_H_
7#define _I915_PERF_TYPES_H_
8
9#include <linux/atomic.h>
10#include <linux/device.h>
11#include <linux/hrtimer.h>
12#include <linux/llist.h>
13#include <linux/poll.h>
14#include <linux/sysfs.h>
15#include <linux/types.h>
16#include <linux/uuid.h>
17#include <linux/wait.h>
18
19#include "i915_reg.h"
20#include "intel_wakeref.h"
21
22struct drm_i915_private;
23struct file;
24struct i915_gem_context;
25struct i915_perf;
26struct i915_vma;
27struct intel_context;
28struct intel_engine_cs;
29
30struct i915_oa_format {
31	u32 format;
32	int size;
33};
34
35struct i915_oa_reg {
36	i915_reg_t addr;
37	u32 value;
38};
39
40struct i915_oa_config {
41	struct i915_perf *perf;
42
43	char uuid[UUID_STRING_LEN + 1];
44	int id;
45
46	const struct i915_oa_reg *mux_regs;
47	u32 mux_regs_len;
48	const struct i915_oa_reg *b_counter_regs;
49	u32 b_counter_regs_len;
50	const struct i915_oa_reg *flex_regs;
51	u32 flex_regs_len;
52
53	struct attribute_group sysfs_metric;
54	struct attribute *attrs[2];
55	struct device_attribute sysfs_metric_id;
56
57	struct kref ref;
58	struct rcu_head rcu;
59};
60
61struct i915_perf_stream;
62
63/**
64 * struct i915_perf_stream_ops - the OPs to support a specific stream type
65 */
66struct i915_perf_stream_ops {
67	/**
68	 * @enable: Enables the collection of HW samples, either in response to
69	 * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
70	 * without `I915_PERF_FLAG_DISABLED`.
71	 */
72	void (*enable)(struct i915_perf_stream *stream);
73
74	/**
75	 * @disable: Disables the collection of HW samples, either in response
76	 * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
77	 * the stream.
78	 */
79	void (*disable)(struct i915_perf_stream *stream);
80
81	/**
82	 * @poll_wait: Call poll_wait, passing a wait queue that will be woken
83	 * once there is something ready to read() for the stream
84	 */
85#ifdef notyet
86	void (*poll_wait)(struct i915_perf_stream *stream,
87			  struct file *file,
88			  poll_table *wait);
89#endif
90
91	/**
92	 * @wait_unlocked: For handling a blocking read, wait until there is
93	 * something to ready to read() for the stream. E.g. wait on the same
94	 * wait queue that would be passed to poll_wait().
95	 */
96	int (*wait_unlocked)(struct i915_perf_stream *stream);
97
98	/**
99	 * @read: Copy buffered metrics as records to userspace
100	 * **buf**: the userspace, destination buffer
101	 * **count**: the number of bytes to copy, requested by userspace
102	 * **offset**: zero at the start of the read, updated as the read
103	 * proceeds, it represents how many bytes have been copied so far and
104	 * the buffer offset for copying the next record.
105	 *
106	 * Copy as many buffered i915 perf samples and records for this stream
107	 * to userspace as will fit in the given buffer.
108	 *
109	 * Only write complete records; returning -%ENOSPC if there isn't room
110	 * for a complete record.
111	 *
112	 * Return any error condition that results in a short read such as
113	 * -%ENOSPC or -%EFAULT, even though these may be squashed before
114	 * returning to userspace.
115	 */
116	int (*read)(struct i915_perf_stream *stream,
117		    char __user *buf,
118		    size_t count,
119		    size_t *offset);
120
121	/**
122	 * @destroy: Cleanup any stream specific resources.
123	 *
124	 * The stream will always be disabled before this is called.
125	 */
126	void (*destroy)(struct i915_perf_stream *stream);
127};
128
129/**
130 * struct i915_perf_stream - state for a single open stream FD
131 */
132struct i915_perf_stream {
133	/**
134	 * @perf: i915_perf backpointer
135	 */
136	struct i915_perf *perf;
137
138	/**
139	 * @uncore: mmio access path
140	 */
141	struct intel_uncore *uncore;
142
143	/**
144	 * @engine: Engine associated with this performance stream.
145	 */
146	struct intel_engine_cs *engine;
147
148	/**
149	 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
150	 * properties given when opening a stream, representing the contents
151	 * of a single sample as read() by userspace.
152	 */
153	u32 sample_flags;
154
155	/**
156	 * @sample_size: Considering the configured contents of a sample
157	 * combined with the required header size, this is the total size
158	 * of a single sample record.
159	 */
160	int sample_size;
161
162	/**
163	 * @ctx: %NULL if measuring system-wide across all contexts or a
164	 * specific context that is being monitored.
165	 */
166	struct i915_gem_context *ctx;
167
168	/**
169	 * @enabled: Whether the stream is currently enabled, considering
170	 * whether the stream was opened in a disabled state and based
171	 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
172	 */
173	bool enabled;
174
175	/**
176	 * @hold_preemption: Whether preemption is put on hold for command
177	 * submissions done on the @ctx. This is useful for some drivers that
178	 * cannot easily post process the OA buffer context to subtract delta
179	 * of performance counters not associated with @ctx.
180	 */
181	bool hold_preemption;
182
183	/**
184	 * @ops: The callbacks providing the implementation of this specific
185	 * type of configured stream.
186	 */
187	const struct i915_perf_stream_ops *ops;
188
189	/**
190	 * @oa_config: The OA configuration used by the stream.
191	 */
192	struct i915_oa_config *oa_config;
193
194	/**
195	 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
196	 * each time @oa_config changes.
197	 */
198	struct llist_head oa_config_bos;
199
200	/**
201	 * @pinned_ctx: The OA context specific information.
202	 */
203	struct intel_context *pinned_ctx;
204
205	/**
206	 * @specific_ctx_id: The id of the specific context.
207	 */
208	u32 specific_ctx_id;
209
210	/**
211	 * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits.
212	 */
213	u32 specific_ctx_id_mask;
214
215	/**
216	 * @poll_check_timer: High resolution timer that will periodically
217	 * check for data in the circular OA buffer for notifying userspace
218	 * (e.g. during a read() or poll()).
219	 */
220	struct hrtimer poll_check_timer;
221
222	/**
223	 * @poll_wq: The wait queue that hrtimer callback wakes when it
224	 * sees data ready to read in the circular OA buffer.
225	 */
226	wait_queue_head_t poll_wq;
227
228	/**
229	 * @pollin: Whether there is data available to read.
230	 */
231	bool pollin;
232
233	/**
234	 * @periodic: Whether periodic sampling is currently enabled.
235	 */
236	bool periodic;
237
238	/**
239	 * @period_exponent: The OA unit sampling frequency is derived from this.
240	 */
241	int period_exponent;
242
243	/**
244	 * @oa_buffer: State of the OA buffer.
245	 */
246	struct {
247		struct i915_vma *vma;
248		u8 *vaddr;
249		u32 last_ctx_id;
250		int format;
251		int format_size;
252		int size_exponent;
253
254		/**
255		 * @ptr_lock: Locks reads and writes to all head/tail state
256		 *
257		 * Consider: the head and tail pointer state needs to be read
258		 * consistently from a hrtimer callback (atomic context) and
259		 * read() fop (user context) with tail pointer updates happening
260		 * in atomic context and head updates in user context and the
261		 * (unlikely) possibility of read() errors needing to reset all
262		 * head/tail state.
263		 *
264		 * Note: Contention/performance aren't currently a significant
265		 * concern here considering the relatively low frequency of
266		 * hrtimer callbacks (5ms period) and that reads typically only
267		 * happen in response to a hrtimer event and likely complete
268		 * before the next callback.
269		 *
270		 * Note: This lock is not held *while* reading and copying data
271		 * to userspace so the value of head observed in htrimer
272		 * callbacks won't represent any partial consumption of data.
273		 */
274		spinlock_t ptr_lock;
275
276		/**
277		 * @tails: One 'aging' tail pointer and one 'aged' tail pointer ready to
278		 * used for reading.
279		 *
280		 * Initial values of 0xffffffff are invalid and imply that an
281		 * update is required (and should be ignored by an attempted
282		 * read)
283		 */
284		struct {
285			u32 offset;
286		} tails[2];
287
288		/**
289		 * @aged_tail_idx: Index for the aged tail ready to read() data up to.
290		 */
291		unsigned int aged_tail_idx;
292
293		/**
294		 * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer
295		 * was read; used to determine when it is old enough to trust.
296		 */
297		u64 aging_timestamp;
298
299		/**
300		 * @head: Although we can always read back the head pointer register,
301		 * we prefer to avoid trusting the HW state, just to avoid any
302		 * risk that some hardware condition could * somehow bump the
303		 * head pointer unpredictably and cause us to forward the wrong
304		 * OA buffer data to userspace.
305		 */
306		u32 head;
307	} oa_buffer;
308
309	/**
310	 * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be
311	 * reprogrammed.
312	 */
313	struct i915_vma *noa_wait;
314};
315
316/**
317 * struct i915_oa_ops - Gen specific implementation of an OA unit stream
318 */
319struct i915_oa_ops {
320	/**
321	 * @is_valid_b_counter_reg: Validates register's address for
322	 * programming boolean counters for a particular platform.
323	 */
324	bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr);
325
326	/**
327	 * @is_valid_mux_reg: Validates register's address for programming mux
328	 * for a particular platform.
329	 */
330	bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr);
331
332	/**
333	 * @is_valid_flex_reg: Validates register's address for programming
334	 * flex EU filtering for a particular platform.
335	 */
336	bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr);
337
338	/**
339	 * @enable_metric_set: Selects and applies any MUX configuration to set
340	 * up the Boolean and Custom (B/C) counters that are part of the
341	 * counter reports being sampled. May apply system constraints such as
342	 * disabling EU clock gating as required.
343	 */
344	struct i915_request *
345		(*enable_metric_set)(struct i915_perf_stream *stream);
346
347	/**
348	 * @disable_metric_set: Remove system constraints associated with using
349	 * the OA unit.
350	 */
351	void (*disable_metric_set)(struct i915_perf_stream *stream);
352
353	/**
354	 * @oa_enable: Enable periodic sampling
355	 */
356	void (*oa_enable)(struct i915_perf_stream *stream);
357
358	/**
359	 * @oa_disable: Disable periodic sampling
360	 */
361	void (*oa_disable)(struct i915_perf_stream *stream);
362
363	/**
364	 * @read: Copy data from the circular OA buffer into a given userspace
365	 * buffer.
366	 */
367	int (*read)(struct i915_perf_stream *stream,
368		    char __user *buf,
369		    size_t count,
370		    size_t *offset);
371
372	/**
373	 * @oa_hw_tail_read: read the OA tail pointer register
374	 *
375	 * In particular this enables us to share all the fiddly code for
376	 * handling the OA unit tail pointer race that affects multiple
377	 * generations.
378	 */
379	u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
380};
381
382struct i915_perf {
383	struct drm_i915_private *i915;
384
385	struct kobject *metrics_kobj;
386
387	/*
388	 * Lock associated with adding/modifying/removing OA configs
389	 * in perf->metrics_idr.
390	 */
391	struct rwlock metrics_lock;
392
393	/*
394	 * List of dynamic configurations (struct i915_oa_config), you
395	 * need to hold perf->metrics_lock to access it.
396	 */
397	struct idr metrics_idr;
398
399	/*
400	 * Lock associated with anything below within this structure
401	 * except exclusive_stream.
402	 */
403	struct rwlock lock;
404
405	/*
406	 * The stream currently using the OA unit. If accessed
407	 * outside a syscall associated to its file
408	 * descriptor.
409	 */
410	struct i915_perf_stream *exclusive_stream;
411
412	/**
413	 * For rate limiting any notifications of spurious
414	 * invalid OA reports
415	 */
416#ifdef notyet
417	struct ratelimit_state spurious_report_rs;
418#endif
419
420	struct i915_oa_config test_config;
421
422	u32 gen7_latched_oastatus1;
423	u32 ctx_oactxctrl_offset;
424	u32 ctx_flexeu0_offset;
425
426	/**
427	 * The RPT_ID/reason field for Gen8+ includes a bit
428	 * to determine if the CTX ID in the report is valid
429	 * but the specific bit differs between Gen 8 and 9
430	 */
431	u32 gen8_valid_ctx_bit;
432
433	struct i915_oa_ops ops;
434	const struct i915_oa_format *oa_formats;
435
436	atomic64_t noa_programming_delay;
437};
438
439#endif /* _I915_PERF_TYPES_H_ */
440