1/* SPDX-License-Identifier: MIT */
2/*
3 * Copyright �� 2019 Intel Corporation
4 */
5
6#ifndef _I915_PERF_TYPES_H_
7#define _I915_PERF_TYPES_H_
8
9#include <linux/atomic.h>
10#include <linux/device.h>
11#include <linux/hrtimer.h>
12#include <linux/llist.h>
13#include <linux/poll.h>
14#include <linux/sysfs.h>
15#include <linux/types.h>
16#include <linux/uuid.h>
17#include <linux/wait.h>
18#include <uapi/drm/i915_drm.h>
19
20#include "gt/intel_engine_types.h"
21#include "gt/intel_sseu.h"
22#include "i915_reg_defs.h"
23#include "intel_uncore.h"
24#include "intel_wakeref.h"
25
26struct drm_i915_private;
27struct file;
28struct i915_active;
29struct i915_gem_context;
30struct i915_perf;
31struct i915_vma;
32struct intel_context;
33struct intel_engine_cs;
34
35enum {
36	PERF_GROUP_OAG = 0,
37	PERF_GROUP_OAM_SAMEDIA_0 = 0,
38
39	PERF_GROUP_MAX,
40	PERF_GROUP_INVALID = U32_MAX,
41};
42
43enum report_header {
44	HDR_32_BIT = 0,
45	HDR_64_BIT,
46};
47
48struct i915_perf_regs {
49	u32 base;
50	i915_reg_t oa_head_ptr;
51	i915_reg_t oa_tail_ptr;
52	i915_reg_t oa_buffer;
53	i915_reg_t oa_ctx_ctrl;
54	i915_reg_t oa_ctrl;
55	i915_reg_t oa_debug;
56	i915_reg_t oa_status;
57	u32 oa_ctrl_counter_format_shift;
58};
59
60enum oa_type {
61	TYPE_OAG,
62	TYPE_OAM,
63};
64
65struct i915_oa_format {
66	u32 format;
67	int size;
68	int type;
69	enum report_header header;
70};
71
72struct i915_oa_reg {
73	i915_reg_t addr;
74	u32 value;
75};
76
77struct i915_oa_config {
78	struct i915_perf *perf;
79
80	char uuid[UUID_STRING_LEN + 1];
81	int id;
82
83	const struct i915_oa_reg *mux_regs;
84	u32 mux_regs_len;
85	const struct i915_oa_reg *b_counter_regs;
86	u32 b_counter_regs_len;
87	const struct i915_oa_reg *flex_regs;
88	u32 flex_regs_len;
89
90	struct attribute_group sysfs_metric;
91	struct attribute *attrs[2];
92	struct kobj_attribute sysfs_metric_id;
93
94	struct kref ref;
95	struct rcu_head rcu;
96};
97
98struct i915_perf_stream;
99
100/**
101 * struct i915_perf_stream_ops - the OPs to support a specific stream type
102 */
103struct i915_perf_stream_ops {
104	/**
105	 * @enable: Enables the collection of HW samples, either in response to
106	 * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
107	 * without `I915_PERF_FLAG_DISABLED`.
108	 */
109	void (*enable)(struct i915_perf_stream *stream);
110
111	/**
112	 * @disable: Disables the collection of HW samples, either in response
113	 * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
114	 * the stream.
115	 */
116	void (*disable)(struct i915_perf_stream *stream);
117
118	/**
119	 * @poll_wait: Call poll_wait, passing a wait queue that will be woken
120	 * once there is something ready to read() for the stream
121	 */
122	void (*poll_wait)(struct i915_perf_stream *stream,
123			  struct file *file,
124			  poll_table *wait);
125
126	/**
127	 * @wait_unlocked: For handling a blocking read, wait until there is
128	 * something to ready to read() for the stream. E.g. wait on the same
129	 * wait queue that would be passed to poll_wait().
130	 */
131	int (*wait_unlocked)(struct i915_perf_stream *stream);
132
133	/**
134	 * @read: Copy buffered metrics as records to userspace
135	 * **buf**: the userspace, destination buffer
136	 * **count**: the number of bytes to copy, requested by userspace
137	 * **offset**: zero at the start of the read, updated as the read
138	 * proceeds, it represents how many bytes have been copied so far and
139	 * the buffer offset for copying the next record.
140	 *
141	 * Copy as many buffered i915 perf samples and records for this stream
142	 * to userspace as will fit in the given buffer.
143	 *
144	 * Only write complete records; returning -%ENOSPC if there isn't room
145	 * for a complete record.
146	 *
147	 * Return any error condition that results in a short read such as
148	 * -%ENOSPC or -%EFAULT, even though these may be squashed before
149	 * returning to userspace.
150	 */
151	int (*read)(struct i915_perf_stream *stream,
152		    char __user *buf,
153		    size_t count,
154		    size_t *offset);
155
156	/**
157	 * @destroy: Cleanup any stream specific resources.
158	 *
159	 * The stream will always be disabled before this is called.
160	 */
161	void (*destroy)(struct i915_perf_stream *stream);
162};
163
164/**
165 * struct i915_perf_stream - state for a single open stream FD
166 */
167struct i915_perf_stream {
168	/**
169	 * @perf: i915_perf backpointer
170	 */
171	struct i915_perf *perf;
172
173	/**
174	 * @uncore: mmio access path
175	 */
176	struct intel_uncore *uncore;
177
178	/**
179	 * @engine: Engine associated with this performance stream.
180	 */
181	struct intel_engine_cs *engine;
182
183	/**
184	 * @lock: Lock associated with operations on stream
185	 */
186	struct mutex lock;
187
188	/**
189	 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
190	 * properties given when opening a stream, representing the contents
191	 * of a single sample as read() by userspace.
192	 */
193	u32 sample_flags;
194
195	/**
196	 * @sample_size: Considering the configured contents of a sample
197	 * combined with the required header size, this is the total size
198	 * of a single sample record.
199	 */
200	int sample_size;
201
202	/**
203	 * @ctx: %NULL if measuring system-wide across all contexts or a
204	 * specific context that is being monitored.
205	 */
206	struct i915_gem_context *ctx;
207
208	/**
209	 * @enabled: Whether the stream is currently enabled, considering
210	 * whether the stream was opened in a disabled state and based
211	 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
212	 */
213	bool enabled;
214
215	/**
216	 * @hold_preemption: Whether preemption is put on hold for command
217	 * submissions done on the @ctx. This is useful for some drivers that
218	 * cannot easily post process the OA buffer context to subtract delta
219	 * of performance counters not associated with @ctx.
220	 */
221	bool hold_preemption;
222
223	/**
224	 * @ops: The callbacks providing the implementation of this specific
225	 * type of configured stream.
226	 */
227	const struct i915_perf_stream_ops *ops;
228
229	/**
230	 * @oa_config: The OA configuration used by the stream.
231	 */
232	struct i915_oa_config *oa_config;
233
234	/**
235	 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
236	 * each time @oa_config changes.
237	 */
238	struct llist_head oa_config_bos;
239
240	/**
241	 * @pinned_ctx: The OA context specific information.
242	 */
243	struct intel_context *pinned_ctx;
244
245	/**
246	 * @specific_ctx_id: The id of the specific context.
247	 */
248	u32 specific_ctx_id;
249
250	/**
251	 * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits.
252	 */
253	u32 specific_ctx_id_mask;
254
255	/**
256	 * @poll_check_timer: High resolution timer that will periodically
257	 * check for data in the circular OA buffer for notifying userspace
258	 * (e.g. during a read() or poll()).
259	 */
260	struct hrtimer poll_check_timer;
261
262	/**
263	 * @poll_wq: The wait queue that hrtimer callback wakes when it
264	 * sees data ready to read in the circular OA buffer.
265	 */
266	wait_queue_head_t poll_wq;
267
268	/**
269	 * @pollin: Whether there is data available to read.
270	 */
271	bool pollin;
272
273	/**
274	 * @periodic: Whether periodic sampling is currently enabled.
275	 */
276	bool periodic;
277
278	/**
279	 * @period_exponent: The OA unit sampling frequency is derived from this.
280	 */
281	int period_exponent;
282
283	/**
284	 * @oa_buffer: State of the OA buffer.
285	 */
286	struct {
287		const struct i915_oa_format *format;
288		struct i915_vma *vma;
289		u8 *vaddr;
290		u32 last_ctx_id;
291
292		/**
293		 * @oa_buffer.ptr_lock: Locks reads and writes to all
294		 * head/tail state
295		 *
296		 * Consider: the head and tail pointer state needs to be read
297		 * consistently from a hrtimer callback (atomic context) and
298		 * read() fop (user context) with tail pointer updates happening
299		 * in atomic context and head updates in user context and the
300		 * (unlikely) possibility of read() errors needing to reset all
301		 * head/tail state.
302		 *
303		 * Note: Contention/performance aren't currently a significant
304		 * concern here considering the relatively low frequency of
305		 * hrtimer callbacks (5ms period) and that reads typically only
306		 * happen in response to a hrtimer event and likely complete
307		 * before the next callback.
308		 *
309		 * Note: This lock is not held *while* reading and copying data
310		 * to userspace so the value of head observed in htrimer
311		 * callbacks won't represent any partial consumption of data.
312		 */
313		spinlock_t ptr_lock;
314
315		/**
316		 * @oa_buffer.head: Although we can always read back
317		 * the head pointer register,
318		 * we prefer to avoid trusting the HW state, just to avoid any
319		 * risk that some hardware condition could * somehow bump the
320		 * head pointer unpredictably and cause us to forward the wrong
321		 * OA buffer data to userspace.
322		 */
323		u32 head;
324
325		/**
326		 * @oa_buffer.tail: The last verified tail that can be
327		 * read by userspace.
328		 */
329		u32 tail;
330	} oa_buffer;
331
332	/**
333	 * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be
334	 * reprogrammed.
335	 */
336	struct i915_vma *noa_wait;
337
338	/**
339	 * @poll_oa_period: The period in nanoseconds at which the OA
340	 * buffer should be checked for available data.
341	 */
342	u64 poll_oa_period;
343};
344
345/**
346 * struct i915_oa_ops - Gen specific implementation of an OA unit stream
347 */
348struct i915_oa_ops {
349	/**
350	 * @is_valid_b_counter_reg: Validates register's address for
351	 * programming boolean counters for a particular platform.
352	 */
353	bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr);
354
355	/**
356	 * @is_valid_mux_reg: Validates register's address for programming mux
357	 * for a particular platform.
358	 */
359	bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr);
360
361	/**
362	 * @is_valid_flex_reg: Validates register's address for programming
363	 * flex EU filtering for a particular platform.
364	 */
365	bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr);
366
367	/**
368	 * @enable_metric_set: Selects and applies any MUX configuration to set
369	 * up the Boolean and Custom (B/C) counters that are part of the
370	 * counter reports being sampled. May apply system constraints such as
371	 * disabling EU clock gating as required.
372	 */
373	int (*enable_metric_set)(struct i915_perf_stream *stream,
374				 struct i915_active *active);
375
376	/**
377	 * @disable_metric_set: Remove system constraints associated with using
378	 * the OA unit.
379	 */
380	void (*disable_metric_set)(struct i915_perf_stream *stream);
381
382	/**
383	 * @oa_enable: Enable periodic sampling
384	 */
385	void (*oa_enable)(struct i915_perf_stream *stream);
386
387	/**
388	 * @oa_disable: Disable periodic sampling
389	 */
390	void (*oa_disable)(struct i915_perf_stream *stream);
391
392	/**
393	 * @read: Copy data from the circular OA buffer into a given userspace
394	 * buffer.
395	 */
396	int (*read)(struct i915_perf_stream *stream,
397		    char __user *buf,
398		    size_t count,
399		    size_t *offset);
400
401	/**
402	 * @oa_hw_tail_read: read the OA tail pointer register
403	 *
404	 * In particular this enables us to share all the fiddly code for
405	 * handling the OA unit tail pointer race that affects multiple
406	 * generations.
407	 */
408	u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
409};
410
411struct i915_perf_group {
412	/*
413	 * @exclusive_stream: The stream currently using the OA unit. This is
414	 * sometimes accessed outside a syscall associated to its file
415	 * descriptor.
416	 */
417	struct i915_perf_stream *exclusive_stream;
418
419	/*
420	 * @num_engines: The number of engines using this OA unit.
421	 */
422	u32 num_engines;
423
424	/*
425	 * @regs: OA buffer register group for programming the OA unit.
426	 */
427	struct i915_perf_regs regs;
428
429	/*
430	 * @type: Type of OA unit - OAM, OAG etc.
431	 */
432	enum oa_type type;
433};
434
435struct i915_perf_gt {
436	/*
437	 * Lock associated with anything below within this structure.
438	 */
439	struct mutex lock;
440
441	/**
442	 * @sseu: sseu configuration selected to run while perf is active,
443	 * applies to all contexts.
444	 */
445	struct intel_sseu sseu;
446
447	/**
448	 * @num_perf_groups: number of perf groups per gt.
449	 */
450	u32 num_perf_groups;
451
452	/*
453	 * @group: list of OA groups - one for each OA buffer.
454	 */
455	struct i915_perf_group *group;
456};
457
458struct i915_perf {
459	struct drm_i915_private *i915;
460
461	struct kobject *metrics_kobj;
462
463	/*
464	 * Lock associated with adding/modifying/removing OA configs
465	 * in perf->metrics_idr.
466	 */
467	struct mutex metrics_lock;
468
469	/*
470	 * List of dynamic configurations (struct i915_oa_config), you
471	 * need to hold perf->metrics_lock to access it.
472	 */
473	struct idr metrics_idr;
474
475	/**
476	 * For rate limiting any notifications of spurious
477	 * invalid OA reports
478	 */
479	struct ratelimit_state spurious_report_rs;
480
481	/**
482	 * For rate limiting any notifications of tail pointer
483	 * race.
484	 */
485	struct ratelimit_state tail_pointer_race;
486
487	u32 gen7_latched_oastatus1;
488	u32 ctx_oactxctrl_offset;
489	u32 ctx_flexeu0_offset;
490
491	/**
492	 * The RPT_ID/reason field for Gen8+ includes a bit
493	 * to determine if the CTX ID in the report is valid
494	 * but the specific bit differs between Gen 8 and 9
495	 */
496	u32 gen8_valid_ctx_bit;
497
498	struct i915_oa_ops ops;
499	const struct i915_oa_format *oa_formats;
500
501	/**
502	 * Use a format mask to store the supported formats
503	 * for a platform.
504	 */
505#define FORMAT_MASK_SIZE DIV_ROUND_UP(I915_OA_FORMAT_MAX - 1, BITS_PER_LONG)
506	unsigned long format_mask[FORMAT_MASK_SIZE];
507
508	atomic64_t noa_programming_delay;
509};
510
511#endif /* _I915_PERF_TYPES_H_ */
512