1/* $NetBSD: i915_perf_types.h,v 1.6 2021/12/19 11:36:57 riastradh Exp $ */ 2 3/* SPDX-License-Identifier: MIT */ 4/* 5 * Copyright �� 2019 Intel Corporation 6 */ 7 8#ifndef _I915_PERF_TYPES_H_ 9#define _I915_PERF_TYPES_H_ 10 11#include <linux/atomic.h> 12#include <linux/device.h> 13#include <linux/hrtimer.h> 14#include <linux/llist.h> 15#include <linux/poll.h> 16#include <linux/sysfs.h> 17#include <linux/types.h> 18#include <linux/uuid.h> 19#include <linux/wait.h> 20 21#include "i915_reg.h" 22#include "intel_wakeref.h" 23 24struct drm_i915_private; 25struct file; 26struct i915_gem_context; 27struct i915_perf; 28struct i915_vma; 29struct intel_context; 30struct intel_engine_cs; 31 32struct i915_oa_format { 33 u32 format; 34 int size; 35}; 36 37struct i915_oa_reg { 38 i915_reg_t addr; 39 u32 value; 40}; 41 42struct i915_oa_config { 43 struct i915_perf *perf; 44 45 char uuid[UUID_STRING_LEN + 1]; 46 int id; 47 48 const struct i915_oa_reg *mux_regs; 49 u32 mux_regs_len; 50 const struct i915_oa_reg *b_counter_regs; 51 u32 b_counter_regs_len; 52 const struct i915_oa_reg *flex_regs; 53 u32 flex_regs_len; 54 55#ifndef __NetBSD__ /* XXX sysfs */ 56 struct attribute_group sysfs_metric; 57 struct attribute *attrs[2]; 58 struct device_attribute sysfs_metric_id; 59#endif 60 61 struct kref ref; 62 struct rcu_head rcu; 63}; 64 65struct i915_perf_stream; 66 67/** 68 * struct i915_perf_stream_ops - the OPs to support a specific stream type 69 */ 70struct i915_perf_stream_ops { 71 /** 72 * @enable: Enables the collection of HW samples, either in response to 73 * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened 74 * without `I915_PERF_FLAG_DISABLED`. 75 */ 76 void (*enable)(struct i915_perf_stream *stream); 77 78 /** 79 * @disable: Disables the collection of HW samples, either in response 80 * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying 81 * the stream. 82 */ 83 void (*disable)(struct i915_perf_stream *stream); 84 85#ifndef __NetBSD__ 86 /** 87 * @poll_wait: Call poll_wait, passing a wait queue that will be woken 88 * once there is something ready to read() for the stream 89 */ 90 void (*poll_wait)(struct i915_perf_stream *stream, 91 struct file *file, 92 poll_table *wait); 93#endif 94 95 /** 96 * @wait_unlocked: For handling a blocking read, wait until there is 97 * something to ready to read() for the stream. E.g. wait on the same 98 * wait queue that would be passed to poll_wait(). 99 */ 100 int (*wait_unlocked)(struct i915_perf_stream *stream); 101 102 /** 103 * @read: Copy buffered metrics as records to userspace 104 * **buf**: the userspace, destination buffer 105 * **count**: the number of bytes to copy, requested by userspace 106 * **offset**: zero at the start of the read, updated as the read 107 * proceeds, it represents how many bytes have been copied so far and 108 * the buffer offset for copying the next record. 109 * 110 * Copy as many buffered i915 perf samples and records for this stream 111 * to userspace as will fit in the given buffer. 112 * 113 * Only write complete records; returning -%ENOSPC if there isn't room 114 * for a complete record. 115 * 116 * Return any error condition that results in a short read such as 117 * -%ENOSPC or -%EFAULT, even though these may be squashed before 118 * returning to userspace. 119 */ 120#ifdef __NetBSD__ 121 int (*read)(struct i915_perf_stream *stream, 122 struct uio *buf, 123 kauth_cred_t count, /* XXX dummy */ 124 int offset); /* XXX dummy */ 125#else 126 int (*read)(struct i915_perf_stream *stream, 127 char __user *buf, 128 size_t count, 129 size_t *offset); 130#endif 131 132 /** 133 * @destroy: Cleanup any stream specific resources. 134 * 135 * The stream will always be disabled before this is called. 136 */ 137 void (*destroy)(struct i915_perf_stream *stream); 138}; 139 140/** 141 * struct i915_perf_stream - state for a single open stream FD 142 */ 143struct i915_perf_stream { 144 /** 145 * @perf: i915_perf backpointer 146 */ 147 struct i915_perf *perf; 148 149 /** 150 * @uncore: mmio access path 151 */ 152 struct intel_uncore *uncore; 153 154 /** 155 * @engine: Engine associated with this performance stream. 156 */ 157 struct intel_engine_cs *engine; 158 159 /** 160 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*` 161 * properties given when opening a stream, representing the contents 162 * of a single sample as read() by userspace. 163 */ 164 u32 sample_flags; 165 166 /** 167 * @sample_size: Considering the configured contents of a sample 168 * combined with the required header size, this is the total size 169 * of a single sample record. 170 */ 171 int sample_size; 172 173 /** 174 * @ctx: %NULL if measuring system-wide across all contexts or a 175 * specific context that is being monitored. 176 */ 177 struct i915_gem_context *ctx; 178 179 /** 180 * @enabled: Whether the stream is currently enabled, considering 181 * whether the stream was opened in a disabled state and based 182 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls. 183 */ 184 bool enabled; 185 186 /** 187 * @hold_preemption: Whether preemption is put on hold for command 188 * submissions done on the @ctx. This is useful for some drivers that 189 * cannot easily post process the OA buffer context to subtract delta 190 * of performance counters not associated with @ctx. 191 */ 192 bool hold_preemption; 193 194 /** 195 * @ops: The callbacks providing the implementation of this specific 196 * type of configured stream. 197 */ 198 const struct i915_perf_stream_ops *ops; 199 200 /** 201 * @oa_config: The OA configuration used by the stream. 202 */ 203 struct i915_oa_config *oa_config; 204 205 /** 206 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily 207 * each time @oa_config changes. 208 */ 209 struct llist_head oa_config_bos; 210 211 /** 212 * @pinned_ctx: The OA context specific information. 213 */ 214 struct intel_context *pinned_ctx; 215 216 /** 217 * @specific_ctx_id: The id of the specific context. 218 */ 219 u32 specific_ctx_id; 220 221 /** 222 * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits. 223 */ 224 u32 specific_ctx_id_mask; 225 226 /** 227 * @poll_check_timer: High resolution timer that will periodically 228 * check for data in the circular OA buffer for notifying userspace 229 * (e.g. during a read() or poll()). 230 */ 231 struct hrtimer poll_check_timer; 232 233 /** 234 * @poll_wq: The wait queue that hrtimer callback wakes when it 235 * sees data ready to read in the circular OA buffer. 236 */ 237#ifdef __NetBSD__ 238 drm_waitqueue_t poll_wq; 239 struct selinfo poll_selq; 240#else 241 wait_queue_head_t poll_wq; 242#endif 243 244 /** 245 * @pollin: Whether there is data available to read. 246 */ 247 bool pollin; 248 249 /** 250 * @periodic: Whether periodic sampling is currently enabled. 251 */ 252 bool periodic; 253 254 /** 255 * @period_exponent: The OA unit sampling frequency is derived from this. 256 */ 257 int period_exponent; 258 259 /** 260 * @oa_buffer: State of the OA buffer. 261 */ 262 struct { 263 struct i915_vma *vma; 264 u8 *vaddr; 265 u32 last_ctx_id; 266 int format; 267 int format_size; 268 int size_exponent; 269 270 /** 271 * @ptr_lock: Locks reads and writes to all head/tail state 272 * 273 * Consider: the head and tail pointer state needs to be read 274 * consistently from a hrtimer callback (atomic context) and 275 * read() fop (user context) with tail pointer updates happening 276 * in atomic context and head updates in user context and the 277 * (unlikely) possibility of read() errors needing to reset all 278 * head/tail state. 279 * 280 * Note: Contention/performance aren't currently a significant 281 * concern here considering the relatively low frequency of 282 * hrtimer callbacks (5ms period) and that reads typically only 283 * happen in response to a hrtimer event and likely complete 284 * before the next callback. 285 * 286 * Note: This lock is not held *while* reading and copying data 287 * to userspace so the value of head observed in htrimer 288 * callbacks won't represent any partial consumption of data. 289 */ 290 spinlock_t ptr_lock; 291 292 /** 293 * @tails: One 'aging' tail pointer and one 'aged' tail pointer ready to 294 * used for reading. 295 * 296 * Initial values of 0xffffffff are invalid and imply that an 297 * update is required (and should be ignored by an attempted 298 * read) 299 */ 300 struct { 301 u32 offset; 302 } tails[2]; 303 304 /** 305 * @aged_tail_idx: Index for the aged tail ready to read() data up to. 306 */ 307 unsigned int aged_tail_idx; 308 309 /** 310 * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer 311 * was read; used to determine when it is old enough to trust. 312 */ 313 u64 aging_timestamp; 314 315 /** 316 * @head: Although we can always read back the head pointer register, 317 * we prefer to avoid trusting the HW state, just to avoid any 318 * risk that some hardware condition could * somehow bump the 319 * head pointer unpredictably and cause us to forward the wrong 320 * OA buffer data to userspace. 321 */ 322 u32 head; 323 } oa_buffer; 324 325 /** 326 * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be 327 * reprogrammed. 328 */ 329 struct i915_vma *noa_wait; 330}; 331 332/** 333 * struct i915_oa_ops - Gen specific implementation of an OA unit stream 334 */ 335struct i915_oa_ops { 336 /** 337 * @is_valid_b_counter_reg: Validates register's address for 338 * programming boolean counters for a particular platform. 339 */ 340 bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr); 341 342 /** 343 * @is_valid_mux_reg: Validates register's address for programming mux 344 * for a particular platform. 345 */ 346 bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr); 347 348 /** 349 * @is_valid_flex_reg: Validates register's address for programming 350 * flex EU filtering for a particular platform. 351 */ 352 bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr); 353 354 /** 355 * @enable_metric_set: Selects and applies any MUX configuration to set 356 * up the Boolean and Custom (B/C) counters that are part of the 357 * counter reports being sampled. May apply system constraints such as 358 * disabling EU clock gating as required. 359 */ 360 int (*enable_metric_set)(struct i915_perf_stream *stream); 361 362 /** 363 * @disable_metric_set: Remove system constraints associated with using 364 * the OA unit. 365 */ 366 void (*disable_metric_set)(struct i915_perf_stream *stream); 367 368 /** 369 * @oa_enable: Enable periodic sampling 370 */ 371 void (*oa_enable)(struct i915_perf_stream *stream); 372 373 /** 374 * @oa_disable: Disable periodic sampling 375 */ 376 void (*oa_disable)(struct i915_perf_stream *stream); 377 378 /** 379 * @read: Copy data from the circular OA buffer into a given userspace 380 * buffer. 381 */ 382#ifdef __NetBSD__ 383 int (*read)(struct i915_perf_stream *stream, 384 struct uio *buf, 385 kauth_cred_t count, /* XXX dummy */ 386 int offset); /* XXX dummy */ 387#else 388 int (*read)(struct i915_perf_stream *stream, 389 char __user *buf, 390 size_t count, 391 size_t *offset); 392#endif 393 394 /** 395 * @oa_hw_tail_read: read the OA tail pointer register 396 * 397 * In particular this enables us to share all the fiddly code for 398 * handling the OA unit tail pointer race that affects multiple 399 * generations. 400 */ 401 u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream); 402}; 403 404struct i915_perf { 405 struct drm_i915_private *i915; 406 407 struct kobject *metrics_kobj; 408 409 /* 410 * Lock associated with adding/modifying/removing OA configs 411 * in perf->metrics_idr. 412 */ 413 struct mutex metrics_lock; 414 415 /* 416 * List of dynamic configurations (struct i915_oa_config), you 417 * need to hold perf->metrics_lock to access it. 418 */ 419 struct idr metrics_idr; 420 421 /* 422 * Lock associated with anything below within this structure 423 * except exclusive_stream. 424 */ 425 struct mutex lock; 426 427 /* 428 * The stream currently using the OA unit. If accessed 429 * outside a syscall associated to its file 430 * descriptor. 431 */ 432 struct i915_perf_stream *exclusive_stream; 433 434 /** 435 * For rate limiting any notifications of spurious 436 * invalid OA reports 437 */ 438 struct ratelimit_state spurious_report_rs; 439 440 struct i915_oa_config test_config; 441 442 u32 gen7_latched_oastatus1; 443 u32 ctx_oactxctrl_offset; 444 u32 ctx_flexeu0_offset; 445 446 /** 447 * The RPT_ID/reason field for Gen8+ includes a bit 448 * to determine if the CTX ID in the report is valid 449 * but the specific bit differs between Gen 8 and 9 450 */ 451 u32 gen8_valid_ctx_bit; 452 453 struct i915_oa_ops ops; 454 const struct i915_oa_format *oa_formats; 455 456 atomic64_t noa_programming_delay; 457}; 458 459#endif /* _I915_PERF_TYPES_H_ */ 460