i915_perf_types.h revision 1.2
1/* SPDX-License-Identifier: MIT */ 2/* 3 * Copyright �� 2019 Intel Corporation 4 */ 5 6#ifndef _I915_PERF_TYPES_H_ 7#define _I915_PERF_TYPES_H_ 8 9#include <linux/atomic.h> 10#include <linux/device.h> 11#include <linux/hrtimer.h> 12#include <linux/llist.h> 13#include <linux/poll.h> 14#include <linux/sysfs.h> 15#include <linux/types.h> 16#include <linux/uuid.h> 17#include <linux/wait.h> 18 19#include "gt/intel_sseu.h" 20#include "i915_reg.h" 21#include "intel_wakeref.h" 22 23struct drm_i915_private; 24struct file; 25struct i915_active; 26struct i915_gem_context; 27struct i915_perf; 28struct i915_vma; 29struct intel_context; 30struct intel_engine_cs; 31 32struct i915_oa_format { 33 u32 format; 34 int size; 35}; 36 37struct i915_oa_reg { 38 i915_reg_t addr; 39 u32 value; 40}; 41 42struct i915_oa_config { 43 struct i915_perf *perf; 44 45 char uuid[UUID_STRING_LEN + 1]; 46 int id; 47 48 const struct i915_oa_reg *mux_regs; 49 u32 mux_regs_len; 50 const struct i915_oa_reg *b_counter_regs; 51 u32 b_counter_regs_len; 52 const struct i915_oa_reg *flex_regs; 53 u32 flex_regs_len; 54 55 struct attribute_group sysfs_metric; 56 struct attribute *attrs[2]; 57 struct device_attribute sysfs_metric_id; 58 59 struct kref ref; 60 struct rcu_head rcu; 61}; 62 63struct i915_perf_stream; 64 65/** 66 * struct i915_perf_stream_ops - the OPs to support a specific stream type 67 */ 68struct i915_perf_stream_ops { 69 /** 70 * @enable: Enables the collection of HW samples, either in response to 71 * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened 72 * without `I915_PERF_FLAG_DISABLED`. 73 */ 74 void (*enable)(struct i915_perf_stream *stream); 75 76 /** 77 * @disable: Disables the collection of HW samples, either in response 78 * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying 79 * the stream. 80 */ 81 void (*disable)(struct i915_perf_stream *stream); 82 83 /** 84 * @poll_wait: Call poll_wait, passing a wait queue that will be woken 85 * once there is something ready to read() for the stream 86 */ 87#ifdef notyet 88 void (*poll_wait)(struct i915_perf_stream *stream, 89 struct file *file, 90 poll_table *wait); 91#endif 92 93 /** 94 * @wait_unlocked: For handling a blocking read, wait until there is 95 * something to ready to read() for the stream. E.g. wait on the same 96 * wait queue that would be passed to poll_wait(). 97 */ 98 int (*wait_unlocked)(struct i915_perf_stream *stream); 99 100 /** 101 * @read: Copy buffered metrics as records to userspace 102 * **buf**: the userspace, destination buffer 103 * **count**: the number of bytes to copy, requested by userspace 104 * **offset**: zero at the start of the read, updated as the read 105 * proceeds, it represents how many bytes have been copied so far and 106 * the buffer offset for copying the next record. 107 * 108 * Copy as many buffered i915 perf samples and records for this stream 109 * to userspace as will fit in the given buffer. 110 * 111 * Only write complete records; returning -%ENOSPC if there isn't room 112 * for a complete record. 113 * 114 * Return any error condition that results in a short read such as 115 * -%ENOSPC or -%EFAULT, even though these may be squashed before 116 * returning to userspace. 117 */ 118 int (*read)(struct i915_perf_stream *stream, 119 char __user *buf, 120 size_t count, 121 size_t *offset); 122 123 /** 124 * @destroy: Cleanup any stream specific resources. 125 * 126 * The stream will always be disabled before this is called. 127 */ 128 void (*destroy)(struct i915_perf_stream *stream); 129}; 130 131/** 132 * struct i915_perf_stream - state for a single open stream FD 133 */ 134struct i915_perf_stream { 135 /** 136 * @perf: i915_perf backpointer 137 */ 138 struct i915_perf *perf; 139 140 /** 141 * @uncore: mmio access path 142 */ 143 struct intel_uncore *uncore; 144 145 /** 146 * @engine: Engine associated with this performance stream. 147 */ 148 struct intel_engine_cs *engine; 149 150 /** 151 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*` 152 * properties given when opening a stream, representing the contents 153 * of a single sample as read() by userspace. 154 */ 155 u32 sample_flags; 156 157 /** 158 * @sample_size: Considering the configured contents of a sample 159 * combined with the required header size, this is the total size 160 * of a single sample record. 161 */ 162 int sample_size; 163 164 /** 165 * @ctx: %NULL if measuring system-wide across all contexts or a 166 * specific context that is being monitored. 167 */ 168 struct i915_gem_context *ctx; 169 170 /** 171 * @enabled: Whether the stream is currently enabled, considering 172 * whether the stream was opened in a disabled state and based 173 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls. 174 */ 175 bool enabled; 176 177 /** 178 * @hold_preemption: Whether preemption is put on hold for command 179 * submissions done on the @ctx. This is useful for some drivers that 180 * cannot easily post process the OA buffer context to subtract delta 181 * of performance counters not associated with @ctx. 182 */ 183 bool hold_preemption; 184 185 /** 186 * @ops: The callbacks providing the implementation of this specific 187 * type of configured stream. 188 */ 189 const struct i915_perf_stream_ops *ops; 190 191 /** 192 * @oa_config: The OA configuration used by the stream. 193 */ 194 struct i915_oa_config *oa_config; 195 196 /** 197 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily 198 * each time @oa_config changes. 199 */ 200 struct llist_head oa_config_bos; 201 202 /** 203 * @pinned_ctx: The OA context specific information. 204 */ 205 struct intel_context *pinned_ctx; 206 207 /** 208 * @specific_ctx_id: The id of the specific context. 209 */ 210 u32 specific_ctx_id; 211 212 /** 213 * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits. 214 */ 215 u32 specific_ctx_id_mask; 216 217 /** 218 * @poll_check_timer: High resolution timer that will periodically 219 * check for data in the circular OA buffer for notifying userspace 220 * (e.g. during a read() or poll()). 221 */ 222 struct hrtimer poll_check_timer; 223 224 /** 225 * @poll_wq: The wait queue that hrtimer callback wakes when it 226 * sees data ready to read in the circular OA buffer. 227 */ 228 wait_queue_head_t poll_wq; 229 230 /** 231 * @pollin: Whether there is data available to read. 232 */ 233 bool pollin; 234 235 /** 236 * @periodic: Whether periodic sampling is currently enabled. 237 */ 238 bool periodic; 239 240 /** 241 * @period_exponent: The OA unit sampling frequency is derived from this. 242 */ 243 int period_exponent; 244 245 /** 246 * @oa_buffer: State of the OA buffer. 247 */ 248 struct { 249 struct i915_vma *vma; 250 u8 *vaddr; 251 u32 last_ctx_id; 252 int format; 253 int format_size; 254 int size_exponent; 255 256 /** 257 * @ptr_lock: Locks reads and writes to all head/tail state 258 * 259 * Consider: the head and tail pointer state needs to be read 260 * consistently from a hrtimer callback (atomic context) and 261 * read() fop (user context) with tail pointer updates happening 262 * in atomic context and head updates in user context and the 263 * (unlikely) possibility of read() errors needing to reset all 264 * head/tail state. 265 * 266 * Note: Contention/performance aren't currently a significant 267 * concern here considering the relatively low frequency of 268 * hrtimer callbacks (5ms period) and that reads typically only 269 * happen in response to a hrtimer event and likely complete 270 * before the next callback. 271 * 272 * Note: This lock is not held *while* reading and copying data 273 * to userspace so the value of head observed in htrimer 274 * callbacks won't represent any partial consumption of data. 275 */ 276 spinlock_t ptr_lock; 277 278 /** 279 * @aging_tail: The last HW tail reported by HW. The data 280 * might not have made it to memory yet though. 281 */ 282 u32 aging_tail; 283 284 /** 285 * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer 286 * was read; used to determine when it is old enough to trust. 287 */ 288 u64 aging_timestamp; 289 290 /** 291 * @head: Although we can always read back the head pointer register, 292 * we prefer to avoid trusting the HW state, just to avoid any 293 * risk that some hardware condition could * somehow bump the 294 * head pointer unpredictably and cause us to forward the wrong 295 * OA buffer data to userspace. 296 */ 297 u32 head; 298 299 /** 300 * @tail: The last verified tail that can be read by userspace. 301 */ 302 u32 tail; 303 } oa_buffer; 304 305 /** 306 * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be 307 * reprogrammed. 308 */ 309 struct i915_vma *noa_wait; 310 311 /** 312 * @poll_oa_period: The period in nanoseconds at which the OA 313 * buffer should be checked for available data. 314 */ 315 u64 poll_oa_period; 316}; 317 318/** 319 * struct i915_oa_ops - Gen specific implementation of an OA unit stream 320 */ 321struct i915_oa_ops { 322 /** 323 * @is_valid_b_counter_reg: Validates register's address for 324 * programming boolean counters for a particular platform. 325 */ 326 bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr); 327 328 /** 329 * @is_valid_mux_reg: Validates register's address for programming mux 330 * for a particular platform. 331 */ 332 bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr); 333 334 /** 335 * @is_valid_flex_reg: Validates register's address for programming 336 * flex EU filtering for a particular platform. 337 */ 338 bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr); 339 340 /** 341 * @enable_metric_set: Selects and applies any MUX configuration to set 342 * up the Boolean and Custom (B/C) counters that are part of the 343 * counter reports being sampled. May apply system constraints such as 344 * disabling EU clock gating as required. 345 */ 346 int (*enable_metric_set)(struct i915_perf_stream *stream, 347 struct i915_active *active); 348 349 /** 350 * @disable_metric_set: Remove system constraints associated with using 351 * the OA unit. 352 */ 353 void (*disable_metric_set)(struct i915_perf_stream *stream); 354 355 /** 356 * @oa_enable: Enable periodic sampling 357 */ 358 void (*oa_enable)(struct i915_perf_stream *stream); 359 360 /** 361 * @oa_disable: Disable periodic sampling 362 */ 363 void (*oa_disable)(struct i915_perf_stream *stream); 364 365 /** 366 * @read: Copy data from the circular OA buffer into a given userspace 367 * buffer. 368 */ 369 int (*read)(struct i915_perf_stream *stream, 370 char __user *buf, 371 size_t count, 372 size_t *offset); 373 374 /** 375 * @oa_hw_tail_read: read the OA tail pointer register 376 * 377 * In particular this enables us to share all the fiddly code for 378 * handling the OA unit tail pointer race that affects multiple 379 * generations. 380 */ 381 u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream); 382}; 383 384struct i915_perf { 385 struct drm_i915_private *i915; 386 387 struct kobject *metrics_kobj; 388 389 /* 390 * Lock associated with adding/modifying/removing OA configs 391 * in perf->metrics_idr. 392 */ 393 struct rwlock metrics_lock; 394 395 /* 396 * List of dynamic configurations (struct i915_oa_config), you 397 * need to hold perf->metrics_lock to access it. 398 */ 399 struct idr metrics_idr; 400 401 /* 402 * Lock associated with anything below within this structure 403 * except exclusive_stream. 404 */ 405 struct rwlock lock; 406 407 /* 408 * The stream currently using the OA unit. If accessed 409 * outside a syscall associated to its file 410 * descriptor. 411 */ 412 struct i915_perf_stream *exclusive_stream; 413 414 /** 415 * @sseu: sseu configuration selected to run while perf is active, 416 * applies to all contexts. 417 */ 418 struct intel_sseu sseu; 419 420 /** 421 * For rate limiting any notifications of spurious 422 * invalid OA reports 423 */ 424#ifdef notyet 425 struct ratelimit_state spurious_report_rs; 426#endif 427 428 /** 429 * For rate limiting any notifications of tail pointer 430 * race. 431 */ 432 struct ratelimit_state tail_pointer_race; 433 434 u32 gen7_latched_oastatus1; 435 u32 ctx_oactxctrl_offset; 436 u32 ctx_flexeu0_offset; 437 438 /** 439 * The RPT_ID/reason field for Gen8+ includes a bit 440 * to determine if the CTX ID in the report is valid 441 * but the specific bit differs between Gen 8 and 9 442 */ 443 u32 gen8_valid_ctx_bit; 444 445 struct i915_oa_ops ops; 446 const struct i915_oa_format *oa_formats; 447 448 atomic64_t noa_programming_delay; 449}; 450 451#endif /* _I915_PERF_TYPES_H_ */ 452