i915_perf_types.h revision 1.3
1/* SPDX-License-Identifier: MIT */ 2/* 3 * Copyright �� 2019 Intel Corporation 4 */ 5 6#ifndef _I915_PERF_TYPES_H_ 7#define _I915_PERF_TYPES_H_ 8 9#include <linux/atomic.h> 10#include <linux/device.h> 11#include <linux/hrtimer.h> 12#include <linux/llist.h> 13#include <linux/poll.h> 14#include <linux/sysfs.h> 15#include <linux/types.h> 16#include <linux/uuid.h> 17#include <linux/wait.h> 18#include <uapi/drm/i915_drm.h> 19 20#include "gt/intel_sseu.h" 21#include "i915_reg.h" 22#include "intel_wakeref.h" 23 24struct drm_i915_private; 25struct file; 26struct i915_active; 27struct i915_gem_context; 28struct i915_perf; 29struct i915_vma; 30struct intel_context; 31struct intel_engine_cs; 32 33struct i915_oa_format { 34 u32 format; 35 int size; 36}; 37 38struct i915_oa_reg { 39 i915_reg_t addr; 40 u32 value; 41}; 42 43struct i915_oa_config { 44 struct i915_perf *perf; 45 46 char uuid[UUID_STRING_LEN + 1]; 47 int id; 48 49 const struct i915_oa_reg *mux_regs; 50 u32 mux_regs_len; 51 const struct i915_oa_reg *b_counter_regs; 52 u32 b_counter_regs_len; 53 const struct i915_oa_reg *flex_regs; 54 u32 flex_regs_len; 55 56 struct attribute_group sysfs_metric; 57 struct attribute *attrs[2]; 58 struct device_attribute sysfs_metric_id; 59 60 struct kref ref; 61 struct rcu_head rcu; 62}; 63 64struct i915_perf_stream; 65 66/** 67 * struct i915_perf_stream_ops - the OPs to support a specific stream type 68 */ 69struct i915_perf_stream_ops { 70 /** 71 * @enable: Enables the collection of HW samples, either in response to 72 * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened 73 * without `I915_PERF_FLAG_DISABLED`. 74 */ 75 void (*enable)(struct i915_perf_stream *stream); 76 77 /** 78 * @disable: Disables the collection of HW samples, either in response 79 * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying 80 * the stream. 81 */ 82 void (*disable)(struct i915_perf_stream *stream); 83 84 /** 85 * @poll_wait: Call poll_wait, passing a wait queue that will be woken 86 * once there is something ready to read() for the stream 87 */ 88#ifdef notyet 89 void (*poll_wait)(struct i915_perf_stream *stream, 90 struct file *file, 91 poll_table *wait); 92#endif 93 94 /** 95 * @wait_unlocked: For handling a blocking read, wait until there is 96 * something to ready to read() for the stream. E.g. wait on the same 97 * wait queue that would be passed to poll_wait(). 98 */ 99 int (*wait_unlocked)(struct i915_perf_stream *stream); 100 101 /** 102 * @read: Copy buffered metrics as records to userspace 103 * **buf**: the userspace, destination buffer 104 * **count**: the number of bytes to copy, requested by userspace 105 * **offset**: zero at the start of the read, updated as the read 106 * proceeds, it represents how many bytes have been copied so far and 107 * the buffer offset for copying the next record. 108 * 109 * Copy as many buffered i915 perf samples and records for this stream 110 * to userspace as will fit in the given buffer. 111 * 112 * Only write complete records; returning -%ENOSPC if there isn't room 113 * for a complete record. 114 * 115 * Return any error condition that results in a short read such as 116 * -%ENOSPC or -%EFAULT, even though these may be squashed before 117 * returning to userspace. 118 */ 119 int (*read)(struct i915_perf_stream *stream, 120 char __user *buf, 121 size_t count, 122 size_t *offset); 123 124 /** 125 * @destroy: Cleanup any stream specific resources. 126 * 127 * The stream will always be disabled before this is called. 128 */ 129 void (*destroy)(struct i915_perf_stream *stream); 130}; 131 132/** 133 * struct i915_perf_stream - state for a single open stream FD 134 */ 135struct i915_perf_stream { 136 /** 137 * @perf: i915_perf backpointer 138 */ 139 struct i915_perf *perf; 140 141 /** 142 * @uncore: mmio access path 143 */ 144 struct intel_uncore *uncore; 145 146 /** 147 * @engine: Engine associated with this performance stream. 148 */ 149 struct intel_engine_cs *engine; 150 151 /** 152 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*` 153 * properties given when opening a stream, representing the contents 154 * of a single sample as read() by userspace. 155 */ 156 u32 sample_flags; 157 158 /** 159 * @sample_size: Considering the configured contents of a sample 160 * combined with the required header size, this is the total size 161 * of a single sample record. 162 */ 163 int sample_size; 164 165 /** 166 * @ctx: %NULL if measuring system-wide across all contexts or a 167 * specific context that is being monitored. 168 */ 169 struct i915_gem_context *ctx; 170 171 /** 172 * @enabled: Whether the stream is currently enabled, considering 173 * whether the stream was opened in a disabled state and based 174 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls. 175 */ 176 bool enabled; 177 178 /** 179 * @hold_preemption: Whether preemption is put on hold for command 180 * submissions done on the @ctx. This is useful for some drivers that 181 * cannot easily post process the OA buffer context to subtract delta 182 * of performance counters not associated with @ctx. 183 */ 184 bool hold_preemption; 185 186 /** 187 * @ops: The callbacks providing the implementation of this specific 188 * type of configured stream. 189 */ 190 const struct i915_perf_stream_ops *ops; 191 192 /** 193 * @oa_config: The OA configuration used by the stream. 194 */ 195 struct i915_oa_config *oa_config; 196 197 /** 198 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily 199 * each time @oa_config changes. 200 */ 201 struct llist_head oa_config_bos; 202 203 /** 204 * @pinned_ctx: The OA context specific information. 205 */ 206 struct intel_context *pinned_ctx; 207 208 /** 209 * @specific_ctx_id: The id of the specific context. 210 */ 211 u32 specific_ctx_id; 212 213 /** 214 * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits. 215 */ 216 u32 specific_ctx_id_mask; 217 218 /** 219 * @poll_check_timer: High resolution timer that will periodically 220 * check for data in the circular OA buffer for notifying userspace 221 * (e.g. during a read() or poll()). 222 */ 223 struct hrtimer poll_check_timer; 224 225 /** 226 * @poll_wq: The wait queue that hrtimer callback wakes when it 227 * sees data ready to read in the circular OA buffer. 228 */ 229 wait_queue_head_t poll_wq; 230 231 /** 232 * @pollin: Whether there is data available to read. 233 */ 234 bool pollin; 235 236 /** 237 * @periodic: Whether periodic sampling is currently enabled. 238 */ 239 bool periodic; 240 241 /** 242 * @period_exponent: The OA unit sampling frequency is derived from this. 243 */ 244 int period_exponent; 245 246 /** 247 * @oa_buffer: State of the OA buffer. 248 */ 249 struct { 250 struct i915_vma *vma; 251 u8 *vaddr; 252 u32 last_ctx_id; 253 int format; 254 int format_size; 255 int size_exponent; 256 257 /** 258 * @ptr_lock: Locks reads and writes to all head/tail state 259 * 260 * Consider: the head and tail pointer state needs to be read 261 * consistently from a hrtimer callback (atomic context) and 262 * read() fop (user context) with tail pointer updates happening 263 * in atomic context and head updates in user context and the 264 * (unlikely) possibility of read() errors needing to reset all 265 * head/tail state. 266 * 267 * Note: Contention/performance aren't currently a significant 268 * concern here considering the relatively low frequency of 269 * hrtimer callbacks (5ms period) and that reads typically only 270 * happen in response to a hrtimer event and likely complete 271 * before the next callback. 272 * 273 * Note: This lock is not held *while* reading and copying data 274 * to userspace so the value of head observed in htrimer 275 * callbacks won't represent any partial consumption of data. 276 */ 277 spinlock_t ptr_lock; 278 279 /** 280 * @aging_tail: The last HW tail reported by HW. The data 281 * might not have made it to memory yet though. 282 */ 283 u32 aging_tail; 284 285 /** 286 * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer 287 * was read; used to determine when it is old enough to trust. 288 */ 289 u64 aging_timestamp; 290 291 /** 292 * @head: Although we can always read back the head pointer register, 293 * we prefer to avoid trusting the HW state, just to avoid any 294 * risk that some hardware condition could * somehow bump the 295 * head pointer unpredictably and cause us to forward the wrong 296 * OA buffer data to userspace. 297 */ 298 u32 head; 299 300 /** 301 * @tail: The last verified tail that can be read by userspace. 302 */ 303 u32 tail; 304 } oa_buffer; 305 306 /** 307 * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be 308 * reprogrammed. 309 */ 310 struct i915_vma *noa_wait; 311 312 /** 313 * @poll_oa_period: The period in nanoseconds at which the OA 314 * buffer should be checked for available data. 315 */ 316 u64 poll_oa_period; 317}; 318 319/** 320 * struct i915_oa_ops - Gen specific implementation of an OA unit stream 321 */ 322struct i915_oa_ops { 323 /** 324 * @is_valid_b_counter_reg: Validates register's address for 325 * programming boolean counters for a particular platform. 326 */ 327 bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr); 328 329 /** 330 * @is_valid_mux_reg: Validates register's address for programming mux 331 * for a particular platform. 332 */ 333 bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr); 334 335 /** 336 * @is_valid_flex_reg: Validates register's address for programming 337 * flex EU filtering for a particular platform. 338 */ 339 bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr); 340 341 /** 342 * @enable_metric_set: Selects and applies any MUX configuration to set 343 * up the Boolean and Custom (B/C) counters that are part of the 344 * counter reports being sampled. May apply system constraints such as 345 * disabling EU clock gating as required. 346 */ 347 int (*enable_metric_set)(struct i915_perf_stream *stream, 348 struct i915_active *active); 349 350 /** 351 * @disable_metric_set: Remove system constraints associated with using 352 * the OA unit. 353 */ 354 void (*disable_metric_set)(struct i915_perf_stream *stream); 355 356 /** 357 * @oa_enable: Enable periodic sampling 358 */ 359 void (*oa_enable)(struct i915_perf_stream *stream); 360 361 /** 362 * @oa_disable: Disable periodic sampling 363 */ 364 void (*oa_disable)(struct i915_perf_stream *stream); 365 366 /** 367 * @read: Copy data from the circular OA buffer into a given userspace 368 * buffer. 369 */ 370 int (*read)(struct i915_perf_stream *stream, 371 char __user *buf, 372 size_t count, 373 size_t *offset); 374 375 /** 376 * @oa_hw_tail_read: read the OA tail pointer register 377 * 378 * In particular this enables us to share all the fiddly code for 379 * handling the OA unit tail pointer race that affects multiple 380 * generations. 381 */ 382 u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream); 383}; 384 385struct i915_perf { 386 struct drm_i915_private *i915; 387 388 struct kobject *metrics_kobj; 389 390 /* 391 * Lock associated with adding/modifying/removing OA configs 392 * in perf->metrics_idr. 393 */ 394 struct rwlock metrics_lock; 395 396 /* 397 * List of dynamic configurations (struct i915_oa_config), you 398 * need to hold perf->metrics_lock to access it. 399 */ 400 struct idr metrics_idr; 401 402 /* 403 * Lock associated with anything below within this structure 404 * except exclusive_stream. 405 */ 406 struct rwlock lock; 407 408 /* 409 * The stream currently using the OA unit. If accessed 410 * outside a syscall associated to its file 411 * descriptor. 412 */ 413 struct i915_perf_stream *exclusive_stream; 414 415 /** 416 * @sseu: sseu configuration selected to run while perf is active, 417 * applies to all contexts. 418 */ 419 struct intel_sseu sseu; 420 421 /** 422 * For rate limiting any notifications of spurious 423 * invalid OA reports 424 */ 425#ifdef notyet 426 struct ratelimit_state spurious_report_rs; 427#endif 428 429 /** 430 * For rate limiting any notifications of tail pointer 431 * race. 432 */ 433 struct ratelimit_state tail_pointer_race; 434 435 u32 gen7_latched_oastatus1; 436 u32 ctx_oactxctrl_offset; 437 u32 ctx_flexeu0_offset; 438 439 /** 440 * The RPT_ID/reason field for Gen8+ includes a bit 441 * to determine if the CTX ID in the report is valid 442 * but the specific bit differs between Gen 8 and 9 443 */ 444 u32 gen8_valid_ctx_bit; 445 446 struct i915_oa_ops ops; 447 const struct i915_oa_format *oa_formats; 448 449 /** 450 * Use a format mask to store the supported formats 451 * for a platform. 452 */ 453#define FORMAT_MASK_SIZE DIV_ROUND_UP(I915_OA_FORMAT_MAX - 1, BITS_PER_LONG) 454 unsigned long format_mask[FORMAT_MASK_SIZE]; 455 456 atomic64_t noa_programming_delay; 457}; 458 459#endif /* _I915_PERF_TYPES_H_ */ 460