1/* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright �� 2008-2018 Intel Corporation 5 */ 6 7#ifndef _I915_GPU_ERROR_H_ 8#define _I915_GPU_ERROR_H_ 9 10#include <linux/atomic.h> 11#include <linux/kref.h> 12#include <linux/ktime.h> 13#include <linux/sched.h> 14 15#include <drm/drm_mm.h> 16 17#include "display/intel_display_device.h" 18#include "display/intel_display_params.h" 19#include "gt/intel_engine.h" 20#include "gt/intel_engine_types.h" 21#include "gt/intel_gt_types.h" 22#include "gt/uc/intel_uc_fw.h" 23 24#include "intel_device_info.h" 25 26#include "i915_gem.h" 27#include "i915_gem_gtt.h" 28#include "i915_params.h" 29#include "i915_scheduler.h" 30 31struct drm_i915_private; 32struct i915_vma_compress; 33struct intel_engine_capture_vma; 34struct intel_overlay_error_state; 35 36struct i915_vma_coredump { 37 struct i915_vma_coredump *next; 38 39 char name[20]; 40 41 u64 gtt_offset; 42 u64 gtt_size; 43 u32 gtt_page_sizes; 44 45 int unused; 46 struct list_head page_list; 47}; 48 49struct i915_request_coredump { 50 unsigned long flags; 51 pid_t pid; 52 u32 context; 53 u32 seqno; 54 u32 head; 55 u32 tail; 56 struct i915_sched_attr sched_attr; 57}; 58 59struct __guc_capture_parsed_output; 60 61struct intel_engine_coredump { 62 const struct intel_engine_cs *engine; 63 64 bool hung; 65 bool simulated; 66 u32 reset_count; 67 68 /* position of active request inside the ring */ 69 u32 rq_head, rq_post, rq_tail; 70 71 /* Register state */ 72 u32 ccid; 73 u32 start; 74 u32 tail; 75 u32 head; 76 u32 ctl; 77 u32 mode; 78 u32 hws; 79 u32 ipeir; 80 u32 ipehr; 81 u32 esr; 82 u32 bbstate; 83 u32 instpm; 84 u32 instps; 85 u64 bbaddr; 86 u64 acthd; 87 u32 fault_reg; 88 u64 faddr; 89 u32 rc_psmi; /* sleep state */ 90 u32 nopid; 91 u32 excc; 92 u32 cmd_cctl; 93 u32 cscmdop; 94 u32 ctx_sr_ctl; 95 u32 dma_faddr_hi; 96 u32 dma_faddr_lo; 97 struct intel_instdone instdone; 98 99 /* GuC matched capture-lists info */ 100 struct intel_guc_state_capture *guc_capture; 101 struct __guc_capture_parsed_output *guc_capture_node; 102 103 struct i915_gem_context_coredump { 104 char comm[TASK_COMM_LEN]; 105 106 u64 total_runtime; 107 u64 avg_runtime; 108 109 pid_t pid; 110 int active; 111 int guilty; 112 struct i915_sched_attr sched_attr; 113 u32 hwsp_seqno; 114 } context; 115 116 struct i915_vma_coredump *vma; 117 118 struct i915_request_coredump execlist[EXECLIST_MAX_PORTS]; 119 unsigned int num_ports; 120 121 struct { 122 u32 gfx_mode; 123 union { 124 u64 pdp[4]; 125 u32 pp_dir_base; 126 }; 127 } vm_info; 128 129 struct intel_engine_coredump *next; 130}; 131 132struct intel_ctb_coredump { 133 u32 raw_head, head; 134 u32 raw_tail, tail; 135 u32 raw_status; 136 u32 desc_offset; 137 u32 cmds_offset; 138 u32 size; 139}; 140 141struct intel_gt_coredump { 142 const struct intel_gt *_gt; 143 bool awake; 144 bool simulated; 145 146 struct intel_gt_info info; 147 148 /* Generic register state */ 149 u32 eir; 150 u32 pgtbl_er; 151 u32 ier; 152 u32 gtier[6], ngtier; 153 u32 forcewake; 154 u32 error; /* gen6+ */ 155 u32 err_int; /* gen7 */ 156 u32 fault_data0; /* gen8, gen9 */ 157 u32 fault_data1; /* gen8, gen9 */ 158 u32 done_reg; 159 u32 gac_eco; 160 u32 gam_ecochk; 161 u32 gab_ctl; 162 u32 gfx_mode; 163 u32 gtt_cache; 164 u32 aux_err; /* gen12 */ 165 u32 gam_done; /* gen12 */ 166 u32 clock_frequency; 167 u32 clock_period_ns; 168 169 /* Display related */ 170 u32 derrmr; 171 u32 sfc_done[I915_MAX_SFC]; /* gen12 */ 172 173 u32 nfence; 174 u64 fence[I915_MAX_NUM_FENCES]; 175 176 struct intel_engine_coredump *engine; 177 178 struct intel_uc_coredump { 179 struct intel_uc_fw guc_fw; 180 struct intel_uc_fw huc_fw; 181 struct guc_info { 182 struct intel_ctb_coredump ctb[2]; 183 struct i915_vma_coredump *vma_ctb; 184 struct i915_vma_coredump *vma_log; 185 u32 timestamp; 186 u16 last_fence; 187 bool is_guc_capture; 188 } guc; 189 } *uc; 190 191 struct intel_gt_coredump *next; 192}; 193 194struct i915_gpu_coredump { 195 struct kref ref; 196 ktime_t time; 197 ktime_t boottime; 198 ktime_t uptime; 199 unsigned long capture; 200 201 struct drm_i915_private *i915; 202 203 struct intel_gt_coredump *gt; 204 205 char error_msg[128]; 206 bool simulated; 207 bool wakelock; 208 bool suspended; 209 int iommu; 210 u32 reset_count; 211 u32 suspend_count; 212 213 struct intel_device_info device_info; 214 struct intel_runtime_info runtime_info; 215 struct intel_display_device_info display_device_info; 216 struct intel_display_runtime_info display_runtime_info; 217 struct intel_driver_caps driver_caps; 218 struct i915_params params; 219 struct intel_display_params display_params; 220 221 struct intel_overlay_error_state *overlay; 222 223 struct scatterlist *sgl, *fit; 224}; 225 226struct i915_gpu_error { 227 /* For reset and error_state handling. */ 228 spinlock_t lock; 229 /* Protected by the above dev->gpu_error.lock. */ 230 struct i915_gpu_coredump *first_error; 231 232 atomic_t pending_fb_pin; 233 234 /** Number of times the device has been reset (global) */ 235 atomic_t reset_count; 236 237 /** Number of times an engine has been reset */ 238 atomic_t reset_engine_count[MAX_ENGINE_CLASS]; 239}; 240 241struct drm_i915_error_state_buf { 242 struct drm_i915_private *i915; 243 struct scatterlist *sgl, *cur, *end; 244 245 char *buf; 246 size_t bytes; 247 size_t size; 248 loff_t iter; 249 250 int err; 251}; 252 253static inline u32 i915_reset_count(struct i915_gpu_error *error) 254{ 255 return atomic_read(&error->reset_count); 256} 257 258static inline u32 i915_reset_engine_count(struct i915_gpu_error *error, 259 const struct intel_engine_cs *engine) 260{ 261 return atomic_read(&error->reset_engine_count[engine->class]); 262} 263 264static inline void 265i915_increase_reset_engine_count(struct i915_gpu_error *error, 266 const struct intel_engine_cs *engine) 267{ 268 atomic_inc(&error->reset_engine_count[engine->class]); 269} 270 271#define CORE_DUMP_FLAG_NONE 0x0 272#define CORE_DUMP_FLAG_IS_GUC_CAPTURE BIT(0) 273 274#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) && IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) 275void intel_klog_error_capture(struct intel_gt *gt, 276 intel_engine_mask_t engine_mask); 277#else 278static inline void intel_klog_error_capture(struct intel_gt *gt, 279 intel_engine_mask_t engine_mask) 280{ 281} 282#endif 283 284#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) 285 286__printf(2, 3) 287void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); 288 289void i915_capture_error_state(struct intel_gt *gt, 290 intel_engine_mask_t engine_mask, u32 dump_flags); 291 292struct i915_gpu_coredump * 293i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp); 294 295struct intel_gt_coredump * 296intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags); 297 298struct intel_engine_coredump * 299intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_flags); 300 301struct intel_engine_capture_vma * 302intel_engine_coredump_add_request(struct intel_engine_coredump *ee, 303 struct i915_request *rq, 304 gfp_t gfp); 305 306void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee, 307 struct intel_engine_capture_vma *capture, 308 struct i915_vma_compress *compress); 309 310struct i915_vma_compress * 311i915_vma_capture_prepare(struct intel_gt_coredump *gt); 312 313void i915_vma_capture_finish(struct intel_gt_coredump *gt, 314 struct i915_vma_compress *compress); 315 316void i915_error_state_store(struct i915_gpu_coredump *error); 317 318static inline struct i915_gpu_coredump * 319i915_gpu_coredump_get(struct i915_gpu_coredump *gpu) 320{ 321 kref_get(&gpu->ref); 322 return gpu; 323} 324 325ssize_t 326i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error, 327 char *buf, loff_t offset, size_t count); 328 329void __i915_gpu_coredump_free(struct kref *kref); 330static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu) 331{ 332 if (gpu) 333 kref_put(&gpu->ref, __i915_gpu_coredump_free); 334} 335 336void i915_reset_error_state(struct drm_i915_private *i915); 337void i915_disable_error_state(struct drm_i915_private *i915, int err); 338 339void i915_gpu_error_debugfs_register(struct drm_i915_private *i915); 340void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915); 341void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915); 342 343#else 344 345__printf(2, 3) 346static inline void 347i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...) 348{ 349} 350 351static inline void 352i915_capture_error_state(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags) 353{ 354} 355 356static inline struct i915_gpu_coredump * 357i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp) 358{ 359 return NULL; 360} 361 362static inline struct intel_gt_coredump * 363intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags) 364{ 365 return NULL; 366} 367 368static inline struct intel_engine_coredump * 369intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_flags) 370{ 371 return NULL; 372} 373 374static inline struct intel_engine_capture_vma * 375intel_engine_coredump_add_request(struct intel_engine_coredump *ee, 376 struct i915_request *rq, 377 gfp_t gfp) 378{ 379 return NULL; 380} 381 382static inline void 383intel_engine_coredump_add_vma(struct intel_engine_coredump *ee, 384 struct intel_engine_capture_vma *capture, 385 struct i915_vma_compress *compress) 386{ 387} 388 389static inline struct i915_vma_compress * 390i915_vma_capture_prepare(struct intel_gt_coredump *gt) 391{ 392 return NULL; 393} 394 395static inline void 396i915_vma_capture_finish(struct intel_gt_coredump *gt, 397 struct i915_vma_compress *compress) 398{ 399} 400 401static inline void 402i915_error_state_store(struct i915_gpu_coredump *error) 403{ 404} 405 406static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu) 407{ 408} 409 410static inline void i915_reset_error_state(struct drm_i915_private *i915) 411{ 412} 413 414static inline void i915_disable_error_state(struct drm_i915_private *i915, 415 int err) 416{ 417} 418 419static inline void i915_gpu_error_debugfs_register(struct drm_i915_private *i915) 420{ 421} 422 423static inline void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915) 424{ 425} 426 427static inline void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915) 428{ 429} 430 431#endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */ 432 433#endif /* _I915_GPU_ERROR_H_ */ 434