1// SPDX-License-Identifier: MIT 2/* 3 * Copyright �� 2022 Intel Corporation 4 */ 5 6#include "xe_pm.h" 7 8#include <linux/pm_runtime.h> 9 10#include <drm/drm_managed.h> 11#include <drm/ttm/ttm_placement.h> 12 13#include "display/xe_display.h" 14#include "xe_bo.h" 15#include "xe_bo_evict.h" 16#include "xe_device.h" 17#include "xe_device_sysfs.h" 18#include "xe_ggtt.h" 19#include "xe_gt.h" 20#include "xe_guc.h" 21#include "xe_irq.h" 22#include "xe_pcode.h" 23#include "xe_wa.h" 24 25/** 26 * DOC: Xe Power Management 27 * 28 * Xe PM shall be guided by the simplicity. 29 * Use the simplest hook options whenever possible. 30 * Let's not reinvent the runtime_pm references and hooks. 31 * Shall have a clear separation of display and gt underneath this component. 32 * 33 * What's next: 34 * 35 * For now s2idle and s3 are only working in integrated devices. The next step 36 * is to iterate through all VRAM's BO backing them up into the system memory 37 * before allowing the system suspend. 38 * 39 * Also runtime_pm needs to be here from the beginning. 40 * 41 * RC6/RPS are also critical PM features. Let's start with GuCRC and GuC SLPC 42 * and no wait boost. Frequency optimizations should come on a next stage. 43 */ 44 45/** 46 * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle 47 * @xe: xe device instance 48 * 49 * Return: 0 on success 50 */ 51int xe_pm_suspend(struct xe_device *xe) 52{ 53 struct xe_gt *gt; 54 u8 id; 55 int err; 56 57 for_each_gt(gt, xe, id) 58 xe_gt_suspend_prepare(gt); 59 60 /* FIXME: Super racey... */ 61 err = xe_bo_evict_all(xe); 62 if (err) 63 return err; 64 65 xe_display_pm_suspend(xe); 66 67 for_each_gt(gt, xe, id) { 68 err = xe_gt_suspend(gt); 69 if (err) { 70 xe_display_pm_resume(xe); 71 return err; 72 } 73 } 74 75 xe_irq_suspend(xe); 76 77 xe_display_pm_suspend_late(xe); 78 79 return 0; 80} 81 82/** 83 * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0 84 * @xe: xe device instance 85 * 86 * Return: 0 on success 87 */ 88int xe_pm_resume(struct xe_device *xe) 89{ 90 struct xe_tile *tile; 91 struct xe_gt *gt; 92 u8 id; 93 int err; 94 95 for_each_tile(tile, xe, id) 96 xe_wa_apply_tile_workarounds(tile); 97 98 for_each_gt(gt, xe, id) { 99 err = xe_pcode_init(gt); 100 if (err) 101 return err; 102 } 103 104 xe_display_pm_resume_early(xe); 105 106 /* 107 * This only restores pinned memory which is the memory required for the 108 * GT(s) to resume. 109 */ 110 err = xe_bo_restore_kernel(xe); 111 if (err) 112 return err; 113 114 xe_irq_resume(xe); 115 116 xe_display_pm_resume(xe); 117 118 for_each_gt(gt, xe, id) 119 xe_gt_resume(gt); 120 121 err = xe_bo_restore_user(xe); 122 if (err) 123 return err; 124 125 return 0; 126} 127 128static bool xe_pm_pci_d3cold_capable(struct xe_device *xe) 129{ 130 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 131 struct pci_dev *root_pdev; 132 133 root_pdev = pcie_find_root_port(pdev); 134 if (!root_pdev) 135 return false; 136 137 /* D3Cold requires PME capability */ 138 if (!pci_pme_capable(root_pdev, PCI_D3cold)) { 139 drm_dbg(&xe->drm, "d3cold: PME# not supported\n"); 140 return false; 141 } 142 143 /* D3Cold requires _PR3 power resource */ 144 if (!pci_pr3_present(root_pdev)) { 145 drm_dbg(&xe->drm, "d3cold: ACPI _PR3 not present\n"); 146 return false; 147 } 148 149 return true; 150} 151 152static void xe_pm_runtime_init(struct xe_device *xe) 153{ 154 struct device *dev = xe->drm.dev; 155 156 /* 157 * Disable the system suspend direct complete optimization. 158 * We need to ensure that the regular device suspend/resume functions 159 * are called since our runtime_pm cannot guarantee local memory 160 * eviction for d3cold. 161 * TODO: Check HDA audio dependencies claimed by i915, and then enforce 162 * this option to integrated graphics as well. 163 */ 164 if (IS_DGFX(xe)) 165 dev_pm_set_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE); 166 167 pm_runtime_use_autosuspend(dev); 168 pm_runtime_set_autosuspend_delay(dev, 1000); 169 pm_runtime_set_active(dev); 170 pm_runtime_allow(dev); 171 pm_runtime_mark_last_busy(dev); 172 pm_runtime_put(dev); 173} 174 175void xe_pm_init_early(struct xe_device *xe) 176{ 177 INIT_LIST_HEAD(&xe->mem_access.vram_userfault.list); 178 drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock); 179} 180 181void xe_pm_init(struct xe_device *xe) 182{ 183 /* For now suspend/resume is only allowed with GuC */ 184 if (!xe_device_uc_enabled(xe)) 185 return; 186 187 drmm_mutex_init(&xe->drm, &xe->d3cold.lock); 188 189 xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe); 190 191 if (xe->d3cold.capable) { 192 xe_device_sysfs_init(xe); 193 xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD); 194 } 195 196 xe_pm_runtime_init(xe); 197} 198 199void xe_pm_runtime_fini(struct xe_device *xe) 200{ 201 struct device *dev = xe->drm.dev; 202 203 pm_runtime_get_sync(dev); 204 pm_runtime_forbid(dev); 205} 206 207static void xe_pm_write_callback_task(struct xe_device *xe, 208 struct task_struct *task) 209{ 210 WRITE_ONCE(xe->pm_callback_task, task); 211 212 /* 213 * Just in case it's somehow possible for our writes to be reordered to 214 * the extent that something else re-uses the task written in 215 * pm_callback_task. For example after returning from the callback, but 216 * before the reordered write that resets pm_callback_task back to NULL. 217 */ 218 smp_mb(); /* pairs with xe_pm_read_callback_task */ 219} 220 221struct task_struct *xe_pm_read_callback_task(struct xe_device *xe) 222{ 223 smp_mb(); /* pairs with xe_pm_write_callback_task */ 224 225 return READ_ONCE(xe->pm_callback_task); 226} 227 228int xe_pm_runtime_suspend(struct xe_device *xe) 229{ 230 struct xe_bo *bo, *on; 231 struct xe_gt *gt; 232 u8 id; 233 int err = 0; 234 235 if (xe->d3cold.allowed && xe_device_mem_access_ongoing(xe)) 236 return -EBUSY; 237 238 /* Disable access_ongoing asserts and prevent recursive pm calls */ 239 xe_pm_write_callback_task(xe, current); 240 241 /* 242 * The actual xe_device_mem_access_put() is always async underneath, so 243 * exactly where that is called should makes no difference to us. However 244 * we still need to be very careful with the locks that this callback 245 * acquires and the locks that are acquired and held by any callers of 246 * xe_device_mem_access_get(). We already have the matching annotation 247 * on that side, but we also need it here. For example lockdep should be 248 * able to tell us if the following scenario is in theory possible: 249 * 250 * CPU0 | CPU1 (kworker) 251 * lock(A) | 252 * | xe_pm_runtime_suspend() 253 * | lock(A) 254 * xe_device_mem_access_get() | 255 * 256 * This will clearly deadlock since rpm core needs to wait for 257 * xe_pm_runtime_suspend() to complete, but here we are holding lock(A) 258 * on CPU0 which prevents CPU1 making forward progress. With the 259 * annotation here and in xe_device_mem_access_get() lockdep will see 260 * the potential lock inversion and give us a nice splat. 261 */ 262 lock_map_acquire(&xe_device_mem_access_lockdep_map); 263 264 /* 265 * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify 266 * also checks and delets bo entry from user fault list. 267 */ 268 mutex_lock(&xe->mem_access.vram_userfault.lock); 269 list_for_each_entry_safe(bo, on, 270 &xe->mem_access.vram_userfault.list, vram_userfault_link) 271 xe_bo_runtime_pm_release_mmap_offset(bo); 272 mutex_unlock(&xe->mem_access.vram_userfault.lock); 273 274 if (xe->d3cold.allowed) { 275 err = xe_bo_evict_all(xe); 276 if (err) 277 goto out; 278 } 279 280 for_each_gt(gt, xe, id) { 281 err = xe_gt_suspend(gt); 282 if (err) 283 goto out; 284 } 285 286 xe_irq_suspend(xe); 287out: 288 lock_map_release(&xe_device_mem_access_lockdep_map); 289 xe_pm_write_callback_task(xe, NULL); 290 return err; 291} 292 293int xe_pm_runtime_resume(struct xe_device *xe) 294{ 295 struct xe_gt *gt; 296 u8 id; 297 int err = 0; 298 299 /* Disable access_ongoing asserts and prevent recursive pm calls */ 300 xe_pm_write_callback_task(xe, current); 301 302 lock_map_acquire(&xe_device_mem_access_lockdep_map); 303 304 /* 305 * It can be possible that xe has allowed d3cold but other pcie devices 306 * in gfx card soc would have blocked d3cold, therefore card has not 307 * really lost power. Detecting primary Gt power is sufficient. 308 */ 309 gt = xe_device_get_gt(xe, 0); 310 xe->d3cold.power_lost = xe_guc_in_reset(>->uc.guc); 311 312 if (xe->d3cold.allowed && xe->d3cold.power_lost) { 313 for_each_gt(gt, xe, id) { 314 err = xe_pcode_init(gt); 315 if (err) 316 goto out; 317 } 318 319 /* 320 * This only restores pinned memory which is the memory 321 * required for the GT(s) to resume. 322 */ 323 err = xe_bo_restore_kernel(xe); 324 if (err) 325 goto out; 326 } 327 328 xe_irq_resume(xe); 329 330 for_each_gt(gt, xe, id) 331 xe_gt_resume(gt); 332 333 if (xe->d3cold.allowed && xe->d3cold.power_lost) { 334 err = xe_bo_restore_user(xe); 335 if (err) 336 goto out; 337 } 338out: 339 lock_map_release(&xe_device_mem_access_lockdep_map); 340 xe_pm_write_callback_task(xe, NULL); 341 return err; 342} 343 344int xe_pm_runtime_get(struct xe_device *xe) 345{ 346 return pm_runtime_get_sync(xe->drm.dev); 347} 348 349int xe_pm_runtime_put(struct xe_device *xe) 350{ 351 pm_runtime_mark_last_busy(xe->drm.dev); 352 return pm_runtime_put(xe->drm.dev); 353} 354 355int xe_pm_runtime_get_if_active(struct xe_device *xe) 356{ 357 return pm_runtime_get_if_active(xe->drm.dev); 358} 359 360void xe_pm_assert_unbounded_bridge(struct xe_device *xe) 361{ 362 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 363 struct pci_dev *bridge = pci_upstream_bridge(pdev); 364 365 if (!bridge) 366 return; 367 368 if (!bridge->driver) { 369 drm_warn(&xe->drm, "unbounded parent pci bridge, device won't support any PM support.\n"); 370 device_set_pm_not_required(&pdev->dev); 371 } 372} 373 374int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) 375{ 376 struct ttm_resource_manager *man; 377 u32 vram_total_mb = 0; 378 int i; 379 380 for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { 381 man = ttm_manager_type(&xe->ttm, i); 382 if (man) 383 vram_total_mb += DIV_ROUND_UP_ULL(man->size, 1024 * 1024); 384 } 385 386 drm_dbg(&xe->drm, "Total vram %u mb\n", vram_total_mb); 387 388 if (threshold > vram_total_mb) 389 return -EINVAL; 390 391 mutex_lock(&xe->d3cold.lock); 392 xe->d3cold.vram_threshold = threshold; 393 mutex_unlock(&xe->d3cold.lock); 394 395 return 0; 396} 397 398void xe_pm_d3cold_allowed_toggle(struct xe_device *xe) 399{ 400 struct ttm_resource_manager *man; 401 u32 total_vram_used_mb = 0; 402 u64 vram_used; 403 int i; 404 405 if (!xe->d3cold.capable) { 406 xe->d3cold.allowed = false; 407 return; 408 } 409 410 for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { 411 man = ttm_manager_type(&xe->ttm, i); 412 if (man) { 413 vram_used = ttm_resource_manager_usage(man); 414 total_vram_used_mb += DIV_ROUND_UP_ULL(vram_used, 1024 * 1024); 415 } 416 } 417 418 mutex_lock(&xe->d3cold.lock); 419 420 if (total_vram_used_mb < xe->d3cold.vram_threshold) 421 xe->d3cold.allowed = true; 422 else 423 xe->d3cold.allowed = false; 424 425 mutex_unlock(&xe->d3cold.lock); 426 427 drm_dbg(&xe->drm, 428 "d3cold: allowed=%s\n", str_yes_no(xe->d3cold.allowed)); 429} 430