1/* $NetBSD: radeon_fence.c,v 1.22 2021/12/19 11:52:38 riastradh Exp $ */ 2 3/* 4 * Copyright 2009 Jerome Glisse. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * The above copyright notice and this permission notice (including the 24 * next paragraph) shall be included in all copies or substantial portions 25 * of the Software. 26 * 27 */ 28/* 29 * Authors: 30 * Jerome Glisse <glisse@freedesktop.org> 31 * Dave Airlie 32 */ 33 34#include <sys/cdefs.h> 35__KERNEL_RCSID(0, "$NetBSD: radeon_fence.c,v 1.22 2021/12/19 11:52:38 riastradh Exp $"); 36 37#include <linux/atomic.h> 38#include <linux/firmware.h> 39#include <linux/kref.h> 40#include <linux/sched/signal.h> 41#include <linux/seq_file.h> 42#include <linux/slab.h> 43#include <linux/wait.h> 44 45#include <drm/drm_debugfs.h> 46#include <drm/drm_device.h> 47#include <drm/drm_file.h> 48 49#include "radeon.h" 50#include "radeon_reg.h" 51#include "radeon_trace.h" 52 53#include <linux/nbsd-namespace.h> 54 55/* 56 * Fences 57 * Fences mark an event in the GPUs pipeline and are used 58 * for GPU/CPU synchronization. When the fence is written, 59 * it is expected that all buffers associated with that fence 60 * are no longer in use by the associated ring on the GPU and 61 * that the the relevant GPU caches have been flushed. Whether 62 * we use a scratch register or memory location depends on the asic 63 * and whether writeback is enabled. 64 */ 65 66/** 67 * radeon_fence_write - write a fence value 68 * 69 * @rdev: radeon_device pointer 70 * @seq: sequence number to write 71 * @ring: ring index the fence is associated with 72 * 73 * Writes a fence value to memory or a scratch register (all asics). 74 */ 75static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring) 76{ 77 struct radeon_fence_driver *drv = &rdev->fence_drv[ring]; 78 if (likely(rdev->wb.enabled || !drv->scratch_reg)) { 79 if (drv->cpu_addr) { 80 *drv->cpu_addr = cpu_to_le32(seq); 81 } 82 } else { 83 WREG32(drv->scratch_reg, seq); 84 } 85} 86 87/** 88 * radeon_fence_read - read a fence value 89 * 90 * @rdev: radeon_device pointer 91 * @ring: ring index the fence is associated with 92 * 93 * Reads a fence value from memory or a scratch register (all asics). 94 * Returns the value of the fence read from memory or register. 95 */ 96static u32 radeon_fence_read(struct radeon_device *rdev, int ring) 97{ 98 struct radeon_fence_driver *drv = &rdev->fence_drv[ring]; 99 u32 seq = 0; 100 101 if (likely(rdev->wb.enabled || !drv->scratch_reg)) { 102 if (drv->cpu_addr) { 103 seq = le32_to_cpu(*drv->cpu_addr); 104 } else { 105 seq = lower_32_bits(atomic64_read(&drv->last_seq)); 106 } 107 } else { 108 seq = RREG32(drv->scratch_reg); 109 } 110 return seq; 111} 112 113/** 114 * radeon_fence_schedule_check - schedule lockup check 115 * 116 * @rdev: radeon_device pointer 117 * @ring: ring index we should work with 118 * 119 * Queues a delayed work item to check for lockups. 120 */ 121static void radeon_fence_schedule_check(struct radeon_device *rdev, int ring) 122{ 123 /* 124 * Do not reset the timer here with mod_delayed_work, 125 * this can livelock in an interaction with TTM delayed destroy. 126 */ 127 queue_delayed_work(system_power_efficient_wq, 128 &rdev->fence_drv[ring].lockup_work, 129 RADEON_FENCE_JIFFIES_TIMEOUT); 130} 131 132/** 133 * radeon_fence_emit - emit a fence on the requested ring 134 * 135 * @rdev: radeon_device pointer 136 * @fence: radeon fence object 137 * @ring: ring index the fence is associated with 138 * 139 * Emits a fence command on the requested ring (all asics). 140 * Returns 0 on success, -ENOMEM on failure. 141 */ 142int radeon_fence_emit(struct radeon_device *rdev, 143 struct radeon_fence **fence, 144 int ring) 145{ 146 u64 seq; 147 148 /* we are protected by the ring emission mutex */ 149 *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL); 150 if ((*fence) == NULL) { 151 return -ENOMEM; 152 } 153 (*fence)->rdev = rdev; 154 (*fence)->seq = seq = ++rdev->fence_drv[ring].sync_seq[ring]; 155 (*fence)->ring = ring; 156 (*fence)->is_vm_update = false; 157 dma_fence_init(&(*fence)->base, &radeon_fence_ops, 158 &rdev->fence_lock, 159 rdev->fence_context + ring, 160 seq); 161 radeon_fence_ring_emit(rdev, ring, *fence); 162 trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq); 163 radeon_fence_schedule_check(rdev, ring); 164 return 0; 165} 166 167/** 168 * radeon_fence_check_signaled - callback from fence_queue 169 * 170 * this function is called with fence_queue lock held, which is also used 171 * for the fence locking itself, so unlocked variants are used for 172 * fence_signal, and remove_wait_queue. 173 */ 174static int radeon_fence_check_signaled(struct radeon_fence *fence) 175{ 176 u64 seq; 177 178 BUG_ON(!spin_is_locked(&fence->rdev->fence_lock)); 179 180 /* 181 * We cannot use radeon_fence_process here because we're already 182 * in the waitqueue, in a call from wake_up_all. 183 */ 184 seq = atomic64_read(&fence->rdev->fence_drv[fence->ring].last_seq); 185 if (seq >= fence->seq) { 186 int ret = dma_fence_signal_locked(&fence->base); 187 188 if (!ret) 189 DMA_FENCE_TRACE(&fence->base, "signaled from irq context\n"); 190 else 191 DMA_FENCE_TRACE(&fence->base, "was already signaled\n"); 192 193 radeon_irq_kms_sw_irq_put(fence->rdev, fence->ring); 194 TAILQ_REMOVE(&fence->rdev->fence_check, fence, fence_check); 195 dma_fence_put(&fence->base); 196 } else 197 DMA_FENCE_TRACE(&fence->base, "pending\n"); 198 return 0; 199} 200 201void 202radeon_fence_wakeup_locked(struct radeon_device *rdev) 203{ 204 struct radeon_fence *fence, *next; 205 206 BUG_ON(!spin_is_locked(&rdev->fence_lock)); 207 DRM_SPIN_WAKEUP_ALL(&rdev->fence_queue, &rdev->fence_lock); 208 TAILQ_FOREACH_SAFE(fence, &rdev->fence_check, fence_check, next) { 209 radeon_fence_check_signaled(fence); 210 } 211} 212 213/** 214 * radeon_fence_activity - check for fence activity 215 * 216 * @rdev: radeon_device pointer 217 * @ring: ring index the fence is associated with 218 * 219 * Checks the current fence value and calculates the last 220 * signalled fence value. Returns true if activity occured 221 * on the ring, and the fence_queue should be waken up. 222 */ 223static bool radeon_fence_activity(struct radeon_device *rdev, int ring) 224{ 225 uint64_t seq, last_seq, last_emitted; 226 unsigned count_loop = 0; 227 bool wake = false; 228 229 BUG_ON(!spin_is_locked(&rdev->fence_lock)); 230 231 /* Note there is a scenario here for an infinite loop but it's 232 * very unlikely to happen. For it to happen, the current polling 233 * process need to be interrupted by another process and another 234 * process needs to update the last_seq btw the atomic read and 235 * xchg of the current process. 236 * 237 * More over for this to go in infinite loop there need to be 238 * continuously new fence signaled ie radeon_fence_read needs 239 * to return a different value each time for both the currently 240 * polling process and the other process that xchg the last_seq 241 * btw atomic read and xchg of the current process. And the 242 * value the other process set as last seq must be higher than 243 * the seq value we just read. Which means that current process 244 * need to be interrupted after radeon_fence_read and before 245 * atomic xchg. 246 * 247 * To be even more safe we count the number of time we loop and 248 * we bail after 10 loop just accepting the fact that we might 249 * have temporarly set the last_seq not to the true real last 250 * seq but to an older one. 251 */ 252 last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq); 253 do { 254 last_emitted = rdev->fence_drv[ring].sync_seq[ring]; 255 seq = radeon_fence_read(rdev, ring); 256 seq |= last_seq & 0xffffffff00000000LL; 257 if (seq < last_seq) { 258 seq &= 0xffffffff; 259 seq |= last_emitted & 0xffffffff00000000LL; 260 } 261 262 if (seq <= last_seq || seq > last_emitted) { 263 break; 264 } 265 /* If we loop over we don't want to return without 266 * checking if a fence is signaled as it means that the 267 * seq we just read is different from the previous on. 268 */ 269 wake = true; 270 last_seq = seq; 271 if ((count_loop++) > 10) { 272 /* We looped over too many time leave with the 273 * fact that we might have set an older fence 274 * seq then the current real last seq as signaled 275 * by the hw. 276 */ 277 break; 278 } 279 } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq); 280 281 if (seq < last_emitted) 282 radeon_fence_schedule_check(rdev, ring); 283 284 return wake; 285} 286 287/** 288 * radeon_fence_check_lockup - check for hardware lockup 289 * 290 * @work: delayed work item 291 * 292 * Checks for fence activity and if there is none probe 293 * the hardware if a lockup occured. 294 */ 295static void radeon_fence_check_lockup(struct work_struct *work) 296{ 297 struct radeon_fence_driver *fence_drv; 298 struct radeon_device *rdev; 299 int ring; 300 301 fence_drv = container_of(work, struct radeon_fence_driver, 302 lockup_work.work); 303 rdev = fence_drv->rdev; 304 ring = fence_drv - &rdev->fence_drv[0]; 305 306 spin_lock(&rdev->fence_lock); 307 308 if (!down_read_trylock(&rdev->exclusive_lock)) { 309 /* just reschedule the check if a reset is going on */ 310 radeon_fence_schedule_check(rdev, ring); 311 spin_unlock(&rdev->fence_lock); 312 return; 313 } 314 315 if (fence_drv->delayed_irq && rdev->ddev->irq_enabled) { 316 unsigned long irqflags; 317 318 fence_drv->delayed_irq = false; 319 spin_lock_irqsave(&rdev->irq.lock, irqflags); 320 radeon_irq_set(rdev); 321 spin_unlock_irqrestore(&rdev->irq.lock, irqflags); 322 } 323 324 if (radeon_fence_activity(rdev, ring)) 325 radeon_fence_wakeup_locked(rdev); 326 327 else if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) { 328 329 /* good news we believe it's a lockup */ 330 dev_warn(rdev->dev, "GPU lockup (current fence id " 331 "0x%016"PRIx64" last fence id 0x%016"PRIx64" on ring %d)\n", 332 (uint64_t)atomic64_read(&fence_drv->last_seq), 333 fence_drv->sync_seq[ring], ring); 334 335 /* remember that we need an reset */ 336 rdev->needs_reset = true; 337 radeon_fence_wakeup_locked(rdev); 338 } 339 up_read(&rdev->exclusive_lock); 340 spin_unlock(&rdev->fence_lock); 341} 342 343/** 344 * radeon_fence_process - process a fence 345 * 346 * @rdev: radeon_device pointer 347 * @ring: ring index the fence is associated with 348 * 349 * Checks the current fence value and wakes the fence queue 350 * if the sequence number has increased (all asics). 351 */ 352static void radeon_fence_process_locked(struct radeon_device *rdev, int ring) 353{ 354 if (radeon_fence_activity(rdev, ring)) 355 radeon_fence_wakeup_locked(rdev); 356} 357 358void radeon_fence_process(struct radeon_device *rdev, int ring) 359{ 360 361 spin_lock(&rdev->fence_lock); 362 radeon_fence_process_locked(rdev, ring); 363 spin_unlock(&rdev->fence_lock); 364} 365 366/** 367 * radeon_fence_seq_signaled - check if a fence sequence number has signaled 368 * 369 * @rdev: radeon device pointer 370 * @seq: sequence number 371 * @ring: ring index the fence is associated with 372 * 373 * Check if the last signaled fence sequnce number is >= the requested 374 * sequence number (all asics). 375 * Returns true if the fence has signaled (current fence value 376 * is >= requested value) or false if it has not (current fence 377 * value is < the requested value. Helper function for 378 * radeon_fence_signaled(). 379 */ 380static bool radeon_fence_seq_signaled(struct radeon_device *rdev, 381 u64 seq, unsigned ring) 382{ 383 BUG_ON(!spin_is_locked(&rdev->fence_lock)); 384 if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) { 385 return true; 386 } 387 /* poll new last sequence at least once */ 388 radeon_fence_process_locked(rdev, ring); 389 if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) { 390 return true; 391 } 392 return false; 393} 394 395static bool radeon_fence_is_signaled(struct dma_fence *f) 396{ 397 struct radeon_fence *fence = to_radeon_fence(f); 398 struct radeon_device *rdev = fence->rdev; 399 unsigned ring = fence->ring; 400 u64 seq = fence->seq; 401 402 BUG_ON(!spin_is_locked(&rdev->fence_lock)); 403 404 if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) { 405 return true; 406 } 407 408 if (down_read_trylock(&rdev->exclusive_lock)) { 409 radeon_fence_process_locked(rdev, ring); 410 up_read(&rdev->exclusive_lock); 411 412 if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) { 413 return true; 414 } 415 } 416 return false; 417} 418 419/** 420 * radeon_fence_enable_signaling - enable signalling on fence 421 * @fence: fence 422 * 423 * This function is called with fence_queue lock held, and adds a callback 424 * to fence_queue that checks if this fence is signaled, and if so it 425 * signals the fence and removes itself. 426 */ 427static bool radeon_fence_enable_signaling(struct dma_fence *f) 428{ 429 struct radeon_fence *fence = to_radeon_fence(f); 430 struct radeon_device *rdev = fence->rdev; 431 432 BUG_ON(!spin_is_locked(&rdev->fence_lock)); 433 434 if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) 435 return false; 436 437 if (down_read_trylock(&rdev->exclusive_lock)) { 438 radeon_irq_kms_sw_irq_get(rdev, fence->ring); 439 440 if (radeon_fence_activity(rdev, fence->ring)) 441 radeon_fence_wakeup_locked(rdev); 442 443 /* did fence get signaled after we enabled the sw irq? */ 444 if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) { 445 radeon_irq_kms_sw_irq_put(rdev, fence->ring); 446 up_read(&rdev->exclusive_lock); 447 return false; 448 } 449 450 up_read(&rdev->exclusive_lock); 451 } else { 452 /* we're probably in a lockup, lets not fiddle too much */ 453 if (radeon_irq_kms_sw_irq_get_delayed(rdev, fence->ring)) 454 rdev->fence_drv[fence->ring].delayed_irq = true; 455 radeon_fence_schedule_check(rdev, fence->ring); 456 } 457 458 TAILQ_INSERT_TAIL(&rdev->fence_check, fence, fence_check); 459 dma_fence_get(f); 460 461 DMA_FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring); 462 return true; 463} 464 465/** 466 * radeon_fence_signaled - check if a fence has signaled 467 * 468 * @fence: radeon fence object 469 * 470 * Check if the requested fence has signaled (all asics). 471 * Returns true if the fence has signaled or false if it has not. 472 */ 473bool radeon_fence_signaled(struct radeon_fence *fence) 474{ 475 if (!fence) 476 return true; 477 478 spin_lock(&fence->rdev->fence_lock); 479 if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) { 480 int ret; 481 482 ret = dma_fence_signal_locked(&fence->base); 483 if (!ret) 484 DMA_FENCE_TRACE(&fence->base, "signaled from radeon_fence_signaled\n"); 485 spin_unlock(&fence->rdev->fence_lock); 486 return true; 487 } 488 spin_unlock(&fence->rdev->fence_lock); 489 return false; 490} 491 492/** 493 * radeon_fence_any_seq_signaled - check if any sequence number is signaled 494 * 495 * @rdev: radeon device pointer 496 * @seq: sequence numbers 497 * 498 * Check if the last signaled fence sequnce number is >= the requested 499 * sequence number (all asics). 500 * Returns true if any has signaled (current value is >= requested value) 501 * or false if it has not. Helper function for radeon_fence_wait_seq. 502 */ 503static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq) 504{ 505 unsigned i; 506 507 BUG_ON(!spin_is_locked(&rdev->fence_lock)); 508 509 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 510 if (seq[i] && radeon_fence_seq_signaled(rdev, seq[i], i)) 511 return true; 512 } 513 return false; 514} 515 516/** 517 * radeon_fence_wait_seq_timeout - wait for a specific sequence numbers 518 * 519 * @rdev: radeon device pointer 520 * @target_seq: sequence number(s) we want to wait for 521 * @intr: use interruptable sleep 522 * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait 523 * 524 * Wait for the requested sequence number(s) to be written by any ring 525 * (all asics). Sequnce number array is indexed by ring id. 526 * @intr selects whether to use interruptable (true) or non-interruptable 527 * (false) sleep when waiting for the sequence number. Helper function 528 * for radeon_fence_wait_*(). 529 * Returns remaining time if the sequence number has passed, 0 when 530 * the wait timeout, or an error for all other cases. 531 * -EDEADLK is returned when a GPU lockup has been detected. 532 */ 533static long radeon_fence_wait_seq_timeout_locked(struct radeon_device *rdev, 534 u64 *target_seq, bool intr, 535 long timeout) 536{ 537 long r; 538 int i; 539 540 if (radeon_fence_any_seq_signaled(rdev, target_seq)) 541 return timeout; 542 543 /* enable IRQs and tracing */ 544 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 545 if (!target_seq[i]) 546 continue; 547 548 trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]); 549 radeon_irq_kms_sw_irq_get(rdev, i); 550 } 551 552 if (intr) 553 DRM_SPIN_TIMED_WAIT_UNTIL(r, &rdev->fence_queue, 554 &rdev->fence_lock, timeout, 555 (radeon_fence_any_seq_signaled(rdev, target_seq) 556 || rdev->needs_reset)); 557 else 558 DRM_SPIN_TIMED_WAIT_NOINTR_UNTIL(r, &rdev->fence_queue, 559 &rdev->fence_lock, timeout, 560 (radeon_fence_any_seq_signaled(rdev, target_seq) 561 || rdev->needs_reset)); 562 563 if (rdev->needs_reset) 564 r = -EDEADLK; 565 566 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 567 if (!target_seq[i]) 568 continue; 569 570 radeon_irq_kms_sw_irq_put(rdev, i); 571 trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]); 572 } 573 574 return r; 575} 576 577static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev, 578 u64 *target_seq, bool intr, long timo) 579{ 580 long r; 581 582 spin_lock(&rdev->fence_lock); 583 r = radeon_fence_wait_seq_timeout_locked(rdev, target_seq, intr, timo); 584 spin_unlock(&rdev->fence_lock); 585 586 return r; 587} 588 589/** 590 * radeon_fence_wait_timeout - wait for a fence to signal with timeout 591 * 592 * @fence: radeon fence object 593 * @intr: use interruptible sleep 594 * 595 * Wait for the requested fence to signal (all asics). 596 * @intr selects whether to use interruptable (true) or non-interruptable 597 * (false) sleep when waiting for the fence. 598 * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait 599 * Returns remaining time if the sequence number has passed, 0 when 600 * the wait timeout, or an error for all other cases. 601 */ 602long radeon_fence_wait_timeout(struct radeon_fence *fence, bool intr, long timeout) 603{ 604 uint64_t seq[RADEON_NUM_RINGS] = {}; 605 long r; 606 int r_sig; 607 608 /* 609 * This function should not be called on !radeon fences. 610 * If this is the case, it would mean this function can 611 * also be called on radeon fences belonging to another card. 612 * exclusive_lock is not held in that case. 613 */ 614 if (WARN_ON_ONCE(!to_radeon_fence(&fence->base))) 615 return dma_fence_wait(&fence->base, intr); 616 617 seq[fence->ring] = fence->seq; 618 r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, timeout); 619 if (r <= 0) { 620 return r; 621 } 622 623 r_sig = dma_fence_signal(&fence->base); 624 if (!r_sig) 625 DMA_FENCE_TRACE(&fence->base, "signaled from fence_wait\n"); 626 return r; 627} 628 629/** 630 * radeon_fence_wait - wait for a fence to signal 631 * 632 * @fence: radeon fence object 633 * @intr: use interruptible sleep 634 * 635 * Wait for the requested fence to signal (all asics). 636 * @intr selects whether to use interruptable (true) or non-interruptable 637 * (false) sleep when waiting for the fence. 638 * Returns 0 if the fence has passed, error for all other cases. 639 */ 640int radeon_fence_wait(struct radeon_fence *fence, bool intr) 641{ 642 long r = radeon_fence_wait_timeout(fence, intr, MAX_SCHEDULE_TIMEOUT); 643 if (r > 0) { 644 return 0; 645 } else { 646 return r; 647 } 648} 649 650/** 651 * radeon_fence_wait_any - wait for a fence to signal on any ring 652 * 653 * @rdev: radeon device pointer 654 * @fences: radeon fence object(s) 655 * @intr: use interruptable sleep 656 * 657 * Wait for any requested fence to signal (all asics). Fence 658 * array is indexed by ring id. @intr selects whether to use 659 * interruptable (true) or non-interruptable (false) sleep when 660 * waiting for the fences. Used by the suballocator. 661 * Returns 0 if any fence has passed, error for all other cases. 662 */ 663int radeon_fence_wait_any(struct radeon_device *rdev, 664 struct radeon_fence **fences, 665 bool intr) 666{ 667 uint64_t seq[RADEON_NUM_RINGS]; 668 unsigned i, num_rings = 0; 669 long r; 670 671 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 672 seq[i] = 0; 673 674 if (!fences[i]) { 675 continue; 676 } 677 678 seq[i] = fences[i]->seq; 679 ++num_rings; 680 } 681 682 /* nothing to wait for ? */ 683 if (num_rings == 0) 684 return -ENOENT; 685 686 r = radeon_fence_wait_seq_timeout(rdev, seq, intr, MAX_SCHEDULE_TIMEOUT); 687 if (r < 0) { 688 return r; 689 } 690 return 0; 691} 692 693/** 694 * radeon_fence_wait_next - wait for the next fence to signal 695 * 696 * @rdev: radeon device pointer 697 * @ring: ring index the fence is associated with 698 * 699 * Wait for the next fence on the requested ring to signal (all asics). 700 * Returns 0 if the next fence has passed, error for all other cases. 701 * Caller must hold ring lock. 702 */ 703int radeon_fence_wait_next(struct radeon_device *rdev, int ring) 704{ 705 uint64_t seq[RADEON_NUM_RINGS] = {}; 706 long r; 707 708 seq[ring] = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL; 709 if (seq[ring] >= rdev->fence_drv[ring].sync_seq[ring]) { 710 /* nothing to wait for, last_seq is 711 already the last emited fence */ 712 return -ENOENT; 713 } 714 r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT); 715 if (r < 0) 716 return r; 717 return 0; 718} 719 720/** 721 * radeon_fence_wait_empty - wait for all fences to signal 722 * 723 * @rdev: radeon device pointer 724 * @ring: ring index the fence is associated with 725 * 726 * Wait for all fences on the requested ring to signal (all asics). 727 * Returns 0 if the fences have passed, error for all other cases. 728 * Caller must hold ring lock. 729 */ 730int radeon_fence_wait_empty(struct radeon_device *rdev, int ring) 731{ 732 uint64_t seq[RADEON_NUM_RINGS] = {}; 733 long r; 734 735 seq[ring] = rdev->fence_drv[ring].sync_seq[ring]; 736 if (!seq[ring]) 737 return 0; 738 739 r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT); 740 if (r < 0) { 741 if (r == -EDEADLK) 742 return -EDEADLK; 743 744 dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%ld)\n", 745 ring, r); 746 } 747 return 0; 748} 749 750/** 751 * radeon_fence_ref - take a ref on a fence 752 * 753 * @fence: radeon fence object 754 * 755 * Take a reference on a fence (all asics). 756 * Returns the fence. 757 */ 758struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence) 759{ 760 dma_fence_get(&fence->base); 761 return fence; 762} 763 764/** 765 * radeon_fence_unref - remove a ref on a fence 766 * 767 * @fence: radeon fence object 768 * 769 * Remove a reference on a fence (all asics). 770 */ 771void radeon_fence_unref(struct radeon_fence **fence) 772{ 773 struct radeon_fence *tmp = *fence; 774 775 *fence = NULL; 776 if (tmp) { 777 dma_fence_put(&tmp->base); 778 } 779} 780 781/** 782 * radeon_fence_count_emitted - get the count of emitted fences 783 * 784 * @rdev: radeon device pointer 785 * @ring: ring index the fence is associated with 786 * 787 * Get the number of fences emitted on the requested ring (all asics). 788 * Returns the number of emitted fences on the ring. Used by the 789 * dynpm code to ring track activity. 790 */ 791unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring) 792{ 793 uint64_t emitted; 794 795 /* We are not protected by ring lock when reading the last sequence 796 * but it's ok to report slightly wrong fence count here. 797 */ 798 radeon_fence_process(rdev, ring); 799 emitted = rdev->fence_drv[ring].sync_seq[ring] 800 - atomic64_read(&rdev->fence_drv[ring].last_seq); 801 /* to avoid 32bits warp around */ 802 if (emitted > 0x10000000) { 803 emitted = 0x10000000; 804 } 805 return (unsigned)emitted; 806} 807 808/** 809 * radeon_fence_need_sync - do we need a semaphore 810 * 811 * @fence: radeon fence object 812 * @dst_ring: which ring to check against 813 * 814 * Check if the fence needs to be synced against another ring 815 * (all asics). If so, we need to emit a semaphore. 816 * Returns true if we need to sync with another ring, false if 817 * not. 818 */ 819bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring) 820{ 821 struct radeon_fence_driver *fdrv; 822 823 if (!fence) { 824 return false; 825 } 826 827 if (fence->ring == dst_ring) { 828 return false; 829 } 830 831 /* we are protected by the ring mutex */ 832 fdrv = &fence->rdev->fence_drv[dst_ring]; 833 if (fence->seq <= fdrv->sync_seq[fence->ring]) { 834 return false; 835 } 836 837 return true; 838} 839 840/** 841 * radeon_fence_note_sync - record the sync point 842 * 843 * @fence: radeon fence object 844 * @dst_ring: which ring to check against 845 * 846 * Note the sequence number at which point the fence will 847 * be synced with the requested ring (all asics). 848 */ 849void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring) 850{ 851 struct radeon_fence_driver *dst, *src; 852 unsigned i; 853 854 if (!fence) { 855 return; 856 } 857 858 if (fence->ring == dst_ring) { 859 return; 860 } 861 862 /* we are protected by the ring mutex */ 863 src = &fence->rdev->fence_drv[fence->ring]; 864 dst = &fence->rdev->fence_drv[dst_ring]; 865 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 866 if (i == dst_ring) { 867 continue; 868 } 869 dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]); 870 } 871} 872 873/** 874 * radeon_fence_driver_start_ring - make the fence driver 875 * ready for use on the requested ring. 876 * 877 * @rdev: radeon device pointer 878 * @ring: ring index to start the fence driver on 879 * 880 * Make the fence driver ready for processing (all asics). 881 * Not all asics have all rings, so each asic will only 882 * start the fence driver on the rings it has. 883 * Returns 0 for success, errors for failure. 884 */ 885int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) 886{ 887 uint64_t index; 888 int r; 889 890 radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); 891 if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) { 892 rdev->fence_drv[ring].scratch_reg = 0; 893 if (ring != R600_RING_TYPE_UVD_INDEX) { 894 index = R600_WB_EVENT_OFFSET + ring * 4; 895 rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; 896 rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + 897 index; 898 899 } else { 900 /* put fence directly behind firmware */ 901 index = ALIGN(rdev->uvd_fw->size, 8); 902 rdev->fence_drv[ring].cpu_addr = (uint32_t *)((uint8_t *)rdev->uvd.cpu_addr + index); 903 rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr + index; 904 } 905 906 } else { 907 r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg); 908 if (r) { 909 dev_err(rdev->dev, "fence failed to get scratch register\n"); 910 return r; 911 } 912 index = RADEON_WB_SCRATCH_OFFSET + 913 rdev->fence_drv[ring].scratch_reg - 914 rdev->scratch.reg_base; 915 rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; 916 rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index; 917 } 918 radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring); 919 rdev->fence_drv[ring].initialized = true; 920 dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016"PRIx64" and cpu addr 0x%p\n", 921 ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr); 922 return 0; 923} 924 925/** 926 * radeon_fence_driver_init_ring - init the fence driver 927 * for the requested ring. 928 * 929 * @rdev: radeon device pointer 930 * @ring: ring index to start the fence driver on 931 * 932 * Init the fence driver for the requested ring (all asics). 933 * Helper function for radeon_fence_driver_init(). 934 */ 935static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring) 936{ 937 int i; 938 939 rdev->fence_drv[ring].scratch_reg = -1; 940 rdev->fence_drv[ring].cpu_addr = NULL; 941 rdev->fence_drv[ring].gpu_addr = 0; 942 for (i = 0; i < RADEON_NUM_RINGS; ++i) 943 rdev->fence_drv[ring].sync_seq[i] = 0; 944 atomic64_set(&rdev->fence_drv[ring].last_seq, 0); 945 rdev->fence_drv[ring].initialized = false; 946 INIT_DELAYED_WORK(&rdev->fence_drv[ring].lockup_work, 947 radeon_fence_check_lockup); 948 rdev->fence_drv[ring].rdev = rdev; 949} 950 951/** 952 * radeon_fence_driver_init - init the fence driver 953 * for all possible rings. 954 * 955 * @rdev: radeon device pointer 956 * 957 * Init the fence driver for all possible rings (all asics). 958 * Not all asics have all rings, so each asic will only 959 * start the fence driver on the rings it has using 960 * radeon_fence_driver_start_ring(). 961 * Returns 0 for success. 962 */ 963int radeon_fence_driver_init(struct radeon_device *rdev) 964{ 965 int ring; 966 967 spin_lock_init(&rdev->fence_lock); 968 DRM_INIT_WAITQUEUE(&rdev->fence_queue, "radfence"); 969 TAILQ_INIT(&rdev->fence_check); 970 for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { 971 radeon_fence_driver_init_ring(rdev, ring); 972 } 973 if (radeon_debugfs_fence_init(rdev)) { 974 dev_err(rdev->dev, "fence debugfs file creation failed\n"); 975 } 976 return 0; 977} 978 979/** 980 * radeon_fence_driver_fini - tear down the fence driver 981 * for all possible rings. 982 * 983 * @rdev: radeon device pointer 984 * 985 * Tear down the fence driver for all possible rings (all asics). 986 */ 987void radeon_fence_driver_fini(struct radeon_device *rdev) 988{ 989 int ring, r; 990 991 mutex_lock(&rdev->ring_lock); 992 for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { 993 if (!rdev->fence_drv[ring].initialized) 994 continue; 995 r = radeon_fence_wait_empty(rdev, ring); 996 if (r) { 997 /* no need to trigger GPU reset as we are unloading */ 998 radeon_fence_driver_force_completion(rdev, ring); 999 } 1000 cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work); 1001 spin_lock(&rdev->fence_lock); 1002 radeon_fence_wakeup_locked(rdev); 1003 spin_unlock(&rdev->fence_lock); 1004 radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); 1005 rdev->fence_drv[ring].initialized = false; 1006 } 1007 mutex_unlock(&rdev->ring_lock); 1008 1009 BUG_ON(!TAILQ_EMPTY(&rdev->fence_check)); 1010 DRM_DESTROY_WAITQUEUE(&rdev->fence_queue); 1011 spin_lock_destroy(&rdev->fence_lock); 1012} 1013 1014/** 1015 * radeon_fence_driver_force_completion - force all fence waiter to complete 1016 * 1017 * @rdev: radeon device pointer 1018 * @ring: the ring to complete 1019 * 1020 * In case of GPU reset failure make sure no process keep waiting on fence 1021 * that will never complete. 1022 */ 1023void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring) 1024{ 1025 if (rdev->fence_drv[ring].initialized) { 1026 radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring); 1027 cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work); 1028 } 1029} 1030 1031 1032/* 1033 * Fence debugfs 1034 */ 1035#if defined(CONFIG_DEBUG_FS) 1036static int radeon_debugfs_fence_info(struct seq_file *m, void *data) 1037{ 1038 struct drm_info_node *node = (struct drm_info_node *)m->private; 1039 struct drm_device *dev = node->minor->dev; 1040 struct radeon_device *rdev = dev->dev_private; 1041 int i, j; 1042 1043 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 1044 if (!rdev->fence_drv[i].initialized) 1045 continue; 1046 1047 radeon_fence_process(rdev, i); 1048 1049 seq_printf(m, "--- ring %d ---\n", i); 1050 seq_printf(m, "Last signaled fence 0x%016llx\n", 1051 (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq)); 1052 seq_printf(m, "Last emitted 0x%016"PRIx64"\n", 1053 rdev->fence_drv[i].sync_seq[i]); 1054 1055 for (j = 0; j < RADEON_NUM_RINGS; ++j) { 1056 if (i != j && rdev->fence_drv[j].initialized) 1057 seq_printf(m, "Last sync to ring %d 0x%016"PRIx64"\n", 1058 j, rdev->fence_drv[i].sync_seq[j]); 1059 } 1060 } 1061 return 0; 1062} 1063 1064/** 1065 * radeon_debugfs_gpu_reset - manually trigger a gpu reset 1066 * 1067 * Manually trigger a gpu reset at the next fence wait. 1068 */ 1069static int radeon_debugfs_gpu_reset(struct seq_file *m, void *data) 1070{ 1071 struct drm_info_node *node = (struct drm_info_node *) m->private; 1072 struct drm_device *dev = node->minor->dev; 1073 struct radeon_device *rdev = dev->dev_private; 1074 1075 down_read(&rdev->exclusive_lock); 1076 seq_printf(m, "%d\n", rdev->needs_reset); 1077 rdev->needs_reset = true; 1078 wake_up_all(&rdev->fence_queue); 1079 up_read(&rdev->exclusive_lock); 1080 1081 return 0; 1082} 1083 1084static struct drm_info_list radeon_debugfs_fence_list[] = { 1085 {"radeon_fence_info", &radeon_debugfs_fence_info, 0, NULL}, 1086 {"radeon_gpu_reset", &radeon_debugfs_gpu_reset, 0, NULL} 1087}; 1088#endif 1089 1090int radeon_debugfs_fence_init(struct radeon_device *rdev) 1091{ 1092#if defined(CONFIG_DEBUG_FS) 1093 return radeon_debugfs_add_files(rdev, radeon_debugfs_fence_list, 2); 1094#else 1095 return 0; 1096#endif 1097} 1098 1099static const char *radeon_fence_get_driver_name(struct dma_fence *fence) 1100{ 1101 return "radeon"; 1102} 1103 1104static const char *radeon_fence_get_timeline_name(struct dma_fence *f) 1105{ 1106 struct radeon_fence *fence = to_radeon_fence(f); 1107 switch (fence->ring) { 1108 case RADEON_RING_TYPE_GFX_INDEX: return "radeon.gfx"; 1109 case CAYMAN_RING_TYPE_CP1_INDEX: return "radeon.cp1"; 1110 case CAYMAN_RING_TYPE_CP2_INDEX: return "radeon.cp2"; 1111 case R600_RING_TYPE_DMA_INDEX: return "radeon.dma"; 1112 case CAYMAN_RING_TYPE_DMA1_INDEX: return "radeon.dma1"; 1113 case R600_RING_TYPE_UVD_INDEX: return "radeon.uvd"; 1114 case TN_RING_TYPE_VCE1_INDEX: return "radeon.vce1"; 1115 case TN_RING_TYPE_VCE2_INDEX: return "radeon.vce2"; 1116 default: WARN_ON_ONCE(1); return "radeon.unk"; 1117 } 1118} 1119 1120static inline bool radeon_test_signaled(struct radeon_fence *fence) 1121{ 1122 return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->base.flags); 1123} 1124 1125struct radeon_wait_cb { 1126 struct dma_fence_cb base; 1127}; 1128 1129static void 1130radeon_fence_wait_cb(struct dma_fence *fence, struct dma_fence_cb *cb) 1131{ 1132 struct radeon_fence *rfence = to_radeon_fence(fence); 1133 struct radeon_device *rdev = rfence->rdev; 1134 1135 BUG_ON(!spin_is_locked(&rdev->fence_lock)); 1136 DRM_SPIN_WAKEUP_ALL(&rdev->fence_queue, &rdev->fence_lock); 1137} 1138 1139static signed long radeon_fence_default_wait(struct dma_fence *f, bool intr, 1140 signed long t) 1141{ 1142 struct radeon_fence *fence = to_radeon_fence(f); 1143 struct radeon_device *rdev = fence->rdev; 1144 struct radeon_wait_cb cb; 1145 int r; 1146 1147 if (dma_fence_add_callback(f, &cb.base, radeon_fence_wait_cb)) 1148 return t; 1149 1150 spin_lock(&rdev->fence_lock); 1151 if (intr) { 1152 DRM_SPIN_TIMED_WAIT_UNTIL(r, &rdev->fence_queue, 1153 &rdev->fence_lock, t, 1154 radeon_test_signaled(fence)); 1155 } else { 1156 DRM_SPIN_TIMED_WAIT_NOINTR_UNTIL(r, &rdev->fence_queue, 1157 &rdev->fence_lock, t, 1158 radeon_test_signaled(fence)); 1159 } 1160 spin_unlock(&rdev->fence_lock); 1161 1162 dma_fence_remove_callback(f, &cb.base); 1163 1164 return r; 1165} 1166 1167const struct dma_fence_ops radeon_fence_ops = { 1168 .get_driver_name = radeon_fence_get_driver_name, 1169 .get_timeline_name = radeon_fence_get_timeline_name, 1170 .enable_signaling = radeon_fence_enable_signaling, 1171 .signaled = radeon_fence_is_signaled, 1172 .wait = radeon_fence_default_wait, 1173 .release = NULL, 1174}; 1175