1/* $NetBSD: intel_timeline.c,v 1.5 2021/12/19 12:32:15 riastradh Exp $ */ 2 3/* 4 * SPDX-License-Identifier: MIT 5 * 6 * Copyright �� 2016-2018 Intel Corporation 7 */ 8 9#include <sys/cdefs.h> 10__KERNEL_RCSID(0, "$NetBSD: intel_timeline.c,v 1.5 2021/12/19 12:32:15 riastradh Exp $"); 11 12#include "i915_drv.h" 13 14#include "i915_active.h" 15#include "i915_syncmap.h" 16#include "intel_gt.h" 17#include "intel_ring.h" 18#include "intel_timeline.h" 19 20#include <linux/nbsd-namespace.h> 21 22#define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit))) 23#define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit)) 24 25#define CACHELINE_BITS 6 26#define CACHELINE_FREE CACHELINE_BITS 27 28struct intel_timeline_hwsp { 29 struct intel_gt *gt; 30 struct intel_gt_timelines *gt_timelines; 31 struct list_head free_link; 32 struct i915_vma *vma; 33 u64 free_bitmap; 34}; 35 36static struct i915_vma *__hwsp_alloc(struct intel_gt *gt) 37{ 38 struct drm_i915_private *i915 = gt->i915; 39 struct drm_i915_gem_object *obj; 40 struct i915_vma *vma; 41 42 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 43 if (IS_ERR(obj)) 44 return ERR_CAST(obj); 45 46 i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); 47 48 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 49 if (IS_ERR(vma)) 50 i915_gem_object_put(obj); 51 52 return vma; 53} 54 55static struct i915_vma * 56hwsp_alloc(struct intel_timeline *timeline, unsigned int *cacheline) 57{ 58 struct intel_gt_timelines *gt = &timeline->gt->timelines; 59 struct intel_timeline_hwsp *hwsp; 60 61 BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE); 62 63 spin_lock_irq(>->hwsp_lock); 64 65 /* hwsp_free_list only contains HWSP that have available cachelines */ 66 hwsp = list_first_entry_or_null(>->hwsp_free_list, 67 typeof(*hwsp), free_link); 68 if (!hwsp) { 69 struct i915_vma *vma; 70 71 spin_unlock_irq(>->hwsp_lock); 72 73 hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL); 74 if (!hwsp) 75 return ERR_PTR(-ENOMEM); 76 77 vma = __hwsp_alloc(timeline->gt); 78 if (IS_ERR(vma)) { 79 kfree(hwsp); 80 return vma; 81 } 82 83 vma->private = hwsp; 84 hwsp->gt = timeline->gt; 85 hwsp->vma = vma; 86 hwsp->free_bitmap = ~0ull; 87 hwsp->gt_timelines = gt; 88 89 spin_lock_irq(>->hwsp_lock); 90 list_add(&hwsp->free_link, >->hwsp_free_list); 91 } 92 93 GEM_BUG_ON(!hwsp->free_bitmap); 94 *cacheline = __ffs64(hwsp->free_bitmap); 95 hwsp->free_bitmap &= ~BIT_ULL(*cacheline); 96 if (!hwsp->free_bitmap) 97 list_del(&hwsp->free_link); 98 99 spin_unlock_irq(>->hwsp_lock); 100 101 GEM_BUG_ON(hwsp->vma->private != hwsp); 102 return hwsp->vma; 103} 104 105static void __idle_hwsp_free(struct intel_timeline_hwsp *hwsp, int cacheline) 106{ 107 struct intel_gt_timelines *gt = hwsp->gt_timelines; 108 unsigned long flags; 109 110 spin_lock_irqsave(>->hwsp_lock, flags); 111 112 /* As a cacheline becomes available, publish the HWSP on the freelist */ 113 if (!hwsp->free_bitmap) 114 list_add_tail(&hwsp->free_link, >->hwsp_free_list); 115 116 GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap)); 117 hwsp->free_bitmap |= BIT_ULL(cacheline); 118 119 /* And if no one is left using it, give the page back to the system */ 120 if (hwsp->free_bitmap == ~0ull) { 121 i915_vma_put(hwsp->vma); 122 list_del(&hwsp->free_link); 123 kfree(hwsp); 124 } 125 126 spin_unlock_irqrestore(>->hwsp_lock, flags); 127} 128 129static void __idle_cacheline_free(struct intel_timeline_cacheline *cl) 130{ 131 GEM_BUG_ON(!i915_active_is_idle(&cl->active)); 132 133 i915_gem_object_unpin_map(cl->hwsp->vma->obj); 134 i915_vma_put(cl->hwsp->vma); 135 __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS)); 136 137 i915_active_fini(&cl->active); 138 kfree_rcu(cl, rcu); 139} 140 141__i915_active_call 142static void __cacheline_retire(struct i915_active *active) 143{ 144 struct intel_timeline_cacheline *cl = 145 container_of(active, typeof(*cl), active); 146 147 i915_vma_unpin(cl->hwsp->vma); 148 if (ptr_test_bit(cl->vaddr, CACHELINE_FREE)) 149 __idle_cacheline_free(cl); 150} 151 152static int __cacheline_active(struct i915_active *active) 153{ 154 struct intel_timeline_cacheline *cl = 155 container_of(active, typeof(*cl), active); 156 157 __i915_vma_pin(cl->hwsp->vma); 158 return 0; 159} 160 161static struct intel_timeline_cacheline * 162cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline) 163{ 164 struct intel_timeline_cacheline *cl; 165 void *vaddr; 166 167 GEM_BUG_ON(cacheline >= BIT(CACHELINE_BITS)); 168 169 cl = kmalloc(sizeof(*cl), GFP_KERNEL); 170 if (!cl) 171 return ERR_PTR(-ENOMEM); 172 173 vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB); 174 if (IS_ERR(vaddr)) { 175 kfree(cl); 176 return ERR_CAST(vaddr); 177 } 178 179 i915_vma_get(hwsp->vma); 180 cl->hwsp = hwsp; 181 cl->vaddr = page_pack_bits(vaddr, cacheline); 182 183 i915_active_init(&cl->active, __cacheline_active, __cacheline_retire); 184 185 return cl; 186} 187 188static void cacheline_acquire(struct intel_timeline_cacheline *cl) 189{ 190 if (cl) 191 i915_active_acquire(&cl->active); 192} 193 194static void cacheline_release(struct intel_timeline_cacheline *cl) 195{ 196 if (cl) 197 i915_active_release(&cl->active); 198} 199 200static void cacheline_free(struct intel_timeline_cacheline *cl) 201{ 202 GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE)); 203 cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE); 204 205 if (i915_active_is_idle(&cl->active)) 206 __idle_cacheline_free(cl); 207} 208 209int intel_timeline_init(struct intel_timeline *timeline, 210 struct intel_gt *gt, 211 struct i915_vma *hwsp) 212{ 213 void *vaddr; 214 215 kref_init(&timeline->kref); 216 atomic_set(&timeline->pin_count, 0); 217 218 timeline->gt = gt; 219 220 timeline->has_initial_breadcrumb = !hwsp; 221 timeline->hwsp_cacheline = NULL; 222 223 if (!hwsp) { 224 struct intel_timeline_cacheline *cl; 225 unsigned int cacheline; 226 227 hwsp = hwsp_alloc(timeline, &cacheline); 228 if (IS_ERR(hwsp)) 229 return PTR_ERR(hwsp); 230 231 cl = cacheline_alloc(hwsp->private, cacheline); 232 if (IS_ERR(cl)) { 233 __idle_hwsp_free(hwsp->private, cacheline); 234 return PTR_ERR(cl); 235 } 236 237 timeline->hwsp_cacheline = cl; 238 timeline->hwsp_offset = cacheline * CACHELINE_BYTES; 239 240 vaddr = page_mask_bits(cl->vaddr); 241 } else { 242 timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR; 243 244 vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB); 245 if (IS_ERR(vaddr)) 246 return PTR_ERR(vaddr); 247 } 248 249 timeline->hwsp_seqno = 250 memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES); 251 252 timeline->hwsp_ggtt = i915_vma_get(hwsp); 253 GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size); 254 255 timeline->fence_context = dma_fence_context_alloc(1); 256 257 mutex_init(&timeline->mutex); 258 259 INIT_ACTIVE_FENCE(&timeline->last_request); 260 INIT_LIST_HEAD(&timeline->requests); 261 262 i915_syncmap_init(&timeline->sync); 263 264 return 0; 265} 266 267void intel_gt_init_timelines(struct intel_gt *gt) 268{ 269 struct intel_gt_timelines *timelines = >->timelines; 270 271 spin_lock_init(&timelines->lock); 272 INIT_LIST_HEAD(&timelines->active_list); 273 274 spin_lock_init(&timelines->hwsp_lock); 275 INIT_LIST_HEAD(&timelines->hwsp_free_list); 276} 277 278void intel_timeline_fini(struct intel_timeline *timeline) 279{ 280 GEM_BUG_ON(atomic_read(&timeline->pin_count)); 281 GEM_BUG_ON(!list_empty(&timeline->requests)); 282 GEM_BUG_ON(timeline->retire); 283 284 if (timeline->hwsp_cacheline) 285 cacheline_free(timeline->hwsp_cacheline); 286 else 287 i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); 288 289 i915_vma_put(timeline->hwsp_ggtt); 290 291 mutex_destroy(&timeline->mutex); 292} 293 294struct intel_timeline * 295intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp) 296{ 297 struct intel_timeline *timeline; 298 int err; 299 300 timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); 301 if (!timeline) 302 return ERR_PTR(-ENOMEM); 303 304 err = intel_timeline_init(timeline, gt, global_hwsp); 305 if (err) { 306 kfree(timeline); 307 return ERR_PTR(err); 308 } 309 310 return timeline; 311} 312 313int intel_timeline_pin(struct intel_timeline *tl) 314{ 315 int err; 316 317 if (atomic_add_unless(&tl->pin_count, 1, 0)) 318 return 0; 319 320 err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH); 321 if (err) 322 return err; 323 324 tl->hwsp_offset = 325 i915_ggtt_offset(tl->hwsp_ggtt) + 326 offset_in_page(tl->hwsp_offset); 327 328 cacheline_acquire(tl->hwsp_cacheline); 329 if (atomic_fetch_inc(&tl->pin_count)) { 330 cacheline_release(tl->hwsp_cacheline); 331 __i915_vma_unpin(tl->hwsp_ggtt); 332 } 333 334 return 0; 335} 336 337void intel_timeline_enter(struct intel_timeline *tl) 338{ 339 struct intel_gt_timelines *timelines = &tl->gt->timelines; 340 341 /* 342 * Pretend we are serialised by the timeline->mutex. 343 * 344 * While generally true, there are a few exceptions to the rule 345 * for the engine->kernel_context being used to manage power 346 * transitions. As the engine_park may be called from under any 347 * timeline, it uses the power mutex as a global serialisation 348 * lock to prevent any other request entering its timeline. 349 * 350 * The rule is generally tl->mutex, otherwise engine->wakeref.mutex. 351 * 352 * However, intel_gt_retire_request() does not know which engine 353 * it is retiring along and so cannot partake in the engine-pm 354 * barrier, and there we use the tl->active_count as a means to 355 * pin the timeline in the active_list while the locks are dropped. 356 * Ergo, as that is outside of the engine-pm barrier, we need to 357 * use atomic to manipulate tl->active_count. 358 */ 359 lockdep_assert_held(&tl->mutex); 360 361 if (atomic_add_unless(&tl->active_count, 1, 0)) 362 return; 363 364 spin_lock(&timelines->lock); 365 if (!atomic_fetch_inc(&tl->active_count)) 366 list_add_tail(&tl->link, &timelines->active_list); 367 spin_unlock(&timelines->lock); 368} 369 370void intel_timeline_exit(struct intel_timeline *tl) 371{ 372 struct intel_gt_timelines *timelines = &tl->gt->timelines; 373 374 /* See intel_timeline_enter() */ 375 lockdep_assert_held(&tl->mutex); 376 377 GEM_BUG_ON(!atomic_read(&tl->active_count)); 378 if (atomic_add_unless(&tl->active_count, -1, 1)) 379 return; 380 381 spin_lock(&timelines->lock); 382 if (atomic_dec_and_test(&tl->active_count)) 383 list_del(&tl->link); 384 spin_unlock(&timelines->lock); 385 386 /* 387 * Since this timeline is idle, all bariers upon which we were waiting 388 * must also be complete and so we can discard the last used barriers 389 * without loss of information. 390 */ 391 i915_syncmap_free(&tl->sync); 392} 393 394static u32 timeline_advance(struct intel_timeline *tl) 395{ 396 GEM_BUG_ON(!atomic_read(&tl->pin_count)); 397 GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb); 398 399 return tl->seqno += 1 + tl->has_initial_breadcrumb; 400} 401 402static void timeline_rollback(struct intel_timeline *tl) 403{ 404 tl->seqno -= 1 + tl->has_initial_breadcrumb; 405} 406 407static noinline int 408__intel_timeline_get_seqno(struct intel_timeline *tl, 409 struct i915_request *rq, 410 u32 *seqno) 411{ 412 struct intel_timeline_cacheline *cl; 413 unsigned int cacheline; 414 struct i915_vma *vma; 415 void *vaddr; 416 int err; 417 418 /* 419 * If there is an outstanding GPU reference to this cacheline, 420 * such as it being sampled by a HW semaphore on another timeline, 421 * we cannot wraparound our seqno value (the HW semaphore does 422 * a strict greater-than-or-equals compare, not i915_seqno_passed). 423 * So if the cacheline is still busy, we must detach ourselves 424 * from it and leave it inflight alongside its users. 425 * 426 * However, if nobody is watching and we can guarantee that nobody 427 * will, we could simply reuse the same cacheline. 428 * 429 * if (i915_active_request_is_signaled(&tl->last_request) && 430 * i915_active_is_signaled(&tl->hwsp_cacheline->active)) 431 * return 0; 432 * 433 * That seems unlikely for a busy timeline that needed to wrap in 434 * the first place, so just replace the cacheline. 435 */ 436 437 vma = hwsp_alloc(tl, &cacheline); 438 if (IS_ERR(vma)) { 439 err = PTR_ERR(vma); 440 goto err_rollback; 441 } 442 443 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); 444 if (err) { 445 __idle_hwsp_free(vma->private, cacheline); 446 goto err_rollback; 447 } 448 449 cl = cacheline_alloc(vma->private, cacheline); 450 if (IS_ERR(cl)) { 451 err = PTR_ERR(cl); 452 __idle_hwsp_free(vma->private, cacheline); 453 goto err_unpin; 454 } 455 GEM_BUG_ON(cl->hwsp->vma != vma); 456 457 /* 458 * Attach the old cacheline to the current request, so that we only 459 * free it after the current request is retired, which ensures that 460 * all writes into the cacheline from previous requests are complete. 461 */ 462 err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence); 463 if (err) 464 goto err_cacheline; 465 466 cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */ 467 cacheline_free(tl->hwsp_cacheline); 468 469 i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */ 470 i915_vma_put(tl->hwsp_ggtt); 471 472 tl->hwsp_ggtt = i915_vma_get(vma); 473 474 vaddr = page_mask_bits(cl->vaddr); 475 tl->hwsp_offset = cacheline * CACHELINE_BYTES; 476 tl->hwsp_seqno = 477 memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES); 478 479 tl->hwsp_offset += i915_ggtt_offset(vma); 480 481 cacheline_acquire(cl); 482 tl->hwsp_cacheline = cl; 483 484 *seqno = timeline_advance(tl); 485 GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno)); 486 return 0; 487 488err_cacheline: 489 cacheline_free(cl); 490err_unpin: 491 i915_vma_unpin(vma); 492err_rollback: 493 timeline_rollback(tl); 494 return err; 495} 496 497int intel_timeline_get_seqno(struct intel_timeline *tl, 498 struct i915_request *rq, 499 u32 *seqno) 500{ 501 *seqno = timeline_advance(tl); 502 503 /* Replace the HWSP on wraparound for HW semaphores */ 504 if (unlikely(!*seqno && tl->hwsp_cacheline)) 505 return __intel_timeline_get_seqno(tl, rq, seqno); 506 507 return 0; 508} 509 510static int cacheline_ref(struct intel_timeline_cacheline *cl, 511 struct i915_request *rq) 512{ 513 return i915_active_add_request(&cl->active, rq); 514} 515 516int intel_timeline_read_hwsp(struct i915_request *from, 517 struct i915_request *to, 518 u32 *hwsp) 519{ 520 struct intel_timeline_cacheline *cl; 521 int err; 522 523 GEM_BUG_ON(!rcu_access_pointer(from->hwsp_cacheline)); 524 525 rcu_read_lock(); 526 cl = rcu_dereference(from->hwsp_cacheline); 527 if (unlikely(!i915_active_acquire_if_busy(&cl->active))) 528 goto unlock; /* seqno wrapped and completed! */ 529 if (unlikely(i915_request_completed(from))) 530 goto release; 531 rcu_read_unlock(); 532 533 err = cacheline_ref(cl, to); 534 if (err) 535 goto out; 536 537 *hwsp = i915_ggtt_offset(cl->hwsp->vma) + 538 ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES; 539 540out: 541 i915_active_release(&cl->active); 542 return err; 543 544release: 545 i915_active_release(&cl->active); 546unlock: 547 rcu_read_unlock(); 548 return 1; 549} 550 551void intel_timeline_unpin(struct intel_timeline *tl) 552{ 553 GEM_BUG_ON(!atomic_read(&tl->pin_count)); 554 if (!atomic_dec_and_test(&tl->pin_count)) 555 return; 556 557 cacheline_release(tl->hwsp_cacheline); 558 559 __i915_vma_unpin(tl->hwsp_ggtt); 560} 561 562void __intel_timeline_free(struct kref *kref) 563{ 564 struct intel_timeline *timeline = 565 container_of(kref, typeof(*timeline), kref); 566 567 intel_timeline_fini(timeline); 568 kfree_rcu(timeline, rcu); 569} 570 571void intel_gt_fini_timelines(struct intel_gt *gt) 572{ 573 struct intel_gt_timelines *timelines = >->timelines; 574 575 GEM_BUG_ON(!list_empty(&timelines->active_list)); 576 GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list)); 577 578 spin_lock_destroy(&timelines->hwsp_lock); 579 spin_lock_destroy(&timelines->lock); 580} 581 582#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 583#include "gt/selftests/mock_timeline.c" 584#include "gt/selftest_timeline.c" 585#endif 586