// SPDX-License-Identifier: MIT /* * Copyright © 2016-2018 Intel Corporation */ #include #include "gem/i915_gem_internal.h" #include "i915_active.h" #include "i915_drv.h" #include "i915_syncmap.h" #include "intel_gt.h" #include "intel_ring.h" #include "intel_timeline.h" #define TIMELINE_SEQNO_BYTES 8 static struct i915_vma *hwsp_alloc(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; struct drm_i915_gem_object *obj; struct i915_vma *vma; obj = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(obj)) return ERR_CAST(obj); i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); vma = i915_vma_instance(obj, >->ggtt->vm, NULL); if (IS_ERR(vma)) i915_gem_object_put(obj); return vma; } static void __timeline_retire(struct i915_active *active) { struct intel_timeline *tl = container_of(active, typeof(*tl), active); i915_vma_unpin(tl->hwsp_ggtt); intel_timeline_put(tl); } static int __timeline_active(struct i915_active *active) { struct intel_timeline *tl = container_of(active, typeof(*tl), active); __i915_vma_pin(tl->hwsp_ggtt); intel_timeline_get(tl); return 0; } I915_SELFTEST_EXPORT int intel_timeline_pin_map(struct intel_timeline *timeline) { struct drm_i915_gem_object *obj = timeline->hwsp_ggtt->obj; u32 ofs = offset_in_page(timeline->hwsp_offset); void *vaddr; vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); timeline->hwsp_map = vaddr; timeline->hwsp_seqno = memset(vaddr + ofs, 0, TIMELINE_SEQNO_BYTES); drm_clflush_virt_range(vaddr + ofs, TIMELINE_SEQNO_BYTES); return 0; } static int intel_timeline_init(struct intel_timeline *timeline, struct intel_gt *gt, struct i915_vma *hwsp, unsigned int offset) { kref_init(&timeline->kref); atomic_set(&timeline->pin_count, 0); timeline->gt = gt; if (hwsp) { timeline->hwsp_offset = offset; timeline->hwsp_ggtt = i915_vma_get(hwsp); } else { timeline->has_initial_breadcrumb = true; hwsp = hwsp_alloc(gt); if (IS_ERR(hwsp)) return PTR_ERR(hwsp); timeline->hwsp_ggtt = hwsp; } timeline->hwsp_map = NULL; timeline->hwsp_seqno = (void *)(long)timeline->hwsp_offset; GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size); timeline->fence_context = dma_fence_context_alloc(1); mutex_init(&timeline->mutex); INIT_ACTIVE_FENCE(&timeline->last_request); INIT_LIST_HEAD(&timeline->requests); i915_syncmap_init(&timeline->sync); i915_active_init(&timeline->active, __timeline_active, __timeline_retire, 0); return 0; } void intel_gt_init_timelines(struct intel_gt *gt) { struct intel_gt_timelines *timelines = >->timelines; spin_lock_init(&timelines->lock); INIT_LIST_HEAD(&timelines->active_list); } static void intel_timeline_fini(struct rcu_head *rcu) { struct intel_timeline *timeline = container_of(rcu, struct intel_timeline, rcu); if (timeline->hwsp_map) i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); i915_vma_put(timeline->hwsp_ggtt); i915_active_fini(&timeline->active); /* * A small race exists between intel_gt_retire_requests_timeout and * intel_timeline_exit which could result in the syncmap not getting * free'd. Rather than work to hard to seal this race, simply cleanup * the syncmap on fini. */ i915_syncmap_free(&timeline->sync); kfree(timeline); } struct intel_timeline * __intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp, unsigned int offset) { struct intel_timeline *timeline; int err; timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); if (!timeline) return ERR_PTR(-ENOMEM); err = intel_timeline_init(timeline, gt, global_hwsp, offset); if (err) { kfree(timeline); return ERR_PTR(err); } return timeline; } struct intel_timeline * intel_timeline_create_from_engine(struct intel_engine_cs *engine, unsigned int offset) { struct i915_vma *hwsp = engine->status_page.vma; struct intel_timeline *tl; tl = __intel_timeline_create(engine->gt, hwsp, offset); if (IS_ERR(tl)) return tl; /* Borrow a nearby lock; we only create these timelines during init */ mutex_lock(&hwsp->vm->mutex); list_add_tail(&tl->engine_link, &engine->status_page.timelines); mutex_unlock(&hwsp->vm->mutex); return tl; } void __intel_timeline_pin(struct intel_timeline *tl) { GEM_BUG_ON(!atomic_read(&tl->pin_count)); atomic_inc(&tl->pin_count); } int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww) { int err; if (atomic_add_unless(&tl->pin_count, 1, 0)) return 0; if (!tl->hwsp_map) { err = intel_timeline_pin_map(tl); if (err) return err; } err = i915_ggtt_pin(tl->hwsp_ggtt, ww, 0, PIN_HIGH); if (err) return err; tl->hwsp_offset = i915_ggtt_offset(tl->hwsp_ggtt) + offset_in_page(tl->hwsp_offset); GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n", tl->fence_context, tl->hwsp_offset); i915_active_acquire(&tl->active); if (atomic_fetch_inc(&tl->pin_count)) { i915_active_release(&tl->active); __i915_vma_unpin(tl->hwsp_ggtt); } return 0; } void intel_timeline_reset_seqno(const struct intel_timeline *tl) { u32 *hwsp_seqno = (u32 *)tl->hwsp_seqno; /* Must be pinned to be writable, and no requests in flight. */ GEM_BUG_ON(!atomic_read(&tl->pin_count)); memset(hwsp_seqno + 1, 0, TIMELINE_SEQNO_BYTES - sizeof(*hwsp_seqno)); WRITE_ONCE(*hwsp_seqno, tl->seqno); drm_clflush_virt_range(hwsp_seqno, TIMELINE_SEQNO_BYTES); } void intel_timeline_enter(struct intel_timeline *tl) { struct intel_gt_timelines *timelines = &tl->gt->timelines; /* * Pretend we are serialised by the timeline->mutex. * * While generally true, there are a few exceptions to the rule * for the engine->kernel_context being used to manage power * transitions. As the engine_park may be called from under any * timeline, it uses the power mutex as a global serialisation * lock to prevent any other request entering its timeline. * * The rule is generally tl->mutex, otherwise engine->wakeref.mutex. * * However, intel_gt_retire_request() does not know which engine * it is retiring along and so cannot partake in the engine-pm * barrier, and there we use the tl->active_count as a means to * pin the timeline in the active_list while the locks are dropped. * Ergo, as that is outside of the engine-pm barrier, we need to * use atomic to manipulate tl->active_count. */ lockdep_assert_held(&tl->mutex); if (atomic_add_unless(&tl->active_count, 1, 0)) return; spin_lock(&timelines->lock); if (!atomic_fetch_inc(&tl->active_count)) { /* * The HWSP is volatile, and may have been lost while inactive, * e.g. across suspend/resume. Be paranoid, and ensure that * the HWSP value matches our seqno so we don't proclaim * the next request as already complete. */ intel_timeline_reset_seqno(tl); list_add_tail(&tl->link, &timelines->active_list); } spin_unlock(&timelines->lock); } void intel_timeline_exit(struct intel_timeline *tl) { struct intel_gt_timelines *timelines = &tl->gt->timelines; /* See intel_timeline_enter() */ lockdep_assert_held(&tl->mutex); GEM_BUG_ON(!atomic_read(&tl->active_count)); if (atomic_add_unless(&tl->active_count, -1, 1)) return; spin_lock(&timelines->lock); if (atomic_dec_and_test(&tl->active_count)) list_del(&tl->link); spin_unlock(&timelines->lock); /* * Since this timeline is idle, all bariers upon which we were waiting * must also be complete and so we can discard the last used barriers * without loss of information. */ i915_syncmap_free(&tl->sync); } static u32 timeline_advance(struct intel_timeline *tl) { GEM_BUG_ON(!atomic_read(&tl->pin_count)); GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb); return tl->seqno += 1 + tl->has_initial_breadcrumb; } static noinline int __intel_timeline_get_seqno(struct intel_timeline *tl, u32 *seqno) { u32 next_ofs = offset_in_page(tl->hwsp_offset + TIMELINE_SEQNO_BYTES); /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ if (TIMELINE_SEQNO_BYTES <= BIT(5) && (next_ofs & BIT(5))) next_ofs = offset_in_page(next_ofs + BIT(5)); tl->hwsp_offset = i915_ggtt_offset(tl->hwsp_ggtt) + next_ofs; tl->hwsp_seqno = tl->hwsp_map + next_ofs; intel_timeline_reset_seqno(tl); *seqno = timeline_advance(tl); GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno)); return 0; } int intel_timeline_get_seqno(struct intel_timeline *tl, struct i915_request *rq, u32 *seqno) { *seqno = timeline_advance(tl); /* Replace the HWSP on wraparound for HW semaphores */ if (unlikely(!*seqno && tl->has_initial_breadcrumb)) return __intel_timeline_get_seqno(tl, seqno); return 0; } int intel_timeline_read_hwsp(struct i915_request *from, struct i915_request *to, u32 *hwsp) { struct intel_timeline *tl; int err; rcu_read_lock(); tl = rcu_dereference(from->timeline); if (i915_request_signaled(from) || !i915_active_acquire_if_busy(&tl->active)) tl = NULL; if (tl) { /* hwsp_offset may wraparound, so use from->hwsp_seqno */ *hwsp = i915_ggtt_offset(tl->hwsp_ggtt) + offset_in_page(from->hwsp_seqno); } /* ensure we wait on the right request, if not, we completed */ if (tl && __i915_request_is_complete(from)) { i915_active_release(&tl->active); tl = NULL; } rcu_read_unlock(); if (!tl) return 1; /* Can't do semaphore waits on kernel context */ if (!tl->has_initial_breadcrumb) { err = -EINVAL; goto out; } err = i915_active_add_request(&tl->active, to); out: i915_active_release(&tl->active); return err; } void intel_timeline_unpin(struct intel_timeline *tl) { GEM_BUG_ON(!atomic_read(&tl->pin_count)); if (!atomic_dec_and_test(&tl->pin_count)) return; i915_active_release(&tl->active); __i915_vma_unpin(tl->hwsp_ggtt); } void __intel_timeline_free(struct kref *kref) { struct intel_timeline *timeline = container_of(kref, typeof(*timeline), kref); GEM_BUG_ON(atomic_read(&timeline->pin_count)); GEM_BUG_ON(!list_empty(&timeline->requests)); GEM_BUG_ON(timeline->retire); call_rcu(&timeline->rcu, intel_timeline_fini); } void intel_gt_fini_timelines(struct intel_gt *gt) { struct intel_gt_timelines *timelines = >->timelines; GEM_BUG_ON(!list_empty(&timelines->active_list)); } void intel_gt_show_timelines(struct intel_gt *gt, struct drm_printer *m, void (*show_request)(struct drm_printer *m, const struct i915_request *rq, const char *prefix, int indent)) { struct intel_gt_timelines *timelines = >->timelines; struct intel_timeline *tl, *tn; LIST_HEAD(free); spin_lock(&timelines->lock); list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { unsigned long count, ready, inflight; struct i915_request *rq, *rn; struct dma_fence *fence; if (!mutex_trylock(&tl->mutex)) { drm_printf(m, "Timeline %llx: busy; skipping\n", tl->fence_context); continue; } intel_timeline_get(tl); GEM_BUG_ON(!atomic_read(&tl->active_count)); atomic_inc(&tl->active_count); /* pin the list element */ spin_unlock(&timelines->lock); count = 0; ready = 0; inflight = 0; list_for_each_entry_safe(rq, rn, &tl->requests, link) { if (i915_request_completed(rq)) continue; count++; if (i915_request_is_ready(rq)) ready++; if (i915_request_is_active(rq)) inflight++; } drm_printf(m, "Timeline %llx: { ", tl->fence_context); drm_printf(m, "count: %lu, ready: %lu, inflight: %lu", count, ready, inflight); drm_printf(m, ", seqno: { current: %d, last: %d }", *tl->hwsp_seqno, tl->seqno); fence = i915_active_fence_get(&tl->last_request); if (fence) { drm_printf(m, ", engine: %s", to_request(fence)->engine->name); dma_fence_put(fence); } drm_printf(m, " }\n"); if (show_request) { list_for_each_entry_safe(rq, rn, &tl->requests, link) show_request(m, rq, "", 2); } mutex_unlock(&tl->mutex); spin_lock(&timelines->lock); /* Resume list iteration after reacquiring spinlock */ list_safe_reset_next(tl, tn, link); if (atomic_dec_and_test(&tl->active_count)) list_del(&tl->link); /* Defer the final release to after the spinlock */ if (refcount_dec_and_test(&tl->kref.refcount)) { GEM_BUG_ON(atomic_read(&tl->active_count)); list_add(&tl->link, &free); } } spin_unlock(&timelines->lock); list_for_each_entry_safe(tl, tn, &free, link) __intel_timeline_free(&tl->kref); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "gt/selftests/mock_timeline.c" #include "gt/selftest_timeline.c" #endif