1/* $NetBSD: intel_engine_pm.c,v 1.5 2021/12/19 12:37:28 riastradh Exp $ */ 2 3/* 4 * SPDX-License-Identifier: MIT 5 * 6 * Copyright �� 2019 Intel Corporation 7 */ 8 9#include <sys/cdefs.h> 10__KERNEL_RCSID(0, "$NetBSD: intel_engine_pm.c,v 1.5 2021/12/19 12:37:28 riastradh Exp $"); 11 12#include "i915_drv.h" 13 14#include "intel_context.h" 15#include "intel_engine.h" 16#include "intel_engine_heartbeat.h" 17#include "intel_engine_pm.h" 18#include "intel_engine_pool.h" 19#include "intel_gt.h" 20#include "intel_gt_pm.h" 21#include "intel_rc6.h" 22#include "intel_ring.h" 23 24static int __engine_unpark(struct intel_wakeref *wf) 25{ 26 struct intel_engine_cs *engine = 27 container_of(wf, typeof(*engine), wakeref); 28 struct intel_context *ce; 29 void *map; 30 31 ENGINE_TRACE(engine, "\n"); 32 33 intel_gt_pm_get(engine->gt); 34 35 /* Pin the default state for fast resets from atomic context. */ 36 map = NULL; 37 if (engine->default_state) 38 map = i915_gem_object_pin_map(engine->default_state, 39 I915_MAP_WB); 40 if (!IS_ERR_OR_NULL(map)) 41 engine->pinned_default_state = map; 42 43 /* Discard stale context state from across idling */ 44 ce = engine->kernel_context; 45 if (ce) { 46 GEM_BUG_ON(test_bit(CONTEXT_VALID_BIT, &ce->flags)); 47 48 /* First poison the image to verify we never fully trust it */ 49 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && ce->state) { 50 struct drm_i915_gem_object *obj = ce->state->obj; 51 int type = i915_coherent_map_type(engine->i915); 52 53 map = i915_gem_object_pin_map(obj, type); 54 if (!IS_ERR(map)) { 55 memset(map, CONTEXT_REDZONE, obj->base.size); 56 i915_gem_object_flush_map(obj); 57 i915_gem_object_unpin_map(obj); 58 } 59 } 60 61 ce->ops->reset(ce); 62 } 63 64 if (engine->unpark) 65 engine->unpark(engine); 66 67 intel_engine_unpark_heartbeat(engine); 68 return 0; 69} 70 71#if IS_ENABLED(CONFIG_LOCKDEP) 72 73static inline unsigned long __timeline_mark_lock(struct intel_context *ce) 74{ 75 unsigned long flags; 76 77 local_irq_save(flags); 78 mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_); 79 80 return flags; 81} 82 83static inline void __timeline_mark_unlock(struct intel_context *ce, 84 unsigned long flags) 85{ 86 mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_); 87 local_irq_restore(flags); 88} 89 90#else 91 92static inline unsigned long __timeline_mark_lock(struct intel_context *ce) 93{ 94 return 0; 95} 96 97static inline void __timeline_mark_unlock(struct intel_context *ce, 98 unsigned long flags) 99{ 100} 101 102#endif /* !IS_ENABLED(CONFIG_LOCKDEP) */ 103 104static void duration(struct dma_fence *fence, struct dma_fence_cb *cb) 105{ 106 struct i915_request *rq = to_request(fence); 107 108 ewma__engine_latency_add(&rq->engine->latency, 109 ktime_us_delta(rq->fence.timestamp, 110 rq->duration.emitted)); 111} 112 113static void 114__queue_and_release_pm(struct i915_request *rq, 115 struct intel_timeline *tl, 116 struct intel_engine_cs *engine) 117{ 118 struct intel_gt_timelines *timelines = &engine->gt->timelines; 119 120 ENGINE_TRACE(engine, "\n"); 121 122 /* 123 * We have to serialise all potential retirement paths with our 124 * submission, as we don't want to underflow either the 125 * engine->wakeref.counter or our timeline->active_count. 126 * 127 * Equally, we cannot allow a new submission to start until 128 * after we finish queueing, nor could we allow that submitter 129 * to retire us before we are ready! 130 */ 131 spin_lock(&timelines->lock); 132 133 /* Let intel_gt_retire_requests() retire us (acquired under lock) */ 134 if (!atomic_fetch_inc(&tl->active_count)) 135 list_add_tail(&tl->link, &timelines->active_list); 136 137 /* Hand the request over to HW and so engine_retire() */ 138 __i915_request_queue(rq, NULL); 139 140 /* Let new submissions commence (and maybe retire this timeline) */ 141 __intel_wakeref_defer_park(&engine->wakeref); 142 143 spin_unlock(&timelines->lock); 144} 145 146static bool switch_to_kernel_context(struct intel_engine_cs *engine) 147{ 148 struct intel_context *ce = engine->kernel_context; 149 struct i915_request *rq; 150 unsigned long flags; 151 bool result = true; 152 153 /* GPU is pointing to the void, as good as in the kernel context. */ 154 if (intel_gt_is_wedged(engine->gt)) 155 return true; 156 157 GEM_BUG_ON(!intel_context_is_barrier(ce)); 158 159 /* Already inside the kernel context, safe to power down. */ 160 if (engine->wakeref_serial == engine->serial) 161 return true; 162 163 /* 164 * Note, we do this without taking the timeline->mutex. We cannot 165 * as we may be called while retiring the kernel context and so 166 * already underneath the timeline->mutex. Instead we rely on the 167 * exclusive property of the __engine_park that prevents anyone 168 * else from creating a request on this engine. This also requires 169 * that the ring is empty and we avoid any waits while constructing 170 * the context, as they assume protection by the timeline->mutex. 171 * This should hold true as we can only park the engine after 172 * retiring the last request, thus all rings should be empty and 173 * all timelines idle. 174 * 175 * For unlocking, there are 2 other parties and the GPU who have a 176 * stake here. 177 * 178 * A new gpu user will be waiting on the engine-pm to start their 179 * engine_unpark. New waiters are predicated on engine->wakeref.count 180 * and so intel_wakeref_defer_park() acts like a mutex_unlock of the 181 * engine->wakeref. 182 * 183 * The other party is intel_gt_retire_requests(), which is walking the 184 * list of active timelines looking for completions. Meanwhile as soon 185 * as we call __i915_request_queue(), the GPU may complete our request. 186 * Ergo, if we put ourselves on the timelines.active_list 187 * (se intel_timeline_enter()) before we increment the 188 * engine->wakeref.count, we may see the request completion and retire 189 * it causing an undeflow of the engine->wakeref. 190 */ 191 flags = __timeline_mark_lock(ce); 192 GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0); 193 194 rq = __i915_request_create(ce, GFP_NOWAIT); 195 if (IS_ERR(rq)) 196 /* Context switch failed, hope for the best! Maybe reset? */ 197 goto out_unlock; 198 199 /* Check again on the next retirement. */ 200 engine->wakeref_serial = engine->serial + 1; 201 i915_request_add_active_barriers(rq); 202 203 /* Install ourselves as a preemption barrier */ 204 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 205 if (likely(!__i915_request_commit(rq))) { /* engine should be idle! */ 206 /* 207 * Use an interrupt for precise measurement of duration, 208 * otherwise we rely on someone else retiring all the requests 209 * which may delay the signaling (i.e. we will likely wait 210 * until the background request retirement running every 211 * second or two). 212 */ 213 dma_fence_add_callback(&rq->fence, &rq->duration.cb, duration); 214 rq->duration.emitted = ktime_get(); 215 } 216 217 /* Expose ourselves to the world */ 218 __queue_and_release_pm(rq, ce->timeline, engine); 219 220 result = false; 221out_unlock: 222 __timeline_mark_unlock(ce, flags); 223 return result; 224} 225 226static void call_idle_barriers(struct intel_engine_cs *engine) 227{ 228 struct llist_node *node, *next; 229 230 llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) { 231 struct i915_active_fence *fence = 232 container_of(node, struct i915_active_fence, llist); 233 234 fence->cb.func(ERR_PTR(-EAGAIN), &fence->cb); 235 } 236} 237 238static int __engine_park(struct intel_wakeref *wf) 239{ 240 struct intel_engine_cs *engine = 241 container_of(wf, typeof(*engine), wakeref); 242 243 engine->saturated = 0; 244 245 /* 246 * If one and only one request is completed between pm events, 247 * we know that we are inside the kernel context and it is 248 * safe to power down. (We are paranoid in case that runtime 249 * suspend causes corruption to the active context image, and 250 * want to avoid that impacting userspace.) 251 */ 252 if (!switch_to_kernel_context(engine)) 253 return -EBUSY; 254 255 ENGINE_TRACE(engine, "\n"); 256 257 call_idle_barriers(engine); /* cleanup after wedging */ 258 259 intel_engine_park_heartbeat(engine); 260 intel_engine_disarm_breadcrumbs(engine); 261 intel_engine_pool_park(&engine->pool); 262 263 /* Must be reset upon idling, or we may miss the busy wakeup. */ 264 GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); 265 266 if (engine->park) 267 engine->park(engine); 268 269 if (engine->pinned_default_state) { 270 i915_gem_object_unpin_map(engine->default_state); 271 engine->pinned_default_state = NULL; 272 } 273 274 engine->execlists.no_priolist = false; 275 276 /* While gt calls i915_vma_parked(), we have to break the lock cycle */ 277 intel_gt_pm_put_async(engine->gt); 278 return 0; 279} 280 281static const struct intel_wakeref_ops wf_ops = { 282 .get = __engine_unpark, 283 .put = __engine_park, 284}; 285 286void intel_engine_init__pm(struct intel_engine_cs *engine) 287{ 288 struct intel_runtime_pm *rpm = engine->uncore->rpm; 289 290 intel_wakeref_init(&engine->wakeref, rpm, &wf_ops); 291 intel_engine_init_heartbeat(engine); 292} 293 294void 295intel_engine_fini__pm(struct intel_engine_cs *engine) 296{ 297 298 intel_wakeref_fini(&engine->wakeref); 299} 300 301#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 302#include "selftest_engine_pm.c" 303#endif 304