intel_breadcrumbs.c revision 1.1
1/* $NetBSD: intel_breadcrumbs.c,v 1.1 2021/12/18 20:15:32 riastradh Exp $ */ 2 3/* 4 * Copyright �� 2015 Intel Corporation 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 23 * IN THE SOFTWARE. 24 * 25 */ 26 27#include <sys/cdefs.h> 28__KERNEL_RCSID(0, "$NetBSD: intel_breadcrumbs.c,v 1.1 2021/12/18 20:15:32 riastradh Exp $"); 29 30#include <linux/kthread.h> 31#include <trace/events/dma_fence.h> 32#include <uapi/linux/sched/types.h> 33 34#include "i915_drv.h" 35#include "i915_trace.h" 36#include "intel_gt_pm.h" 37#include "intel_gt_requests.h" 38 39static void irq_enable(struct intel_engine_cs *engine) 40{ 41 if (!engine->irq_enable) 42 return; 43 44 /* Caller disables interrupts */ 45 spin_lock(&engine->gt->irq_lock); 46 engine->irq_enable(engine); 47 spin_unlock(&engine->gt->irq_lock); 48} 49 50static void irq_disable(struct intel_engine_cs *engine) 51{ 52 if (!engine->irq_disable) 53 return; 54 55 /* Caller disables interrupts */ 56 spin_lock(&engine->gt->irq_lock); 57 engine->irq_disable(engine); 58 spin_unlock(&engine->gt->irq_lock); 59} 60 61static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) 62{ 63 struct intel_engine_cs *engine = 64 container_of(b, struct intel_engine_cs, breadcrumbs); 65 66 lockdep_assert_held(&b->irq_lock); 67 68 GEM_BUG_ON(!b->irq_enabled); 69 if (!--b->irq_enabled) 70 irq_disable(engine); 71 72 b->irq_armed = false; 73 intel_gt_pm_put_async(engine->gt); 74} 75 76void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) 77{ 78 struct intel_breadcrumbs *b = &engine->breadcrumbs; 79 unsigned long flags; 80 81 if (!b->irq_armed) 82 return; 83 84 spin_lock_irqsave(&b->irq_lock, flags); 85 if (b->irq_armed) 86 __intel_breadcrumbs_disarm_irq(b); 87 spin_unlock_irqrestore(&b->irq_lock, flags); 88} 89 90static inline bool __request_completed(const struct i915_request *rq) 91{ 92 return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno); 93} 94 95__maybe_unused static bool 96check_signal_order(struct intel_context *ce, struct i915_request *rq) 97{ 98 if (!list_is_last(&rq->signal_link, &ce->signals) && 99 i915_seqno_passed(rq->fence.seqno, 100 list_next_entry(rq, signal_link)->fence.seqno)) 101 return false; 102 103 if (!list_is_first(&rq->signal_link, &ce->signals) && 104 i915_seqno_passed(list_prev_entry(rq, signal_link)->fence.seqno, 105 rq->fence.seqno)) 106 return false; 107 108 return true; 109} 110 111static bool 112__dma_fence_signal(struct dma_fence *fence) 113{ 114 return !test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags); 115} 116 117static void 118__dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp) 119{ 120 fence->timestamp = timestamp; 121 set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags); 122 trace_dma_fence_signaled(fence); 123} 124 125static void 126__dma_fence_signal__notify(struct dma_fence *fence, 127 const struct list_head *list) 128{ 129 struct dma_fence_cb *cur, *tmp; 130 131 lockdep_assert_held(fence->lock); 132 133 list_for_each_entry_safe(cur, tmp, list, node) { 134 INIT_LIST_HEAD(&cur->node); 135 cur->func(fence, cur); 136 } 137} 138 139static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl) 140{ 141 struct intel_engine_cs *engine = 142 container_of(b, struct intel_engine_cs, breadcrumbs); 143 144 if (unlikely(intel_engine_is_virtual(engine))) 145 engine = intel_virtual_engine_get_sibling(engine, 0); 146 147 intel_engine_add_retire(engine, tl); 148} 149 150static void signal_irq_work(struct irq_work *work) 151{ 152 struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work); 153 const ktime_t timestamp = ktime_get(); 154 struct intel_context *ce, *cn; 155 struct list_head *pos, *next; 156 LIST_HEAD(signal); 157 158 spin_lock(&b->irq_lock); 159 160 if (b->irq_armed && list_empty(&b->signalers)) 161 __intel_breadcrumbs_disarm_irq(b); 162 163 list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) { 164 GEM_BUG_ON(list_empty(&ce->signals)); 165 166 list_for_each_safe(pos, next, &ce->signals) { 167 struct i915_request *rq = 168 list_entry(pos, typeof(*rq), signal_link); 169 170 GEM_BUG_ON(!check_signal_order(ce, rq)); 171 172 if (!__request_completed(rq)) 173 break; 174 175 GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL, 176 &rq->fence.flags)); 177 clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); 178 179 if (!__dma_fence_signal(&rq->fence)) 180 continue; 181 182 /* 183 * Queue for execution after dropping the signaling 184 * spinlock as the callback chain may end up adding 185 * more signalers to the same context or engine. 186 */ 187 i915_request_get(rq); 188 list_add_tail(&rq->signal_link, &signal); 189 } 190 191 /* 192 * We process the list deletion in bulk, only using a list_add 193 * (not list_move) above but keeping the status of 194 * rq->signal_link known with the I915_FENCE_FLAG_SIGNAL bit. 195 */ 196 if (!list_is_first(pos, &ce->signals)) { 197 /* Advance the list to the first incomplete request */ 198 __list_del_many(&ce->signals, pos); 199 if (&ce->signals == pos) { /* now empty */ 200 list_del_init(&ce->signal_link); 201 add_retire(b, ce->timeline); 202 } 203 } 204 } 205 206 spin_unlock(&b->irq_lock); 207 208 list_for_each_safe(pos, next, &signal) { 209 struct i915_request *rq = 210 list_entry(pos, typeof(*rq), signal_link); 211 struct list_head cb_list; 212 213 spin_lock(&rq->lock); 214 list_replace(&rq->fence.cb_list, &cb_list); 215 __dma_fence_signal__timestamp(&rq->fence, timestamp); 216 __dma_fence_signal__notify(&rq->fence, &cb_list); 217 spin_unlock(&rq->lock); 218 219 i915_request_put(rq); 220 } 221} 222 223static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) 224{ 225 struct intel_engine_cs *engine = 226 container_of(b, struct intel_engine_cs, breadcrumbs); 227 228 lockdep_assert_held(&b->irq_lock); 229 if (b->irq_armed) 230 return true; 231 232 if (!intel_gt_pm_get_if_awake(engine->gt)) 233 return false; 234 235 /* 236 * The breadcrumb irq will be disarmed on the interrupt after the 237 * waiters are signaled. This gives us a single interrupt window in 238 * which we can add a new waiter and avoid the cost of re-enabling 239 * the irq. 240 */ 241 b->irq_armed = true; 242 243 /* 244 * Since we are waiting on a request, the GPU should be busy 245 * and should have its own rpm reference. This is tracked 246 * by i915->gt.awake, we can forgo holding our own wakref 247 * for the interrupt as before i915->gt.awake is released (when 248 * the driver is idle) we disarm the breadcrumbs. 249 */ 250 251 if (!b->irq_enabled++) 252 irq_enable(engine); 253 254 return true; 255} 256 257void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) 258{ 259 struct intel_breadcrumbs *b = &engine->breadcrumbs; 260 261 spin_lock_init(&b->irq_lock); 262 INIT_LIST_HEAD(&b->signalers); 263 264 init_irq_work(&b->irq_work, signal_irq_work); 265} 266 267void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) 268{ 269 struct intel_breadcrumbs *b = &engine->breadcrumbs; 270 unsigned long flags; 271 272 spin_lock_irqsave(&b->irq_lock, flags); 273 274 if (b->irq_enabled) 275 irq_enable(engine); 276 else 277 irq_disable(engine); 278 279 spin_unlock_irqrestore(&b->irq_lock, flags); 280} 281 282void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) 283{ 284} 285 286bool i915_request_enable_breadcrumb(struct i915_request *rq) 287{ 288 lockdep_assert_held(&rq->lock); 289 290 if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) { 291 struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; 292 struct intel_context *ce = rq->context; 293 struct list_head *pos; 294 295 spin_lock(&b->irq_lock); 296 GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)); 297 298 if (!__intel_breadcrumbs_arm_irq(b)) 299 goto unlock; 300 301 /* 302 * We keep the seqno in retirement order, so we can break 303 * inside intel_engine_signal_breadcrumbs as soon as we've 304 * passed the last completed request (or seen a request that 305 * hasn't event started). We could walk the timeline->requests, 306 * but keeping a separate signalers_list has the advantage of 307 * hopefully being much smaller than the full list and so 308 * provides faster iteration and detection when there are no 309 * more interrupts required for this context. 310 * 311 * We typically expect to add new signalers in order, so we 312 * start looking for our insertion point from the tail of 313 * the list. 314 */ 315 list_for_each_prev(pos, &ce->signals) { 316 struct i915_request *it = 317 list_entry(pos, typeof(*it), signal_link); 318 319 if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno)) 320 break; 321 } 322 list_add(&rq->signal_link, pos); 323 if (pos == &ce->signals) /* catch transitions from empty list */ 324 list_move_tail(&ce->signal_link, &b->signalers); 325 GEM_BUG_ON(!check_signal_order(ce, rq)); 326 327 set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); 328unlock: 329 spin_unlock(&b->irq_lock); 330 } 331 332 return !__request_completed(rq); 333} 334 335void i915_request_cancel_breadcrumb(struct i915_request *rq) 336{ 337 struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; 338 339 lockdep_assert_held(&rq->lock); 340 341 /* 342 * We must wait for b->irq_lock so that we know the interrupt handler 343 * has released its reference to the intel_context and has completed 344 * the DMA_FENCE_FLAG_SIGNALED_BIT/I915_FENCE_FLAG_SIGNAL dance (if 345 * required). 346 */ 347 spin_lock(&b->irq_lock); 348 if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) { 349 struct intel_context *ce = rq->context; 350 351 list_del(&rq->signal_link); 352 if (list_empty(&ce->signals)) 353 list_del_init(&ce->signal_link); 354 355 clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); 356 } 357 spin_unlock(&b->irq_lock); 358} 359 360void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, 361 struct drm_printer *p) 362{ 363 struct intel_breadcrumbs *b = &engine->breadcrumbs; 364 struct intel_context *ce; 365 struct i915_request *rq; 366 367 if (list_empty(&b->signalers)) 368 return; 369 370 drm_printf(p, "Signals:\n"); 371 372 spin_lock_irq(&b->irq_lock); 373 list_for_each_entry(ce, &b->signalers, signal_link) { 374 list_for_each_entry(rq, &ce->signals, signal_link) { 375 drm_printf(p, "\t[%llx:%llx%s] @ %dms\n", 376 rq->fence.context, rq->fence.seqno, 377 i915_request_completed(rq) ? "!" : 378 i915_request_started(rq) ? "*" : 379 "", 380 jiffies_to_msecs(jiffies - rq->emitted_jiffies)); 381 } 382 } 383 spin_unlock_irq(&b->irq_lock); 384} 385