i915_scheduler.c revision 1.4
1/*	$NetBSD: i915_scheduler.c,v 1.4 2021/12/19 11:37:41 riastradh Exp $	*/
2
3/*
4 * SPDX-License-Identifier: MIT
5 *
6 * Copyright �� 2018 Intel Corporation
7 */
8
9#include <sys/cdefs.h>
10__KERNEL_RCSID(0, "$NetBSD: i915_scheduler.c,v 1.4 2021/12/19 11:37:41 riastradh Exp $");
11
12#include <linux/mutex.h>
13
14#include "i915_drv.h"
15#include "i915_globals.h"
16#include "i915_request.h"
17#include "i915_scheduler.h"
18
19#include <linux/nbsd-namespace.h>
20
21static struct i915_global_scheduler {
22	struct i915_global base;
23	struct kmem_cache *slab_dependencies;
24	struct kmem_cache *slab_priorities;
25} global;
26
27#ifdef __NetBSD__
28static spinlock_t schedule_lock;
29spinlock_t *const i915_schedule_lock = &schedule_lock;
30#else
31static DEFINE_SPINLOCK(schedule_lock);
32#endif
33
34static const struct i915_request *
35node_to_request(const struct i915_sched_node *node)
36{
37	return const_container_of(node, const struct i915_request, sched);
38}
39
40static inline bool node_started(const struct i915_sched_node *node)
41{
42	return i915_request_started(node_to_request(node));
43}
44
45static inline bool node_signaled(const struct i915_sched_node *node)
46{
47	return i915_request_completed(node_to_request(node));
48}
49
50static inline struct i915_priolist *to_priolist(struct rb_node *rb)
51{
52	return rb_entry(rb, struct i915_priolist, node);
53}
54
55static void assert_priolists(struct intel_engine_execlists * const execlists)
56{
57	struct rb_node *rb;
58	long last_prio, i;
59
60	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
61		return;
62
63	GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
64		   rb_first(&execlists->queue.rb_root));
65
66	last_prio = (INT_MAX >> I915_USER_PRIORITY_SHIFT) + 1;
67	for (rb = rb_first_cached(&execlists->queue);
68	     rb;
69	     rb = rb_next2(&execlists->queue.rb_root, rb)) {
70		const struct i915_priolist *p = to_priolist(rb);
71
72		GEM_BUG_ON(p->priority >= last_prio);
73		last_prio = p->priority;
74
75		GEM_BUG_ON(!p->used);
76		for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
77			if (list_empty(&p->requests[i]))
78				continue;
79
80			GEM_BUG_ON(!(p->used & BIT(i)));
81		}
82	}
83}
84
85struct list_head *
86i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
87{
88	struct intel_engine_execlists * const execlists = &engine->execlists;
89	struct i915_priolist *p;
90	struct rb_node **parent, *rb;
91	bool first = true;
92	int idx, i;
93
94	lockdep_assert_held(&engine->active.lock);
95	assert_priolists(execlists);
96
97	/* buckets sorted from highest [in slot 0] to lowest priority */
98	idx = I915_PRIORITY_COUNT - (prio & I915_PRIORITY_MASK) - 1;
99	prio >>= I915_USER_PRIORITY_SHIFT;
100	if (unlikely(execlists->no_priolist))
101		prio = I915_PRIORITY_NORMAL;
102
103find_priolist:
104#ifdef __NetBSD__
105	/* XXX  */
106	__USE(first);
107	__USE(parent);
108	__USE(rb);
109	p = rb_tree_find_node(&execlists->queue.rb_root.rbr_tree, &prio);
110	if (p)
111		goto out;
112#else
113	/* most positive priority is scheduled first, equal priorities fifo */
114	rb = NULL;
115	parent = &execlists->queue.rb_root.rb_node;
116	while (*parent) {
117		rb = *parent;
118		p = to_priolist(rb);
119		if (prio > p->priority) {
120			parent = &rb->rb_left;
121		} else if (prio < p->priority) {
122			parent = &rb->rb_right;
123			first = false;
124		} else {
125			goto out;
126		}
127	}
128#endif
129
130	if (prio == I915_PRIORITY_NORMAL) {
131		p = &execlists->default_priolist;
132	} else {
133		p = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC);
134		/* Convert an allocation failure to a priority bump */
135		if (unlikely(!p)) {
136			prio = I915_PRIORITY_NORMAL; /* recurses just once */
137
138			/* To maintain ordering with all rendering, after an
139			 * allocation failure we have to disable all scheduling.
140			 * Requests will then be executed in fifo, and schedule
141			 * will ensure that dependencies are emitted in fifo.
142			 * There will be still some reordering with existing
143			 * requests, so if userspace lied about their
144			 * dependencies that reordering may be visible.
145			 */
146			execlists->no_priolist = true;
147			goto find_priolist;
148		}
149	}
150
151	p->priority = prio;
152	for (i = 0; i < ARRAY_SIZE(p->requests); i++)
153		INIT_LIST_HEAD(&p->requests[i]);
154#ifdef __NetBSD__
155	struct i915_priolist *collision __diagused;
156	collision = rb_tree_insert_node(&execlists->queue.rb_root.rbr_tree,
157	    p);
158	KASSERT(collision == p);
159#else
160	rb_link_node(&p->node, rb, parent);
161	rb_insert_color_cached(&p->node, &execlists->queue, first);
162#endif
163	p->used = 0;
164
165out:
166	p->used |= BIT(idx);
167	return &p->requests[idx];
168}
169
170void __i915_priolist_free(struct i915_priolist *p)
171{
172	kmem_cache_free(global.slab_priorities, p);
173}
174
175struct sched_cache {
176	struct list_head *priolist;
177};
178
179static struct intel_engine_cs *
180sched_lock_engine(const struct i915_sched_node *node,
181		  struct intel_engine_cs *locked,
182		  struct sched_cache *cache)
183{
184	const struct i915_request *rq = node_to_request(node);
185	struct intel_engine_cs *engine;
186
187	GEM_BUG_ON(!locked);
188
189	/*
190	 * Virtual engines complicate acquiring the engine timeline lock,
191	 * as their rq->engine pointer is not stable until under that
192	 * engine lock. The simple ploy we use is to take the lock then
193	 * check that the rq still belongs to the newly locked engine.
194	 */
195	while (locked != (engine = READ_ONCE(rq->engine))) {
196		spin_unlock(&locked->active.lock);
197		memset(cache, 0, sizeof(*cache));
198		spin_lock(&engine->active.lock);
199		locked = engine;
200	}
201
202	GEM_BUG_ON(locked != engine);
203	return locked;
204}
205
206static inline int rq_prio(const struct i915_request *rq)
207{
208	return rq->sched.attr.priority | __NO_PREEMPTION;
209}
210
211static inline bool need_preempt(int prio, int active)
212{
213	/*
214	 * Allow preemption of low -> normal -> high, but we do
215	 * not allow low priority tasks to preempt other low priority
216	 * tasks under the impression that latency for low priority
217	 * tasks does not matter (as much as background throughput),
218	 * so kiss.
219	 */
220	return prio >= max(I915_PRIORITY_NORMAL, active);
221}
222
223static void kick_submission(struct intel_engine_cs *engine,
224			    const struct i915_request *rq,
225			    int prio)
226{
227	const struct i915_request *inflight;
228
229	/*
230	 * We only need to kick the tasklet once for the high priority
231	 * new context we add into the queue.
232	 */
233	if (prio <= engine->execlists.queue_priority_hint)
234		return;
235
236	rcu_read_lock();
237
238	/* Nothing currently active? We're overdue for a submission! */
239	inflight = execlists_active(&engine->execlists);
240	if (!inflight)
241		goto unlock;
242
243	/*
244	 * If we are already the currently executing context, don't
245	 * bother evaluating if we should preempt ourselves.
246	 */
247	if (inflight->context == rq->context)
248		goto unlock;
249
250	engine->execlists.queue_priority_hint = prio;
251	if (need_preempt(prio, rq_prio(inflight)))
252		tasklet_hi_schedule(&engine->execlists.tasklet);
253
254unlock:
255	rcu_read_unlock();
256}
257
258static void __i915_schedule(struct i915_sched_node *node,
259			    const struct i915_sched_attr *attr)
260{
261	struct intel_engine_cs *engine;
262	struct i915_dependency *dep, *p;
263	struct i915_dependency stack;
264	const int prio = attr->priority;
265	struct sched_cache cache;
266	LIST_HEAD(dfs);
267
268	/* Needed in order to use the temporary link inside i915_dependency */
269	lockdep_assert_held(&schedule_lock);
270	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
271
272	if (prio <= READ_ONCE(node->attr.priority))
273		return;
274
275	if (node_signaled(node))
276		return;
277
278	stack.signaler = node;
279	list_add(&stack.dfs_link, &dfs);
280
281	/*
282	 * Recursively bump all dependent priorities to match the new request.
283	 *
284	 * A naive approach would be to use recursion:
285	 * static void update_priorities(struct i915_sched_node *node, prio) {
286	 *	list_for_each_entry(dep, &node->signalers_list, signal_link)
287	 *		update_priorities(dep->signal, prio)
288	 *	queue_request(node);
289	 * }
290	 * but that may have unlimited recursion depth and so runs a very
291	 * real risk of overunning the kernel stack. Instead, we build
292	 * a flat list of all dependencies starting with the current request.
293	 * As we walk the list of dependencies, we add all of its dependencies
294	 * to the end of the list (this may include an already visited
295	 * request) and continue to walk onwards onto the new dependencies. The
296	 * end result is a topological list of requests in reverse order, the
297	 * last element in the list is the request we must execute first.
298	 */
299	list_for_each_entry(dep, &dfs, dfs_link) {
300		struct i915_sched_node *node = dep->signaler;
301
302		/* If we are already flying, we know we have no signalers */
303		if (node_started(node))
304			continue;
305
306		/*
307		 * Within an engine, there can be no cycle, but we may
308		 * refer to the same dependency chain multiple times
309		 * (redundant dependencies are not eliminated) and across
310		 * engines.
311		 */
312		list_for_each_entry(p, &node->signalers_list, signal_link) {
313			GEM_BUG_ON(p == dep); /* no cycles! */
314
315			if (node_signaled(p->signaler))
316				continue;
317
318			if (prio > READ_ONCE(p->signaler->attr.priority))
319				list_move_tail(&p->dfs_link, &dfs);
320		}
321	}
322
323	/*
324	 * If we didn't need to bump any existing priorities, and we haven't
325	 * yet submitted this request (i.e. there is no potential race with
326	 * execlists_submit_request()), we can set our own priority and skip
327	 * acquiring the engine locks.
328	 */
329	if (node->attr.priority == I915_PRIORITY_INVALID) {
330		GEM_BUG_ON(!list_empty(&node->link));
331		node->attr = *attr;
332
333		if (stack.dfs_link.next == stack.dfs_link.prev)
334			return;
335
336		__list_del_entry(&stack.dfs_link);
337	}
338
339	memset(&cache, 0, sizeof(cache));
340	engine = node_to_request(node)->engine;
341	spin_lock(&engine->active.lock);
342
343	/* Fifo and depth-first replacement ensure our deps execute before us */
344	engine = sched_lock_engine(node, engine, &cache);
345	list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
346		INIT_LIST_HEAD(&dep->dfs_link);
347
348		node = dep->signaler;
349		engine = sched_lock_engine(node, engine, &cache);
350		lockdep_assert_held(&engine->active.lock);
351
352		/* Recheck after acquiring the engine->timeline.lock */
353		if (prio <= node->attr.priority || node_signaled(node))
354			continue;
355
356		GEM_BUG_ON(node_to_request(node)->engine != engine);
357
358		node->attr.priority = prio;
359
360		/*
361		 * Once the request is ready, it will be placed into the
362		 * priority lists and then onto the HW runlist. Before the
363		 * request is ready, it does not contribute to our preemption
364		 * decisions and we can safely ignore it, as it will, and
365		 * any preemption required, be dealt with upon submission.
366		 * See engine->submit_request()
367		 */
368		if (list_empty(&node->link))
369			continue;
370
371		if (i915_request_in_priority_queue(node_to_request(node))) {
372			if (!cache.priolist)
373				cache.priolist =
374					i915_sched_lookup_priolist(engine,
375								   prio);
376			list_move_tail(&node->link, cache.priolist);
377		}
378
379		/* Defer (tasklet) submission until after all of our updates. */
380		kick_submission(engine, node_to_request(node), prio);
381	}
382
383	spin_unlock(&engine->active.lock);
384}
385
386void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
387{
388	spin_lock_irq(&schedule_lock);
389	__i915_schedule(&rq->sched, attr);
390	spin_unlock_irq(&schedule_lock);
391}
392
393static void __bump_priority(struct i915_sched_node *node, unsigned int bump)
394{
395	struct i915_sched_attr attr = node->attr;
396
397	attr.priority |= bump;
398	__i915_schedule(node, &attr);
399}
400
401void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
402{
403	unsigned long flags;
404
405	GEM_BUG_ON(bump & ~I915_PRIORITY_MASK);
406	if (READ_ONCE(rq->sched.attr.priority) & bump)
407		return;
408
409	spin_lock_irqsave(&schedule_lock, flags);
410	__bump_priority(&rq->sched, bump);
411	spin_unlock_irqrestore(&schedule_lock, flags);
412}
413
414void i915_sched_node_init(struct i915_sched_node *node)
415{
416	INIT_LIST_HEAD(&node->signalers_list);
417	INIT_LIST_HEAD(&node->waiters_list);
418	INIT_LIST_HEAD(&node->link);
419
420	i915_sched_node_reinit(node);
421}
422
423void i915_sched_node_reinit(struct i915_sched_node *node)
424{
425	node->attr.priority = I915_PRIORITY_INVALID;
426	node->semaphores = 0;
427	node->flags = 0;
428
429	GEM_BUG_ON(!list_empty(&node->signalers_list));
430	GEM_BUG_ON(!list_empty(&node->waiters_list));
431	GEM_BUG_ON(!list_empty(&node->link));
432}
433
434static struct i915_dependency *
435i915_dependency_alloc(void)
436{
437	return kmem_cache_alloc(global.slab_dependencies, GFP_KERNEL);
438}
439
440static void
441i915_dependency_free(struct i915_dependency *dep)
442{
443	kmem_cache_free(global.slab_dependencies, dep);
444}
445
446bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
447				      struct i915_sched_node *signal,
448				      struct i915_dependency *dep,
449				      unsigned long flags)
450{
451	bool ret = false;
452
453	spin_lock_irq(&schedule_lock);
454
455	if (!node_signaled(signal)) {
456		INIT_LIST_HEAD(&dep->dfs_link);
457		dep->signaler = signal;
458		dep->waiter = node;
459		dep->flags = flags;
460
461		/* Keep track of whether anyone on this chain has a semaphore */
462		if (signal->flags & I915_SCHED_HAS_SEMAPHORE_CHAIN &&
463		    !node_started(signal))
464			node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
465
466		/* All set, now publish. Beware the lockless walkers. */
467		list_add(&dep->signal_link, &node->signalers_list);
468		list_add_rcu(&dep->wait_link, &signal->waiters_list);
469
470		/*
471		 * As we do not allow WAIT to preempt inflight requests,
472		 * once we have executed a request, along with triggering
473		 * any execution callbacks, we must preserve its ordering
474		 * within the non-preemptible FIFO.
475		 */
476		BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK);
477		if (flags & I915_DEPENDENCY_EXTERNAL)
478			__bump_priority(signal, __NO_PREEMPTION);
479
480		ret = true;
481	}
482
483	spin_unlock_irq(&schedule_lock);
484
485	return ret;
486}
487
488int i915_sched_node_add_dependency(struct i915_sched_node *node,
489				   struct i915_sched_node *signal)
490{
491	struct i915_dependency *dep;
492
493	dep = i915_dependency_alloc();
494	if (!dep)
495		return -ENOMEM;
496
497	if (!__i915_sched_node_add_dependency(node, signal, dep,
498					      I915_DEPENDENCY_EXTERNAL |
499					      I915_DEPENDENCY_ALLOC))
500		i915_dependency_free(dep);
501
502	return 0;
503}
504
505void i915_sched_node_fini(struct i915_sched_node *node)
506{
507	struct i915_dependency *dep, *tmp;
508
509	spin_lock_irq(&schedule_lock);
510
511	/*
512	 * Everyone we depended upon (the fences we wait to be signaled)
513	 * should retire before us and remove themselves from our list.
514	 * However, retirement is run independently on each timeline and
515	 * so we may be called out-of-order.
516	 */
517	list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
518		GEM_BUG_ON(!list_empty(&dep->dfs_link));
519
520		list_del(&dep->wait_link);
521		if (dep->flags & I915_DEPENDENCY_ALLOC)
522			i915_dependency_free(dep);
523	}
524	INIT_LIST_HEAD(&node->signalers_list);
525
526	/* Remove ourselves from everyone who depends upon us */
527	list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
528		GEM_BUG_ON(dep->signaler != node);
529		GEM_BUG_ON(!list_empty(&dep->dfs_link));
530
531		list_del(&dep->signal_link);
532		if (dep->flags & I915_DEPENDENCY_ALLOC)
533			i915_dependency_free(dep);
534	}
535	INIT_LIST_HEAD(&node->waiters_list);
536
537	spin_unlock_irq(&schedule_lock);
538}
539
540static void i915_global_scheduler_shrink(void)
541{
542	kmem_cache_shrink(global.slab_dependencies);
543	kmem_cache_shrink(global.slab_priorities);
544}
545
546static void i915_global_scheduler_exit(void)
547{
548	kmem_cache_destroy(global.slab_dependencies);
549	kmem_cache_destroy(global.slab_priorities);
550}
551
552static struct i915_global_scheduler global = { {
553	.shrink = i915_global_scheduler_shrink,
554	.exit = i915_global_scheduler_exit,
555} };
556
557int __init i915_global_scheduler_init(void)
558{
559	global.slab_dependencies = KMEM_CACHE(i915_dependency,
560					      SLAB_HWCACHE_ALIGN);
561	if (!global.slab_dependencies)
562		return -ENOMEM;
563
564	global.slab_priorities = KMEM_CACHE(i915_priolist,
565					    SLAB_HWCACHE_ALIGN);
566	if (!global.slab_priorities)
567		goto err_priorities;
568
569	i915_global_register(&global.base);
570	return 0;
571
572err_priorities:
573	kmem_cache_destroy(global.slab_priorities);
574	return -ENOMEM;
575}
576