1// SPDX-License-Identifier: MIT
2/*
3 * Copyright �� 2021 Intel Corporation
4 */
5
6#include <linux/interval_tree_generic.h>
7#include <linux/sched/mm.h>
8
9#include "i915_sw_fence.h"
10#include "i915_vma_resource.h"
11#include "i915_drv.h"
12#include "intel_memory_region.h"
13
14#include "gt/intel_gtt.h"
15
16static struct pool slab_vma_resources;
17
18/**
19 * DOC:
20 * We use a per-vm interval tree to keep track of vma_resources
21 * scheduled for unbind but not yet unbound. The tree is protected by
22 * the vm mutex, and nodes are removed just after the unbind fence signals.
23 * The removal takes the vm mutex from a kernel thread which we need to
24 * keep in mind so that we don't grab the mutex and try to wait for all
25 * pending unbinds to complete, because that will temporaryily block many
26 * of the workqueue threads, and people will get angry.
27 *
28 * We should consider using a single ordered fence per VM instead but that
29 * requires ordering the unbinds and might introduce unnecessary waiting
30 * for unrelated unbinds. Amount of code will probably be roughly the same
31 * due to the simplicity of using the interval tree interface.
32 *
33 * Another drawback of this interval tree is that the complexity of insertion
34 * and removal of fences increases as O(ln(pending_unbinds)) instead of
35 * O(1) for a single fence without interval tree.
36 */
37#define VMA_RES_START(_node) ((_node)->start - (_node)->guard)
38#define VMA_RES_LAST(_node) ((_node)->start + (_node)->node_size + (_node)->guard - 1)
39#ifdef __linux__
40INTERVAL_TREE_DEFINE(struct i915_vma_resource, rb,
41		     u64, __subtree_last,
42		     VMA_RES_START, VMA_RES_LAST, static, vma_res_itree);
43#else
44static struct i915_vma_resource *
45vma_res_itree_iter_first(struct rb_root_cached *root, uint64_t start,
46    uint64_t last)
47{
48	struct i915_vma_resource *node;
49	struct rb_node *rb;
50
51	for (rb = rb_first_cached(root); rb; rb = rb_next(rb)) {
52		node = rb_entry(rb, typeof(*node), rb);
53		if (VMA_RES_LAST(node) >= start && VMA_RES_START(node) <= last)
54			return node;
55	}
56	return NULL;
57}
58
59static struct i915_vma_resource *
60vma_res_itree_iter_next(struct i915_vma_resource *node, uint64_t start,
61    uint64_t last)
62{
63	struct rb_node *rb = &node->rb;
64
65	for (rb = rb_next(rb); rb; rb = rb_next(rb)) {
66		node = rb_entry(rb, typeof(*node), rb);
67		if (VMA_RES_LAST(node) >= start && VMA_RES_START(node) <= last)
68			return node;
69	}
70	return NULL;
71}
72
73static void
74vma_res_itree_remove(struct i915_vma_resource *node,
75    struct rb_root_cached *root)
76{
77	rb_erase_cached(&node->rb, root);
78}
79
80static void
81vma_res_itree_insert(struct i915_vma_resource *node,
82    struct rb_root_cached *root)
83{
84	struct rb_node **iter = &root->rb_root.rb_node;
85	struct rb_node *parent = NULL;
86	struct i915_vma_resource *iter_node;
87
88	while (*iter) {
89		parent = *iter;
90		iter_node = rb_entry(*iter, struct i915_vma_resource, rb);
91
92		if (node->start < iter_node->start)
93			iter = &(*iter)->rb_left;
94		else
95			iter = &(*iter)->rb_right;
96	}
97
98	rb_link_node(&node->rb, parent, iter);
99	rb_insert_color_cached(&node->rb, root, false);
100}
101#endif
102
103/* Callbacks for the unbind dma-fence. */
104
105/**
106 * i915_vma_resource_alloc - Allocate a vma resource
107 *
108 * Return: A pointer to a cleared struct i915_vma_resource or
109 * a -ENOMEM error pointer if allocation fails.
110 */
111struct i915_vma_resource *i915_vma_resource_alloc(void)
112{
113#ifdef __linux__
114	struct i915_vma_resource *vma_res =
115		kmem_cache_zalloc(slab_vma_resources, GFP_KERNEL);
116#else
117	struct i915_vma_resource *vma_res =
118		pool_get(&slab_vma_resources, PR_WAITOK | PR_ZERO);
119#endif
120
121	return vma_res ? vma_res : ERR_PTR(-ENOMEM);
122}
123
124/**
125 * i915_vma_resource_free - Free a vma resource
126 * @vma_res: The vma resource to free.
127 */
128void i915_vma_resource_free(struct i915_vma_resource *vma_res)
129{
130#ifdef __linux__
131	if (vma_res)
132		kmem_cache_free(slab_vma_resources, vma_res);
133#else
134	if (vma_res)
135		pool_put(&slab_vma_resources, vma_res);
136#endif
137}
138
139static const char *get_driver_name(struct dma_fence *fence)
140{
141	return "vma unbind fence";
142}
143
144static const char *get_timeline_name(struct dma_fence *fence)
145{
146	return "unbound";
147}
148
149static void unbind_fence_free_rcu(struct rcu_head *head)
150{
151	struct i915_vma_resource *vma_res =
152		container_of(head, typeof(*vma_res), unbind_fence.rcu);
153
154	i915_vma_resource_free(vma_res);
155}
156
157static void unbind_fence_release(struct dma_fence *fence)
158{
159	struct i915_vma_resource *vma_res =
160		container_of(fence, typeof(*vma_res), unbind_fence);
161
162	i915_sw_fence_fini(&vma_res->chain);
163
164	call_rcu(&fence->rcu, unbind_fence_free_rcu);
165}
166
167static struct dma_fence_ops unbind_fence_ops = {
168	.get_driver_name = get_driver_name,
169	.get_timeline_name = get_timeline_name,
170	.release = unbind_fence_release,
171};
172
173static void __i915_vma_resource_unhold(struct i915_vma_resource *vma_res)
174{
175	struct i915_address_space *vm;
176
177	if (!refcount_dec_and_test(&vma_res->hold_count))
178		return;
179
180	dma_fence_signal(&vma_res->unbind_fence);
181
182	vm = vma_res->vm;
183	if (vma_res->wakeref)
184		intel_runtime_pm_put(&vm->i915->runtime_pm, vma_res->wakeref);
185
186	vma_res->vm = NULL;
187	if (!RB_EMPTY_NODE(&vma_res->rb)) {
188		mutex_lock(&vm->mutex);
189		vma_res_itree_remove(vma_res, &vm->pending_unbind);
190		mutex_unlock(&vm->mutex);
191	}
192
193	if (vma_res->bi.pages_rsgt)
194		i915_refct_sgt_put(vma_res->bi.pages_rsgt);
195}
196
197/**
198 * i915_vma_resource_unhold - Unhold the signaling of the vma resource unbind
199 * fence.
200 * @vma_res: The vma resource.
201 * @lockdep_cookie: The lockdep cookie returned from i915_vma_resource_hold.
202 *
203 * The function may leave a dma_fence critical section.
204 */
205void i915_vma_resource_unhold(struct i915_vma_resource *vma_res,
206			      bool lockdep_cookie)
207{
208	dma_fence_end_signalling(lockdep_cookie);
209
210	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
211		unsigned long irq_flags;
212
213		/* Inefficient open-coded might_lock_irqsave() */
214		spin_lock_irqsave(&vma_res->lock, irq_flags);
215		spin_unlock_irqrestore(&vma_res->lock, irq_flags);
216	}
217
218	__i915_vma_resource_unhold(vma_res);
219}
220
221/**
222 * i915_vma_resource_hold - Hold the signaling of the vma resource unbind fence.
223 * @vma_res: The vma resource.
224 * @lockdep_cookie: Pointer to a bool serving as a lockdep cooke that should
225 * be given as an argument to the pairing i915_vma_resource_unhold.
226 *
227 * If returning true, the function enters a dma_fence signalling critical
228 * section if not in one already.
229 *
230 * Return: true if holding successful, false if not.
231 */
232bool i915_vma_resource_hold(struct i915_vma_resource *vma_res,
233			    bool *lockdep_cookie)
234{
235	bool held = refcount_inc_not_zero(&vma_res->hold_count);
236
237	if (held)
238		*lockdep_cookie = dma_fence_begin_signalling();
239
240	return held;
241}
242
243static void i915_vma_resource_unbind_work(struct work_struct *work)
244{
245	struct i915_vma_resource *vma_res =
246		container_of(work, typeof(*vma_res), work);
247	struct i915_address_space *vm = vma_res->vm;
248	bool lockdep_cookie;
249
250	lockdep_cookie = dma_fence_begin_signalling();
251	if (likely(!vma_res->skip_pte_rewrite))
252		vma_res->ops->unbind_vma(vm, vma_res);
253
254	dma_fence_end_signalling(lockdep_cookie);
255	__i915_vma_resource_unhold(vma_res);
256	i915_vma_resource_put(vma_res);
257}
258
259static int
260i915_vma_resource_fence_notify(struct i915_sw_fence *fence,
261			       enum i915_sw_fence_notify state)
262{
263	struct i915_vma_resource *vma_res =
264		container_of(fence, typeof(*vma_res), chain);
265	struct dma_fence *unbind_fence =
266		&vma_res->unbind_fence;
267
268	switch (state) {
269	case FENCE_COMPLETE:
270		dma_fence_get(unbind_fence);
271		if (vma_res->immediate_unbind) {
272			i915_vma_resource_unbind_work(&vma_res->work);
273		} else {
274			INIT_WORK(&vma_res->work, i915_vma_resource_unbind_work);
275			queue_work(system_unbound_wq, &vma_res->work);
276		}
277		break;
278	case FENCE_FREE:
279		i915_vma_resource_put(vma_res);
280		break;
281	}
282
283	return NOTIFY_DONE;
284}
285
286/**
287 * i915_vma_resource_unbind - Unbind a vma resource
288 * @vma_res: The vma resource to unbind.
289 * @tlb: pointer to vma->obj->mm.tlb associated with the resource
290 *	 to be stored at vma_res->tlb. When not-NULL, it will be used
291 *	 to do TLB cache invalidation before freeing a VMA resource.
292 *	 Used only for async unbind.
293 *
294 * At this point this function does little more than publish a fence that
295 * signals immediately unless signaling is held back.
296 *
297 * Return: A refcounted pointer to a dma-fence that signals when unbinding is
298 * complete.
299 */
300struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res,
301					   u32 *tlb)
302{
303	struct i915_address_space *vm = vma_res->vm;
304
305	vma_res->tlb = tlb;
306
307	/* Reference for the sw fence */
308	i915_vma_resource_get(vma_res);
309
310	/* Caller must already have a wakeref in this case. */
311	if (vma_res->needs_wakeref)
312		vma_res->wakeref = intel_runtime_pm_get_if_in_use(&vm->i915->runtime_pm);
313
314	if (atomic_read(&vma_res->chain.pending) <= 1) {
315		RB_CLEAR_NODE(&vma_res->rb);
316		vma_res->immediate_unbind = 1;
317	} else {
318		vma_res_itree_insert(vma_res, &vma_res->vm->pending_unbind);
319	}
320
321	i915_sw_fence_commit(&vma_res->chain);
322
323	return &vma_res->unbind_fence;
324}
325
326/**
327 * __i915_vma_resource_init - Initialize a vma resource.
328 * @vma_res: The vma resource to initialize
329 *
330 * Initializes the private members of a vma resource.
331 */
332void __i915_vma_resource_init(struct i915_vma_resource *vma_res)
333{
334	mtx_init(&vma_res->lock, IPL_TTY);
335	dma_fence_init(&vma_res->unbind_fence, &unbind_fence_ops,
336		       &vma_res->lock, 0, 0);
337	refcount_set(&vma_res->hold_count, 1);
338	i915_sw_fence_init(&vma_res->chain, i915_vma_resource_fence_notify);
339}
340
341static void
342i915_vma_resource_color_adjust_range(struct i915_address_space *vm,
343				     u64 *start,
344				     u64 *end)
345{
346	if (i915_vm_has_cache_coloring(vm)) {
347		if (*start)
348			*start -= I915_GTT_PAGE_SIZE;
349		*end += I915_GTT_PAGE_SIZE;
350	}
351}
352
353/**
354 * i915_vma_resource_bind_dep_sync - Wait for / sync all unbinds touching a
355 * certain vm range.
356 * @vm: The vm to look at.
357 * @offset: The range start.
358 * @size: The range size.
359 * @intr: Whether to wait interrubtible.
360 *
361 * The function needs to be called with the vm lock held.
362 *
363 * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true
364 */
365int i915_vma_resource_bind_dep_sync(struct i915_address_space *vm,
366				    u64 offset,
367				    u64 size,
368				    bool intr)
369{
370	struct i915_vma_resource *node;
371	u64 last = offset + size - 1;
372
373	lockdep_assert_held(&vm->mutex);
374	might_sleep();
375
376	i915_vma_resource_color_adjust_range(vm, &offset, &last);
377	node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last);
378	while (node) {
379		int ret = dma_fence_wait(&node->unbind_fence, intr);
380
381		if (ret)
382			return ret;
383
384		node = vma_res_itree_iter_next(node, offset, last);
385	}
386
387	return 0;
388}
389
390/**
391 * i915_vma_resource_bind_dep_sync_all - Wait for / sync all unbinds of a vm,
392 * releasing the vm lock while waiting.
393 * @vm: The vm to look at.
394 *
395 * The function may not be called with the vm lock held.
396 * Typically this is called at vm destruction to finish any pending
397 * unbind operations. The vm mutex is released while waiting to avoid
398 * stalling kernel workqueues trying to grab the mutex.
399 */
400void i915_vma_resource_bind_dep_sync_all(struct i915_address_space *vm)
401{
402	struct i915_vma_resource *node;
403	struct dma_fence *fence;
404
405	do {
406		fence = NULL;
407		mutex_lock(&vm->mutex);
408		node = vma_res_itree_iter_first(&vm->pending_unbind, 0,
409						U64_MAX);
410		if (node)
411			fence = dma_fence_get_rcu(&node->unbind_fence);
412		mutex_unlock(&vm->mutex);
413
414		if (fence) {
415			/*
416			 * The wait makes sure the node eventually removes
417			 * itself from the tree.
418			 */
419			dma_fence_wait(fence, false);
420			dma_fence_put(fence);
421		}
422	} while (node);
423}
424
425/**
426 * i915_vma_resource_bind_dep_await - Have a struct i915_sw_fence await all
427 * pending unbinds in a certain range of a vm.
428 * @vm: The vm to look at.
429 * @sw_fence: The struct i915_sw_fence that will be awaiting the unbinds.
430 * @offset: The range start.
431 * @size: The range size.
432 * @intr: Whether to wait interrubtible.
433 * @gfp: Allocation mode for memory allocations.
434 *
435 * The function makes @sw_fence await all pending unbinds in a certain
436 * vm range before calling the complete notifier. To be able to await
437 * each individual unbind, the function needs to allocate memory using
438 * the @gpf allocation mode. If that fails, the function will instead
439 * wait for the unbind fence to signal, using @intr to judge whether to
440 * wait interruptible or not. Note that @gfp should ideally be selected so
441 * as to avoid any expensive memory allocation stalls and rather fail and
442 * synchronize itself. For now the vm mutex is required when calling this
443 * function with means that @gfp can't call into direct reclaim. In reality
444 * this means that during heavy memory pressure, we will sync in this
445 * function.
446 *
447 * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true
448 */
449int i915_vma_resource_bind_dep_await(struct i915_address_space *vm,
450				     struct i915_sw_fence *sw_fence,
451				     u64 offset,
452				     u64 size,
453				     bool intr,
454				     gfp_t gfp)
455{
456	struct i915_vma_resource *node;
457	u64 last = offset + size - 1;
458
459	lockdep_assert_held(&vm->mutex);
460	might_alloc(gfp);
461	might_sleep();
462
463	i915_vma_resource_color_adjust_range(vm, &offset, &last);
464	node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last);
465	while (node) {
466		int ret;
467
468		ret = i915_sw_fence_await_dma_fence(sw_fence,
469						    &node->unbind_fence,
470						    0, gfp);
471		if (ret < 0) {
472			ret = dma_fence_wait(&node->unbind_fence, intr);
473			if (ret)
474				return ret;
475		}
476
477		node = vma_res_itree_iter_next(node, offset, last);
478	}
479
480	return 0;
481}
482
483void i915_vma_resource_module_exit(void)
484{
485#ifdef __linux__
486	kmem_cache_destroy(slab_vma_resources);
487#else
488	pool_destroy(&slab_vma_resources);
489#endif
490}
491
492int __init i915_vma_resource_module_init(void)
493{
494#ifdef __linux__
495	slab_vma_resources = KMEM_CACHE(i915_vma_resource, SLAB_HWCACHE_ALIGN);
496	if (!slab_vma_resources)
497		return -ENOMEM;
498#else
499	pool_init(&slab_vma_resources, sizeof(struct i915_vma_resource),
500	    0, IPL_NONE, 0, "svmar", NULL);
501#endif
502
503	return 0;
504}
505