1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2/*
3 * Copyright(c) 2020 Cornelis Networks, Inc.
4 * Copyright(c) 2016 - 2017 Intel Corporation.
5 */
6
7#include <linux/list.h>
8#include <linux/rculist.h>
9#include <linux/mmu_notifier.h>
10#include <linux/interval_tree_generic.h>
11#include <linux/sched/mm.h>
12
13#include "mmu_rb.h"
14#include "trace.h"
15
16static unsigned long mmu_node_start(struct mmu_rb_node *);
17static unsigned long mmu_node_last(struct mmu_rb_node *);
18static int mmu_notifier_range_start(struct mmu_notifier *,
19		const struct mmu_notifier_range *);
20static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
21					   unsigned long, unsigned long);
22static void release_immediate(struct kref *refcount);
23static void handle_remove(struct work_struct *work);
24
25static const struct mmu_notifier_ops mn_opts = {
26	.invalidate_range_start = mmu_notifier_range_start,
27};
28
29INTERVAL_TREE_DEFINE(struct mmu_rb_node, node, unsigned long, __last,
30		     mmu_node_start, mmu_node_last, static, __mmu_int_rb);
31
32static unsigned long mmu_node_start(struct mmu_rb_node *node)
33{
34	return node->addr & PAGE_MASK;
35}
36
37static unsigned long mmu_node_last(struct mmu_rb_node *node)
38{
39	return PAGE_ALIGN(node->addr + node->len) - 1;
40}
41
42int hfi1_mmu_rb_register(void *ops_arg,
43			 struct mmu_rb_ops *ops,
44			 struct workqueue_struct *wq,
45			 struct mmu_rb_handler **handler)
46{
47	struct mmu_rb_handler *h;
48	void *free_ptr;
49	int ret;
50
51	free_ptr = kzalloc(sizeof(*h) + cache_line_size() - 1, GFP_KERNEL);
52	if (!free_ptr)
53		return -ENOMEM;
54
55	h = PTR_ALIGN(free_ptr, cache_line_size());
56	h->root = RB_ROOT_CACHED;
57	h->ops = ops;
58	h->ops_arg = ops_arg;
59	INIT_HLIST_NODE(&h->mn.hlist);
60	spin_lock_init(&h->lock);
61	h->mn.ops = &mn_opts;
62	INIT_WORK(&h->del_work, handle_remove);
63	INIT_LIST_HEAD(&h->del_list);
64	INIT_LIST_HEAD(&h->lru_list);
65	h->wq = wq;
66	h->free_ptr = free_ptr;
67
68	ret = mmu_notifier_register(&h->mn, current->mm);
69	if (ret) {
70		kfree(free_ptr);
71		return ret;
72	}
73
74	*handler = h;
75	return 0;
76}
77
78void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler)
79{
80	struct mmu_rb_node *rbnode;
81	struct rb_node *node;
82	unsigned long flags;
83	struct list_head del_list;
84
85	/* Prevent freeing of mm until we are completely finished. */
86	mmgrab(handler->mn.mm);
87
88	/* Unregister first so we don't get any more notifications. */
89	mmu_notifier_unregister(&handler->mn, handler->mn.mm);
90
91	/*
92	 * Make sure the wq delete handler is finished running.  It will not
93	 * be triggered once the mmu notifiers are unregistered above.
94	 */
95	flush_work(&handler->del_work);
96
97	INIT_LIST_HEAD(&del_list);
98
99	spin_lock_irqsave(&handler->lock, flags);
100	while ((node = rb_first_cached(&handler->root))) {
101		rbnode = rb_entry(node, struct mmu_rb_node, node);
102		rb_erase_cached(node, &handler->root);
103		/* move from LRU list to delete list */
104		list_move(&rbnode->list, &del_list);
105	}
106	spin_unlock_irqrestore(&handler->lock, flags);
107
108	while (!list_empty(&del_list)) {
109		rbnode = list_first_entry(&del_list, struct mmu_rb_node, list);
110		list_del(&rbnode->list);
111		kref_put(&rbnode->refcount, release_immediate);
112	}
113
114	/* Now the mm may be freed. */
115	mmdrop(handler->mn.mm);
116
117	kfree(handler->free_ptr);
118}
119
120int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
121		       struct mmu_rb_node *mnode)
122{
123	struct mmu_rb_node *node;
124	unsigned long flags;
125	int ret = 0;
126
127	trace_hfi1_mmu_rb_insert(mnode);
128
129	if (current->mm != handler->mn.mm)
130		return -EPERM;
131
132	spin_lock_irqsave(&handler->lock, flags);
133	node = __mmu_rb_search(handler, mnode->addr, mnode->len);
134	if (node) {
135		ret = -EEXIST;
136		goto unlock;
137	}
138	__mmu_int_rb_insert(mnode, &handler->root);
139	list_add_tail(&mnode->list, &handler->lru_list);
140	mnode->handler = handler;
141unlock:
142	spin_unlock_irqrestore(&handler->lock, flags);
143	return ret;
144}
145
146/* Caller must hold handler lock */
147struct mmu_rb_node *hfi1_mmu_rb_get_first(struct mmu_rb_handler *handler,
148					  unsigned long addr, unsigned long len)
149{
150	struct mmu_rb_node *node;
151
152	trace_hfi1_mmu_rb_search(addr, len);
153	node = __mmu_int_rb_iter_first(&handler->root, addr, (addr + len) - 1);
154	if (node)
155		list_move_tail(&node->list, &handler->lru_list);
156	return node;
157}
158
159/* Caller must hold handler lock */
160static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
161					   unsigned long addr,
162					   unsigned long len)
163{
164	struct mmu_rb_node *node = NULL;
165
166	trace_hfi1_mmu_rb_search(addr, len);
167	if (!handler->ops->filter) {
168		node = __mmu_int_rb_iter_first(&handler->root, addr,
169					       (addr + len) - 1);
170	} else {
171		for (node = __mmu_int_rb_iter_first(&handler->root, addr,
172						    (addr + len) - 1);
173		     node;
174		     node = __mmu_int_rb_iter_next(node, addr,
175						   (addr + len) - 1)) {
176			if (handler->ops->filter(node, addr, len))
177				return node;
178		}
179	}
180	return node;
181}
182
183/*
184 * Must NOT call while holding mnode->handler->lock.
185 * mnode->handler->ops->remove() may sleep and mnode->handler->lock is a
186 * spinlock.
187 */
188static void release_immediate(struct kref *refcount)
189{
190	struct mmu_rb_node *mnode =
191		container_of(refcount, struct mmu_rb_node, refcount);
192	trace_hfi1_mmu_release_node(mnode);
193	mnode->handler->ops->remove(mnode->handler->ops_arg, mnode);
194}
195
196/* Caller must hold mnode->handler->lock */
197static void release_nolock(struct kref *refcount)
198{
199	struct mmu_rb_node *mnode =
200		container_of(refcount, struct mmu_rb_node, refcount);
201	list_move(&mnode->list, &mnode->handler->del_list);
202	queue_work(mnode->handler->wq, &mnode->handler->del_work);
203}
204
205/*
206 * struct mmu_rb_node->refcount kref_put() callback.
207 * Adds mmu_rb_node to mmu_rb_node->handler->del_list and queues
208 * handler->del_work on handler->wq.
209 * Does not remove mmu_rb_node from handler->lru_list or handler->rb_root.
210 * Acquires mmu_rb_node->handler->lock; do not call while already holding
211 * handler->lock.
212 */
213void hfi1_mmu_rb_release(struct kref *refcount)
214{
215	struct mmu_rb_node *mnode =
216		container_of(refcount, struct mmu_rb_node, refcount);
217	struct mmu_rb_handler *handler = mnode->handler;
218	unsigned long flags;
219
220	spin_lock_irqsave(&handler->lock, flags);
221	list_move(&mnode->list, &mnode->handler->del_list);
222	spin_unlock_irqrestore(&handler->lock, flags);
223	queue_work(handler->wq, &handler->del_work);
224}
225
226void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
227{
228	struct mmu_rb_node *rbnode, *ptr;
229	struct list_head del_list;
230	unsigned long flags;
231	bool stop = false;
232
233	if (current->mm != handler->mn.mm)
234		return;
235
236	INIT_LIST_HEAD(&del_list);
237
238	spin_lock_irqsave(&handler->lock, flags);
239	list_for_each_entry_safe(rbnode, ptr, &handler->lru_list, list) {
240		/* refcount == 1 implies mmu_rb_handler has only rbnode ref */
241		if (kref_read(&rbnode->refcount) > 1)
242			continue;
243
244		if (handler->ops->evict(handler->ops_arg, rbnode, evict_arg,
245					&stop)) {
246			__mmu_int_rb_remove(rbnode, &handler->root);
247			/* move from LRU list to delete list */
248			list_move(&rbnode->list, &del_list);
249		}
250		if (stop)
251			break;
252	}
253	spin_unlock_irqrestore(&handler->lock, flags);
254
255	list_for_each_entry_safe(rbnode, ptr, &del_list, list) {
256		trace_hfi1_mmu_rb_evict(rbnode);
257		kref_put(&rbnode->refcount, release_immediate);
258	}
259}
260
261static int mmu_notifier_range_start(struct mmu_notifier *mn,
262		const struct mmu_notifier_range *range)
263{
264	struct mmu_rb_handler *handler =
265		container_of(mn, struct mmu_rb_handler, mn);
266	struct rb_root_cached *root = &handler->root;
267	struct mmu_rb_node *node, *ptr = NULL;
268	unsigned long flags;
269
270	spin_lock_irqsave(&handler->lock, flags);
271	for (node = __mmu_int_rb_iter_first(root, range->start, range->end-1);
272	     node; node = ptr) {
273		/* Guard against node removal. */
274		ptr = __mmu_int_rb_iter_next(node, range->start,
275					     range->end - 1);
276		trace_hfi1_mmu_mem_invalidate(node);
277		/* Remove from rb tree and lru_list. */
278		__mmu_int_rb_remove(node, root);
279		list_del_init(&node->list);
280		kref_put(&node->refcount, release_nolock);
281	}
282	spin_unlock_irqrestore(&handler->lock, flags);
283
284	return 0;
285}
286
287/*
288 * Work queue function to remove all nodes that have been queued up to
289 * be removed.  The key feature is that mm->mmap_lock is not being held
290 * and the remove callback can sleep while taking it, if needed.
291 */
292static void handle_remove(struct work_struct *work)
293{
294	struct mmu_rb_handler *handler = container_of(work,
295						struct mmu_rb_handler,
296						del_work);
297	struct list_head del_list;
298	unsigned long flags;
299	struct mmu_rb_node *node;
300
301	/* remove anything that is queued to get removed */
302	spin_lock_irqsave(&handler->lock, flags);
303	list_replace_init(&handler->del_list, &del_list);
304	spin_unlock_irqrestore(&handler->lock, flags);
305
306	while (!list_empty(&del_list)) {
307		node = list_first_entry(&del_list, struct mmu_rb_node, list);
308		list_del(&node->list);
309		trace_hfi1_mmu_release_node(node);
310		handler->ops->remove(handler->ops_arg, node);
311	}
312}
313