linux_rcu.c revision 328653
1/*-
2 * Copyright (c) 2016 Matthew Macy (mmacy@mattmacy.io)
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice unmodified, this list of conditions, and the following
10 *    disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: stable/11/sys/compat/linuxkpi/common/src/linux_rcu.c 328653 2018-02-01 13:01:44Z hselasky $");
29
30#include <sys/types.h>
31#include <sys/systm.h>
32#include <sys/malloc.h>
33#include <sys/kernel.h>
34#include <sys/lock.h>
35#include <sys/mutex.h>
36#include <sys/proc.h>
37#include <sys/sched.h>
38#include <sys/smp.h>
39#include <sys/queue.h>
40#include <sys/taskqueue.h>
41#include <sys/kdb.h>
42
43#include <ck_epoch.h>
44
45#include <linux/rcupdate.h>
46#include <linux/srcu.h>
47#include <linux/slab.h>
48#include <linux/kernel.h>
49#include <linux/compat.h>
50
51/*
52 * By defining CONFIG_NO_RCU_SKIP LinuxKPI RCU locks and asserts will
53 * not be skipped during panic().
54 */
55#ifdef CONFIG_NO_RCU_SKIP
56#define	RCU_SKIP(void) 0
57#else
58#define	RCU_SKIP(void)	unlikely(SCHEDULER_STOPPED() || kdb_active)
59#endif
60
61struct callback_head {
62	STAILQ_ENTRY(callback_head) entry;
63	rcu_callback_t func;
64};
65
66struct linux_epoch_head {
67	STAILQ_HEAD(, callback_head) cb_head;
68	struct mtx lock;
69	struct task task;
70} __aligned(CACHE_LINE_SIZE);
71
72struct linux_epoch_record {
73	ck_epoch_record_t epoch_record;
74	TAILQ_HEAD(, task_struct) ts_head;
75	int cpuid;
76} __aligned(CACHE_LINE_SIZE);
77
78/*
79 * Verify that "struct rcu_head" is big enough to hold "struct
80 * callback_head". This has been done to avoid having to add special
81 * compile flags for including ck_epoch.h to all clients of the
82 * LinuxKPI.
83 */
84CTASSERT(sizeof(struct rcu_head) == sizeof(struct callback_head));
85
86/*
87 * Verify that "epoch_record" is at beginning of "struct
88 * linux_epoch_record":
89 */
90CTASSERT(offsetof(struct linux_epoch_record, epoch_record) == 0);
91
92static ck_epoch_t linux_epoch;
93static struct linux_epoch_head linux_epoch_head;
94static DPCPU_DEFINE(struct linux_epoch_record, linux_epoch_record);
95
96static void linux_rcu_cleaner_func(void *, int);
97
98static void
99linux_rcu_runtime_init(void *arg __unused)
100{
101	struct linux_epoch_head *head;
102	int i;
103
104	ck_epoch_init(&linux_epoch);
105
106	head = &linux_epoch_head;
107
108	mtx_init(&head->lock, "LRCU-HEAD", NULL, MTX_DEF);
109	TASK_INIT(&head->task, 0, linux_rcu_cleaner_func, NULL);
110	STAILQ_INIT(&head->cb_head);
111
112	CPU_FOREACH(i) {
113		struct linux_epoch_record *record;
114
115		record = &DPCPU_ID_GET(i, linux_epoch_record);
116
117		record->cpuid = i;
118		ck_epoch_register(&linux_epoch, &record->epoch_record, NULL);
119		TAILQ_INIT(&record->ts_head);
120	}
121}
122SYSINIT(linux_rcu_runtime, SI_SUB_CPU, SI_ORDER_ANY, linux_rcu_runtime_init, NULL);
123
124static void
125linux_rcu_runtime_uninit(void *arg __unused)
126{
127	struct linux_epoch_head *head;
128
129	head = &linux_epoch_head;
130
131	/* destroy head lock */
132	mtx_destroy(&head->lock);
133}
134SYSUNINIT(linux_rcu_runtime, SI_SUB_LOCK, SI_ORDER_SECOND, linux_rcu_runtime_uninit, NULL);
135
136static void
137linux_rcu_cleaner_func(void *context __unused, int pending __unused)
138{
139	struct linux_epoch_head *head;
140	struct callback_head *rcu;
141	STAILQ_HEAD(, callback_head) tmp_head;
142
143	linux_set_current(curthread);
144
145	head = &linux_epoch_head;
146
147	/* move current callbacks into own queue */
148	mtx_lock(&head->lock);
149	STAILQ_INIT(&tmp_head);
150	STAILQ_CONCAT(&tmp_head, &head->cb_head);
151	mtx_unlock(&head->lock);
152
153	/* synchronize */
154	linux_synchronize_rcu();
155
156	/* dispatch all callbacks, if any */
157	while ((rcu = STAILQ_FIRST(&tmp_head)) != NULL) {
158		uintptr_t offset;
159
160		STAILQ_REMOVE_HEAD(&tmp_head, entry);
161
162		offset = (uintptr_t)rcu->func;
163
164		if (offset < LINUX_KFREE_RCU_OFFSET_MAX)
165			kfree((char *)rcu - offset);
166		else
167			rcu->func((struct rcu_head *)rcu);
168	}
169}
170
171void
172linux_rcu_read_lock(void)
173{
174	struct linux_epoch_record *record;
175	struct task_struct *ts;
176
177	if (RCU_SKIP())
178		return;
179
180	/*
181	 * Pin thread to current CPU so that the unlock code gets the
182	 * same per-CPU epoch record:
183	 */
184	sched_pin();
185
186	record = &DPCPU_GET(linux_epoch_record);
187	ts = current;
188
189	/*
190	 * Use a critical section to prevent recursion inside
191	 * ck_epoch_begin(). Else this function supports recursion.
192	 */
193	critical_enter();
194	ck_epoch_begin(&record->epoch_record, NULL);
195	ts->rcu_recurse++;
196	if (ts->rcu_recurse == 1)
197		TAILQ_INSERT_TAIL(&record->ts_head, ts, rcu_entry);
198	critical_exit();
199}
200
201void
202linux_rcu_read_unlock(void)
203{
204	struct linux_epoch_record *record;
205	struct task_struct *ts;
206
207	if (RCU_SKIP())
208		return;
209
210	record = &DPCPU_GET(linux_epoch_record);
211	ts = current;
212
213	/*
214	 * Use a critical section to prevent recursion inside
215	 * ck_epoch_end(). Else this function supports recursion.
216	 */
217	critical_enter();
218	ck_epoch_end(&record->epoch_record, NULL);
219	ts->rcu_recurse--;
220	if (ts->rcu_recurse == 0)
221		TAILQ_REMOVE(&record->ts_head, ts, rcu_entry);
222	critical_exit();
223
224	sched_unpin();
225}
226
227static void
228linux_synchronize_rcu_cb(ck_epoch_t *epoch __unused, ck_epoch_record_t *epoch_record, void *arg __unused)
229{
230	struct linux_epoch_record *record =
231	    container_of(epoch_record, struct linux_epoch_record, epoch_record);
232	struct thread *td = curthread;
233	struct task_struct *ts;
234
235	/* check if blocked on the current CPU */
236	if (record->cpuid == PCPU_GET(cpuid)) {
237		bool is_sleeping = 0;
238		u_char prio = 0;
239
240		/*
241		 * Find the lowest priority or sleeping thread which
242		 * is blocking synchronization on this CPU core. All
243		 * the threads in the queue are CPU-pinned and cannot
244		 * go anywhere while the current thread is locked.
245		 */
246		TAILQ_FOREACH(ts, &record->ts_head, rcu_entry) {
247			if (ts->task_thread->td_priority > prio)
248				prio = ts->task_thread->td_priority;
249			is_sleeping |= (ts->task_thread->td_inhibitors != 0);
250		}
251
252		if (is_sleeping) {
253			thread_unlock(td);
254			pause("W", 1);
255			thread_lock(td);
256		} else {
257			/* set new thread priority */
258			sched_prio(td, prio);
259			/* task switch */
260			mi_switch(SW_VOL | SWT_RELINQUISH, NULL);
261
262			/*
263			 * Release the thread lock while yielding to
264			 * allow other threads to acquire the lock
265			 * pointed to by TDQ_LOCKPTR(td). Else a
266			 * deadlock like situation might happen.
267			 */
268			thread_unlock(td);
269			thread_lock(td);
270		}
271	} else {
272		/*
273		 * To avoid spinning move execution to the other CPU
274		 * which is blocking synchronization. Set highest
275		 * thread priority so that code gets run. The thread
276		 * priority will be restored later.
277		 */
278		sched_prio(td, 0);
279		sched_bind(td, record->cpuid);
280	}
281}
282
283void
284linux_synchronize_rcu(void)
285{
286	struct thread *td;
287	int was_bound;
288	int old_cpu;
289	int old_pinned;
290	u_char old_prio;
291
292	if (RCU_SKIP())
293		return;
294
295	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
296	    "linux_synchronize_rcu() can sleep");
297
298	td = curthread;
299
300	DROP_GIANT();
301
302	/*
303	 * Synchronizing RCU might change the CPU core this function
304	 * is running on. Save current values:
305	 */
306	thread_lock(td);
307
308	old_cpu = PCPU_GET(cpuid);
309	old_pinned = td->td_pinned;
310	old_prio = td->td_priority;
311	was_bound = sched_is_bound(td);
312	sched_unbind(td);
313	td->td_pinned = 0;
314	sched_bind(td, old_cpu);
315
316	ck_epoch_synchronize_wait(&linux_epoch,
317	    &linux_synchronize_rcu_cb, NULL);
318
319	/* restore CPU binding, if any */
320	if (was_bound != 0) {
321		sched_bind(td, old_cpu);
322	} else {
323		/* get thread back to initial CPU, if any */
324		if (old_pinned != 0)
325			sched_bind(td, old_cpu);
326		sched_unbind(td);
327	}
328	/* restore pinned after bind */
329	td->td_pinned = old_pinned;
330
331	/* restore thread priority */
332	sched_prio(td, old_prio);
333	thread_unlock(td);
334
335	PICKUP_GIANT();
336}
337
338void
339linux_rcu_barrier(void)
340{
341	struct linux_epoch_head *head;
342
343	linux_synchronize_rcu();
344
345	head = &linux_epoch_head;
346
347	/* wait for callbacks to complete */
348	taskqueue_drain(taskqueue_fast, &head->task);
349}
350
351void
352linux_call_rcu(struct rcu_head *context, rcu_callback_t func)
353{
354	struct callback_head *rcu = (struct callback_head *)context;
355	struct linux_epoch_head *head = &linux_epoch_head;
356
357	mtx_lock(&head->lock);
358	rcu->func = func;
359	STAILQ_INSERT_TAIL(&head->cb_head, rcu, entry);
360	taskqueue_enqueue(taskqueue_fast, &head->task);
361	mtx_unlock(&head->lock);
362}
363
364int
365init_srcu_struct(struct srcu_struct *srcu)
366{
367	return (0);
368}
369
370void
371cleanup_srcu_struct(struct srcu_struct *srcu)
372{
373}
374
375int
376srcu_read_lock(struct srcu_struct *srcu)
377{
378	linux_rcu_read_lock();
379	return (0);
380}
381
382void
383srcu_read_unlock(struct srcu_struct *srcu, int key __unused)
384{
385	linux_rcu_read_unlock();
386}
387
388void
389synchronize_srcu(struct srcu_struct *srcu)
390{
391	linux_synchronize_rcu();
392}
393
394void
395srcu_barrier(struct srcu_struct *srcu)
396{
397	linux_rcu_barrier();
398}
399