1/*-
2 * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice unmodified, this list of conditions, and the following
10 *    disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: stable/11/sys/kern/kern_rangelock.c 355690 2019-12-13 04:03:03Z kevans $");
29
30#include <sys/param.h>
31#include <sys/kernel.h>
32#include <sys/lock.h>
33#include <sys/mutex.h>
34#include <sys/proc.h>
35#include <sys/rangelock.h>
36#include <sys/systm.h>
37
38#include <vm/uma.h>
39
40struct rl_q_entry {
41	TAILQ_ENTRY(rl_q_entry) rl_q_link;
42	off_t		rl_q_start, rl_q_end;
43	int		rl_q_flags;
44};
45
46static uma_zone_t rl_entry_zone;
47
48static void
49rangelock_sys_init(void)
50{
51
52	rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry),
53	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
54}
55SYSINIT(vfs, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL);
56
57static struct rl_q_entry *
58rlqentry_alloc(void)
59{
60
61	return (uma_zalloc(rl_entry_zone, M_WAITOK));
62}
63
64void
65rlqentry_free(struct rl_q_entry *rleq)
66{
67
68	uma_zfree(rl_entry_zone, rleq);
69}
70
71void
72rangelock_init(struct rangelock *lock)
73{
74
75	TAILQ_INIT(&lock->rl_waiters);
76	lock->rl_currdep = NULL;
77}
78
79void
80rangelock_destroy(struct rangelock *lock)
81{
82
83	KASSERT(TAILQ_EMPTY(&lock->rl_waiters), ("Dangling waiters"));
84}
85
86/*
87 * Two entries are compatible if their ranges do not overlap, or both
88 * entries are for read.
89 */
90static int
91ranges_overlap(const struct rl_q_entry *e1,
92    const struct rl_q_entry *e2)
93{
94
95	if (e1->rl_q_start < e2->rl_q_end && e1->rl_q_end > e2->rl_q_start)
96		return (1);
97	return (0);
98}
99
100/*
101 * Recalculate the lock->rl_currdep after an unlock.
102 */
103static void
104rangelock_calc_block(struct rangelock *lock)
105{
106	struct rl_q_entry *entry, *nextentry, *entry1;
107
108	for (entry = lock->rl_currdep; entry != NULL; entry = nextentry) {
109		nextentry = TAILQ_NEXT(entry, rl_q_link);
110		if (entry->rl_q_flags & RL_LOCK_READ) {
111			/* Reads must not overlap with granted writes. */
112			for (entry1 = TAILQ_FIRST(&lock->rl_waiters);
113			    !(entry1->rl_q_flags & RL_LOCK_READ);
114			    entry1 = TAILQ_NEXT(entry1, rl_q_link)) {
115				if (ranges_overlap(entry, entry1))
116					goto out;
117			}
118		} else {
119			/* Write must not overlap with any granted locks. */
120			for (entry1 = TAILQ_FIRST(&lock->rl_waiters);
121			    entry1 != entry;
122			    entry1 = TAILQ_NEXT(entry1, rl_q_link)) {
123				if (ranges_overlap(entry, entry1))
124					goto out;
125			}
126
127			/* Move grantable write locks to the front. */
128			TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link);
129			TAILQ_INSERT_HEAD(&lock->rl_waiters, entry, rl_q_link);
130		}
131
132		/* Grant this lock. */
133		entry->rl_q_flags |= RL_LOCK_GRANTED;
134		wakeup(entry);
135	}
136out:
137	lock->rl_currdep = entry;
138}
139
140static void
141rangelock_unlock_locked(struct rangelock *lock, struct rl_q_entry *entry,
142    struct mtx *ilk)
143{
144
145	MPASS(lock != NULL && entry != NULL && ilk != NULL);
146	mtx_assert(ilk, MA_OWNED);
147	KASSERT(entry != lock->rl_currdep, ("stuck currdep"));
148
149	TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link);
150	rangelock_calc_block(lock);
151	mtx_unlock(ilk);
152	if (curthread->td_rlqe == NULL)
153		curthread->td_rlqe = entry;
154	else
155		rlqentry_free(entry);
156}
157
158void
159rangelock_unlock(struct rangelock *lock, void *cookie, struct mtx *ilk)
160{
161
162	MPASS(lock != NULL && cookie != NULL && ilk != NULL);
163
164	mtx_lock(ilk);
165	rangelock_unlock_locked(lock, cookie, ilk);
166}
167
168/*
169 * Unlock the sub-range of granted lock.
170 */
171void *
172rangelock_unlock_range(struct rangelock *lock, void *cookie, off_t start,
173    off_t end, struct mtx *ilk)
174{
175	struct rl_q_entry *entry;
176
177	MPASS(lock != NULL && cookie != NULL && ilk != NULL);
178	entry = cookie;
179	KASSERT(entry->rl_q_flags & RL_LOCK_GRANTED,
180	    ("Unlocking non-granted lock"));
181	KASSERT(entry->rl_q_start == start, ("wrong start"));
182	KASSERT(entry->rl_q_end >= end, ("wrong end"));
183
184	mtx_lock(ilk);
185	if (entry->rl_q_end == end) {
186		rangelock_unlock_locked(lock, cookie, ilk);
187		return (NULL);
188	}
189	entry->rl_q_end = end;
190	rangelock_calc_block(lock);
191	mtx_unlock(ilk);
192	return (cookie);
193}
194
195/*
196 * Add the lock request to the queue of the pending requests for
197 * rangelock.  Sleep until the request can be granted.
198 */
199static void *
200rangelock_enqueue(struct rangelock *lock, off_t start, off_t end, int mode,
201    struct mtx *ilk)
202{
203	struct rl_q_entry *entry;
204	struct thread *td;
205
206	MPASS(lock != NULL && ilk != NULL);
207
208	td = curthread;
209	if (td->td_rlqe != NULL) {
210		entry = td->td_rlqe;
211		td->td_rlqe = NULL;
212	} else
213		entry = rlqentry_alloc();
214	MPASS(entry != NULL);
215	entry->rl_q_flags = mode;
216	entry->rl_q_start = start;
217	entry->rl_q_end = end;
218
219	mtx_lock(ilk);
220	/*
221	 * XXXKIB TODO. Check that a thread does not try to enqueue a
222	 * lock that is incompatible with another request from the same
223	 * thread.
224	 */
225
226	TAILQ_INSERT_TAIL(&lock->rl_waiters, entry, rl_q_link);
227	if (lock->rl_currdep == NULL)
228		lock->rl_currdep = entry;
229	rangelock_calc_block(lock);
230	while (!(entry->rl_q_flags & RL_LOCK_GRANTED))
231		msleep(entry, ilk, 0, "range", 0);
232	mtx_unlock(ilk);
233	return (entry);
234}
235
236void *
237rangelock_rlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk)
238{
239
240	return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk));
241}
242
243void *
244rangelock_wlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk)
245{
246
247	return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk));
248}
249
250#ifdef INVARIANT_SUPPORT
251void
252_rangelock_cookie_assert(void *cookie, int what, const char *file, int line)
253{
254	struct rl_q_entry *entry;
255	int flags;
256
257	MPASS(cookie != NULL);
258	entry = cookie;
259	flags = entry->rl_q_flags;
260	switch (what) {
261	case RCA_LOCKED:
262		if ((flags & RL_LOCK_GRANTED) == 0)
263			panic("rangelock not held @ %s:%d\n", file, line);
264		break;
265	case RCA_RLOCKED:
266		if ((flags & (RL_LOCK_GRANTED | RL_LOCK_READ)) !=
267		    (RL_LOCK_GRANTED | RL_LOCK_READ))
268			panic("rangelock not rlocked @ %s:%d\n", file, line);
269		break;
270	case RCA_WLOCKED:
271		if ((flags & (RL_LOCK_GRANTED | RL_LOCK_WRITE)) !=
272		    (RL_LOCK_GRANTED | RL_LOCK_WRITE))
273			panic("rangelock not wlocked @ %s:%d\n", file, line);
274		break;
275	default:
276		panic("Unknown rangelock assertion: %d @ %s:%d", what, file,
277		    line);
278	}
279}
280#endif	/* INVARIANT_SUPPORT */
281