1236317Skib/*-
2236317Skib * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org>
3236317Skib * All rights reserved.
4236317Skib *
5236317Skib * Redistribution and use in source and binary forms, with or without
6236317Skib * modification, are permitted provided that the following conditions
7236317Skib * are met:
8236317Skib * 1. Redistributions of source code must retain the above copyright
9236317Skib *    notice unmodified, this list of conditions, and the following
10236317Skib *    disclaimer.
11236317Skib * 2. Redistributions in binary form must reproduce the above copyright
12236317Skib *    notice, this list of conditions and the following disclaimer in the
13236317Skib *    documentation and/or other materials provided with the distribution.
14236317Skib *
15236317Skib * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16236317Skib * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17236317Skib * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18236317Skib * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19236317Skib * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20236317Skib * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21236317Skib * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22236317Skib * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23236317Skib * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24236317Skib * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25236317Skib */
26236317Skib
27236317Skib#include <sys/cdefs.h>
28236317Skib__FBSDID("$FreeBSD: stable/11/sys/kern/kern_rangelock.c 355690 2019-12-13 04:03:03Z kevans $");
29236317Skib
30236317Skib#include <sys/param.h>
31236317Skib#include <sys/kernel.h>
32236317Skib#include <sys/lock.h>
33236317Skib#include <sys/mutex.h>
34236317Skib#include <sys/proc.h>
35236317Skib#include <sys/rangelock.h>
36236317Skib#include <sys/systm.h>
37236317Skib
38236317Skib#include <vm/uma.h>
39236317Skib
40236317Skibstruct rl_q_entry {
41236317Skib	TAILQ_ENTRY(rl_q_entry) rl_q_link;
42236317Skib	off_t		rl_q_start, rl_q_end;
43236317Skib	int		rl_q_flags;
44236317Skib};
45236317Skib
46236317Skibstatic uma_zone_t rl_entry_zone;
47236317Skib
48236317Skibstatic void
49236317Skibrangelock_sys_init(void)
50236317Skib{
51236317Skib
52236317Skib	rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry),
53236317Skib	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
54236317Skib}
55236317SkibSYSINIT(vfs, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL);
56236317Skib
57236317Skibstatic struct rl_q_entry *
58236317Skibrlqentry_alloc(void)
59236317Skib{
60236317Skib
61236317Skib	return (uma_zalloc(rl_entry_zone, M_WAITOK));
62236317Skib}
63236317Skib
64236317Skibvoid
65236317Skibrlqentry_free(struct rl_q_entry *rleq)
66236317Skib{
67236317Skib
68236317Skib	uma_zfree(rl_entry_zone, rleq);
69236317Skib}
70236317Skib
71236317Skibvoid
72236317Skibrangelock_init(struct rangelock *lock)
73236317Skib{
74236317Skib
75236317Skib	TAILQ_INIT(&lock->rl_waiters);
76236317Skib	lock->rl_currdep = NULL;
77236317Skib}
78236317Skib
79236317Skibvoid
80236317Skibrangelock_destroy(struct rangelock *lock)
81236317Skib{
82236317Skib
83236317Skib	KASSERT(TAILQ_EMPTY(&lock->rl_waiters), ("Dangling waiters"));
84236317Skib}
85236317Skib
86236317Skib/*
87236317Skib * Two entries are compatible if their ranges do not overlap, or both
88236317Skib * entries are for read.
89236317Skib */
90236317Skibstatic int
91254380Scpercivaranges_overlap(const struct rl_q_entry *e1,
92236317Skib    const struct rl_q_entry *e2)
93236317Skib{
94236317Skib
95236317Skib	if (e1->rl_q_start < e2->rl_q_end && e1->rl_q_end > e2->rl_q_start)
96236317Skib		return (1);
97236317Skib	return (0);
98236317Skib}
99236317Skib
100236317Skib/*
101236317Skib * Recalculate the lock->rl_currdep after an unlock.
102236317Skib */
103236317Skibstatic void
104236317Skibrangelock_calc_block(struct rangelock *lock)
105236317Skib{
106254380Scperciva	struct rl_q_entry *entry, *nextentry, *entry1;
107236317Skib
108254380Scperciva	for (entry = lock->rl_currdep; entry != NULL; entry = nextentry) {
109254380Scperciva		nextentry = TAILQ_NEXT(entry, rl_q_link);
110254380Scperciva		if (entry->rl_q_flags & RL_LOCK_READ) {
111254380Scperciva			/* Reads must not overlap with granted writes. */
112254380Scperciva			for (entry1 = TAILQ_FIRST(&lock->rl_waiters);
113254380Scperciva			    !(entry1->rl_q_flags & RL_LOCK_READ);
114254380Scperciva			    entry1 = TAILQ_NEXT(entry1, rl_q_link)) {
115254380Scperciva				if (ranges_overlap(entry, entry1))
116254380Scperciva					goto out;
117254380Scperciva			}
118254380Scperciva		} else {
119254380Scperciva			/* Write must not overlap with any granted locks. */
120254380Scperciva			for (entry1 = TAILQ_FIRST(&lock->rl_waiters);
121254380Scperciva			    entry1 != entry;
122254380Scperciva			    entry1 = TAILQ_NEXT(entry1, rl_q_link)) {
123254380Scperciva				if (ranges_overlap(entry, entry1))
124254380Scperciva					goto out;
125254380Scperciva			}
126254380Scperciva
127254380Scperciva			/* Move grantable write locks to the front. */
128254380Scperciva			TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link);
129254380Scperciva			TAILQ_INSERT_HEAD(&lock->rl_waiters, entry, rl_q_link);
130236317Skib		}
131254380Scperciva
132254380Scperciva		/* Grant this lock. */
133254380Scperciva		entry->rl_q_flags |= RL_LOCK_GRANTED;
134254380Scperciva		wakeup(entry);
135236317Skib	}
136236317Skibout:
137236317Skib	lock->rl_currdep = entry;
138236317Skib}
139236317Skib
140236317Skibstatic void
141236317Skibrangelock_unlock_locked(struct rangelock *lock, struct rl_q_entry *entry,
142236317Skib    struct mtx *ilk)
143236317Skib{
144236317Skib
145236317Skib	MPASS(lock != NULL && entry != NULL && ilk != NULL);
146236317Skib	mtx_assert(ilk, MA_OWNED);
147236317Skib	KASSERT(entry != lock->rl_currdep, ("stuck currdep"));
148236317Skib
149236317Skib	TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link);
150236317Skib	rangelock_calc_block(lock);
151236317Skib	mtx_unlock(ilk);
152236317Skib	if (curthread->td_rlqe == NULL)
153236317Skib		curthread->td_rlqe = entry;
154236317Skib	else
155236317Skib		rlqentry_free(entry);
156236317Skib}
157236317Skib
158236317Skibvoid
159236317Skibrangelock_unlock(struct rangelock *lock, void *cookie, struct mtx *ilk)
160236317Skib{
161236317Skib
162236317Skib	MPASS(lock != NULL && cookie != NULL && ilk != NULL);
163236317Skib
164236317Skib	mtx_lock(ilk);
165236317Skib	rangelock_unlock_locked(lock, cookie, ilk);
166236317Skib}
167236317Skib
168236317Skib/*
169236317Skib * Unlock the sub-range of granted lock.
170236317Skib */
171236317Skibvoid *
172236317Skibrangelock_unlock_range(struct rangelock *lock, void *cookie, off_t start,
173236317Skib    off_t end, struct mtx *ilk)
174236317Skib{
175236317Skib	struct rl_q_entry *entry;
176236317Skib
177236317Skib	MPASS(lock != NULL && cookie != NULL && ilk != NULL);
178236317Skib	entry = cookie;
179236317Skib	KASSERT(entry->rl_q_flags & RL_LOCK_GRANTED,
180236317Skib	    ("Unlocking non-granted lock"));
181236317Skib	KASSERT(entry->rl_q_start == start, ("wrong start"));
182236317Skib	KASSERT(entry->rl_q_end >= end, ("wrong end"));
183236317Skib
184236317Skib	mtx_lock(ilk);
185236317Skib	if (entry->rl_q_end == end) {
186236317Skib		rangelock_unlock_locked(lock, cookie, ilk);
187236317Skib		return (NULL);
188236317Skib	}
189236317Skib	entry->rl_q_end = end;
190236317Skib	rangelock_calc_block(lock);
191236317Skib	mtx_unlock(ilk);
192236317Skib	return (cookie);
193236317Skib}
194236317Skib
195236317Skib/*
196236317Skib * Add the lock request to the queue of the pending requests for
197236317Skib * rangelock.  Sleep until the request can be granted.
198236317Skib */
199236317Skibstatic void *
200236317Skibrangelock_enqueue(struct rangelock *lock, off_t start, off_t end, int mode,
201236317Skib    struct mtx *ilk)
202236317Skib{
203236317Skib	struct rl_q_entry *entry;
204236317Skib	struct thread *td;
205236317Skib
206236317Skib	MPASS(lock != NULL && ilk != NULL);
207236317Skib
208236317Skib	td = curthread;
209236317Skib	if (td->td_rlqe != NULL) {
210236317Skib		entry = td->td_rlqe;
211236317Skib		td->td_rlqe = NULL;
212236317Skib	} else
213236317Skib		entry = rlqentry_alloc();
214236317Skib	MPASS(entry != NULL);
215236317Skib	entry->rl_q_flags = mode;
216236317Skib	entry->rl_q_start = start;
217236317Skib	entry->rl_q_end = end;
218236317Skib
219236317Skib	mtx_lock(ilk);
220236317Skib	/*
221236317Skib	 * XXXKIB TODO. Check that a thread does not try to enqueue a
222236317Skib	 * lock that is incompatible with another request from the same
223236317Skib	 * thread.
224236317Skib	 */
225236317Skib
226236317Skib	TAILQ_INSERT_TAIL(&lock->rl_waiters, entry, rl_q_link);
227236317Skib	if (lock->rl_currdep == NULL)
228236317Skib		lock->rl_currdep = entry;
229236317Skib	rangelock_calc_block(lock);
230236317Skib	while (!(entry->rl_q_flags & RL_LOCK_GRANTED))
231236317Skib		msleep(entry, ilk, 0, "range", 0);
232236317Skib	mtx_unlock(ilk);
233236317Skib	return (entry);
234236317Skib}
235236317Skib
236236317Skibvoid *
237236317Skibrangelock_rlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk)
238236317Skib{
239236317Skib
240236317Skib	return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk));
241236317Skib}
242236317Skib
243236317Skibvoid *
244236317Skibrangelock_wlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk)
245236317Skib{
246236317Skib
247236317Skib	return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk));
248236317Skib}
249355690Skevans
250355690Skevans#ifdef INVARIANT_SUPPORT
251355690Skevansvoid
252355690Skevans_rangelock_cookie_assert(void *cookie, int what, const char *file, int line)
253355690Skevans{
254355690Skevans	struct rl_q_entry *entry;
255355690Skevans	int flags;
256355690Skevans
257355690Skevans	MPASS(cookie != NULL);
258355690Skevans	entry = cookie;
259355690Skevans	flags = entry->rl_q_flags;
260355690Skevans	switch (what) {
261355690Skevans	case RCA_LOCKED:
262355690Skevans		if ((flags & RL_LOCK_GRANTED) == 0)
263355690Skevans			panic("rangelock not held @ %s:%d\n", file, line);
264355690Skevans		break;
265355690Skevans	case RCA_RLOCKED:
266355690Skevans		if ((flags & (RL_LOCK_GRANTED | RL_LOCK_READ)) !=
267355690Skevans		    (RL_LOCK_GRANTED | RL_LOCK_READ))
268355690Skevans			panic("rangelock not rlocked @ %s:%d\n", file, line);
269355690Skevans		break;
270355690Skevans	case RCA_WLOCKED:
271355690Skevans		if ((flags & (RL_LOCK_GRANTED | RL_LOCK_WRITE)) !=
272355690Skevans		    (RL_LOCK_GRANTED | RL_LOCK_WRITE))
273355690Skevans			panic("rangelock not wlocked @ %s:%d\n", file, line);
274355690Skevans		break;
275355690Skevans	default:
276355690Skevans		panic("Unknown rangelock assertion: %d @ %s:%d", what, file,
277355690Skevans		    line);
278355690Skevans	}
279355690Skevans}
280355690Skevans#endif	/* INVARIANT_SUPPORT */
281