1/*-
2 * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice unmodified, this list of conditions, and the following
10 *    disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD$");
29
30#include <sys/param.h>
31#include <sys/kernel.h>
32#include <sys/lock.h>
33#include <sys/mutex.h>
34#include <sys/proc.h>
35#include <sys/rangelock.h>
36#include <sys/systm.h>
37
38#include <vm/uma.h>
39
40struct rl_q_entry {
41	TAILQ_ENTRY(rl_q_entry) rl_q_link;
42	off_t		rl_q_start, rl_q_end;
43	int		rl_q_flags;
44};
45
46static uma_zone_t rl_entry_zone;
47
48static void
49rangelock_sys_init(void)
50{
51
52	rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry),
53	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
54}
55SYSINIT(vfs, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL);
56
57static struct rl_q_entry *
58rlqentry_alloc(void)
59{
60
61	return (uma_zalloc(rl_entry_zone, M_WAITOK));
62}
63
64void
65rlqentry_free(struct rl_q_entry *rleq)
66{
67
68	uma_zfree(rl_entry_zone, rleq);
69}
70
71void
72rangelock_init(struct rangelock *lock)
73{
74
75	TAILQ_INIT(&lock->rl_waiters);
76	lock->rl_currdep = NULL;
77}
78
79void
80rangelock_destroy(struct rangelock *lock)
81{
82
83	KASSERT(TAILQ_EMPTY(&lock->rl_waiters), ("Dangling waiters"));
84}
85
86/*
87 * Verifies the supplied rl_q_entries for compatibility.  Returns true
88 * if the rangelock queue entries are not compatible, false if they are.
89 *
90 * Two entries are compatible if their ranges do not overlap, or both
91 * entries are for read.
92 */
93static int
94rangelock_incompatible(const struct rl_q_entry *e1,
95    const struct rl_q_entry *e2)
96{
97
98	if ((e1->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ &&
99	    (e2->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ)
100		return (0);
101	if (e1->rl_q_start < e2->rl_q_end && e1->rl_q_end > e2->rl_q_start)
102		return (1);
103	return (0);
104}
105
106/*
107 * Recalculate the lock->rl_currdep after an unlock.
108 */
109static void
110rangelock_calc_block(struct rangelock *lock)
111{
112	struct rl_q_entry *entry, *entry1, *whead;
113
114	if (lock->rl_currdep == TAILQ_FIRST(&lock->rl_waiters) &&
115	    lock->rl_currdep != NULL)
116		lock->rl_currdep = TAILQ_NEXT(lock->rl_currdep, rl_q_link);
117	for (entry = lock->rl_currdep; entry != NULL;
118	     entry = TAILQ_NEXT(entry, rl_q_link)) {
119		TAILQ_FOREACH(entry1, &lock->rl_waiters, rl_q_link) {
120			if (rangelock_incompatible(entry, entry1))
121				goto out;
122			if (entry1 == entry)
123				break;
124		}
125	}
126out:
127	lock->rl_currdep = entry;
128	TAILQ_FOREACH(whead, &lock->rl_waiters, rl_q_link) {
129		if (whead == lock->rl_currdep)
130			break;
131		if (!(whead->rl_q_flags & RL_LOCK_GRANTED)) {
132			whead->rl_q_flags |= RL_LOCK_GRANTED;
133			wakeup(whead);
134		}
135	}
136}
137
138static void
139rangelock_unlock_locked(struct rangelock *lock, struct rl_q_entry *entry,
140    struct mtx *ilk)
141{
142
143	MPASS(lock != NULL && entry != NULL && ilk != NULL);
144	mtx_assert(ilk, MA_OWNED);
145	KASSERT(entry != lock->rl_currdep, ("stuck currdep"));
146
147	TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link);
148	rangelock_calc_block(lock);
149	mtx_unlock(ilk);
150	if (curthread->td_rlqe == NULL)
151		curthread->td_rlqe = entry;
152	else
153		rlqentry_free(entry);
154}
155
156void
157rangelock_unlock(struct rangelock *lock, void *cookie, struct mtx *ilk)
158{
159
160	MPASS(lock != NULL && cookie != NULL && ilk != NULL);
161
162	mtx_lock(ilk);
163	rangelock_unlock_locked(lock, cookie, ilk);
164}
165
166/*
167 * Unlock the sub-range of granted lock.
168 */
169void *
170rangelock_unlock_range(struct rangelock *lock, void *cookie, off_t start,
171    off_t end, struct mtx *ilk)
172{
173	struct rl_q_entry *entry;
174
175	MPASS(lock != NULL && cookie != NULL && ilk != NULL);
176	entry = cookie;
177	KASSERT(entry->rl_q_flags & RL_LOCK_GRANTED,
178	    ("Unlocking non-granted lock"));
179	KASSERT(entry->rl_q_start == start, ("wrong start"));
180	KASSERT(entry->rl_q_end >= end, ("wrong end"));
181
182	mtx_lock(ilk);
183	if (entry->rl_q_end == end) {
184		rangelock_unlock_locked(lock, cookie, ilk);
185		return (NULL);
186	}
187	entry->rl_q_end = end;
188	rangelock_calc_block(lock);
189	mtx_unlock(ilk);
190	return (cookie);
191}
192
193/*
194 * Add the lock request to the queue of the pending requests for
195 * rangelock.  Sleep until the request can be granted.
196 */
197static void *
198rangelock_enqueue(struct rangelock *lock, off_t start, off_t end, int mode,
199    struct mtx *ilk)
200{
201	struct rl_q_entry *entry;
202	struct thread *td;
203
204	MPASS(lock != NULL && ilk != NULL);
205
206	td = curthread;
207	if (td->td_rlqe != NULL) {
208		entry = td->td_rlqe;
209		td->td_rlqe = NULL;
210	} else
211		entry = rlqentry_alloc();
212	MPASS(entry != NULL);
213	entry->rl_q_flags = mode;
214	entry->rl_q_start = start;
215	entry->rl_q_end = end;
216
217	mtx_lock(ilk);
218	/*
219	 * XXXKIB TODO. Check that a thread does not try to enqueue a
220	 * lock that is incompatible with another request from the same
221	 * thread.
222	 */
223
224	TAILQ_INSERT_TAIL(&lock->rl_waiters, entry, rl_q_link);
225	if (lock->rl_currdep == NULL)
226		lock->rl_currdep = entry;
227	rangelock_calc_block(lock);
228	while (!(entry->rl_q_flags & RL_LOCK_GRANTED))
229		msleep(entry, ilk, 0, "range", 0);
230	mtx_unlock(ilk);
231	return (entry);
232}
233
234void *
235rangelock_rlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk)
236{
237
238	return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk));
239}
240
241void *
242rangelock_wlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk)
243{
244
245	return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk));
246}
247