1236317Skib/*- 2236317Skib * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org> 3236317Skib * All rights reserved. 4236317Skib * 5236317Skib * Redistribution and use in source and binary forms, with or without 6236317Skib * modification, are permitted provided that the following conditions 7236317Skib * are met: 8236317Skib * 1. Redistributions of source code must retain the above copyright 9236317Skib * notice unmodified, this list of conditions, and the following 10236317Skib * disclaimer. 11236317Skib * 2. Redistributions in binary form must reproduce the above copyright 12236317Skib * notice, this list of conditions and the following disclaimer in the 13236317Skib * documentation and/or other materials provided with the distribution. 14236317Skib * 15236317Skib * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16236317Skib * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17236317Skib * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18236317Skib * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19236317Skib * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20236317Skib * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21236317Skib * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22236317Skib * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23236317Skib * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24236317Skib * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25236317Skib */ 26236317Skib 27236317Skib#include <sys/cdefs.h> 28236317Skib__FBSDID("$FreeBSD: stable/11/sys/kern/kern_rangelock.c 355690 2019-12-13 04:03:03Z kevans $"); 29236317Skib 30236317Skib#include <sys/param.h> 31236317Skib#include <sys/kernel.h> 32236317Skib#include <sys/lock.h> 33236317Skib#include <sys/mutex.h> 34236317Skib#include <sys/proc.h> 35236317Skib#include <sys/rangelock.h> 36236317Skib#include <sys/systm.h> 37236317Skib 38236317Skib#include <vm/uma.h> 39236317Skib 40236317Skibstruct rl_q_entry { 41236317Skib TAILQ_ENTRY(rl_q_entry) rl_q_link; 42236317Skib off_t rl_q_start, rl_q_end; 43236317Skib int rl_q_flags; 44236317Skib}; 45236317Skib 46236317Skibstatic uma_zone_t rl_entry_zone; 47236317Skib 48236317Skibstatic void 49236317Skibrangelock_sys_init(void) 50236317Skib{ 51236317Skib 52236317Skib rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry), 53236317Skib NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 54236317Skib} 55236317SkibSYSINIT(vfs, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL); 56236317Skib 57236317Skibstatic struct rl_q_entry * 58236317Skibrlqentry_alloc(void) 59236317Skib{ 60236317Skib 61236317Skib return (uma_zalloc(rl_entry_zone, M_WAITOK)); 62236317Skib} 63236317Skib 64236317Skibvoid 65236317Skibrlqentry_free(struct rl_q_entry *rleq) 66236317Skib{ 67236317Skib 68236317Skib uma_zfree(rl_entry_zone, rleq); 69236317Skib} 70236317Skib 71236317Skibvoid 72236317Skibrangelock_init(struct rangelock *lock) 73236317Skib{ 74236317Skib 75236317Skib TAILQ_INIT(&lock->rl_waiters); 76236317Skib lock->rl_currdep = NULL; 77236317Skib} 78236317Skib 79236317Skibvoid 80236317Skibrangelock_destroy(struct rangelock *lock) 81236317Skib{ 82236317Skib 83236317Skib KASSERT(TAILQ_EMPTY(&lock->rl_waiters), ("Dangling waiters")); 84236317Skib} 85236317Skib 86236317Skib/* 87236317Skib * Two entries are compatible if their ranges do not overlap, or both 88236317Skib * entries are for read. 89236317Skib */ 90236317Skibstatic int 91254380Scpercivaranges_overlap(const struct rl_q_entry *e1, 92236317Skib const struct rl_q_entry *e2) 93236317Skib{ 94236317Skib 95236317Skib if (e1->rl_q_start < e2->rl_q_end && e1->rl_q_end > e2->rl_q_start) 96236317Skib return (1); 97236317Skib return (0); 98236317Skib} 99236317Skib 100236317Skib/* 101236317Skib * Recalculate the lock->rl_currdep after an unlock. 102236317Skib */ 103236317Skibstatic void 104236317Skibrangelock_calc_block(struct rangelock *lock) 105236317Skib{ 106254380Scperciva struct rl_q_entry *entry, *nextentry, *entry1; 107236317Skib 108254380Scperciva for (entry = lock->rl_currdep; entry != NULL; entry = nextentry) { 109254380Scperciva nextentry = TAILQ_NEXT(entry, rl_q_link); 110254380Scperciva if (entry->rl_q_flags & RL_LOCK_READ) { 111254380Scperciva /* Reads must not overlap with granted writes. */ 112254380Scperciva for (entry1 = TAILQ_FIRST(&lock->rl_waiters); 113254380Scperciva !(entry1->rl_q_flags & RL_LOCK_READ); 114254380Scperciva entry1 = TAILQ_NEXT(entry1, rl_q_link)) { 115254380Scperciva if (ranges_overlap(entry, entry1)) 116254380Scperciva goto out; 117254380Scperciva } 118254380Scperciva } else { 119254380Scperciva /* Write must not overlap with any granted locks. */ 120254380Scperciva for (entry1 = TAILQ_FIRST(&lock->rl_waiters); 121254380Scperciva entry1 != entry; 122254380Scperciva entry1 = TAILQ_NEXT(entry1, rl_q_link)) { 123254380Scperciva if (ranges_overlap(entry, entry1)) 124254380Scperciva goto out; 125254380Scperciva } 126254380Scperciva 127254380Scperciva /* Move grantable write locks to the front. */ 128254380Scperciva TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link); 129254380Scperciva TAILQ_INSERT_HEAD(&lock->rl_waiters, entry, rl_q_link); 130236317Skib } 131254380Scperciva 132254380Scperciva /* Grant this lock. */ 133254380Scperciva entry->rl_q_flags |= RL_LOCK_GRANTED; 134254380Scperciva wakeup(entry); 135236317Skib } 136236317Skibout: 137236317Skib lock->rl_currdep = entry; 138236317Skib} 139236317Skib 140236317Skibstatic void 141236317Skibrangelock_unlock_locked(struct rangelock *lock, struct rl_q_entry *entry, 142236317Skib struct mtx *ilk) 143236317Skib{ 144236317Skib 145236317Skib MPASS(lock != NULL && entry != NULL && ilk != NULL); 146236317Skib mtx_assert(ilk, MA_OWNED); 147236317Skib KASSERT(entry != lock->rl_currdep, ("stuck currdep")); 148236317Skib 149236317Skib TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link); 150236317Skib rangelock_calc_block(lock); 151236317Skib mtx_unlock(ilk); 152236317Skib if (curthread->td_rlqe == NULL) 153236317Skib curthread->td_rlqe = entry; 154236317Skib else 155236317Skib rlqentry_free(entry); 156236317Skib} 157236317Skib 158236317Skibvoid 159236317Skibrangelock_unlock(struct rangelock *lock, void *cookie, struct mtx *ilk) 160236317Skib{ 161236317Skib 162236317Skib MPASS(lock != NULL && cookie != NULL && ilk != NULL); 163236317Skib 164236317Skib mtx_lock(ilk); 165236317Skib rangelock_unlock_locked(lock, cookie, ilk); 166236317Skib} 167236317Skib 168236317Skib/* 169236317Skib * Unlock the sub-range of granted lock. 170236317Skib */ 171236317Skibvoid * 172236317Skibrangelock_unlock_range(struct rangelock *lock, void *cookie, off_t start, 173236317Skib off_t end, struct mtx *ilk) 174236317Skib{ 175236317Skib struct rl_q_entry *entry; 176236317Skib 177236317Skib MPASS(lock != NULL && cookie != NULL && ilk != NULL); 178236317Skib entry = cookie; 179236317Skib KASSERT(entry->rl_q_flags & RL_LOCK_GRANTED, 180236317Skib ("Unlocking non-granted lock")); 181236317Skib KASSERT(entry->rl_q_start == start, ("wrong start")); 182236317Skib KASSERT(entry->rl_q_end >= end, ("wrong end")); 183236317Skib 184236317Skib mtx_lock(ilk); 185236317Skib if (entry->rl_q_end == end) { 186236317Skib rangelock_unlock_locked(lock, cookie, ilk); 187236317Skib return (NULL); 188236317Skib } 189236317Skib entry->rl_q_end = end; 190236317Skib rangelock_calc_block(lock); 191236317Skib mtx_unlock(ilk); 192236317Skib return (cookie); 193236317Skib} 194236317Skib 195236317Skib/* 196236317Skib * Add the lock request to the queue of the pending requests for 197236317Skib * rangelock. Sleep until the request can be granted. 198236317Skib */ 199236317Skibstatic void * 200236317Skibrangelock_enqueue(struct rangelock *lock, off_t start, off_t end, int mode, 201236317Skib struct mtx *ilk) 202236317Skib{ 203236317Skib struct rl_q_entry *entry; 204236317Skib struct thread *td; 205236317Skib 206236317Skib MPASS(lock != NULL && ilk != NULL); 207236317Skib 208236317Skib td = curthread; 209236317Skib if (td->td_rlqe != NULL) { 210236317Skib entry = td->td_rlqe; 211236317Skib td->td_rlqe = NULL; 212236317Skib } else 213236317Skib entry = rlqentry_alloc(); 214236317Skib MPASS(entry != NULL); 215236317Skib entry->rl_q_flags = mode; 216236317Skib entry->rl_q_start = start; 217236317Skib entry->rl_q_end = end; 218236317Skib 219236317Skib mtx_lock(ilk); 220236317Skib /* 221236317Skib * XXXKIB TODO. Check that a thread does not try to enqueue a 222236317Skib * lock that is incompatible with another request from the same 223236317Skib * thread. 224236317Skib */ 225236317Skib 226236317Skib TAILQ_INSERT_TAIL(&lock->rl_waiters, entry, rl_q_link); 227236317Skib if (lock->rl_currdep == NULL) 228236317Skib lock->rl_currdep = entry; 229236317Skib rangelock_calc_block(lock); 230236317Skib while (!(entry->rl_q_flags & RL_LOCK_GRANTED)) 231236317Skib msleep(entry, ilk, 0, "range", 0); 232236317Skib mtx_unlock(ilk); 233236317Skib return (entry); 234236317Skib} 235236317Skib 236236317Skibvoid * 237236317Skibrangelock_rlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk) 238236317Skib{ 239236317Skib 240236317Skib return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk)); 241236317Skib} 242236317Skib 243236317Skibvoid * 244236317Skibrangelock_wlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk) 245236317Skib{ 246236317Skib 247236317Skib return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk)); 248236317Skib} 249355690Skevans 250355690Skevans#ifdef INVARIANT_SUPPORT 251355690Skevansvoid 252355690Skevans_rangelock_cookie_assert(void *cookie, int what, const char *file, int line) 253355690Skevans{ 254355690Skevans struct rl_q_entry *entry; 255355690Skevans int flags; 256355690Skevans 257355690Skevans MPASS(cookie != NULL); 258355690Skevans entry = cookie; 259355690Skevans flags = entry->rl_q_flags; 260355690Skevans switch (what) { 261355690Skevans case RCA_LOCKED: 262355690Skevans if ((flags & RL_LOCK_GRANTED) == 0) 263355690Skevans panic("rangelock not held @ %s:%d\n", file, line); 264355690Skevans break; 265355690Skevans case RCA_RLOCKED: 266355690Skevans if ((flags & (RL_LOCK_GRANTED | RL_LOCK_READ)) != 267355690Skevans (RL_LOCK_GRANTED | RL_LOCK_READ)) 268355690Skevans panic("rangelock not rlocked @ %s:%d\n", file, line); 269355690Skevans break; 270355690Skevans case RCA_WLOCKED: 271355690Skevans if ((flags & (RL_LOCK_GRANTED | RL_LOCK_WRITE)) != 272355690Skevans (RL_LOCK_GRANTED | RL_LOCK_WRITE)) 273355690Skevans panic("rangelock not wlocked @ %s:%d\n", file, line); 274355690Skevans break; 275355690Skevans default: 276355690Skevans panic("Unknown rangelock assertion: %d @ %s:%d", what, file, 277355690Skevans line); 278355690Skevans } 279355690Skevans} 280355690Skevans#endif /* INVARIANT_SUPPORT */ 281