Deleted Added
full compact
kern_umtx.c (155276) kern_umtx.c (157815)
1/*-
2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice unmodified, this list of conditions, and the following
11 * disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice unmodified, this list of conditions, and the following
11 * disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 155276 2006-02-04 06:36:39Z davidxu $");
29__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 157815 2006-04-17 18:20:38Z jhb $");
30
31#include <sys/param.h>
32#include <sys/kernel.h>
33#include <sys/limits.h>
34#include <sys/lock.h>
35#include <sys/malloc.h>
36#include <sys/mutex.h>
37#include <sys/proc.h>
38#include <sys/sysent.h>
39#include <sys/systm.h>
40#include <sys/sysproto.h>
41#include <sys/eventhandler.h>
42#include <sys/thr.h>
43#include <sys/umtx.h>
44
45#include <vm/vm.h>
46#include <vm/vm_param.h>
47#include <vm/pmap.h>
48#include <vm/vm_map.h>
49#include <vm/vm_object.h>
50
51#define UMTX_PRIVATE 0
52#define UMTX_SHARED 1
53
54struct umtx_key {
55 int type;
56 union {
57 struct {
58 vm_object_t object;
59 long offset;
60 } shared;
61 struct {
62 struct umtx *umtx;
63 long pid;
64 } private;
65 struct {
66 void *ptr;
67 long word;
68 } both;
69 } info;
70};
71
72struct umtx_q {
73 LIST_ENTRY(umtx_q) uq_next; /* Linked list for the hash. */
74 struct umtx_key uq_key; /* Umtx key. */
75 struct thread *uq_thread; /* The thread waits on. */
76 LIST_ENTRY(umtx_q) uq_rqnext; /* Linked list for requeuing. */
77 vm_offset_t uq_addr; /* Umtx's virtual address. */
78};
79
80LIST_HEAD(umtx_head, umtx_q);
81struct umtxq_chain {
82 struct mtx uc_lock; /* Lock for this chain. */
83 struct umtx_head uc_queue; /* List of sleep queues. */
84#define UCF_BUSY 0x01
85#define UCF_WANT 0x02
86 int uc_flags;
87};
88
89#define GOLDEN_RATIO_PRIME 2654404609U
90#define UMTX_CHAINS 128
91#define UMTX_SHIFTS (__WORD_BIT - 7)
92
93static struct umtxq_chain umtxq_chains[UMTX_CHAINS];
94static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
95
96static void umtxq_init_chains(void *);
97static int umtxq_hash(struct umtx_key *key);
98static struct mtx *umtxq_mtx(int chain);
99static void umtxq_lock(struct umtx_key *key);
100static void umtxq_unlock(struct umtx_key *key);
101static void umtxq_busy(struct umtx_key *key);
102static void umtxq_unbusy(struct umtx_key *key);
103static void umtxq_insert(struct umtx_q *uq);
104static void umtxq_remove(struct umtx_q *uq);
105static int umtxq_sleep(struct thread *td, struct umtx_key *key,
106 int prio, const char *wmesg, int timo);
107static int umtxq_count(struct umtx_key *key);
108static int umtxq_signal(struct umtx_key *key, int nr_wakeup);
109static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
110static int umtx_key_get(struct thread *td, struct umtx *umtx,
111 struct umtx_key *key);
112static void umtx_key_release(struct umtx_key *key);
113
114SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_init_chains, NULL);
115
116struct umtx_q *
117umtxq_alloc(void)
118{
119 return (malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK));
120}
121
122void
123umtxq_free(struct umtx_q *uq)
124{
125 free(uq, M_UMTX);
126}
127
128static void
129umtxq_init_chains(void *arg __unused)
130{
131 int i;
132
133 for (i = 0; i < UMTX_CHAINS; ++i) {
134 mtx_init(&umtxq_chains[i].uc_lock, "umtxq_lock", NULL,
135 MTX_DEF | MTX_DUPOK);
136 LIST_INIT(&umtxq_chains[i].uc_queue);
137 umtxq_chains[i].uc_flags = 0;
138 }
139}
140
141static inline int
142umtxq_hash(struct umtx_key *key)
143{
144 unsigned n = (uintptr_t)key->info.both.ptr + key->info.both.word;
145 return (((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS);
146}
147
148static inline int
149umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
150{
151 return (k1->type == k2->type &&
152 k1->info.both.ptr == k2->info.both.ptr &&
153 k1->info.both.word == k2->info.both.word);
154}
155
156static inline struct mtx *
157umtxq_mtx(int chain)
158{
159 return (&umtxq_chains[chain].uc_lock);
160}
161
162static inline void
163umtxq_busy(struct umtx_key *key)
164{
165 int chain = umtxq_hash(key);
166
167 mtx_assert(umtxq_mtx(chain), MA_OWNED);
168 while (umtxq_chains[chain].uc_flags & UCF_BUSY) {
169 umtxq_chains[chain].uc_flags |= UCF_WANT;
170 msleep(&umtxq_chains[chain], umtxq_mtx(chain),
30
31#include <sys/param.h>
32#include <sys/kernel.h>
33#include <sys/limits.h>
34#include <sys/lock.h>
35#include <sys/malloc.h>
36#include <sys/mutex.h>
37#include <sys/proc.h>
38#include <sys/sysent.h>
39#include <sys/systm.h>
40#include <sys/sysproto.h>
41#include <sys/eventhandler.h>
42#include <sys/thr.h>
43#include <sys/umtx.h>
44
45#include <vm/vm.h>
46#include <vm/vm_param.h>
47#include <vm/pmap.h>
48#include <vm/vm_map.h>
49#include <vm/vm_object.h>
50
51#define UMTX_PRIVATE 0
52#define UMTX_SHARED 1
53
54struct umtx_key {
55 int type;
56 union {
57 struct {
58 vm_object_t object;
59 long offset;
60 } shared;
61 struct {
62 struct umtx *umtx;
63 long pid;
64 } private;
65 struct {
66 void *ptr;
67 long word;
68 } both;
69 } info;
70};
71
72struct umtx_q {
73 LIST_ENTRY(umtx_q) uq_next; /* Linked list for the hash. */
74 struct umtx_key uq_key; /* Umtx key. */
75 struct thread *uq_thread; /* The thread waits on. */
76 LIST_ENTRY(umtx_q) uq_rqnext; /* Linked list for requeuing. */
77 vm_offset_t uq_addr; /* Umtx's virtual address. */
78};
79
80LIST_HEAD(umtx_head, umtx_q);
81struct umtxq_chain {
82 struct mtx uc_lock; /* Lock for this chain. */
83 struct umtx_head uc_queue; /* List of sleep queues. */
84#define UCF_BUSY 0x01
85#define UCF_WANT 0x02
86 int uc_flags;
87};
88
89#define GOLDEN_RATIO_PRIME 2654404609U
90#define UMTX_CHAINS 128
91#define UMTX_SHIFTS (__WORD_BIT - 7)
92
93static struct umtxq_chain umtxq_chains[UMTX_CHAINS];
94static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
95
96static void umtxq_init_chains(void *);
97static int umtxq_hash(struct umtx_key *key);
98static struct mtx *umtxq_mtx(int chain);
99static void umtxq_lock(struct umtx_key *key);
100static void umtxq_unlock(struct umtx_key *key);
101static void umtxq_busy(struct umtx_key *key);
102static void umtxq_unbusy(struct umtx_key *key);
103static void umtxq_insert(struct umtx_q *uq);
104static void umtxq_remove(struct umtx_q *uq);
105static int umtxq_sleep(struct thread *td, struct umtx_key *key,
106 int prio, const char *wmesg, int timo);
107static int umtxq_count(struct umtx_key *key);
108static int umtxq_signal(struct umtx_key *key, int nr_wakeup);
109static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
110static int umtx_key_get(struct thread *td, struct umtx *umtx,
111 struct umtx_key *key);
112static void umtx_key_release(struct umtx_key *key);
113
114SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_init_chains, NULL);
115
116struct umtx_q *
117umtxq_alloc(void)
118{
119 return (malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK));
120}
121
122void
123umtxq_free(struct umtx_q *uq)
124{
125 free(uq, M_UMTX);
126}
127
128static void
129umtxq_init_chains(void *arg __unused)
130{
131 int i;
132
133 for (i = 0; i < UMTX_CHAINS; ++i) {
134 mtx_init(&umtxq_chains[i].uc_lock, "umtxq_lock", NULL,
135 MTX_DEF | MTX_DUPOK);
136 LIST_INIT(&umtxq_chains[i].uc_queue);
137 umtxq_chains[i].uc_flags = 0;
138 }
139}
140
141static inline int
142umtxq_hash(struct umtx_key *key)
143{
144 unsigned n = (uintptr_t)key->info.both.ptr + key->info.both.word;
145 return (((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS);
146}
147
148static inline int
149umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
150{
151 return (k1->type == k2->type &&
152 k1->info.both.ptr == k2->info.both.ptr &&
153 k1->info.both.word == k2->info.both.word);
154}
155
156static inline struct mtx *
157umtxq_mtx(int chain)
158{
159 return (&umtxq_chains[chain].uc_lock);
160}
161
162static inline void
163umtxq_busy(struct umtx_key *key)
164{
165 int chain = umtxq_hash(key);
166
167 mtx_assert(umtxq_mtx(chain), MA_OWNED);
168 while (umtxq_chains[chain].uc_flags & UCF_BUSY) {
169 umtxq_chains[chain].uc_flags |= UCF_WANT;
170 msleep(&umtxq_chains[chain], umtxq_mtx(chain),
171 curthread->td_priority, "umtxq_busy", 0);
171 0, "umtxq_busy", 0);
172 }
173 umtxq_chains[chain].uc_flags |= UCF_BUSY;
174}
175
176static inline void
177umtxq_unbusy(struct umtx_key *key)
178{
179 int chain = umtxq_hash(key);
180
181 mtx_assert(umtxq_mtx(chain), MA_OWNED);
182 KASSERT(umtxq_chains[chain].uc_flags & UCF_BUSY, ("not busy"));
183 umtxq_chains[chain].uc_flags &= ~UCF_BUSY;
184 if (umtxq_chains[chain].uc_flags & UCF_WANT) {
185 umtxq_chains[chain].uc_flags &= ~UCF_WANT;
186 wakeup(&umtxq_chains[chain]);
187 }
188}
189
190static inline void
191umtxq_lock(struct umtx_key *key)
192{
193 int chain = umtxq_hash(key);
194 mtx_lock(umtxq_mtx(chain));
195}
196
197static inline void
198umtxq_unlock(struct umtx_key *key)
199{
200 int chain = umtxq_hash(key);
201 mtx_unlock(umtxq_mtx(chain));
202}
203
204/*
205 * Insert a thread onto the umtx queue.
206 */
207static inline void
208umtxq_insert(struct umtx_q *uq)
209{
210 struct umtx_head *head;
211 int chain = umtxq_hash(&uq->uq_key);
212
213 mtx_assert(umtxq_mtx(chain), MA_OWNED);
214 head = &umtxq_chains[chain].uc_queue;
215 LIST_INSERT_HEAD(head, uq, uq_next);
216 mtx_lock_spin(&sched_lock);
217 uq->uq_thread->td_flags |= TDF_UMTXQ;
218 mtx_unlock_spin(&sched_lock);
219}
220
221/*
222 * Remove thread from the umtx queue.
223 */
224static inline void
225umtxq_remove(struct umtx_q *uq)
226{
227 mtx_assert(umtxq_mtx(umtxq_hash(&uq->uq_key)), MA_OWNED);
228 if (uq->uq_thread->td_flags & TDF_UMTXQ) {
229 LIST_REMOVE(uq, uq_next);
230 /* turning off TDF_UMTXQ should be the last thing. */
231 mtx_lock_spin(&sched_lock);
232 uq->uq_thread->td_flags &= ~TDF_UMTXQ;
233 mtx_unlock_spin(&sched_lock);
234 }
235}
236
237static int
238umtxq_count(struct umtx_key *key)
239{
240 struct umtx_q *uq;
241 struct umtx_head *head;
242 int chain, count = 0;
243
244 chain = umtxq_hash(key);
245 mtx_assert(umtxq_mtx(chain), MA_OWNED);
246 head = &umtxq_chains[chain].uc_queue;
247 LIST_FOREACH(uq, head, uq_next) {
248 if (umtx_key_match(&uq->uq_key, key)) {
249 if (++count > 1)
250 break;
251 }
252 }
253 return (count);
254}
255
256static int
257umtxq_signal(struct umtx_key *key, int n_wake)
258{
259 struct umtx_q *uq, *next;
260 struct umtx_head *head;
261 struct thread *blocked = NULL;
262 int chain, ret;
263
264 ret = 0;
265 chain = umtxq_hash(key);
266 mtx_assert(umtxq_mtx(chain), MA_OWNED);
267 head = &umtxq_chains[chain].uc_queue;
268 for (uq = LIST_FIRST(head); uq; uq = next) {
269 next = LIST_NEXT(uq, uq_next);
270 if (umtx_key_match(&uq->uq_key, key)) {
271 blocked = uq->uq_thread;
272 umtxq_remove(uq);
273 wakeup(blocked);
274 if (++ret >= n_wake)
275 break;
276 }
277 }
278 return (ret);
279}
280
281static inline int
282umtxq_sleep(struct thread *td, struct umtx_key *key, int priority,
283 const char *wmesg, int timo)
284{
285 int chain = umtxq_hash(key);
286 int error = msleep(td, umtxq_mtx(chain), priority, wmesg, timo);
287 if (error == EWOULDBLOCK)
288 error = ETIMEDOUT;
289 return (error);
290}
291
292static int
293umtx_key_get(struct thread *td, struct umtx *umtx, struct umtx_key *key)
294{
295 vm_map_t map;
296 vm_map_entry_t entry;
297 vm_pindex_t pindex;
298 vm_prot_t prot;
299 boolean_t wired;
300
301 map = &td->td_proc->p_vmspace->vm_map;
302 if (vm_map_lookup(&map, (vm_offset_t)umtx, VM_PROT_WRITE,
303 &entry, &key->info.shared.object, &pindex, &prot,
304 &wired) != KERN_SUCCESS) {
305 return EFAULT;
306 }
307
308 if (VM_INHERIT_SHARE == entry->inheritance) {
309 key->type = UMTX_SHARED;
310 key->info.shared.offset = entry->offset + entry->start -
311 (vm_offset_t)umtx;
312 vm_object_reference(key->info.shared.object);
313 } else {
314 key->type = UMTX_PRIVATE;
315 key->info.private.umtx = umtx;
316 key->info.private.pid = td->td_proc->p_pid;
317 }
318 vm_map_lookup_done(map, entry);
319 return (0);
320}
321
322static inline void
323umtx_key_release(struct umtx_key *key)
324{
325 if (key->type == UMTX_SHARED)
326 vm_object_deallocate(key->info.shared.object);
327}
328
329static inline int
330umtxq_queue_me(struct thread *td, struct umtx *umtx, struct umtx_q *uq)
331{
332 int error;
333
334 if ((error = umtx_key_get(td, umtx, &uq->uq_key)) != 0)
335 return (error);
336
337 uq->uq_addr = (vm_offset_t)umtx;
338 uq->uq_thread = td;
339 umtxq_lock(&uq->uq_key);
340 /* hmm, for condition variable, we don't need busy flag. */
341 umtxq_busy(&uq->uq_key);
342 umtxq_insert(uq);
343 umtxq_unbusy(&uq->uq_key);
344 umtxq_unlock(&uq->uq_key);
345 return (0);
346}
347
348static int
349_do_lock(struct thread *td, struct umtx *umtx, long id, int timo)
350{
351 struct umtx_q *uq;
352 intptr_t owner;
353 intptr_t old;
354 int error = 0;
355
356 uq = td->td_umtxq;
357 /*
358 * Care must be exercised when dealing with umtx structure. It
359 * can fault on any access.
360 */
361
362 for (;;) {
363 /*
364 * Try the uncontested case. This should be done in userland.
365 */
366 owner = casuptr((intptr_t *)&umtx->u_owner,
367 UMTX_UNOWNED, id);
368
369 /* The acquire succeeded. */
370 if (owner == UMTX_UNOWNED)
371 return (0);
372
373 /* The address was invalid. */
374 if (owner == -1)
375 return (EFAULT);
376
377 /* If no one owns it but it is contested try to acquire it. */
378 if (owner == UMTX_CONTESTED) {
379 owner = casuptr((intptr_t *)&umtx->u_owner,
380 UMTX_CONTESTED, id | UMTX_CONTESTED);
381
382 if (owner == UMTX_CONTESTED)
383 return (0);
384
385 /* The address was invalid. */
386 if (owner == -1)
387 return (EFAULT);
388
389 /* If this failed the lock has changed, restart. */
390 continue;
391 }
392
393 /*
394 * If we caught a signal, we have retried and now
395 * exit immediately.
396 */
397 if (error || (error = umtxq_queue_me(td, umtx, uq)) != 0)
398 return (error);
399
400 /*
401 * Set the contested bit so that a release in user space
402 * knows to use the system call for unlock. If this fails
403 * either some one else has acquired the lock or it has been
404 * released.
405 */
406 old = casuptr((intptr_t *)&umtx->u_owner, owner,
407 owner | UMTX_CONTESTED);
408
409 /* The address was invalid. */
410 if (old == -1) {
411 umtxq_lock(&uq->uq_key);
412 umtxq_busy(&uq->uq_key);
413 umtxq_remove(uq);
414 umtxq_unbusy(&uq->uq_key);
415 umtxq_unlock(&uq->uq_key);
416 umtx_key_release(&uq->uq_key);
417 return (EFAULT);
418 }
419
420 /*
421 * We set the contested bit, sleep. Otherwise the lock changed
422 * and we need to retry or we lost a race to the thread
423 * unlocking the umtx.
424 */
425 umtxq_lock(&uq->uq_key);
426 if (old == owner && (td->td_flags & TDF_UMTXQ)) {
172 }
173 umtxq_chains[chain].uc_flags |= UCF_BUSY;
174}
175
176static inline void
177umtxq_unbusy(struct umtx_key *key)
178{
179 int chain = umtxq_hash(key);
180
181 mtx_assert(umtxq_mtx(chain), MA_OWNED);
182 KASSERT(umtxq_chains[chain].uc_flags & UCF_BUSY, ("not busy"));
183 umtxq_chains[chain].uc_flags &= ~UCF_BUSY;
184 if (umtxq_chains[chain].uc_flags & UCF_WANT) {
185 umtxq_chains[chain].uc_flags &= ~UCF_WANT;
186 wakeup(&umtxq_chains[chain]);
187 }
188}
189
190static inline void
191umtxq_lock(struct umtx_key *key)
192{
193 int chain = umtxq_hash(key);
194 mtx_lock(umtxq_mtx(chain));
195}
196
197static inline void
198umtxq_unlock(struct umtx_key *key)
199{
200 int chain = umtxq_hash(key);
201 mtx_unlock(umtxq_mtx(chain));
202}
203
204/*
205 * Insert a thread onto the umtx queue.
206 */
207static inline void
208umtxq_insert(struct umtx_q *uq)
209{
210 struct umtx_head *head;
211 int chain = umtxq_hash(&uq->uq_key);
212
213 mtx_assert(umtxq_mtx(chain), MA_OWNED);
214 head = &umtxq_chains[chain].uc_queue;
215 LIST_INSERT_HEAD(head, uq, uq_next);
216 mtx_lock_spin(&sched_lock);
217 uq->uq_thread->td_flags |= TDF_UMTXQ;
218 mtx_unlock_spin(&sched_lock);
219}
220
221/*
222 * Remove thread from the umtx queue.
223 */
224static inline void
225umtxq_remove(struct umtx_q *uq)
226{
227 mtx_assert(umtxq_mtx(umtxq_hash(&uq->uq_key)), MA_OWNED);
228 if (uq->uq_thread->td_flags & TDF_UMTXQ) {
229 LIST_REMOVE(uq, uq_next);
230 /* turning off TDF_UMTXQ should be the last thing. */
231 mtx_lock_spin(&sched_lock);
232 uq->uq_thread->td_flags &= ~TDF_UMTXQ;
233 mtx_unlock_spin(&sched_lock);
234 }
235}
236
237static int
238umtxq_count(struct umtx_key *key)
239{
240 struct umtx_q *uq;
241 struct umtx_head *head;
242 int chain, count = 0;
243
244 chain = umtxq_hash(key);
245 mtx_assert(umtxq_mtx(chain), MA_OWNED);
246 head = &umtxq_chains[chain].uc_queue;
247 LIST_FOREACH(uq, head, uq_next) {
248 if (umtx_key_match(&uq->uq_key, key)) {
249 if (++count > 1)
250 break;
251 }
252 }
253 return (count);
254}
255
256static int
257umtxq_signal(struct umtx_key *key, int n_wake)
258{
259 struct umtx_q *uq, *next;
260 struct umtx_head *head;
261 struct thread *blocked = NULL;
262 int chain, ret;
263
264 ret = 0;
265 chain = umtxq_hash(key);
266 mtx_assert(umtxq_mtx(chain), MA_OWNED);
267 head = &umtxq_chains[chain].uc_queue;
268 for (uq = LIST_FIRST(head); uq; uq = next) {
269 next = LIST_NEXT(uq, uq_next);
270 if (umtx_key_match(&uq->uq_key, key)) {
271 blocked = uq->uq_thread;
272 umtxq_remove(uq);
273 wakeup(blocked);
274 if (++ret >= n_wake)
275 break;
276 }
277 }
278 return (ret);
279}
280
281static inline int
282umtxq_sleep(struct thread *td, struct umtx_key *key, int priority,
283 const char *wmesg, int timo)
284{
285 int chain = umtxq_hash(key);
286 int error = msleep(td, umtxq_mtx(chain), priority, wmesg, timo);
287 if (error == EWOULDBLOCK)
288 error = ETIMEDOUT;
289 return (error);
290}
291
292static int
293umtx_key_get(struct thread *td, struct umtx *umtx, struct umtx_key *key)
294{
295 vm_map_t map;
296 vm_map_entry_t entry;
297 vm_pindex_t pindex;
298 vm_prot_t prot;
299 boolean_t wired;
300
301 map = &td->td_proc->p_vmspace->vm_map;
302 if (vm_map_lookup(&map, (vm_offset_t)umtx, VM_PROT_WRITE,
303 &entry, &key->info.shared.object, &pindex, &prot,
304 &wired) != KERN_SUCCESS) {
305 return EFAULT;
306 }
307
308 if (VM_INHERIT_SHARE == entry->inheritance) {
309 key->type = UMTX_SHARED;
310 key->info.shared.offset = entry->offset + entry->start -
311 (vm_offset_t)umtx;
312 vm_object_reference(key->info.shared.object);
313 } else {
314 key->type = UMTX_PRIVATE;
315 key->info.private.umtx = umtx;
316 key->info.private.pid = td->td_proc->p_pid;
317 }
318 vm_map_lookup_done(map, entry);
319 return (0);
320}
321
322static inline void
323umtx_key_release(struct umtx_key *key)
324{
325 if (key->type == UMTX_SHARED)
326 vm_object_deallocate(key->info.shared.object);
327}
328
329static inline int
330umtxq_queue_me(struct thread *td, struct umtx *umtx, struct umtx_q *uq)
331{
332 int error;
333
334 if ((error = umtx_key_get(td, umtx, &uq->uq_key)) != 0)
335 return (error);
336
337 uq->uq_addr = (vm_offset_t)umtx;
338 uq->uq_thread = td;
339 umtxq_lock(&uq->uq_key);
340 /* hmm, for condition variable, we don't need busy flag. */
341 umtxq_busy(&uq->uq_key);
342 umtxq_insert(uq);
343 umtxq_unbusy(&uq->uq_key);
344 umtxq_unlock(&uq->uq_key);
345 return (0);
346}
347
348static int
349_do_lock(struct thread *td, struct umtx *umtx, long id, int timo)
350{
351 struct umtx_q *uq;
352 intptr_t owner;
353 intptr_t old;
354 int error = 0;
355
356 uq = td->td_umtxq;
357 /*
358 * Care must be exercised when dealing with umtx structure. It
359 * can fault on any access.
360 */
361
362 for (;;) {
363 /*
364 * Try the uncontested case. This should be done in userland.
365 */
366 owner = casuptr((intptr_t *)&umtx->u_owner,
367 UMTX_UNOWNED, id);
368
369 /* The acquire succeeded. */
370 if (owner == UMTX_UNOWNED)
371 return (0);
372
373 /* The address was invalid. */
374 if (owner == -1)
375 return (EFAULT);
376
377 /* If no one owns it but it is contested try to acquire it. */
378 if (owner == UMTX_CONTESTED) {
379 owner = casuptr((intptr_t *)&umtx->u_owner,
380 UMTX_CONTESTED, id | UMTX_CONTESTED);
381
382 if (owner == UMTX_CONTESTED)
383 return (0);
384
385 /* The address was invalid. */
386 if (owner == -1)
387 return (EFAULT);
388
389 /* If this failed the lock has changed, restart. */
390 continue;
391 }
392
393 /*
394 * If we caught a signal, we have retried and now
395 * exit immediately.
396 */
397 if (error || (error = umtxq_queue_me(td, umtx, uq)) != 0)
398 return (error);
399
400 /*
401 * Set the contested bit so that a release in user space
402 * knows to use the system call for unlock. If this fails
403 * either some one else has acquired the lock or it has been
404 * released.
405 */
406 old = casuptr((intptr_t *)&umtx->u_owner, owner,
407 owner | UMTX_CONTESTED);
408
409 /* The address was invalid. */
410 if (old == -1) {
411 umtxq_lock(&uq->uq_key);
412 umtxq_busy(&uq->uq_key);
413 umtxq_remove(uq);
414 umtxq_unbusy(&uq->uq_key);
415 umtxq_unlock(&uq->uq_key);
416 umtx_key_release(&uq->uq_key);
417 return (EFAULT);
418 }
419
420 /*
421 * We set the contested bit, sleep. Otherwise the lock changed
422 * and we need to retry or we lost a race to the thread
423 * unlocking the umtx.
424 */
425 umtxq_lock(&uq->uq_key);
426 if (old == owner && (td->td_flags & TDF_UMTXQ)) {
427 error = umtxq_sleep(td, &uq->uq_key,
428 td->td_priority | PCATCH,
427 error = umtxq_sleep(td, &uq->uq_key, PCATCH,
429 "umtx", timo);
430 }
431 umtxq_busy(&uq->uq_key);
432 umtxq_remove(uq);
433 umtxq_unbusy(&uq->uq_key);
434 umtxq_unlock(&uq->uq_key);
435 umtx_key_release(&uq->uq_key);
436 }
437
438 return (0);
439}
440
441static int
442do_lock(struct thread *td, struct umtx *umtx, long id,
443 struct timespec *timeout)
444{
445 struct timespec ts, ts2, ts3;
446 struct timeval tv;
447 int error;
448
449 if (timeout == NULL) {
450 error = _do_lock(td, umtx, id, 0);
451 } else {
452 getnanouptime(&ts);
453 timespecadd(&ts, timeout);
454 TIMESPEC_TO_TIMEVAL(&tv, timeout);
455 for (;;) {
456 error = _do_lock(td, umtx, id, tvtohz(&tv));
457 if (error != ETIMEDOUT)
458 break;
459 getnanouptime(&ts2);
460 if (timespeccmp(&ts2, &ts, >=)) {
461 error = ETIMEDOUT;
462 break;
463 }
464 ts3 = ts;
465 timespecsub(&ts3, &ts2);
466 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
467 }
468 }
469 /*
470 * This lets userland back off critical region if needed.
471 */
472 if (error == ERESTART)
473 error = EINTR;
474 return (error);
475}
476
477static int
478do_unlock(struct thread *td, struct umtx *umtx, long id)
479{
480 struct umtx_key key;
481 intptr_t owner;
482 intptr_t old;
483 int error;
484 int count;
485
486 /*
487 * Make sure we own this mtx.
488 *
489 * XXX Need a {fu,su}ptr this is not correct on arch where
490 * sizeof(intptr_t) != sizeof(long).
491 */
492 if ((owner = fuword(&umtx->u_owner)) == -1)
493 return (EFAULT);
494
495 if ((owner & ~UMTX_CONTESTED) != id)
496 return (EPERM);
497
498 /* We should only ever be in here for contested locks */
499 if ((owner & UMTX_CONTESTED) == 0)
500 return (EINVAL);
501
502 if ((error = umtx_key_get(td, umtx, &key)) != 0)
503 return (error);
504
505 umtxq_lock(&key);
506 umtxq_busy(&key);
507 count = umtxq_count(&key);
508 umtxq_unlock(&key);
509
510 /*
511 * When unlocking the umtx, it must be marked as unowned if
512 * there is zero or one thread only waiting for it.
513 * Otherwise, it must be marked as contested.
514 */
515 old = casuptr((intptr_t *)&umtx->u_owner, owner,
516 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
517 umtxq_lock(&key);
518 umtxq_signal(&key, 0);
519 umtxq_unbusy(&key);
520 umtxq_unlock(&key);
521 umtx_key_release(&key);
522 if (old == -1)
523 return (EFAULT);
524 if (old != owner)
525 return (EINVAL);
526 return (0);
527}
528
529static int
530do_wait(struct thread *td, struct umtx *umtx, long id, struct timespec *timeout)
531{
532 struct umtx_q *uq;
533 struct timespec ts, ts2, ts3;
534 struct timeval tv;
535 long tmp;
536 int error = 0;
537
538 uq = td->td_umtxq;
539 if ((error = umtxq_queue_me(td, umtx, uq)) != 0)
540 return (error);
541 tmp = fuword(&umtx->u_owner);
542 if (tmp != id) {
543 umtxq_lock(&uq->uq_key);
544 umtxq_remove(uq);
545 umtxq_unlock(&uq->uq_key);
546 } else if (timeout == NULL) {
547 umtxq_lock(&uq->uq_key);
548 if (td->td_flags & TDF_UMTXQ)
549 error = umtxq_sleep(td, &uq->uq_key,
428 "umtx", timo);
429 }
430 umtxq_busy(&uq->uq_key);
431 umtxq_remove(uq);
432 umtxq_unbusy(&uq->uq_key);
433 umtxq_unlock(&uq->uq_key);
434 umtx_key_release(&uq->uq_key);
435 }
436
437 return (0);
438}
439
440static int
441do_lock(struct thread *td, struct umtx *umtx, long id,
442 struct timespec *timeout)
443{
444 struct timespec ts, ts2, ts3;
445 struct timeval tv;
446 int error;
447
448 if (timeout == NULL) {
449 error = _do_lock(td, umtx, id, 0);
450 } else {
451 getnanouptime(&ts);
452 timespecadd(&ts, timeout);
453 TIMESPEC_TO_TIMEVAL(&tv, timeout);
454 for (;;) {
455 error = _do_lock(td, umtx, id, tvtohz(&tv));
456 if (error != ETIMEDOUT)
457 break;
458 getnanouptime(&ts2);
459 if (timespeccmp(&ts2, &ts, >=)) {
460 error = ETIMEDOUT;
461 break;
462 }
463 ts3 = ts;
464 timespecsub(&ts3, &ts2);
465 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
466 }
467 }
468 /*
469 * This lets userland back off critical region if needed.
470 */
471 if (error == ERESTART)
472 error = EINTR;
473 return (error);
474}
475
476static int
477do_unlock(struct thread *td, struct umtx *umtx, long id)
478{
479 struct umtx_key key;
480 intptr_t owner;
481 intptr_t old;
482 int error;
483 int count;
484
485 /*
486 * Make sure we own this mtx.
487 *
488 * XXX Need a {fu,su}ptr this is not correct on arch where
489 * sizeof(intptr_t) != sizeof(long).
490 */
491 if ((owner = fuword(&umtx->u_owner)) == -1)
492 return (EFAULT);
493
494 if ((owner & ~UMTX_CONTESTED) != id)
495 return (EPERM);
496
497 /* We should only ever be in here for contested locks */
498 if ((owner & UMTX_CONTESTED) == 0)
499 return (EINVAL);
500
501 if ((error = umtx_key_get(td, umtx, &key)) != 0)
502 return (error);
503
504 umtxq_lock(&key);
505 umtxq_busy(&key);
506 count = umtxq_count(&key);
507 umtxq_unlock(&key);
508
509 /*
510 * When unlocking the umtx, it must be marked as unowned if
511 * there is zero or one thread only waiting for it.
512 * Otherwise, it must be marked as contested.
513 */
514 old = casuptr((intptr_t *)&umtx->u_owner, owner,
515 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
516 umtxq_lock(&key);
517 umtxq_signal(&key, 0);
518 umtxq_unbusy(&key);
519 umtxq_unlock(&key);
520 umtx_key_release(&key);
521 if (old == -1)
522 return (EFAULT);
523 if (old != owner)
524 return (EINVAL);
525 return (0);
526}
527
528static int
529do_wait(struct thread *td, struct umtx *umtx, long id, struct timespec *timeout)
530{
531 struct umtx_q *uq;
532 struct timespec ts, ts2, ts3;
533 struct timeval tv;
534 long tmp;
535 int error = 0;
536
537 uq = td->td_umtxq;
538 if ((error = umtxq_queue_me(td, umtx, uq)) != 0)
539 return (error);
540 tmp = fuword(&umtx->u_owner);
541 if (tmp != id) {
542 umtxq_lock(&uq->uq_key);
543 umtxq_remove(uq);
544 umtxq_unlock(&uq->uq_key);
545 } else if (timeout == NULL) {
546 umtxq_lock(&uq->uq_key);
547 if (td->td_flags & TDF_UMTXQ)
548 error = umtxq_sleep(td, &uq->uq_key,
550 td->td_priority | PCATCH, "ucond", 0);
549 PCATCH, "ucond", 0);
551 if (!(td->td_flags & TDF_UMTXQ))
552 error = 0;
553 else
554 umtxq_remove(uq);
555 umtxq_unlock(&uq->uq_key);
556 } else {
557 getnanouptime(&ts);
558 timespecadd(&ts, timeout);
559 TIMESPEC_TO_TIMEVAL(&tv, timeout);
560 for (;;) {
561 umtxq_lock(&uq->uq_key);
562 if (td->td_flags & TDF_UMTXQ) {
550 if (!(td->td_flags & TDF_UMTXQ))
551 error = 0;
552 else
553 umtxq_remove(uq);
554 umtxq_unlock(&uq->uq_key);
555 } else {
556 getnanouptime(&ts);
557 timespecadd(&ts, timeout);
558 TIMESPEC_TO_TIMEVAL(&tv, timeout);
559 for (;;) {
560 umtxq_lock(&uq->uq_key);
561 if (td->td_flags & TDF_UMTXQ) {
563 error = umtxq_sleep(td, &uq->uq_key,
564 td->td_priority | PCATCH,
562 error = umtxq_sleep(td, &uq->uq_key, PCATCH,
565 "ucond", tvtohz(&tv));
566 }
567 if (!(td->td_flags & TDF_UMTXQ)) {
568 umtxq_unlock(&uq->uq_key);
569 goto out;
570 }
571 umtxq_unlock(&uq->uq_key);
572 if (error != ETIMEDOUT)
573 break;
574 getnanouptime(&ts2);
575 if (timespeccmp(&ts2, &ts, >=)) {
576 error = ETIMEDOUT;
577 break;
578 }
579 ts3 = ts;
580 timespecsub(&ts3, &ts2);
581 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
582 }
583 umtxq_lock(&uq->uq_key);
584 umtxq_remove(uq);
585 umtxq_unlock(&uq->uq_key);
586 }
587out:
588 umtx_key_release(&uq->uq_key);
589 if (error == ERESTART)
590 error = EINTR;
591 return (error);
592}
593
594int
595kern_umtx_wake(struct thread *td, void *uaddr, int n_wake)
596{
597 struct umtx_key key;
598 int ret;
599
600 if ((ret = umtx_key_get(td, uaddr, &key)) != 0)
601 return (ret);
602 umtxq_lock(&key);
603 ret = umtxq_signal(&key, n_wake);
604 umtxq_unlock(&key);
605 umtx_key_release(&key);
606 return (0);
607}
608
609int
610_umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
611 /* struct umtx *umtx */
612{
613 return _do_lock(td, uap->umtx, td->td_tid, 0);
614}
615
616int
617_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
618 /* struct umtx *umtx */
619{
620 return do_unlock(td, uap->umtx, td->td_tid);
621}
622
623int
624_umtx_op(struct thread *td, struct _umtx_op_args *uap)
625{
626 struct timespec timeout;
627 struct timespec *ts;
628 int error;
629
630 switch(uap->op) {
631 case UMTX_OP_LOCK:
632 /* Allow a null timespec (wait forever). */
633 if (uap->uaddr2 == NULL)
634 ts = NULL;
635 else {
636 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
637 if (error != 0)
638 break;
639 if (timeout.tv_nsec >= 1000000000 ||
640 timeout.tv_nsec < 0) {
641 error = EINVAL;
642 break;
643 }
644 ts = &timeout;
645 }
646 error = do_lock(td, uap->umtx, uap->id, ts);
647 break;
648 case UMTX_OP_UNLOCK:
649 error = do_unlock(td, uap->umtx, uap->id);
650 break;
651 case UMTX_OP_WAIT:
652 /* Allow a null timespec (wait forever). */
653 if (uap->uaddr2 == NULL)
654 ts = NULL;
655 else {
656 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
657 if (error != 0)
658 break;
659 if (timeout.tv_nsec >= 1000000000 ||
660 timeout.tv_nsec < 0) {
661 error = EINVAL;
662 break;
663 }
664 ts = &timeout;
665 }
666 error = do_wait(td, uap->umtx, uap->id, ts);
667 break;
668 case UMTX_OP_WAKE:
669 error = kern_umtx_wake(td, uap->umtx, uap->id);
670 break;
671 default:
672 error = EINVAL;
673 break;
674 }
675 return (error);
676}
563 "ucond", tvtohz(&tv));
564 }
565 if (!(td->td_flags & TDF_UMTXQ)) {
566 umtxq_unlock(&uq->uq_key);
567 goto out;
568 }
569 umtxq_unlock(&uq->uq_key);
570 if (error != ETIMEDOUT)
571 break;
572 getnanouptime(&ts2);
573 if (timespeccmp(&ts2, &ts, >=)) {
574 error = ETIMEDOUT;
575 break;
576 }
577 ts3 = ts;
578 timespecsub(&ts3, &ts2);
579 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
580 }
581 umtxq_lock(&uq->uq_key);
582 umtxq_remove(uq);
583 umtxq_unlock(&uq->uq_key);
584 }
585out:
586 umtx_key_release(&uq->uq_key);
587 if (error == ERESTART)
588 error = EINTR;
589 return (error);
590}
591
592int
593kern_umtx_wake(struct thread *td, void *uaddr, int n_wake)
594{
595 struct umtx_key key;
596 int ret;
597
598 if ((ret = umtx_key_get(td, uaddr, &key)) != 0)
599 return (ret);
600 umtxq_lock(&key);
601 ret = umtxq_signal(&key, n_wake);
602 umtxq_unlock(&key);
603 umtx_key_release(&key);
604 return (0);
605}
606
607int
608_umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
609 /* struct umtx *umtx */
610{
611 return _do_lock(td, uap->umtx, td->td_tid, 0);
612}
613
614int
615_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
616 /* struct umtx *umtx */
617{
618 return do_unlock(td, uap->umtx, td->td_tid);
619}
620
621int
622_umtx_op(struct thread *td, struct _umtx_op_args *uap)
623{
624 struct timespec timeout;
625 struct timespec *ts;
626 int error;
627
628 switch(uap->op) {
629 case UMTX_OP_LOCK:
630 /* Allow a null timespec (wait forever). */
631 if (uap->uaddr2 == NULL)
632 ts = NULL;
633 else {
634 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
635 if (error != 0)
636 break;
637 if (timeout.tv_nsec >= 1000000000 ||
638 timeout.tv_nsec < 0) {
639 error = EINVAL;
640 break;
641 }
642 ts = &timeout;
643 }
644 error = do_lock(td, uap->umtx, uap->id, ts);
645 break;
646 case UMTX_OP_UNLOCK:
647 error = do_unlock(td, uap->umtx, uap->id);
648 break;
649 case UMTX_OP_WAIT:
650 /* Allow a null timespec (wait forever). */
651 if (uap->uaddr2 == NULL)
652 ts = NULL;
653 else {
654 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
655 if (error != 0)
656 break;
657 if (timeout.tv_nsec >= 1000000000 ||
658 timeout.tv_nsec < 0) {
659 error = EINVAL;
660 break;
661 }
662 ts = &timeout;
663 }
664 error = do_wait(td, uap->umtx, uap->id, ts);
665 break;
666 case UMTX_OP_WAKE:
667 error = kern_umtx_wake(td, uap->umtx, uap->id);
668 break;
669 default:
670 error = EINVAL;
671 break;
672 }
673 return (error);
674}