Deleted Added
full compact
kern_umtx.c (216463) kern_umtx.c (216641)
1/*-
2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice unmodified, this list of conditions, and the following
11 * disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice unmodified, this list of conditions, and the following
11 * disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 216463 2010-12-15 19:30:44Z mdf $");
29__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 216641 2010-12-22 05:01:52Z davidxu $");
30
31#include "opt_compat.h"
32#include <sys/param.h>
33#include <sys/kernel.h>
34#include <sys/limits.h>
35#include <sys/lock.h>
36#include <sys/malloc.h>
37#include <sys/mutex.h>
38#include <sys/priv.h>
39#include <sys/proc.h>
40#include <sys/sched.h>
41#include <sys/smp.h>
42#include <sys/sysctl.h>
43#include <sys/sysent.h>
44#include <sys/systm.h>
45#include <sys/sysproto.h>
30
31#include "opt_compat.h"
32#include <sys/param.h>
33#include <sys/kernel.h>
34#include <sys/limits.h>
35#include <sys/lock.h>
36#include <sys/malloc.h>
37#include <sys/mutex.h>
38#include <sys/priv.h>
39#include <sys/proc.h>
40#include <sys/sched.h>
41#include <sys/smp.h>
42#include <sys/sysctl.h>
43#include <sys/sysent.h>
44#include <sys/systm.h>
45#include <sys/sysproto.h>
46#include <sys/syscallsubr.h>
46#include <sys/eventhandler.h>
47#include <sys/umtx.h>
48
49#include <vm/vm.h>
50#include <vm/vm_param.h>
51#include <vm/pmap.h>
52#include <vm/vm_map.h>
53#include <vm/vm_object.h>
54
55#include <machine/cpu.h>
56
57#ifdef COMPAT_FREEBSD32
58#include <compat/freebsd32/freebsd32_proto.h>
59#endif
60
61enum {
62 TYPE_SIMPLE_WAIT,
63 TYPE_CV,
64 TYPE_SEM,
65 TYPE_SIMPLE_LOCK,
66 TYPE_NORMAL_UMUTEX,
67 TYPE_PI_UMUTEX,
68 TYPE_PP_UMUTEX,
69 TYPE_RWLOCK
70};
71
72#define _UMUTEX_TRY 1
73#define _UMUTEX_WAIT 2
74
75/* Key to represent a unique userland synchronous object */
76struct umtx_key {
77 int hash;
78 int type;
79 int shared;
80 union {
81 struct {
82 vm_object_t object;
83 uintptr_t offset;
84 } shared;
85 struct {
86 struct vmspace *vs;
87 uintptr_t addr;
88 } private;
89 struct {
90 void *a;
91 uintptr_t b;
92 } both;
93 } info;
94};
95
96/* Priority inheritance mutex info. */
97struct umtx_pi {
98 /* Owner thread */
99 struct thread *pi_owner;
100
101 /* Reference count */
102 int pi_refcount;
103
104 /* List entry to link umtx holding by thread */
105 TAILQ_ENTRY(umtx_pi) pi_link;
106
107 /* List entry in hash */
108 TAILQ_ENTRY(umtx_pi) pi_hashlink;
109
110 /* List for waiters */
111 TAILQ_HEAD(,umtx_q) pi_blocked;
112
113 /* Identify a userland lock object */
114 struct umtx_key pi_key;
115};
116
117/* A userland synchronous object user. */
118struct umtx_q {
119 /* Linked list for the hash. */
120 TAILQ_ENTRY(umtx_q) uq_link;
121
122 /* Umtx key. */
123 struct umtx_key uq_key;
124
125 /* Umtx flags. */
126 int uq_flags;
127#define UQF_UMTXQ 0x0001
128
129 /* The thread waits on. */
130 struct thread *uq_thread;
131
132 /*
133 * Blocked on PI mutex. read can use chain lock
134 * or umtx_lock, write must have both chain lock and
135 * umtx_lock being hold.
136 */
137 struct umtx_pi *uq_pi_blocked;
138
139 /* On blocked list */
140 TAILQ_ENTRY(umtx_q) uq_lockq;
141
142 /* Thread contending with us */
143 TAILQ_HEAD(,umtx_pi) uq_pi_contested;
144
145 /* Inherited priority from PP mutex */
146 u_char uq_inherited_pri;
147
148 /* Spare queue ready to be reused */
149 struct umtxq_queue *uq_spare_queue;
150
151 /* The queue we on */
152 struct umtxq_queue *uq_cur_queue;
153};
154
155TAILQ_HEAD(umtxq_head, umtx_q);
156
157/* Per-key wait-queue */
158struct umtxq_queue {
159 struct umtxq_head head;
160 struct umtx_key key;
161 LIST_ENTRY(umtxq_queue) link;
162 int length;
163};
164
165LIST_HEAD(umtxq_list, umtxq_queue);
166
167/* Userland lock object's wait-queue chain */
168struct umtxq_chain {
169 /* Lock for this chain. */
170 struct mtx uc_lock;
171
172 /* List of sleep queues. */
173 struct umtxq_list uc_queue[2];
174#define UMTX_SHARED_QUEUE 0
175#define UMTX_EXCLUSIVE_QUEUE 1
176
177 LIST_HEAD(, umtxq_queue) uc_spare_queue;
178
179 /* Busy flag */
180 char uc_busy;
181
182 /* Chain lock waiters */
183 int uc_waiters;
184
185 /* All PI in the list */
186 TAILQ_HEAD(,umtx_pi) uc_pi_list;
187
188};
189
190#define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED)
191#define UMTXQ_BUSY_ASSERT(uc) KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
192
193/*
194 * Don't propagate time-sharing priority, there is a security reason,
195 * a user can simply introduce PI-mutex, let thread A lock the mutex,
196 * and let another thread B block on the mutex, because B is
197 * sleeping, its priority will be boosted, this causes A's priority to
198 * be boosted via priority propagating too and will never be lowered even
199 * if it is using 100%CPU, this is unfair to other processes.
200 */
201
202#define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
203 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
204 PRI_MAX_TIMESHARE : (td)->td_user_pri)
205
206#define GOLDEN_RATIO_PRIME 2654404609U
207#define UMTX_CHAINS 128
208#define UMTX_SHIFTS (__WORD_BIT - 7)
209
210#define THREAD_SHARE 0
211#define PROCESS_SHARE 1
212#define AUTO_SHARE 2
213
214#define GET_SHARE(flags) \
215 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
216
217#define BUSY_SPINS 200
218
219static uma_zone_t umtx_pi_zone;
220static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS];
221static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
222static int umtx_pi_allocated;
223
224SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
225SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
226 &umtx_pi_allocated, 0, "Allocated umtx_pi");
227
228static void umtxq_sysinit(void *);
229static void umtxq_hash(struct umtx_key *key);
230static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
231static void umtxq_lock(struct umtx_key *key);
232static void umtxq_unlock(struct umtx_key *key);
233static void umtxq_busy(struct umtx_key *key);
234static void umtxq_unbusy(struct umtx_key *key);
235static void umtxq_insert_queue(struct umtx_q *uq, int q);
236static void umtxq_remove_queue(struct umtx_q *uq, int q);
237static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
238static int umtxq_count(struct umtx_key *key);
239static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
240static int umtx_key_get(void *addr, int type, int share,
241 struct umtx_key *key);
242static void umtx_key_release(struct umtx_key *key);
243static struct umtx_pi *umtx_pi_alloc(int);
244static void umtx_pi_free(struct umtx_pi *pi);
245static void umtx_pi_adjust_locked(struct thread *td, u_char oldpri);
246static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
247static void umtx_thread_cleanup(struct thread *td);
248static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
249 struct image_params *imgp __unused);
250SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
251
252#define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
253#define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
254#define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
255
256static struct mtx umtx_lock;
257
258static void
259umtxq_sysinit(void *arg __unused)
260{
261 int i, j;
262
263 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
264 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
265 for (i = 0; i < 2; ++i) {
266 for (j = 0; j < UMTX_CHAINS; ++j) {
267 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
268 MTX_DEF | MTX_DUPOK);
269 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
270 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
271 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
272 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
273 umtxq_chains[i][j].uc_busy = 0;
274 umtxq_chains[i][j].uc_waiters = 0;
275 }
276 }
277 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
278 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
279 EVENTHANDLER_PRI_ANY);
280}
281
282struct umtx_q *
283umtxq_alloc(void)
284{
285 struct umtx_q *uq;
286
287 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
288 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO);
289 TAILQ_INIT(&uq->uq_spare_queue->head);
290 TAILQ_INIT(&uq->uq_pi_contested);
291 uq->uq_inherited_pri = PRI_MAX;
292 return (uq);
293}
294
295void
296umtxq_free(struct umtx_q *uq)
297{
298 MPASS(uq->uq_spare_queue != NULL);
299 free(uq->uq_spare_queue, M_UMTX);
300 free(uq, M_UMTX);
301}
302
303static inline void
304umtxq_hash(struct umtx_key *key)
305{
306 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
307 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
308}
309
310static inline int
311umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
312{
313 return (k1->type == k2->type &&
314 k1->info.both.a == k2->info.both.a &&
315 k1->info.both.b == k2->info.both.b);
316}
317
318static inline struct umtxq_chain *
319umtxq_getchain(struct umtx_key *key)
320{
321 if (key->type <= TYPE_SEM)
322 return (&umtxq_chains[1][key->hash]);
323 return (&umtxq_chains[0][key->hash]);
324}
325
326/*
327 * Lock a chain.
328 */
329static inline void
330umtxq_lock(struct umtx_key *key)
331{
332 struct umtxq_chain *uc;
333
334 uc = umtxq_getchain(key);
335 mtx_lock(&uc->uc_lock);
336}
337
338/*
339 * Unlock a chain.
340 */
341static inline void
342umtxq_unlock(struct umtx_key *key)
343{
344 struct umtxq_chain *uc;
345
346 uc = umtxq_getchain(key);
347 mtx_unlock(&uc->uc_lock);
348}
349
350/*
351 * Set chain to busy state when following operation
352 * may be blocked (kernel mutex can not be used).
353 */
354static inline void
355umtxq_busy(struct umtx_key *key)
356{
357 struct umtxq_chain *uc;
358
359 uc = umtxq_getchain(key);
360 mtx_assert(&uc->uc_lock, MA_OWNED);
361 if (uc->uc_busy) {
362#ifdef SMP
363 if (smp_cpus > 1) {
364 int count = BUSY_SPINS;
365 if (count > 0) {
366 umtxq_unlock(key);
367 while (uc->uc_busy && --count > 0)
368 cpu_spinwait();
369 umtxq_lock(key);
370 }
371 }
372#endif
373 while (uc->uc_busy) {
374 uc->uc_waiters++;
375 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
376 uc->uc_waiters--;
377 }
378 }
379 uc->uc_busy = 1;
380}
381
382/*
383 * Unbusy a chain.
384 */
385static inline void
386umtxq_unbusy(struct umtx_key *key)
387{
388 struct umtxq_chain *uc;
389
390 uc = umtxq_getchain(key);
391 mtx_assert(&uc->uc_lock, MA_OWNED);
392 KASSERT(uc->uc_busy != 0, ("not busy"));
393 uc->uc_busy = 0;
394 if (uc->uc_waiters)
395 wakeup_one(uc);
396}
397
398static struct umtxq_queue *
399umtxq_queue_lookup(struct umtx_key *key, int q)
400{
401 struct umtxq_queue *uh;
402 struct umtxq_chain *uc;
403
404 uc = umtxq_getchain(key);
405 UMTXQ_LOCKED_ASSERT(uc);
406 LIST_FOREACH(uh, &uc->uc_queue[q], link) {
407 if (umtx_key_match(&uh->key, key))
408 return (uh);
409 }
410
411 return (NULL);
412}
413
414static inline void
415umtxq_insert_queue(struct umtx_q *uq, int q)
416{
417 struct umtxq_queue *uh;
418 struct umtxq_chain *uc;
419
420 uc = umtxq_getchain(&uq->uq_key);
421 UMTXQ_LOCKED_ASSERT(uc);
422 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
423 uh = umtxq_queue_lookup(&uq->uq_key, q);
424 if (uh != NULL) {
425 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
426 } else {
427 uh = uq->uq_spare_queue;
428 uh->key = uq->uq_key;
429 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
430 }
431 uq->uq_spare_queue = NULL;
432
433 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
434 uh->length++;
435 uq->uq_flags |= UQF_UMTXQ;
436 uq->uq_cur_queue = uh;
437 return;
438}
439
440static inline void
441umtxq_remove_queue(struct umtx_q *uq, int q)
442{
443 struct umtxq_chain *uc;
444 struct umtxq_queue *uh;
445
446 uc = umtxq_getchain(&uq->uq_key);
447 UMTXQ_LOCKED_ASSERT(uc);
448 if (uq->uq_flags & UQF_UMTXQ) {
449 uh = uq->uq_cur_queue;
450 TAILQ_REMOVE(&uh->head, uq, uq_link);
451 uh->length--;
452 uq->uq_flags &= ~UQF_UMTXQ;
453 if (TAILQ_EMPTY(&uh->head)) {
454 KASSERT(uh->length == 0,
455 ("inconsistent umtxq_queue length"));
456 LIST_REMOVE(uh, link);
457 } else {
458 uh = LIST_FIRST(&uc->uc_spare_queue);
459 KASSERT(uh != NULL, ("uc_spare_queue is empty"));
460 LIST_REMOVE(uh, link);
461 }
462 uq->uq_spare_queue = uh;
463 uq->uq_cur_queue = NULL;
464 }
465}
466
467/*
468 * Check if there are multiple waiters
469 */
470static int
471umtxq_count(struct umtx_key *key)
472{
473 struct umtxq_chain *uc;
474 struct umtxq_queue *uh;
475
476 uc = umtxq_getchain(key);
477 UMTXQ_LOCKED_ASSERT(uc);
478 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
479 if (uh != NULL)
480 return (uh->length);
481 return (0);
482}
483
484/*
485 * Check if there are multiple PI waiters and returns first
486 * waiter.
487 */
488static int
489umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
490{
491 struct umtxq_chain *uc;
492 struct umtxq_queue *uh;
493
494 *first = NULL;
495 uc = umtxq_getchain(key);
496 UMTXQ_LOCKED_ASSERT(uc);
497 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
498 if (uh != NULL) {
499 *first = TAILQ_FIRST(&uh->head);
500 return (uh->length);
501 }
502 return (0);
503}
504
505/*
506 * Wake up threads waiting on an userland object.
507 */
508
509static int
510umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
511{
512 struct umtxq_chain *uc;
513 struct umtxq_queue *uh;
514 struct umtx_q *uq;
515 int ret;
516
517 ret = 0;
518 uc = umtxq_getchain(key);
519 UMTXQ_LOCKED_ASSERT(uc);
520 uh = umtxq_queue_lookup(key, q);
521 if (uh != NULL) {
522 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
523 umtxq_remove_queue(uq, q);
524 wakeup(uq);
525 if (++ret >= n_wake)
526 return (ret);
527 }
528 }
529 return (ret);
530}
531
532
533/*
534 * Wake up specified thread.
535 */
536static inline void
537umtxq_signal_thread(struct umtx_q *uq)
538{
539 struct umtxq_chain *uc;
540
541 uc = umtxq_getchain(&uq->uq_key);
542 UMTXQ_LOCKED_ASSERT(uc);
543 umtxq_remove(uq);
544 wakeup(uq);
545}
546
547/*
548 * Put thread into sleep state, before sleeping, check if
549 * thread was removed from umtx queue.
550 */
551static inline int
552umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
553{
554 struct umtxq_chain *uc;
555 int error;
556
557 uc = umtxq_getchain(&uq->uq_key);
558 UMTXQ_LOCKED_ASSERT(uc);
559 if (!(uq->uq_flags & UQF_UMTXQ))
560 return (0);
561 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
562 if (error == EWOULDBLOCK)
563 error = ETIMEDOUT;
564 return (error);
565}
566
567/*
568 * Convert userspace address into unique logical address.
569 */
570static int
571umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
572{
573 struct thread *td = curthread;
574 vm_map_t map;
575 vm_map_entry_t entry;
576 vm_pindex_t pindex;
577 vm_prot_t prot;
578 boolean_t wired;
579
580 key->type = type;
581 if (share == THREAD_SHARE) {
582 key->shared = 0;
583 key->info.private.vs = td->td_proc->p_vmspace;
584 key->info.private.addr = (uintptr_t)addr;
585 } else {
586 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
587 map = &td->td_proc->p_vmspace->vm_map;
588 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
589 &entry, &key->info.shared.object, &pindex, &prot,
590 &wired) != KERN_SUCCESS) {
591 return EFAULT;
592 }
593
594 if ((share == PROCESS_SHARE) ||
595 (share == AUTO_SHARE &&
596 VM_INHERIT_SHARE == entry->inheritance)) {
597 key->shared = 1;
598 key->info.shared.offset = entry->offset + entry->start -
599 (vm_offset_t)addr;
600 vm_object_reference(key->info.shared.object);
601 } else {
602 key->shared = 0;
603 key->info.private.vs = td->td_proc->p_vmspace;
604 key->info.private.addr = (uintptr_t)addr;
605 }
606 vm_map_lookup_done(map, entry);
607 }
608
609 umtxq_hash(key);
610 return (0);
611}
612
613/*
614 * Release key.
615 */
616static inline void
617umtx_key_release(struct umtx_key *key)
618{
619 if (key->shared)
620 vm_object_deallocate(key->info.shared.object);
621}
622
623/*
624 * Lock a umtx object.
625 */
626static int
627_do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
628{
629 struct umtx_q *uq;
630 u_long owner;
631 u_long old;
632 int error = 0;
633
634 uq = td->td_umtxq;
635
636 /*
637 * Care must be exercised when dealing with umtx structure. It
638 * can fault on any access.
639 */
640 for (;;) {
641 /*
642 * Try the uncontested case. This should be done in userland.
643 */
644 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
645
646 /* The acquire succeeded. */
647 if (owner == UMTX_UNOWNED)
648 return (0);
649
650 /* The address was invalid. */
651 if (owner == -1)
652 return (EFAULT);
653
654 /* If no one owns it but it is contested try to acquire it. */
655 if (owner == UMTX_CONTESTED) {
656 owner = casuword(&umtx->u_owner,
657 UMTX_CONTESTED, id | UMTX_CONTESTED);
658
659 if (owner == UMTX_CONTESTED)
660 return (0);
661
662 /* The address was invalid. */
663 if (owner == -1)
664 return (EFAULT);
665
666 /* If this failed the lock has changed, restart. */
667 continue;
668 }
669
670 /*
671 * If we caught a signal, we have retried and now
672 * exit immediately.
673 */
674 if (error != 0)
675 return (error);
676
677 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
678 AUTO_SHARE, &uq->uq_key)) != 0)
679 return (error);
680
681 umtxq_lock(&uq->uq_key);
682 umtxq_busy(&uq->uq_key);
683 umtxq_insert(uq);
684 umtxq_unbusy(&uq->uq_key);
685 umtxq_unlock(&uq->uq_key);
686
687 /*
688 * Set the contested bit so that a release in user space
689 * knows to use the system call for unlock. If this fails
690 * either some one else has acquired the lock or it has been
691 * released.
692 */
693 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
694
695 /* The address was invalid. */
696 if (old == -1) {
697 umtxq_lock(&uq->uq_key);
698 umtxq_remove(uq);
699 umtxq_unlock(&uq->uq_key);
700 umtx_key_release(&uq->uq_key);
701 return (EFAULT);
702 }
703
704 /*
705 * We set the contested bit, sleep. Otherwise the lock changed
706 * and we need to retry or we lost a race to the thread
707 * unlocking the umtx.
708 */
709 umtxq_lock(&uq->uq_key);
710 if (old == owner)
711 error = umtxq_sleep(uq, "umtx", timo);
712 umtxq_remove(uq);
713 umtxq_unlock(&uq->uq_key);
714 umtx_key_release(&uq->uq_key);
715 }
716
717 return (0);
718}
719
720/*
721 * Lock a umtx object.
722 */
723static int
724do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
725 struct timespec *timeout)
726{
727 struct timespec ts, ts2, ts3;
728 struct timeval tv;
729 int error;
730
731 if (timeout == NULL) {
732 error = _do_lock_umtx(td, umtx, id, 0);
733 /* Mutex locking is restarted if it is interrupted. */
734 if (error == EINTR)
735 error = ERESTART;
736 } else {
737 getnanouptime(&ts);
738 timespecadd(&ts, timeout);
739 TIMESPEC_TO_TIMEVAL(&tv, timeout);
740 for (;;) {
741 error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
742 if (error != ETIMEDOUT)
743 break;
744 getnanouptime(&ts2);
745 if (timespeccmp(&ts2, &ts, >=)) {
746 error = ETIMEDOUT;
747 break;
748 }
749 ts3 = ts;
750 timespecsub(&ts3, &ts2);
751 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
752 }
753 /* Timed-locking is not restarted. */
754 if (error == ERESTART)
755 error = EINTR;
756 }
757 return (error);
758}
759
760/*
761 * Unlock a umtx object.
762 */
763static int
764do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
765{
766 struct umtx_key key;
767 u_long owner;
768 u_long old;
769 int error;
770 int count;
771
772 /*
773 * Make sure we own this mtx.
774 */
775 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
776 if (owner == -1)
777 return (EFAULT);
778
779 if ((owner & ~UMTX_CONTESTED) != id)
780 return (EPERM);
781
782 /* This should be done in userland */
783 if ((owner & UMTX_CONTESTED) == 0) {
784 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
785 if (old == -1)
786 return (EFAULT);
787 if (old == owner)
788 return (0);
789 owner = old;
790 }
791
792 /* We should only ever be in here for contested locks */
793 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
794 &key)) != 0)
795 return (error);
796
797 umtxq_lock(&key);
798 umtxq_busy(&key);
799 count = umtxq_count(&key);
800 umtxq_unlock(&key);
801
802 /*
803 * When unlocking the umtx, it must be marked as unowned if
804 * there is zero or one thread only waiting for it.
805 * Otherwise, it must be marked as contested.
806 */
807 old = casuword(&umtx->u_owner, owner,
808 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
809 umtxq_lock(&key);
810 umtxq_signal(&key,1);
811 umtxq_unbusy(&key);
812 umtxq_unlock(&key);
813 umtx_key_release(&key);
814 if (old == -1)
815 return (EFAULT);
816 if (old != owner)
817 return (EINVAL);
818 return (0);
819}
820
821#ifdef COMPAT_FREEBSD32
822
823/*
824 * Lock a umtx object.
825 */
826static int
827_do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
828{
829 struct umtx_q *uq;
830 uint32_t owner;
831 uint32_t old;
832 int error = 0;
833
834 uq = td->td_umtxq;
835
836 /*
837 * Care must be exercised when dealing with umtx structure. It
838 * can fault on any access.
839 */
840 for (;;) {
841 /*
842 * Try the uncontested case. This should be done in userland.
843 */
844 owner = casuword32(m, UMUTEX_UNOWNED, id);
845
846 /* The acquire succeeded. */
847 if (owner == UMUTEX_UNOWNED)
848 return (0);
849
850 /* The address was invalid. */
851 if (owner == -1)
852 return (EFAULT);
853
854 /* If no one owns it but it is contested try to acquire it. */
855 if (owner == UMUTEX_CONTESTED) {
856 owner = casuword32(m,
857 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
858 if (owner == UMUTEX_CONTESTED)
859 return (0);
860
861 /* The address was invalid. */
862 if (owner == -1)
863 return (EFAULT);
864
865 /* If this failed the lock has changed, restart. */
866 continue;
867 }
868
869 /*
870 * If we caught a signal, we have retried and now
871 * exit immediately.
872 */
873 if (error != 0)
874 return (error);
875
876 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
877 AUTO_SHARE, &uq->uq_key)) != 0)
878 return (error);
879
880 umtxq_lock(&uq->uq_key);
881 umtxq_busy(&uq->uq_key);
882 umtxq_insert(uq);
883 umtxq_unbusy(&uq->uq_key);
884 umtxq_unlock(&uq->uq_key);
885
886 /*
887 * Set the contested bit so that a release in user space
888 * knows to use the system call for unlock. If this fails
889 * either some one else has acquired the lock or it has been
890 * released.
891 */
892 old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
893
894 /* The address was invalid. */
895 if (old == -1) {
896 umtxq_lock(&uq->uq_key);
897 umtxq_remove(uq);
898 umtxq_unlock(&uq->uq_key);
899 umtx_key_release(&uq->uq_key);
900 return (EFAULT);
901 }
902
903 /*
904 * We set the contested bit, sleep. Otherwise the lock changed
905 * and we need to retry or we lost a race to the thread
906 * unlocking the umtx.
907 */
908 umtxq_lock(&uq->uq_key);
909 if (old == owner)
910 error = umtxq_sleep(uq, "umtx", timo);
911 umtxq_remove(uq);
912 umtxq_unlock(&uq->uq_key);
913 umtx_key_release(&uq->uq_key);
914 }
915
916 return (0);
917}
918
919/*
920 * Lock a umtx object.
921 */
922static int
923do_lock_umtx32(struct thread *td, void *m, uint32_t id,
924 struct timespec *timeout)
925{
926 struct timespec ts, ts2, ts3;
927 struct timeval tv;
928 int error;
929
930 if (timeout == NULL) {
931 error = _do_lock_umtx32(td, m, id, 0);
932 /* Mutex locking is restarted if it is interrupted. */
933 if (error == EINTR)
934 error = ERESTART;
935 } else {
936 getnanouptime(&ts);
937 timespecadd(&ts, timeout);
938 TIMESPEC_TO_TIMEVAL(&tv, timeout);
939 for (;;) {
940 error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
941 if (error != ETIMEDOUT)
942 break;
943 getnanouptime(&ts2);
944 if (timespeccmp(&ts2, &ts, >=)) {
945 error = ETIMEDOUT;
946 break;
947 }
948 ts3 = ts;
949 timespecsub(&ts3, &ts2);
950 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
951 }
952 /* Timed-locking is not restarted. */
953 if (error == ERESTART)
954 error = EINTR;
955 }
956 return (error);
957}
958
959/*
960 * Unlock a umtx object.
961 */
962static int
963do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
964{
965 struct umtx_key key;
966 uint32_t owner;
967 uint32_t old;
968 int error;
969 int count;
970
971 /*
972 * Make sure we own this mtx.
973 */
974 owner = fuword32(m);
975 if (owner == -1)
976 return (EFAULT);
977
978 if ((owner & ~UMUTEX_CONTESTED) != id)
979 return (EPERM);
980
981 /* This should be done in userland */
982 if ((owner & UMUTEX_CONTESTED) == 0) {
983 old = casuword32(m, owner, UMUTEX_UNOWNED);
984 if (old == -1)
985 return (EFAULT);
986 if (old == owner)
987 return (0);
988 owner = old;
989 }
990
991 /* We should only ever be in here for contested locks */
992 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
993 &key)) != 0)
994 return (error);
995
996 umtxq_lock(&key);
997 umtxq_busy(&key);
998 count = umtxq_count(&key);
999 umtxq_unlock(&key);
1000
1001 /*
1002 * When unlocking the umtx, it must be marked as unowned if
1003 * there is zero or one thread only waiting for it.
1004 * Otherwise, it must be marked as contested.
1005 */
1006 old = casuword32(m, owner,
1007 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1008 umtxq_lock(&key);
1009 umtxq_signal(&key,1);
1010 umtxq_unbusy(&key);
1011 umtxq_unlock(&key);
1012 umtx_key_release(&key);
1013 if (old == -1)
1014 return (EFAULT);
1015 if (old != owner)
1016 return (EINVAL);
1017 return (0);
1018}
1019#endif
1020
1021/*
1022 * Fetch and compare value, sleep on the address if value is not changed.
1023 */
1024static int
1025do_wait(struct thread *td, void *addr, u_long id,
1026 struct timespec *timeout, int compat32, int is_private)
1027{
1028 struct umtx_q *uq;
1029 struct timespec ts, ts2, ts3;
1030 struct timeval tv;
1031 u_long tmp;
1032 int error = 0;
1033
1034 uq = td->td_umtxq;
1035 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
1036 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
1037 return (error);
1038
1039 umtxq_lock(&uq->uq_key);
1040 umtxq_insert(uq);
1041 umtxq_unlock(&uq->uq_key);
1042 if (compat32 == 0)
1043 tmp = fuword(addr);
1044 else
1045 tmp = (unsigned int)fuword32(addr);
1046 if (tmp != id) {
1047 umtxq_lock(&uq->uq_key);
1048 umtxq_remove(uq);
1049 umtxq_unlock(&uq->uq_key);
1050 } else if (timeout == NULL) {
1051 umtxq_lock(&uq->uq_key);
1052 error = umtxq_sleep(uq, "uwait", 0);
1053 umtxq_remove(uq);
1054 umtxq_unlock(&uq->uq_key);
1055 } else {
1056 getnanouptime(&ts);
1057 timespecadd(&ts, timeout);
1058 TIMESPEC_TO_TIMEVAL(&tv, timeout);
1059 umtxq_lock(&uq->uq_key);
1060 for (;;) {
1061 error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
1062 if (!(uq->uq_flags & UQF_UMTXQ)) {
1063 error = 0;
1064 break;
1065 }
1066 if (error != ETIMEDOUT)
1067 break;
1068 umtxq_unlock(&uq->uq_key);
1069 getnanouptime(&ts2);
1070 if (timespeccmp(&ts2, &ts, >=)) {
1071 error = ETIMEDOUT;
1072 umtxq_lock(&uq->uq_key);
1073 break;
1074 }
1075 ts3 = ts;
1076 timespecsub(&ts3, &ts2);
1077 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
1078 umtxq_lock(&uq->uq_key);
1079 }
1080 umtxq_remove(uq);
1081 umtxq_unlock(&uq->uq_key);
1082 }
1083 umtx_key_release(&uq->uq_key);
1084 if (error == ERESTART)
1085 error = EINTR;
1086 return (error);
1087}
1088
1089/*
1090 * Wake up threads sleeping on the specified address.
1091 */
1092int
1093kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
1094{
1095 struct umtx_key key;
1096 int ret;
1097
1098 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
1099 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
1100 return (ret);
1101 umtxq_lock(&key);
1102 ret = umtxq_signal(&key, n_wake);
1103 umtxq_unlock(&key);
1104 umtx_key_release(&key);
1105 return (0);
1106}
1107
1108/*
1109 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1110 */
1111static int
1112_do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1113 int mode)
1114{
1115 struct umtx_q *uq;
1116 uint32_t owner, old, id;
1117 int error = 0;
1118
1119 id = td->td_tid;
1120 uq = td->td_umtxq;
1121
1122 /*
1123 * Care must be exercised when dealing with umtx structure. It
1124 * can fault on any access.
1125 */
1126 for (;;) {
1127 owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
1128 if (mode == _UMUTEX_WAIT) {
1129 if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
1130 return (0);
1131 } else {
1132 /*
1133 * Try the uncontested case. This should be done in userland.
1134 */
1135 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1136
1137 /* The acquire succeeded. */
1138 if (owner == UMUTEX_UNOWNED)
1139 return (0);
1140
1141 /* The address was invalid. */
1142 if (owner == -1)
1143 return (EFAULT);
1144
1145 /* If no one owns it but it is contested try to acquire it. */
1146 if (owner == UMUTEX_CONTESTED) {
1147 owner = casuword32(&m->m_owner,
1148 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1149
1150 if (owner == UMUTEX_CONTESTED)
1151 return (0);
1152
1153 /* The address was invalid. */
1154 if (owner == -1)
1155 return (EFAULT);
1156
1157 /* If this failed the lock has changed, restart. */
1158 continue;
1159 }
1160 }
1161
1162 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1163 (owner & ~UMUTEX_CONTESTED) == id)
1164 return (EDEADLK);
1165
1166 if (mode == _UMUTEX_TRY)
1167 return (EBUSY);
1168
1169 /*
1170 * If we caught a signal, we have retried and now
1171 * exit immediately.
1172 */
1173 if (error != 0)
1174 return (error);
1175
1176 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1177 GET_SHARE(flags), &uq->uq_key)) != 0)
1178 return (error);
1179
1180 umtxq_lock(&uq->uq_key);
1181 umtxq_busy(&uq->uq_key);
1182 umtxq_insert(uq);
1183 umtxq_unlock(&uq->uq_key);
1184
1185 /*
1186 * Set the contested bit so that a release in user space
1187 * knows to use the system call for unlock. If this fails
1188 * either some one else has acquired the lock or it has been
1189 * released.
1190 */
1191 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1192
1193 /* The address was invalid. */
1194 if (old == -1) {
1195 umtxq_lock(&uq->uq_key);
1196 umtxq_remove(uq);
1197 umtxq_unbusy(&uq->uq_key);
1198 umtxq_unlock(&uq->uq_key);
1199 umtx_key_release(&uq->uq_key);
1200 return (EFAULT);
1201 }
1202
1203 /*
1204 * We set the contested bit, sleep. Otherwise the lock changed
1205 * and we need to retry or we lost a race to the thread
1206 * unlocking the umtx.
1207 */
1208 umtxq_lock(&uq->uq_key);
1209 umtxq_unbusy(&uq->uq_key);
1210 if (old == owner)
1211 error = umtxq_sleep(uq, "umtxn", timo);
1212 umtxq_remove(uq);
1213 umtxq_unlock(&uq->uq_key);
1214 umtx_key_release(&uq->uq_key);
1215 }
1216
1217 return (0);
1218}
1219
1220/*
1221 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1222 */
1223/*
1224 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1225 */
1226static int
1227do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1228{
1229 struct umtx_key key;
1230 uint32_t owner, old, id;
1231 int error;
1232 int count;
1233
1234 id = td->td_tid;
1235 /*
1236 * Make sure we own this mtx.
1237 */
1238 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1239 if (owner == -1)
1240 return (EFAULT);
1241
1242 if ((owner & ~UMUTEX_CONTESTED) != id)
1243 return (EPERM);
1244
1245 if ((owner & UMUTEX_CONTESTED) == 0) {
1246 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1247 if (old == -1)
1248 return (EFAULT);
1249 if (old == owner)
1250 return (0);
1251 owner = old;
1252 }
1253
1254 /* We should only ever be in here for contested locks */
1255 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1256 &key)) != 0)
1257 return (error);
1258
1259 umtxq_lock(&key);
1260 umtxq_busy(&key);
1261 count = umtxq_count(&key);
1262 umtxq_unlock(&key);
1263
1264 /*
1265 * When unlocking the umtx, it must be marked as unowned if
1266 * there is zero or one thread only waiting for it.
1267 * Otherwise, it must be marked as contested.
1268 */
1269 old = casuword32(&m->m_owner, owner,
1270 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1271 umtxq_lock(&key);
1272 umtxq_signal(&key,1);
1273 umtxq_unbusy(&key);
1274 umtxq_unlock(&key);
1275 umtx_key_release(&key);
1276 if (old == -1)
1277 return (EFAULT);
1278 if (old != owner)
1279 return (EINVAL);
1280 return (0);
1281}
1282
1283/*
1284 * Check if the mutex is available and wake up a waiter,
1285 * only for simple mutex.
1286 */
1287static int
1288do_wake_umutex(struct thread *td, struct umutex *m)
1289{
1290 struct umtx_key key;
1291 uint32_t owner;
1292 uint32_t flags;
1293 int error;
1294 int count;
1295
1296 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1297 if (owner == -1)
1298 return (EFAULT);
1299
1300 if ((owner & ~UMUTEX_CONTESTED) != 0)
1301 return (0);
1302
1303 flags = fuword32(&m->m_flags);
1304
1305 /* We should only ever be in here for contested locks */
1306 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1307 &key)) != 0)
1308 return (error);
1309
1310 umtxq_lock(&key);
1311 umtxq_busy(&key);
1312 count = umtxq_count(&key);
1313 umtxq_unlock(&key);
1314
1315 if (count <= 1)
1316 owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
1317
1318 umtxq_lock(&key);
1319 if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1320 umtxq_signal(&key, 1);
1321 umtxq_unbusy(&key);
1322 umtxq_unlock(&key);
1323 umtx_key_release(&key);
1324 return (0);
1325}
1326
1327static inline struct umtx_pi *
1328umtx_pi_alloc(int flags)
1329{
1330 struct umtx_pi *pi;
1331
1332 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1333 TAILQ_INIT(&pi->pi_blocked);
1334 atomic_add_int(&umtx_pi_allocated, 1);
1335 return (pi);
1336}
1337
1338static inline void
1339umtx_pi_free(struct umtx_pi *pi)
1340{
1341 uma_zfree(umtx_pi_zone, pi);
1342 atomic_add_int(&umtx_pi_allocated, -1);
1343}
1344
1345/*
1346 * Adjust the thread's position on a pi_state after its priority has been
1347 * changed.
1348 */
1349static int
1350umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1351{
1352 struct umtx_q *uq, *uq1, *uq2;
1353 struct thread *td1;
1354
1355 mtx_assert(&umtx_lock, MA_OWNED);
1356 if (pi == NULL)
1357 return (0);
1358
1359 uq = td->td_umtxq;
1360
1361 /*
1362 * Check if the thread needs to be moved on the blocked chain.
1363 * It needs to be moved if either its priority is lower than
1364 * the previous thread or higher than the next thread.
1365 */
1366 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1367 uq2 = TAILQ_NEXT(uq, uq_lockq);
1368 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1369 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1370 /*
1371 * Remove thread from blocked chain and determine where
1372 * it should be moved to.
1373 */
1374 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1375 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1376 td1 = uq1->uq_thread;
1377 MPASS(td1->td_proc->p_magic == P_MAGIC);
1378 if (UPRI(td1) > UPRI(td))
1379 break;
1380 }
1381
1382 if (uq1 == NULL)
1383 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1384 else
1385 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1386 }
1387 return (1);
1388}
1389
1390/*
1391 * Propagate priority when a thread is blocked on POSIX
1392 * PI mutex.
1393 */
1394static void
1395umtx_propagate_priority(struct thread *td)
1396{
1397 struct umtx_q *uq;
1398 struct umtx_pi *pi;
1399 int pri;
1400
1401 mtx_assert(&umtx_lock, MA_OWNED);
1402 pri = UPRI(td);
1403 uq = td->td_umtxq;
1404 pi = uq->uq_pi_blocked;
1405 if (pi == NULL)
1406 return;
1407
1408 for (;;) {
1409 td = pi->pi_owner;
1410 if (td == NULL || td == curthread)
1411 return;
1412
1413 MPASS(td->td_proc != NULL);
1414 MPASS(td->td_proc->p_magic == P_MAGIC);
1415
1416 thread_lock(td);
1417 if (td->td_lend_user_pri > pri)
1418 sched_lend_user_prio(td, pri);
1419 else {
1420 thread_unlock(td);
1421 break;
1422 }
1423 thread_unlock(td);
1424
1425 /*
1426 * Pick up the lock that td is blocked on.
1427 */
1428 uq = td->td_umtxq;
1429 pi = uq->uq_pi_blocked;
1430 /* Resort td on the list if needed. */
1431 if (!umtx_pi_adjust_thread(pi, td))
1432 break;
1433 }
1434}
1435
1436/*
1437 * Unpropagate priority for a PI mutex when a thread blocked on
1438 * it is interrupted by signal or resumed by others.
1439 */
1440static void
1441umtx_unpropagate_priority(struct umtx_pi *pi)
1442{
1443 struct umtx_q *uq, *uq_owner;
1444 struct umtx_pi *pi2;
1445 int pri, oldpri;
1446
1447 mtx_assert(&umtx_lock, MA_OWNED);
1448
1449 while (pi != NULL && pi->pi_owner != NULL) {
1450 pri = PRI_MAX;
1451 uq_owner = pi->pi_owner->td_umtxq;
1452
1453 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1454 uq = TAILQ_FIRST(&pi2->pi_blocked);
1455 if (uq != NULL) {
1456 if (pri > UPRI(uq->uq_thread))
1457 pri = UPRI(uq->uq_thread);
1458 }
1459 }
1460
1461 if (pri > uq_owner->uq_inherited_pri)
1462 pri = uq_owner->uq_inherited_pri;
1463 thread_lock(pi->pi_owner);
1464 oldpri = pi->pi_owner->td_user_pri;
1465 sched_unlend_user_prio(pi->pi_owner, pri);
1466 thread_unlock(pi->pi_owner);
1467 if (uq_owner->uq_pi_blocked != NULL)
1468 umtx_pi_adjust_locked(pi->pi_owner, oldpri);
1469 pi = uq_owner->uq_pi_blocked;
1470 }
1471}
1472
1473/*
1474 * Insert a PI mutex into owned list.
1475 */
1476static void
1477umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1478{
1479 struct umtx_q *uq_owner;
1480
1481 uq_owner = owner->td_umtxq;
1482 mtx_assert(&umtx_lock, MA_OWNED);
1483 if (pi->pi_owner != NULL)
1484 panic("pi_ower != NULL");
1485 pi->pi_owner = owner;
1486 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1487}
1488
1489/*
1490 * Claim ownership of a PI mutex.
1491 */
1492static int
1493umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1494{
1495 struct umtx_q *uq, *uq_owner;
1496
1497 uq_owner = owner->td_umtxq;
1498 mtx_lock_spin(&umtx_lock);
1499 if (pi->pi_owner == owner) {
1500 mtx_unlock_spin(&umtx_lock);
1501 return (0);
1502 }
1503
1504 if (pi->pi_owner != NULL) {
1505 /*
1506 * userland may have already messed the mutex, sigh.
1507 */
1508 mtx_unlock_spin(&umtx_lock);
1509 return (EPERM);
1510 }
1511 umtx_pi_setowner(pi, owner);
1512 uq = TAILQ_FIRST(&pi->pi_blocked);
1513 if (uq != NULL) {
1514 int pri;
1515
1516 pri = UPRI(uq->uq_thread);
1517 thread_lock(owner);
1518 if (pri < UPRI(owner))
1519 sched_lend_user_prio(owner, pri);
1520 thread_unlock(owner);
1521 }
1522 mtx_unlock_spin(&umtx_lock);
1523 return (0);
1524}
1525
1526static void
1527umtx_pi_adjust_locked(struct thread *td, u_char oldpri)
1528{
1529 struct umtx_q *uq;
1530 struct umtx_pi *pi;
1531
1532 uq = td->td_umtxq;
1533 /*
1534 * Pick up the lock that td is blocked on.
1535 */
1536 pi = uq->uq_pi_blocked;
1537 MPASS(pi != NULL);
1538
1539 /* Resort the turnstile on the list. */
1540 if (!umtx_pi_adjust_thread(pi, td))
1541 return;
1542
1543 /*
1544 * If our priority was lowered and we are at the head of the
1545 * turnstile, then propagate our new priority up the chain.
1546 */
1547 if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri)
1548 umtx_propagate_priority(td);
1549}
1550
1551/*
1552 * Adjust a thread's order position in its blocked PI mutex,
1553 * this may result new priority propagating process.
1554 */
1555void
1556umtx_pi_adjust(struct thread *td, u_char oldpri)
1557{
1558 struct umtx_q *uq;
1559 struct umtx_pi *pi;
1560
1561 uq = td->td_umtxq;
1562 mtx_lock_spin(&umtx_lock);
1563 /*
1564 * Pick up the lock that td is blocked on.
1565 */
1566 pi = uq->uq_pi_blocked;
1567 if (pi != NULL)
1568 umtx_pi_adjust_locked(td, oldpri);
1569 mtx_unlock_spin(&umtx_lock);
1570}
1571
1572/*
1573 * Sleep on a PI mutex.
1574 */
1575static int
1576umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1577 uint32_t owner, const char *wmesg, int timo)
1578{
1579 struct umtxq_chain *uc;
1580 struct thread *td, *td1;
1581 struct umtx_q *uq1;
1582 int pri;
1583 int error = 0;
1584
1585 td = uq->uq_thread;
1586 KASSERT(td == curthread, ("inconsistent uq_thread"));
1587 uc = umtxq_getchain(&uq->uq_key);
1588 UMTXQ_LOCKED_ASSERT(uc);
1589 UMTXQ_BUSY_ASSERT(uc);
1590 umtxq_insert(uq);
1591 mtx_lock_spin(&umtx_lock);
1592 if (pi->pi_owner == NULL) {
1593 mtx_unlock_spin(&umtx_lock);
1594 /* XXX Only look up thread in current process. */
1595 td1 = tdfind(owner, curproc->p_pid);
1596 mtx_lock_spin(&umtx_lock);
1597 if (td1 != NULL) {
1598 if (pi->pi_owner == NULL)
1599 umtx_pi_setowner(pi, td1);
1600 PROC_UNLOCK(td1->td_proc);
1601 }
1602 }
1603
1604 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1605 pri = UPRI(uq1->uq_thread);
1606 if (pri > UPRI(td))
1607 break;
1608 }
1609
1610 if (uq1 != NULL)
1611 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1612 else
1613 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1614
1615 uq->uq_pi_blocked = pi;
1616 thread_lock(td);
1617 td->td_flags |= TDF_UPIBLOCKED;
1618 thread_unlock(td);
1619 umtx_propagate_priority(td);
1620 mtx_unlock_spin(&umtx_lock);
1621 umtxq_unbusy(&uq->uq_key);
1622
1623 if (uq->uq_flags & UQF_UMTXQ) {
1624 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1625 if (error == EWOULDBLOCK)
1626 error = ETIMEDOUT;
1627 if (uq->uq_flags & UQF_UMTXQ) {
1628 umtxq_remove(uq);
1629 }
1630 }
1631 mtx_lock_spin(&umtx_lock);
1632 uq->uq_pi_blocked = NULL;
1633 thread_lock(td);
1634 td->td_flags &= ~TDF_UPIBLOCKED;
1635 thread_unlock(td);
1636 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1637 umtx_unpropagate_priority(pi);
1638 mtx_unlock_spin(&umtx_lock);
1639 umtxq_unlock(&uq->uq_key);
1640
1641 return (error);
1642}
1643
1644/*
1645 * Add reference count for a PI mutex.
1646 */
1647static void
1648umtx_pi_ref(struct umtx_pi *pi)
1649{
1650 struct umtxq_chain *uc;
1651
1652 uc = umtxq_getchain(&pi->pi_key);
1653 UMTXQ_LOCKED_ASSERT(uc);
1654 pi->pi_refcount++;
1655}
1656
1657/*
1658 * Decrease reference count for a PI mutex, if the counter
1659 * is decreased to zero, its memory space is freed.
1660 */
1661static void
1662umtx_pi_unref(struct umtx_pi *pi)
1663{
1664 struct umtxq_chain *uc;
1665
1666 uc = umtxq_getchain(&pi->pi_key);
1667 UMTXQ_LOCKED_ASSERT(uc);
1668 KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1669 if (--pi->pi_refcount == 0) {
1670 mtx_lock_spin(&umtx_lock);
1671 if (pi->pi_owner != NULL) {
1672 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1673 pi, pi_link);
1674 pi->pi_owner = NULL;
1675 }
1676 KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1677 ("blocked queue not empty"));
1678 mtx_unlock_spin(&umtx_lock);
1679 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1680 umtx_pi_free(pi);
1681 }
1682}
1683
1684/*
1685 * Find a PI mutex in hash table.
1686 */
1687static struct umtx_pi *
1688umtx_pi_lookup(struct umtx_key *key)
1689{
1690 struct umtxq_chain *uc;
1691 struct umtx_pi *pi;
1692
1693 uc = umtxq_getchain(key);
1694 UMTXQ_LOCKED_ASSERT(uc);
1695
1696 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1697 if (umtx_key_match(&pi->pi_key, key)) {
1698 return (pi);
1699 }
1700 }
1701 return (NULL);
1702}
1703
1704/*
1705 * Insert a PI mutex into hash table.
1706 */
1707static inline void
1708umtx_pi_insert(struct umtx_pi *pi)
1709{
1710 struct umtxq_chain *uc;
1711
1712 uc = umtxq_getchain(&pi->pi_key);
1713 UMTXQ_LOCKED_ASSERT(uc);
1714 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1715}
1716
1717/*
1718 * Lock a PI mutex.
1719 */
1720static int
1721_do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1722 int try)
1723{
1724 struct umtx_q *uq;
1725 struct umtx_pi *pi, *new_pi;
1726 uint32_t id, owner, old;
1727 int error;
1728
1729 id = td->td_tid;
1730 uq = td->td_umtxq;
1731
1732 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1733 &uq->uq_key)) != 0)
1734 return (error);
1735 umtxq_lock(&uq->uq_key);
1736 pi = umtx_pi_lookup(&uq->uq_key);
1737 if (pi == NULL) {
1738 new_pi = umtx_pi_alloc(M_NOWAIT);
1739 if (new_pi == NULL) {
1740 umtxq_unlock(&uq->uq_key);
1741 new_pi = umtx_pi_alloc(M_WAITOK);
1742 umtxq_lock(&uq->uq_key);
1743 pi = umtx_pi_lookup(&uq->uq_key);
1744 if (pi != NULL) {
1745 umtx_pi_free(new_pi);
1746 new_pi = NULL;
1747 }
1748 }
1749 if (new_pi != NULL) {
1750 new_pi->pi_key = uq->uq_key;
1751 umtx_pi_insert(new_pi);
1752 pi = new_pi;
1753 }
1754 }
1755 umtx_pi_ref(pi);
1756 umtxq_unlock(&uq->uq_key);
1757
1758 /*
1759 * Care must be exercised when dealing with umtx structure. It
1760 * can fault on any access.
1761 */
1762 for (;;) {
1763 /*
1764 * Try the uncontested case. This should be done in userland.
1765 */
1766 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1767
1768 /* The acquire succeeded. */
1769 if (owner == UMUTEX_UNOWNED) {
1770 error = 0;
1771 break;
1772 }
1773
1774 /* The address was invalid. */
1775 if (owner == -1) {
1776 error = EFAULT;
1777 break;
1778 }
1779
1780 /* If no one owns it but it is contested try to acquire it. */
1781 if (owner == UMUTEX_CONTESTED) {
1782 owner = casuword32(&m->m_owner,
1783 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1784
1785 if (owner == UMUTEX_CONTESTED) {
1786 umtxq_lock(&uq->uq_key);
1787 umtxq_busy(&uq->uq_key);
1788 error = umtx_pi_claim(pi, td);
1789 umtxq_unbusy(&uq->uq_key);
1790 umtxq_unlock(&uq->uq_key);
1791 break;
1792 }
1793
1794 /* The address was invalid. */
1795 if (owner == -1) {
1796 error = EFAULT;
1797 break;
1798 }
1799
1800 /* If this failed the lock has changed, restart. */
1801 continue;
1802 }
1803
1804 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1805 (owner & ~UMUTEX_CONTESTED) == id) {
1806 error = EDEADLK;
1807 break;
1808 }
1809
1810 if (try != 0) {
1811 error = EBUSY;
1812 break;
1813 }
1814
1815 /*
1816 * If we caught a signal, we have retried and now
1817 * exit immediately.
1818 */
1819 if (error != 0)
1820 break;
1821
1822 umtxq_lock(&uq->uq_key);
1823 umtxq_busy(&uq->uq_key);
1824 umtxq_unlock(&uq->uq_key);
1825
1826 /*
1827 * Set the contested bit so that a release in user space
1828 * knows to use the system call for unlock. If this fails
1829 * either some one else has acquired the lock or it has been
1830 * released.
1831 */
1832 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1833
1834 /* The address was invalid. */
1835 if (old == -1) {
1836 umtxq_lock(&uq->uq_key);
1837 umtxq_unbusy(&uq->uq_key);
1838 umtxq_unlock(&uq->uq_key);
1839 error = EFAULT;
1840 break;
1841 }
1842
1843 umtxq_lock(&uq->uq_key);
1844 /*
1845 * We set the contested bit, sleep. Otherwise the lock changed
1846 * and we need to retry or we lost a race to the thread
1847 * unlocking the umtx.
1848 */
1849 if (old == owner)
1850 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1851 "umtxpi", timo);
1852 else {
1853 umtxq_unbusy(&uq->uq_key);
1854 umtxq_unlock(&uq->uq_key);
1855 }
1856 }
1857
1858 umtxq_lock(&uq->uq_key);
1859 umtx_pi_unref(pi);
1860 umtxq_unlock(&uq->uq_key);
1861
1862 umtx_key_release(&uq->uq_key);
1863 return (error);
1864}
1865
1866/*
1867 * Unlock a PI mutex.
1868 */
1869static int
1870do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1871{
1872 struct umtx_key key;
1873 struct umtx_q *uq_first, *uq_first2, *uq_me;
1874 struct umtx_pi *pi, *pi2;
1875 uint32_t owner, old, id;
1876 int error;
1877 int count;
1878 int pri;
1879
1880 id = td->td_tid;
1881 /*
1882 * Make sure we own this mtx.
1883 */
1884 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1885 if (owner == -1)
1886 return (EFAULT);
1887
1888 if ((owner & ~UMUTEX_CONTESTED) != id)
1889 return (EPERM);
1890
1891 /* This should be done in userland */
1892 if ((owner & UMUTEX_CONTESTED) == 0) {
1893 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1894 if (old == -1)
1895 return (EFAULT);
1896 if (old == owner)
1897 return (0);
1898 owner = old;
1899 }
1900
1901 /* We should only ever be in here for contested locks */
1902 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1903 &key)) != 0)
1904 return (error);
1905
1906 umtxq_lock(&key);
1907 umtxq_busy(&key);
1908 count = umtxq_count_pi(&key, &uq_first);
1909 if (uq_first != NULL) {
1910 mtx_lock_spin(&umtx_lock);
1911 pi = uq_first->uq_pi_blocked;
1912 KASSERT(pi != NULL, ("pi == NULL?"));
1913 if (pi->pi_owner != curthread) {
1914 mtx_unlock_spin(&umtx_lock);
1915 umtxq_unbusy(&key);
1916 umtxq_unlock(&key);
1917 umtx_key_release(&key);
1918 /* userland messed the mutex */
1919 return (EPERM);
1920 }
1921 uq_me = curthread->td_umtxq;
1922 pi->pi_owner = NULL;
1923 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1924 /* get highest priority thread which is still sleeping. */
1925 uq_first = TAILQ_FIRST(&pi->pi_blocked);
1926 while (uq_first != NULL &&
1927 (uq_first->uq_flags & UQF_UMTXQ) == 0) {
1928 uq_first = TAILQ_NEXT(uq_first, uq_lockq);
1929 }
1930 pri = PRI_MAX;
1931 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1932 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1933 if (uq_first2 != NULL) {
1934 if (pri > UPRI(uq_first2->uq_thread))
1935 pri = UPRI(uq_first2->uq_thread);
1936 }
1937 }
1938 thread_lock(curthread);
1939 sched_unlend_user_prio(curthread, pri);
1940 thread_unlock(curthread);
1941 mtx_unlock_spin(&umtx_lock);
1942 if (uq_first)
1943 umtxq_signal_thread(uq_first);
1944 }
1945 umtxq_unlock(&key);
1946
1947 /*
1948 * When unlocking the umtx, it must be marked as unowned if
1949 * there is zero or one thread only waiting for it.
1950 * Otherwise, it must be marked as contested.
1951 */
1952 old = casuword32(&m->m_owner, owner,
1953 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1954
1955 umtxq_lock(&key);
1956 umtxq_unbusy(&key);
1957 umtxq_unlock(&key);
1958 umtx_key_release(&key);
1959 if (old == -1)
1960 return (EFAULT);
1961 if (old != owner)
1962 return (EINVAL);
1963 return (0);
1964}
1965
1966/*
1967 * Lock a PP mutex.
1968 */
1969static int
1970_do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1971 int try)
1972{
1973 struct umtx_q *uq, *uq2;
1974 struct umtx_pi *pi;
1975 uint32_t ceiling;
1976 uint32_t owner, id;
1977 int error, pri, old_inherited_pri, su;
1978
1979 id = td->td_tid;
1980 uq = td->td_umtxq;
1981 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1982 &uq->uq_key)) != 0)
1983 return (error);
1984 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1985 for (;;) {
1986 old_inherited_pri = uq->uq_inherited_pri;
1987 umtxq_lock(&uq->uq_key);
1988 umtxq_busy(&uq->uq_key);
1989 umtxq_unlock(&uq->uq_key);
1990
1991 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1992 if (ceiling > RTP_PRIO_MAX) {
1993 error = EINVAL;
1994 goto out;
1995 }
1996
1997 mtx_lock_spin(&umtx_lock);
1998 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1999 mtx_unlock_spin(&umtx_lock);
2000 error = EINVAL;
2001 goto out;
2002 }
2003 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
2004 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
2005 thread_lock(td);
2006 if (uq->uq_inherited_pri < UPRI(td))
2007 sched_lend_user_prio(td, uq->uq_inherited_pri);
2008 thread_unlock(td);
2009 }
2010 mtx_unlock_spin(&umtx_lock);
2011
2012 owner = casuword32(&m->m_owner,
2013 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2014
2015 if (owner == UMUTEX_CONTESTED) {
2016 error = 0;
2017 break;
2018 }
2019
2020 /* The address was invalid. */
2021 if (owner == -1) {
2022 error = EFAULT;
2023 break;
2024 }
2025
2026 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
2027 (owner & ~UMUTEX_CONTESTED) == id) {
2028 error = EDEADLK;
2029 break;
2030 }
2031
2032 if (try != 0) {
2033 error = EBUSY;
2034 break;
2035 }
2036
2037 /*
2038 * If we caught a signal, we have retried and now
2039 * exit immediately.
2040 */
2041 if (error != 0)
2042 break;
2043
2044 umtxq_lock(&uq->uq_key);
2045 umtxq_insert(uq);
2046 umtxq_unbusy(&uq->uq_key);
2047 error = umtxq_sleep(uq, "umtxpp", timo);
2048 umtxq_remove(uq);
2049 umtxq_unlock(&uq->uq_key);
2050
2051 mtx_lock_spin(&umtx_lock);
2052 uq->uq_inherited_pri = old_inherited_pri;
2053 pri = PRI_MAX;
2054 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2055 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2056 if (uq2 != NULL) {
2057 if (pri > UPRI(uq2->uq_thread))
2058 pri = UPRI(uq2->uq_thread);
2059 }
2060 }
2061 if (pri > uq->uq_inherited_pri)
2062 pri = uq->uq_inherited_pri;
2063 thread_lock(td);
2064 sched_unlend_user_prio(td, pri);
2065 thread_unlock(td);
2066 mtx_unlock_spin(&umtx_lock);
2067 }
2068
2069 if (error != 0) {
2070 mtx_lock_spin(&umtx_lock);
2071 uq->uq_inherited_pri = old_inherited_pri;
2072 pri = PRI_MAX;
2073 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2074 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2075 if (uq2 != NULL) {
2076 if (pri > UPRI(uq2->uq_thread))
2077 pri = UPRI(uq2->uq_thread);
2078 }
2079 }
2080 if (pri > uq->uq_inherited_pri)
2081 pri = uq->uq_inherited_pri;
2082 thread_lock(td);
2083 sched_unlend_user_prio(td, pri);
2084 thread_unlock(td);
2085 mtx_unlock_spin(&umtx_lock);
2086 }
2087
2088out:
2089 umtxq_lock(&uq->uq_key);
2090 umtxq_unbusy(&uq->uq_key);
2091 umtxq_unlock(&uq->uq_key);
2092 umtx_key_release(&uq->uq_key);
2093 return (error);
2094}
2095
2096/*
2097 * Unlock a PP mutex.
2098 */
2099static int
2100do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
2101{
2102 struct umtx_key key;
2103 struct umtx_q *uq, *uq2;
2104 struct umtx_pi *pi;
2105 uint32_t owner, id;
2106 uint32_t rceiling;
2107 int error, pri, new_inherited_pri, su;
2108
2109 id = td->td_tid;
2110 uq = td->td_umtxq;
2111 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2112
2113 /*
2114 * Make sure we own this mtx.
2115 */
2116 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
2117 if (owner == -1)
2118 return (EFAULT);
2119
2120 if ((owner & ~UMUTEX_CONTESTED) != id)
2121 return (EPERM);
2122
2123 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2124 if (error != 0)
2125 return (error);
2126
2127 if (rceiling == -1)
2128 new_inherited_pri = PRI_MAX;
2129 else {
2130 rceiling = RTP_PRIO_MAX - rceiling;
2131 if (rceiling > RTP_PRIO_MAX)
2132 return (EINVAL);
2133 new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2134 }
2135
2136 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2137 &key)) != 0)
2138 return (error);
2139 umtxq_lock(&key);
2140 umtxq_busy(&key);
2141 umtxq_unlock(&key);
2142 /*
2143 * For priority protected mutex, always set unlocked state
2144 * to UMUTEX_CONTESTED, so that userland always enters kernel
2145 * to lock the mutex, it is necessary because thread priority
2146 * has to be adjusted for such mutex.
2147 */
2148 error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2149 UMUTEX_CONTESTED);
2150
2151 umtxq_lock(&key);
2152 if (error == 0)
2153 umtxq_signal(&key, 1);
2154 umtxq_unbusy(&key);
2155 umtxq_unlock(&key);
2156
2157 if (error == -1)
2158 error = EFAULT;
2159 else {
2160 mtx_lock_spin(&umtx_lock);
2161 if (su != 0)
2162 uq->uq_inherited_pri = new_inherited_pri;
2163 pri = PRI_MAX;
2164 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2165 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2166 if (uq2 != NULL) {
2167 if (pri > UPRI(uq2->uq_thread))
2168 pri = UPRI(uq2->uq_thread);
2169 }
2170 }
2171 if (pri > uq->uq_inherited_pri)
2172 pri = uq->uq_inherited_pri;
2173 thread_lock(td);
2174 sched_unlend_user_prio(td, pri);
2175 thread_unlock(td);
2176 mtx_unlock_spin(&umtx_lock);
2177 }
2178 umtx_key_release(&key);
2179 return (error);
2180}
2181
2182static int
2183do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2184 uint32_t *old_ceiling)
2185{
2186 struct umtx_q *uq;
2187 uint32_t save_ceiling;
2188 uint32_t owner, id;
2189 uint32_t flags;
2190 int error;
2191
2192 flags = fuword32(&m->m_flags);
2193 if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2194 return (EINVAL);
2195 if (ceiling > RTP_PRIO_MAX)
2196 return (EINVAL);
2197 id = td->td_tid;
2198 uq = td->td_umtxq;
2199 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2200 &uq->uq_key)) != 0)
2201 return (error);
2202 for (;;) {
2203 umtxq_lock(&uq->uq_key);
2204 umtxq_busy(&uq->uq_key);
2205 umtxq_unlock(&uq->uq_key);
2206
2207 save_ceiling = fuword32(&m->m_ceilings[0]);
2208
2209 owner = casuword32(&m->m_owner,
2210 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2211
2212 if (owner == UMUTEX_CONTESTED) {
2213 suword32(&m->m_ceilings[0], ceiling);
2214 suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2215 UMUTEX_CONTESTED);
2216 error = 0;
2217 break;
2218 }
2219
2220 /* The address was invalid. */
2221 if (owner == -1) {
2222 error = EFAULT;
2223 break;
2224 }
2225
2226 if ((owner & ~UMUTEX_CONTESTED) == id) {
2227 suword32(&m->m_ceilings[0], ceiling);
2228 error = 0;
2229 break;
2230 }
2231
2232 /*
2233 * If we caught a signal, we have retried and now
2234 * exit immediately.
2235 */
2236 if (error != 0)
2237 break;
2238
2239 /*
2240 * We set the contested bit, sleep. Otherwise the lock changed
2241 * and we need to retry or we lost a race to the thread
2242 * unlocking the umtx.
2243 */
2244 umtxq_lock(&uq->uq_key);
2245 umtxq_insert(uq);
2246 umtxq_unbusy(&uq->uq_key);
2247 error = umtxq_sleep(uq, "umtxpp", 0);
2248 umtxq_remove(uq);
2249 umtxq_unlock(&uq->uq_key);
2250 }
2251 umtxq_lock(&uq->uq_key);
2252 if (error == 0)
2253 umtxq_signal(&uq->uq_key, INT_MAX);
2254 umtxq_unbusy(&uq->uq_key);
2255 umtxq_unlock(&uq->uq_key);
2256 umtx_key_release(&uq->uq_key);
2257 if (error == 0 && old_ceiling != NULL)
2258 suword32(old_ceiling, save_ceiling);
2259 return (error);
2260}
2261
2262static int
2263_do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2264 int mode)
2265{
2266 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2267 case 0:
2268 return (_do_lock_normal(td, m, flags, timo, mode));
2269 case UMUTEX_PRIO_INHERIT:
2270 return (_do_lock_pi(td, m, flags, timo, mode));
2271 case UMUTEX_PRIO_PROTECT:
2272 return (_do_lock_pp(td, m, flags, timo, mode));
2273 }
2274 return (EINVAL);
2275}
2276
2277/*
2278 * Lock a userland POSIX mutex.
2279 */
2280static int
2281do_lock_umutex(struct thread *td, struct umutex *m,
2282 struct timespec *timeout, int mode)
2283{
2284 struct timespec ts, ts2, ts3;
2285 struct timeval tv;
2286 uint32_t flags;
2287 int error;
2288
2289 flags = fuword32(&m->m_flags);
2290 if (flags == -1)
2291 return (EFAULT);
2292
2293 if (timeout == NULL) {
2294 error = _do_lock_umutex(td, m, flags, 0, mode);
2295 /* Mutex locking is restarted if it is interrupted. */
2296 if (error == EINTR && mode != _UMUTEX_WAIT)
2297 error = ERESTART;
2298 } else {
2299 getnanouptime(&ts);
2300 timespecadd(&ts, timeout);
2301 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2302 for (;;) {
2303 error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode);
2304 if (error != ETIMEDOUT)
2305 break;
2306 getnanouptime(&ts2);
2307 if (timespeccmp(&ts2, &ts, >=)) {
2308 error = ETIMEDOUT;
2309 break;
2310 }
2311 ts3 = ts;
2312 timespecsub(&ts3, &ts2);
2313 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2314 }
2315 /* Timed-locking is not restarted. */
2316 if (error == ERESTART)
2317 error = EINTR;
2318 }
2319 return (error);
2320}
2321
2322/*
2323 * Unlock a userland POSIX mutex.
2324 */
2325static int
2326do_unlock_umutex(struct thread *td, struct umutex *m)
2327{
2328 uint32_t flags;
2329
2330 flags = fuword32(&m->m_flags);
2331 if (flags == -1)
2332 return (EFAULT);
2333
2334 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2335 case 0:
2336 return (do_unlock_normal(td, m, flags));
2337 case UMUTEX_PRIO_INHERIT:
2338 return (do_unlock_pi(td, m, flags));
2339 case UMUTEX_PRIO_PROTECT:
2340 return (do_unlock_pp(td, m, flags));
2341 }
2342
2343 return (EINVAL);
2344}
2345
2346static int
2347do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2348 struct timespec *timeout, u_long wflags)
2349{
2350 struct umtx_q *uq;
2351 struct timeval tv;
2352 struct timespec cts, ets, tts;
2353 uint32_t flags;
47#include <sys/eventhandler.h>
48#include <sys/umtx.h>
49
50#include <vm/vm.h>
51#include <vm/vm_param.h>
52#include <vm/pmap.h>
53#include <vm/vm_map.h>
54#include <vm/vm_object.h>
55
56#include <machine/cpu.h>
57
58#ifdef COMPAT_FREEBSD32
59#include <compat/freebsd32/freebsd32_proto.h>
60#endif
61
62enum {
63 TYPE_SIMPLE_WAIT,
64 TYPE_CV,
65 TYPE_SEM,
66 TYPE_SIMPLE_LOCK,
67 TYPE_NORMAL_UMUTEX,
68 TYPE_PI_UMUTEX,
69 TYPE_PP_UMUTEX,
70 TYPE_RWLOCK
71};
72
73#define _UMUTEX_TRY 1
74#define _UMUTEX_WAIT 2
75
76/* Key to represent a unique userland synchronous object */
77struct umtx_key {
78 int hash;
79 int type;
80 int shared;
81 union {
82 struct {
83 vm_object_t object;
84 uintptr_t offset;
85 } shared;
86 struct {
87 struct vmspace *vs;
88 uintptr_t addr;
89 } private;
90 struct {
91 void *a;
92 uintptr_t b;
93 } both;
94 } info;
95};
96
97/* Priority inheritance mutex info. */
98struct umtx_pi {
99 /* Owner thread */
100 struct thread *pi_owner;
101
102 /* Reference count */
103 int pi_refcount;
104
105 /* List entry to link umtx holding by thread */
106 TAILQ_ENTRY(umtx_pi) pi_link;
107
108 /* List entry in hash */
109 TAILQ_ENTRY(umtx_pi) pi_hashlink;
110
111 /* List for waiters */
112 TAILQ_HEAD(,umtx_q) pi_blocked;
113
114 /* Identify a userland lock object */
115 struct umtx_key pi_key;
116};
117
118/* A userland synchronous object user. */
119struct umtx_q {
120 /* Linked list for the hash. */
121 TAILQ_ENTRY(umtx_q) uq_link;
122
123 /* Umtx key. */
124 struct umtx_key uq_key;
125
126 /* Umtx flags. */
127 int uq_flags;
128#define UQF_UMTXQ 0x0001
129
130 /* The thread waits on. */
131 struct thread *uq_thread;
132
133 /*
134 * Blocked on PI mutex. read can use chain lock
135 * or umtx_lock, write must have both chain lock and
136 * umtx_lock being hold.
137 */
138 struct umtx_pi *uq_pi_blocked;
139
140 /* On blocked list */
141 TAILQ_ENTRY(umtx_q) uq_lockq;
142
143 /* Thread contending with us */
144 TAILQ_HEAD(,umtx_pi) uq_pi_contested;
145
146 /* Inherited priority from PP mutex */
147 u_char uq_inherited_pri;
148
149 /* Spare queue ready to be reused */
150 struct umtxq_queue *uq_spare_queue;
151
152 /* The queue we on */
153 struct umtxq_queue *uq_cur_queue;
154};
155
156TAILQ_HEAD(umtxq_head, umtx_q);
157
158/* Per-key wait-queue */
159struct umtxq_queue {
160 struct umtxq_head head;
161 struct umtx_key key;
162 LIST_ENTRY(umtxq_queue) link;
163 int length;
164};
165
166LIST_HEAD(umtxq_list, umtxq_queue);
167
168/* Userland lock object's wait-queue chain */
169struct umtxq_chain {
170 /* Lock for this chain. */
171 struct mtx uc_lock;
172
173 /* List of sleep queues. */
174 struct umtxq_list uc_queue[2];
175#define UMTX_SHARED_QUEUE 0
176#define UMTX_EXCLUSIVE_QUEUE 1
177
178 LIST_HEAD(, umtxq_queue) uc_spare_queue;
179
180 /* Busy flag */
181 char uc_busy;
182
183 /* Chain lock waiters */
184 int uc_waiters;
185
186 /* All PI in the list */
187 TAILQ_HEAD(,umtx_pi) uc_pi_list;
188
189};
190
191#define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED)
192#define UMTXQ_BUSY_ASSERT(uc) KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
193
194/*
195 * Don't propagate time-sharing priority, there is a security reason,
196 * a user can simply introduce PI-mutex, let thread A lock the mutex,
197 * and let another thread B block on the mutex, because B is
198 * sleeping, its priority will be boosted, this causes A's priority to
199 * be boosted via priority propagating too and will never be lowered even
200 * if it is using 100%CPU, this is unfair to other processes.
201 */
202
203#define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
204 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
205 PRI_MAX_TIMESHARE : (td)->td_user_pri)
206
207#define GOLDEN_RATIO_PRIME 2654404609U
208#define UMTX_CHAINS 128
209#define UMTX_SHIFTS (__WORD_BIT - 7)
210
211#define THREAD_SHARE 0
212#define PROCESS_SHARE 1
213#define AUTO_SHARE 2
214
215#define GET_SHARE(flags) \
216 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
217
218#define BUSY_SPINS 200
219
220static uma_zone_t umtx_pi_zone;
221static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS];
222static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
223static int umtx_pi_allocated;
224
225SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
226SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
227 &umtx_pi_allocated, 0, "Allocated umtx_pi");
228
229static void umtxq_sysinit(void *);
230static void umtxq_hash(struct umtx_key *key);
231static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
232static void umtxq_lock(struct umtx_key *key);
233static void umtxq_unlock(struct umtx_key *key);
234static void umtxq_busy(struct umtx_key *key);
235static void umtxq_unbusy(struct umtx_key *key);
236static void umtxq_insert_queue(struct umtx_q *uq, int q);
237static void umtxq_remove_queue(struct umtx_q *uq, int q);
238static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
239static int umtxq_count(struct umtx_key *key);
240static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
241static int umtx_key_get(void *addr, int type, int share,
242 struct umtx_key *key);
243static void umtx_key_release(struct umtx_key *key);
244static struct umtx_pi *umtx_pi_alloc(int);
245static void umtx_pi_free(struct umtx_pi *pi);
246static void umtx_pi_adjust_locked(struct thread *td, u_char oldpri);
247static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
248static void umtx_thread_cleanup(struct thread *td);
249static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
250 struct image_params *imgp __unused);
251SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
252
253#define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
254#define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
255#define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
256
257static struct mtx umtx_lock;
258
259static void
260umtxq_sysinit(void *arg __unused)
261{
262 int i, j;
263
264 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
265 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
266 for (i = 0; i < 2; ++i) {
267 for (j = 0; j < UMTX_CHAINS; ++j) {
268 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
269 MTX_DEF | MTX_DUPOK);
270 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
271 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
272 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
273 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
274 umtxq_chains[i][j].uc_busy = 0;
275 umtxq_chains[i][j].uc_waiters = 0;
276 }
277 }
278 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
279 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
280 EVENTHANDLER_PRI_ANY);
281}
282
283struct umtx_q *
284umtxq_alloc(void)
285{
286 struct umtx_q *uq;
287
288 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
289 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO);
290 TAILQ_INIT(&uq->uq_spare_queue->head);
291 TAILQ_INIT(&uq->uq_pi_contested);
292 uq->uq_inherited_pri = PRI_MAX;
293 return (uq);
294}
295
296void
297umtxq_free(struct umtx_q *uq)
298{
299 MPASS(uq->uq_spare_queue != NULL);
300 free(uq->uq_spare_queue, M_UMTX);
301 free(uq, M_UMTX);
302}
303
304static inline void
305umtxq_hash(struct umtx_key *key)
306{
307 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
308 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
309}
310
311static inline int
312umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
313{
314 return (k1->type == k2->type &&
315 k1->info.both.a == k2->info.both.a &&
316 k1->info.both.b == k2->info.both.b);
317}
318
319static inline struct umtxq_chain *
320umtxq_getchain(struct umtx_key *key)
321{
322 if (key->type <= TYPE_SEM)
323 return (&umtxq_chains[1][key->hash]);
324 return (&umtxq_chains[0][key->hash]);
325}
326
327/*
328 * Lock a chain.
329 */
330static inline void
331umtxq_lock(struct umtx_key *key)
332{
333 struct umtxq_chain *uc;
334
335 uc = umtxq_getchain(key);
336 mtx_lock(&uc->uc_lock);
337}
338
339/*
340 * Unlock a chain.
341 */
342static inline void
343umtxq_unlock(struct umtx_key *key)
344{
345 struct umtxq_chain *uc;
346
347 uc = umtxq_getchain(key);
348 mtx_unlock(&uc->uc_lock);
349}
350
351/*
352 * Set chain to busy state when following operation
353 * may be blocked (kernel mutex can not be used).
354 */
355static inline void
356umtxq_busy(struct umtx_key *key)
357{
358 struct umtxq_chain *uc;
359
360 uc = umtxq_getchain(key);
361 mtx_assert(&uc->uc_lock, MA_OWNED);
362 if (uc->uc_busy) {
363#ifdef SMP
364 if (smp_cpus > 1) {
365 int count = BUSY_SPINS;
366 if (count > 0) {
367 umtxq_unlock(key);
368 while (uc->uc_busy && --count > 0)
369 cpu_spinwait();
370 umtxq_lock(key);
371 }
372 }
373#endif
374 while (uc->uc_busy) {
375 uc->uc_waiters++;
376 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
377 uc->uc_waiters--;
378 }
379 }
380 uc->uc_busy = 1;
381}
382
383/*
384 * Unbusy a chain.
385 */
386static inline void
387umtxq_unbusy(struct umtx_key *key)
388{
389 struct umtxq_chain *uc;
390
391 uc = umtxq_getchain(key);
392 mtx_assert(&uc->uc_lock, MA_OWNED);
393 KASSERT(uc->uc_busy != 0, ("not busy"));
394 uc->uc_busy = 0;
395 if (uc->uc_waiters)
396 wakeup_one(uc);
397}
398
399static struct umtxq_queue *
400umtxq_queue_lookup(struct umtx_key *key, int q)
401{
402 struct umtxq_queue *uh;
403 struct umtxq_chain *uc;
404
405 uc = umtxq_getchain(key);
406 UMTXQ_LOCKED_ASSERT(uc);
407 LIST_FOREACH(uh, &uc->uc_queue[q], link) {
408 if (umtx_key_match(&uh->key, key))
409 return (uh);
410 }
411
412 return (NULL);
413}
414
415static inline void
416umtxq_insert_queue(struct umtx_q *uq, int q)
417{
418 struct umtxq_queue *uh;
419 struct umtxq_chain *uc;
420
421 uc = umtxq_getchain(&uq->uq_key);
422 UMTXQ_LOCKED_ASSERT(uc);
423 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
424 uh = umtxq_queue_lookup(&uq->uq_key, q);
425 if (uh != NULL) {
426 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
427 } else {
428 uh = uq->uq_spare_queue;
429 uh->key = uq->uq_key;
430 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
431 }
432 uq->uq_spare_queue = NULL;
433
434 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
435 uh->length++;
436 uq->uq_flags |= UQF_UMTXQ;
437 uq->uq_cur_queue = uh;
438 return;
439}
440
441static inline void
442umtxq_remove_queue(struct umtx_q *uq, int q)
443{
444 struct umtxq_chain *uc;
445 struct umtxq_queue *uh;
446
447 uc = umtxq_getchain(&uq->uq_key);
448 UMTXQ_LOCKED_ASSERT(uc);
449 if (uq->uq_flags & UQF_UMTXQ) {
450 uh = uq->uq_cur_queue;
451 TAILQ_REMOVE(&uh->head, uq, uq_link);
452 uh->length--;
453 uq->uq_flags &= ~UQF_UMTXQ;
454 if (TAILQ_EMPTY(&uh->head)) {
455 KASSERT(uh->length == 0,
456 ("inconsistent umtxq_queue length"));
457 LIST_REMOVE(uh, link);
458 } else {
459 uh = LIST_FIRST(&uc->uc_spare_queue);
460 KASSERT(uh != NULL, ("uc_spare_queue is empty"));
461 LIST_REMOVE(uh, link);
462 }
463 uq->uq_spare_queue = uh;
464 uq->uq_cur_queue = NULL;
465 }
466}
467
468/*
469 * Check if there are multiple waiters
470 */
471static int
472umtxq_count(struct umtx_key *key)
473{
474 struct umtxq_chain *uc;
475 struct umtxq_queue *uh;
476
477 uc = umtxq_getchain(key);
478 UMTXQ_LOCKED_ASSERT(uc);
479 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
480 if (uh != NULL)
481 return (uh->length);
482 return (0);
483}
484
485/*
486 * Check if there are multiple PI waiters and returns first
487 * waiter.
488 */
489static int
490umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
491{
492 struct umtxq_chain *uc;
493 struct umtxq_queue *uh;
494
495 *first = NULL;
496 uc = umtxq_getchain(key);
497 UMTXQ_LOCKED_ASSERT(uc);
498 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
499 if (uh != NULL) {
500 *first = TAILQ_FIRST(&uh->head);
501 return (uh->length);
502 }
503 return (0);
504}
505
506/*
507 * Wake up threads waiting on an userland object.
508 */
509
510static int
511umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
512{
513 struct umtxq_chain *uc;
514 struct umtxq_queue *uh;
515 struct umtx_q *uq;
516 int ret;
517
518 ret = 0;
519 uc = umtxq_getchain(key);
520 UMTXQ_LOCKED_ASSERT(uc);
521 uh = umtxq_queue_lookup(key, q);
522 if (uh != NULL) {
523 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
524 umtxq_remove_queue(uq, q);
525 wakeup(uq);
526 if (++ret >= n_wake)
527 return (ret);
528 }
529 }
530 return (ret);
531}
532
533
534/*
535 * Wake up specified thread.
536 */
537static inline void
538umtxq_signal_thread(struct umtx_q *uq)
539{
540 struct umtxq_chain *uc;
541
542 uc = umtxq_getchain(&uq->uq_key);
543 UMTXQ_LOCKED_ASSERT(uc);
544 umtxq_remove(uq);
545 wakeup(uq);
546}
547
548/*
549 * Put thread into sleep state, before sleeping, check if
550 * thread was removed from umtx queue.
551 */
552static inline int
553umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
554{
555 struct umtxq_chain *uc;
556 int error;
557
558 uc = umtxq_getchain(&uq->uq_key);
559 UMTXQ_LOCKED_ASSERT(uc);
560 if (!(uq->uq_flags & UQF_UMTXQ))
561 return (0);
562 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
563 if (error == EWOULDBLOCK)
564 error = ETIMEDOUT;
565 return (error);
566}
567
568/*
569 * Convert userspace address into unique logical address.
570 */
571static int
572umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
573{
574 struct thread *td = curthread;
575 vm_map_t map;
576 vm_map_entry_t entry;
577 vm_pindex_t pindex;
578 vm_prot_t prot;
579 boolean_t wired;
580
581 key->type = type;
582 if (share == THREAD_SHARE) {
583 key->shared = 0;
584 key->info.private.vs = td->td_proc->p_vmspace;
585 key->info.private.addr = (uintptr_t)addr;
586 } else {
587 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
588 map = &td->td_proc->p_vmspace->vm_map;
589 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
590 &entry, &key->info.shared.object, &pindex, &prot,
591 &wired) != KERN_SUCCESS) {
592 return EFAULT;
593 }
594
595 if ((share == PROCESS_SHARE) ||
596 (share == AUTO_SHARE &&
597 VM_INHERIT_SHARE == entry->inheritance)) {
598 key->shared = 1;
599 key->info.shared.offset = entry->offset + entry->start -
600 (vm_offset_t)addr;
601 vm_object_reference(key->info.shared.object);
602 } else {
603 key->shared = 0;
604 key->info.private.vs = td->td_proc->p_vmspace;
605 key->info.private.addr = (uintptr_t)addr;
606 }
607 vm_map_lookup_done(map, entry);
608 }
609
610 umtxq_hash(key);
611 return (0);
612}
613
614/*
615 * Release key.
616 */
617static inline void
618umtx_key_release(struct umtx_key *key)
619{
620 if (key->shared)
621 vm_object_deallocate(key->info.shared.object);
622}
623
624/*
625 * Lock a umtx object.
626 */
627static int
628_do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
629{
630 struct umtx_q *uq;
631 u_long owner;
632 u_long old;
633 int error = 0;
634
635 uq = td->td_umtxq;
636
637 /*
638 * Care must be exercised when dealing with umtx structure. It
639 * can fault on any access.
640 */
641 for (;;) {
642 /*
643 * Try the uncontested case. This should be done in userland.
644 */
645 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
646
647 /* The acquire succeeded. */
648 if (owner == UMTX_UNOWNED)
649 return (0);
650
651 /* The address was invalid. */
652 if (owner == -1)
653 return (EFAULT);
654
655 /* If no one owns it but it is contested try to acquire it. */
656 if (owner == UMTX_CONTESTED) {
657 owner = casuword(&umtx->u_owner,
658 UMTX_CONTESTED, id | UMTX_CONTESTED);
659
660 if (owner == UMTX_CONTESTED)
661 return (0);
662
663 /* The address was invalid. */
664 if (owner == -1)
665 return (EFAULT);
666
667 /* If this failed the lock has changed, restart. */
668 continue;
669 }
670
671 /*
672 * If we caught a signal, we have retried and now
673 * exit immediately.
674 */
675 if (error != 0)
676 return (error);
677
678 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
679 AUTO_SHARE, &uq->uq_key)) != 0)
680 return (error);
681
682 umtxq_lock(&uq->uq_key);
683 umtxq_busy(&uq->uq_key);
684 umtxq_insert(uq);
685 umtxq_unbusy(&uq->uq_key);
686 umtxq_unlock(&uq->uq_key);
687
688 /*
689 * Set the contested bit so that a release in user space
690 * knows to use the system call for unlock. If this fails
691 * either some one else has acquired the lock or it has been
692 * released.
693 */
694 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
695
696 /* The address was invalid. */
697 if (old == -1) {
698 umtxq_lock(&uq->uq_key);
699 umtxq_remove(uq);
700 umtxq_unlock(&uq->uq_key);
701 umtx_key_release(&uq->uq_key);
702 return (EFAULT);
703 }
704
705 /*
706 * We set the contested bit, sleep. Otherwise the lock changed
707 * and we need to retry or we lost a race to the thread
708 * unlocking the umtx.
709 */
710 umtxq_lock(&uq->uq_key);
711 if (old == owner)
712 error = umtxq_sleep(uq, "umtx", timo);
713 umtxq_remove(uq);
714 umtxq_unlock(&uq->uq_key);
715 umtx_key_release(&uq->uq_key);
716 }
717
718 return (0);
719}
720
721/*
722 * Lock a umtx object.
723 */
724static int
725do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
726 struct timespec *timeout)
727{
728 struct timespec ts, ts2, ts3;
729 struct timeval tv;
730 int error;
731
732 if (timeout == NULL) {
733 error = _do_lock_umtx(td, umtx, id, 0);
734 /* Mutex locking is restarted if it is interrupted. */
735 if (error == EINTR)
736 error = ERESTART;
737 } else {
738 getnanouptime(&ts);
739 timespecadd(&ts, timeout);
740 TIMESPEC_TO_TIMEVAL(&tv, timeout);
741 for (;;) {
742 error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
743 if (error != ETIMEDOUT)
744 break;
745 getnanouptime(&ts2);
746 if (timespeccmp(&ts2, &ts, >=)) {
747 error = ETIMEDOUT;
748 break;
749 }
750 ts3 = ts;
751 timespecsub(&ts3, &ts2);
752 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
753 }
754 /* Timed-locking is not restarted. */
755 if (error == ERESTART)
756 error = EINTR;
757 }
758 return (error);
759}
760
761/*
762 * Unlock a umtx object.
763 */
764static int
765do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
766{
767 struct umtx_key key;
768 u_long owner;
769 u_long old;
770 int error;
771 int count;
772
773 /*
774 * Make sure we own this mtx.
775 */
776 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
777 if (owner == -1)
778 return (EFAULT);
779
780 if ((owner & ~UMTX_CONTESTED) != id)
781 return (EPERM);
782
783 /* This should be done in userland */
784 if ((owner & UMTX_CONTESTED) == 0) {
785 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
786 if (old == -1)
787 return (EFAULT);
788 if (old == owner)
789 return (0);
790 owner = old;
791 }
792
793 /* We should only ever be in here for contested locks */
794 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
795 &key)) != 0)
796 return (error);
797
798 umtxq_lock(&key);
799 umtxq_busy(&key);
800 count = umtxq_count(&key);
801 umtxq_unlock(&key);
802
803 /*
804 * When unlocking the umtx, it must be marked as unowned if
805 * there is zero or one thread only waiting for it.
806 * Otherwise, it must be marked as contested.
807 */
808 old = casuword(&umtx->u_owner, owner,
809 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
810 umtxq_lock(&key);
811 umtxq_signal(&key,1);
812 umtxq_unbusy(&key);
813 umtxq_unlock(&key);
814 umtx_key_release(&key);
815 if (old == -1)
816 return (EFAULT);
817 if (old != owner)
818 return (EINVAL);
819 return (0);
820}
821
822#ifdef COMPAT_FREEBSD32
823
824/*
825 * Lock a umtx object.
826 */
827static int
828_do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
829{
830 struct umtx_q *uq;
831 uint32_t owner;
832 uint32_t old;
833 int error = 0;
834
835 uq = td->td_umtxq;
836
837 /*
838 * Care must be exercised when dealing with umtx structure. It
839 * can fault on any access.
840 */
841 for (;;) {
842 /*
843 * Try the uncontested case. This should be done in userland.
844 */
845 owner = casuword32(m, UMUTEX_UNOWNED, id);
846
847 /* The acquire succeeded. */
848 if (owner == UMUTEX_UNOWNED)
849 return (0);
850
851 /* The address was invalid. */
852 if (owner == -1)
853 return (EFAULT);
854
855 /* If no one owns it but it is contested try to acquire it. */
856 if (owner == UMUTEX_CONTESTED) {
857 owner = casuword32(m,
858 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
859 if (owner == UMUTEX_CONTESTED)
860 return (0);
861
862 /* The address was invalid. */
863 if (owner == -1)
864 return (EFAULT);
865
866 /* If this failed the lock has changed, restart. */
867 continue;
868 }
869
870 /*
871 * If we caught a signal, we have retried and now
872 * exit immediately.
873 */
874 if (error != 0)
875 return (error);
876
877 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
878 AUTO_SHARE, &uq->uq_key)) != 0)
879 return (error);
880
881 umtxq_lock(&uq->uq_key);
882 umtxq_busy(&uq->uq_key);
883 umtxq_insert(uq);
884 umtxq_unbusy(&uq->uq_key);
885 umtxq_unlock(&uq->uq_key);
886
887 /*
888 * Set the contested bit so that a release in user space
889 * knows to use the system call for unlock. If this fails
890 * either some one else has acquired the lock or it has been
891 * released.
892 */
893 old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
894
895 /* The address was invalid. */
896 if (old == -1) {
897 umtxq_lock(&uq->uq_key);
898 umtxq_remove(uq);
899 umtxq_unlock(&uq->uq_key);
900 umtx_key_release(&uq->uq_key);
901 return (EFAULT);
902 }
903
904 /*
905 * We set the contested bit, sleep. Otherwise the lock changed
906 * and we need to retry or we lost a race to the thread
907 * unlocking the umtx.
908 */
909 umtxq_lock(&uq->uq_key);
910 if (old == owner)
911 error = umtxq_sleep(uq, "umtx", timo);
912 umtxq_remove(uq);
913 umtxq_unlock(&uq->uq_key);
914 umtx_key_release(&uq->uq_key);
915 }
916
917 return (0);
918}
919
920/*
921 * Lock a umtx object.
922 */
923static int
924do_lock_umtx32(struct thread *td, void *m, uint32_t id,
925 struct timespec *timeout)
926{
927 struct timespec ts, ts2, ts3;
928 struct timeval tv;
929 int error;
930
931 if (timeout == NULL) {
932 error = _do_lock_umtx32(td, m, id, 0);
933 /* Mutex locking is restarted if it is interrupted. */
934 if (error == EINTR)
935 error = ERESTART;
936 } else {
937 getnanouptime(&ts);
938 timespecadd(&ts, timeout);
939 TIMESPEC_TO_TIMEVAL(&tv, timeout);
940 for (;;) {
941 error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
942 if (error != ETIMEDOUT)
943 break;
944 getnanouptime(&ts2);
945 if (timespeccmp(&ts2, &ts, >=)) {
946 error = ETIMEDOUT;
947 break;
948 }
949 ts3 = ts;
950 timespecsub(&ts3, &ts2);
951 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
952 }
953 /* Timed-locking is not restarted. */
954 if (error == ERESTART)
955 error = EINTR;
956 }
957 return (error);
958}
959
960/*
961 * Unlock a umtx object.
962 */
963static int
964do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
965{
966 struct umtx_key key;
967 uint32_t owner;
968 uint32_t old;
969 int error;
970 int count;
971
972 /*
973 * Make sure we own this mtx.
974 */
975 owner = fuword32(m);
976 if (owner == -1)
977 return (EFAULT);
978
979 if ((owner & ~UMUTEX_CONTESTED) != id)
980 return (EPERM);
981
982 /* This should be done in userland */
983 if ((owner & UMUTEX_CONTESTED) == 0) {
984 old = casuword32(m, owner, UMUTEX_UNOWNED);
985 if (old == -1)
986 return (EFAULT);
987 if (old == owner)
988 return (0);
989 owner = old;
990 }
991
992 /* We should only ever be in here for contested locks */
993 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
994 &key)) != 0)
995 return (error);
996
997 umtxq_lock(&key);
998 umtxq_busy(&key);
999 count = umtxq_count(&key);
1000 umtxq_unlock(&key);
1001
1002 /*
1003 * When unlocking the umtx, it must be marked as unowned if
1004 * there is zero or one thread only waiting for it.
1005 * Otherwise, it must be marked as contested.
1006 */
1007 old = casuword32(m, owner,
1008 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1009 umtxq_lock(&key);
1010 umtxq_signal(&key,1);
1011 umtxq_unbusy(&key);
1012 umtxq_unlock(&key);
1013 umtx_key_release(&key);
1014 if (old == -1)
1015 return (EFAULT);
1016 if (old != owner)
1017 return (EINVAL);
1018 return (0);
1019}
1020#endif
1021
1022/*
1023 * Fetch and compare value, sleep on the address if value is not changed.
1024 */
1025static int
1026do_wait(struct thread *td, void *addr, u_long id,
1027 struct timespec *timeout, int compat32, int is_private)
1028{
1029 struct umtx_q *uq;
1030 struct timespec ts, ts2, ts3;
1031 struct timeval tv;
1032 u_long tmp;
1033 int error = 0;
1034
1035 uq = td->td_umtxq;
1036 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
1037 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
1038 return (error);
1039
1040 umtxq_lock(&uq->uq_key);
1041 umtxq_insert(uq);
1042 umtxq_unlock(&uq->uq_key);
1043 if (compat32 == 0)
1044 tmp = fuword(addr);
1045 else
1046 tmp = (unsigned int)fuword32(addr);
1047 if (tmp != id) {
1048 umtxq_lock(&uq->uq_key);
1049 umtxq_remove(uq);
1050 umtxq_unlock(&uq->uq_key);
1051 } else if (timeout == NULL) {
1052 umtxq_lock(&uq->uq_key);
1053 error = umtxq_sleep(uq, "uwait", 0);
1054 umtxq_remove(uq);
1055 umtxq_unlock(&uq->uq_key);
1056 } else {
1057 getnanouptime(&ts);
1058 timespecadd(&ts, timeout);
1059 TIMESPEC_TO_TIMEVAL(&tv, timeout);
1060 umtxq_lock(&uq->uq_key);
1061 for (;;) {
1062 error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
1063 if (!(uq->uq_flags & UQF_UMTXQ)) {
1064 error = 0;
1065 break;
1066 }
1067 if (error != ETIMEDOUT)
1068 break;
1069 umtxq_unlock(&uq->uq_key);
1070 getnanouptime(&ts2);
1071 if (timespeccmp(&ts2, &ts, >=)) {
1072 error = ETIMEDOUT;
1073 umtxq_lock(&uq->uq_key);
1074 break;
1075 }
1076 ts3 = ts;
1077 timespecsub(&ts3, &ts2);
1078 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
1079 umtxq_lock(&uq->uq_key);
1080 }
1081 umtxq_remove(uq);
1082 umtxq_unlock(&uq->uq_key);
1083 }
1084 umtx_key_release(&uq->uq_key);
1085 if (error == ERESTART)
1086 error = EINTR;
1087 return (error);
1088}
1089
1090/*
1091 * Wake up threads sleeping on the specified address.
1092 */
1093int
1094kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
1095{
1096 struct umtx_key key;
1097 int ret;
1098
1099 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
1100 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
1101 return (ret);
1102 umtxq_lock(&key);
1103 ret = umtxq_signal(&key, n_wake);
1104 umtxq_unlock(&key);
1105 umtx_key_release(&key);
1106 return (0);
1107}
1108
1109/*
1110 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1111 */
1112static int
1113_do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1114 int mode)
1115{
1116 struct umtx_q *uq;
1117 uint32_t owner, old, id;
1118 int error = 0;
1119
1120 id = td->td_tid;
1121 uq = td->td_umtxq;
1122
1123 /*
1124 * Care must be exercised when dealing with umtx structure. It
1125 * can fault on any access.
1126 */
1127 for (;;) {
1128 owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
1129 if (mode == _UMUTEX_WAIT) {
1130 if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
1131 return (0);
1132 } else {
1133 /*
1134 * Try the uncontested case. This should be done in userland.
1135 */
1136 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1137
1138 /* The acquire succeeded. */
1139 if (owner == UMUTEX_UNOWNED)
1140 return (0);
1141
1142 /* The address was invalid. */
1143 if (owner == -1)
1144 return (EFAULT);
1145
1146 /* If no one owns it but it is contested try to acquire it. */
1147 if (owner == UMUTEX_CONTESTED) {
1148 owner = casuword32(&m->m_owner,
1149 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1150
1151 if (owner == UMUTEX_CONTESTED)
1152 return (0);
1153
1154 /* The address was invalid. */
1155 if (owner == -1)
1156 return (EFAULT);
1157
1158 /* If this failed the lock has changed, restart. */
1159 continue;
1160 }
1161 }
1162
1163 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1164 (owner & ~UMUTEX_CONTESTED) == id)
1165 return (EDEADLK);
1166
1167 if (mode == _UMUTEX_TRY)
1168 return (EBUSY);
1169
1170 /*
1171 * If we caught a signal, we have retried and now
1172 * exit immediately.
1173 */
1174 if (error != 0)
1175 return (error);
1176
1177 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1178 GET_SHARE(flags), &uq->uq_key)) != 0)
1179 return (error);
1180
1181 umtxq_lock(&uq->uq_key);
1182 umtxq_busy(&uq->uq_key);
1183 umtxq_insert(uq);
1184 umtxq_unlock(&uq->uq_key);
1185
1186 /*
1187 * Set the contested bit so that a release in user space
1188 * knows to use the system call for unlock. If this fails
1189 * either some one else has acquired the lock or it has been
1190 * released.
1191 */
1192 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1193
1194 /* The address was invalid. */
1195 if (old == -1) {
1196 umtxq_lock(&uq->uq_key);
1197 umtxq_remove(uq);
1198 umtxq_unbusy(&uq->uq_key);
1199 umtxq_unlock(&uq->uq_key);
1200 umtx_key_release(&uq->uq_key);
1201 return (EFAULT);
1202 }
1203
1204 /*
1205 * We set the contested bit, sleep. Otherwise the lock changed
1206 * and we need to retry or we lost a race to the thread
1207 * unlocking the umtx.
1208 */
1209 umtxq_lock(&uq->uq_key);
1210 umtxq_unbusy(&uq->uq_key);
1211 if (old == owner)
1212 error = umtxq_sleep(uq, "umtxn", timo);
1213 umtxq_remove(uq);
1214 umtxq_unlock(&uq->uq_key);
1215 umtx_key_release(&uq->uq_key);
1216 }
1217
1218 return (0);
1219}
1220
1221/*
1222 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1223 */
1224/*
1225 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1226 */
1227static int
1228do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1229{
1230 struct umtx_key key;
1231 uint32_t owner, old, id;
1232 int error;
1233 int count;
1234
1235 id = td->td_tid;
1236 /*
1237 * Make sure we own this mtx.
1238 */
1239 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1240 if (owner == -1)
1241 return (EFAULT);
1242
1243 if ((owner & ~UMUTEX_CONTESTED) != id)
1244 return (EPERM);
1245
1246 if ((owner & UMUTEX_CONTESTED) == 0) {
1247 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1248 if (old == -1)
1249 return (EFAULT);
1250 if (old == owner)
1251 return (0);
1252 owner = old;
1253 }
1254
1255 /* We should only ever be in here for contested locks */
1256 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1257 &key)) != 0)
1258 return (error);
1259
1260 umtxq_lock(&key);
1261 umtxq_busy(&key);
1262 count = umtxq_count(&key);
1263 umtxq_unlock(&key);
1264
1265 /*
1266 * When unlocking the umtx, it must be marked as unowned if
1267 * there is zero or one thread only waiting for it.
1268 * Otherwise, it must be marked as contested.
1269 */
1270 old = casuword32(&m->m_owner, owner,
1271 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1272 umtxq_lock(&key);
1273 umtxq_signal(&key,1);
1274 umtxq_unbusy(&key);
1275 umtxq_unlock(&key);
1276 umtx_key_release(&key);
1277 if (old == -1)
1278 return (EFAULT);
1279 if (old != owner)
1280 return (EINVAL);
1281 return (0);
1282}
1283
1284/*
1285 * Check if the mutex is available and wake up a waiter,
1286 * only for simple mutex.
1287 */
1288static int
1289do_wake_umutex(struct thread *td, struct umutex *m)
1290{
1291 struct umtx_key key;
1292 uint32_t owner;
1293 uint32_t flags;
1294 int error;
1295 int count;
1296
1297 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1298 if (owner == -1)
1299 return (EFAULT);
1300
1301 if ((owner & ~UMUTEX_CONTESTED) != 0)
1302 return (0);
1303
1304 flags = fuword32(&m->m_flags);
1305
1306 /* We should only ever be in here for contested locks */
1307 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1308 &key)) != 0)
1309 return (error);
1310
1311 umtxq_lock(&key);
1312 umtxq_busy(&key);
1313 count = umtxq_count(&key);
1314 umtxq_unlock(&key);
1315
1316 if (count <= 1)
1317 owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
1318
1319 umtxq_lock(&key);
1320 if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1321 umtxq_signal(&key, 1);
1322 umtxq_unbusy(&key);
1323 umtxq_unlock(&key);
1324 umtx_key_release(&key);
1325 return (0);
1326}
1327
1328static inline struct umtx_pi *
1329umtx_pi_alloc(int flags)
1330{
1331 struct umtx_pi *pi;
1332
1333 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1334 TAILQ_INIT(&pi->pi_blocked);
1335 atomic_add_int(&umtx_pi_allocated, 1);
1336 return (pi);
1337}
1338
1339static inline void
1340umtx_pi_free(struct umtx_pi *pi)
1341{
1342 uma_zfree(umtx_pi_zone, pi);
1343 atomic_add_int(&umtx_pi_allocated, -1);
1344}
1345
1346/*
1347 * Adjust the thread's position on a pi_state after its priority has been
1348 * changed.
1349 */
1350static int
1351umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1352{
1353 struct umtx_q *uq, *uq1, *uq2;
1354 struct thread *td1;
1355
1356 mtx_assert(&umtx_lock, MA_OWNED);
1357 if (pi == NULL)
1358 return (0);
1359
1360 uq = td->td_umtxq;
1361
1362 /*
1363 * Check if the thread needs to be moved on the blocked chain.
1364 * It needs to be moved if either its priority is lower than
1365 * the previous thread or higher than the next thread.
1366 */
1367 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1368 uq2 = TAILQ_NEXT(uq, uq_lockq);
1369 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1370 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1371 /*
1372 * Remove thread from blocked chain and determine where
1373 * it should be moved to.
1374 */
1375 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1376 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1377 td1 = uq1->uq_thread;
1378 MPASS(td1->td_proc->p_magic == P_MAGIC);
1379 if (UPRI(td1) > UPRI(td))
1380 break;
1381 }
1382
1383 if (uq1 == NULL)
1384 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1385 else
1386 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1387 }
1388 return (1);
1389}
1390
1391/*
1392 * Propagate priority when a thread is blocked on POSIX
1393 * PI mutex.
1394 */
1395static void
1396umtx_propagate_priority(struct thread *td)
1397{
1398 struct umtx_q *uq;
1399 struct umtx_pi *pi;
1400 int pri;
1401
1402 mtx_assert(&umtx_lock, MA_OWNED);
1403 pri = UPRI(td);
1404 uq = td->td_umtxq;
1405 pi = uq->uq_pi_blocked;
1406 if (pi == NULL)
1407 return;
1408
1409 for (;;) {
1410 td = pi->pi_owner;
1411 if (td == NULL || td == curthread)
1412 return;
1413
1414 MPASS(td->td_proc != NULL);
1415 MPASS(td->td_proc->p_magic == P_MAGIC);
1416
1417 thread_lock(td);
1418 if (td->td_lend_user_pri > pri)
1419 sched_lend_user_prio(td, pri);
1420 else {
1421 thread_unlock(td);
1422 break;
1423 }
1424 thread_unlock(td);
1425
1426 /*
1427 * Pick up the lock that td is blocked on.
1428 */
1429 uq = td->td_umtxq;
1430 pi = uq->uq_pi_blocked;
1431 /* Resort td on the list if needed. */
1432 if (!umtx_pi_adjust_thread(pi, td))
1433 break;
1434 }
1435}
1436
1437/*
1438 * Unpropagate priority for a PI mutex when a thread blocked on
1439 * it is interrupted by signal or resumed by others.
1440 */
1441static void
1442umtx_unpropagate_priority(struct umtx_pi *pi)
1443{
1444 struct umtx_q *uq, *uq_owner;
1445 struct umtx_pi *pi2;
1446 int pri, oldpri;
1447
1448 mtx_assert(&umtx_lock, MA_OWNED);
1449
1450 while (pi != NULL && pi->pi_owner != NULL) {
1451 pri = PRI_MAX;
1452 uq_owner = pi->pi_owner->td_umtxq;
1453
1454 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1455 uq = TAILQ_FIRST(&pi2->pi_blocked);
1456 if (uq != NULL) {
1457 if (pri > UPRI(uq->uq_thread))
1458 pri = UPRI(uq->uq_thread);
1459 }
1460 }
1461
1462 if (pri > uq_owner->uq_inherited_pri)
1463 pri = uq_owner->uq_inherited_pri;
1464 thread_lock(pi->pi_owner);
1465 oldpri = pi->pi_owner->td_user_pri;
1466 sched_unlend_user_prio(pi->pi_owner, pri);
1467 thread_unlock(pi->pi_owner);
1468 if (uq_owner->uq_pi_blocked != NULL)
1469 umtx_pi_adjust_locked(pi->pi_owner, oldpri);
1470 pi = uq_owner->uq_pi_blocked;
1471 }
1472}
1473
1474/*
1475 * Insert a PI mutex into owned list.
1476 */
1477static void
1478umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1479{
1480 struct umtx_q *uq_owner;
1481
1482 uq_owner = owner->td_umtxq;
1483 mtx_assert(&umtx_lock, MA_OWNED);
1484 if (pi->pi_owner != NULL)
1485 panic("pi_ower != NULL");
1486 pi->pi_owner = owner;
1487 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1488}
1489
1490/*
1491 * Claim ownership of a PI mutex.
1492 */
1493static int
1494umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1495{
1496 struct umtx_q *uq, *uq_owner;
1497
1498 uq_owner = owner->td_umtxq;
1499 mtx_lock_spin(&umtx_lock);
1500 if (pi->pi_owner == owner) {
1501 mtx_unlock_spin(&umtx_lock);
1502 return (0);
1503 }
1504
1505 if (pi->pi_owner != NULL) {
1506 /*
1507 * userland may have already messed the mutex, sigh.
1508 */
1509 mtx_unlock_spin(&umtx_lock);
1510 return (EPERM);
1511 }
1512 umtx_pi_setowner(pi, owner);
1513 uq = TAILQ_FIRST(&pi->pi_blocked);
1514 if (uq != NULL) {
1515 int pri;
1516
1517 pri = UPRI(uq->uq_thread);
1518 thread_lock(owner);
1519 if (pri < UPRI(owner))
1520 sched_lend_user_prio(owner, pri);
1521 thread_unlock(owner);
1522 }
1523 mtx_unlock_spin(&umtx_lock);
1524 return (0);
1525}
1526
1527static void
1528umtx_pi_adjust_locked(struct thread *td, u_char oldpri)
1529{
1530 struct umtx_q *uq;
1531 struct umtx_pi *pi;
1532
1533 uq = td->td_umtxq;
1534 /*
1535 * Pick up the lock that td is blocked on.
1536 */
1537 pi = uq->uq_pi_blocked;
1538 MPASS(pi != NULL);
1539
1540 /* Resort the turnstile on the list. */
1541 if (!umtx_pi_adjust_thread(pi, td))
1542 return;
1543
1544 /*
1545 * If our priority was lowered and we are at the head of the
1546 * turnstile, then propagate our new priority up the chain.
1547 */
1548 if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri)
1549 umtx_propagate_priority(td);
1550}
1551
1552/*
1553 * Adjust a thread's order position in its blocked PI mutex,
1554 * this may result new priority propagating process.
1555 */
1556void
1557umtx_pi_adjust(struct thread *td, u_char oldpri)
1558{
1559 struct umtx_q *uq;
1560 struct umtx_pi *pi;
1561
1562 uq = td->td_umtxq;
1563 mtx_lock_spin(&umtx_lock);
1564 /*
1565 * Pick up the lock that td is blocked on.
1566 */
1567 pi = uq->uq_pi_blocked;
1568 if (pi != NULL)
1569 umtx_pi_adjust_locked(td, oldpri);
1570 mtx_unlock_spin(&umtx_lock);
1571}
1572
1573/*
1574 * Sleep on a PI mutex.
1575 */
1576static int
1577umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1578 uint32_t owner, const char *wmesg, int timo)
1579{
1580 struct umtxq_chain *uc;
1581 struct thread *td, *td1;
1582 struct umtx_q *uq1;
1583 int pri;
1584 int error = 0;
1585
1586 td = uq->uq_thread;
1587 KASSERT(td == curthread, ("inconsistent uq_thread"));
1588 uc = umtxq_getchain(&uq->uq_key);
1589 UMTXQ_LOCKED_ASSERT(uc);
1590 UMTXQ_BUSY_ASSERT(uc);
1591 umtxq_insert(uq);
1592 mtx_lock_spin(&umtx_lock);
1593 if (pi->pi_owner == NULL) {
1594 mtx_unlock_spin(&umtx_lock);
1595 /* XXX Only look up thread in current process. */
1596 td1 = tdfind(owner, curproc->p_pid);
1597 mtx_lock_spin(&umtx_lock);
1598 if (td1 != NULL) {
1599 if (pi->pi_owner == NULL)
1600 umtx_pi_setowner(pi, td1);
1601 PROC_UNLOCK(td1->td_proc);
1602 }
1603 }
1604
1605 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1606 pri = UPRI(uq1->uq_thread);
1607 if (pri > UPRI(td))
1608 break;
1609 }
1610
1611 if (uq1 != NULL)
1612 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1613 else
1614 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1615
1616 uq->uq_pi_blocked = pi;
1617 thread_lock(td);
1618 td->td_flags |= TDF_UPIBLOCKED;
1619 thread_unlock(td);
1620 umtx_propagate_priority(td);
1621 mtx_unlock_spin(&umtx_lock);
1622 umtxq_unbusy(&uq->uq_key);
1623
1624 if (uq->uq_flags & UQF_UMTXQ) {
1625 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1626 if (error == EWOULDBLOCK)
1627 error = ETIMEDOUT;
1628 if (uq->uq_flags & UQF_UMTXQ) {
1629 umtxq_remove(uq);
1630 }
1631 }
1632 mtx_lock_spin(&umtx_lock);
1633 uq->uq_pi_blocked = NULL;
1634 thread_lock(td);
1635 td->td_flags &= ~TDF_UPIBLOCKED;
1636 thread_unlock(td);
1637 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1638 umtx_unpropagate_priority(pi);
1639 mtx_unlock_spin(&umtx_lock);
1640 umtxq_unlock(&uq->uq_key);
1641
1642 return (error);
1643}
1644
1645/*
1646 * Add reference count for a PI mutex.
1647 */
1648static void
1649umtx_pi_ref(struct umtx_pi *pi)
1650{
1651 struct umtxq_chain *uc;
1652
1653 uc = umtxq_getchain(&pi->pi_key);
1654 UMTXQ_LOCKED_ASSERT(uc);
1655 pi->pi_refcount++;
1656}
1657
1658/*
1659 * Decrease reference count for a PI mutex, if the counter
1660 * is decreased to zero, its memory space is freed.
1661 */
1662static void
1663umtx_pi_unref(struct umtx_pi *pi)
1664{
1665 struct umtxq_chain *uc;
1666
1667 uc = umtxq_getchain(&pi->pi_key);
1668 UMTXQ_LOCKED_ASSERT(uc);
1669 KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1670 if (--pi->pi_refcount == 0) {
1671 mtx_lock_spin(&umtx_lock);
1672 if (pi->pi_owner != NULL) {
1673 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1674 pi, pi_link);
1675 pi->pi_owner = NULL;
1676 }
1677 KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1678 ("blocked queue not empty"));
1679 mtx_unlock_spin(&umtx_lock);
1680 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1681 umtx_pi_free(pi);
1682 }
1683}
1684
1685/*
1686 * Find a PI mutex in hash table.
1687 */
1688static struct umtx_pi *
1689umtx_pi_lookup(struct umtx_key *key)
1690{
1691 struct umtxq_chain *uc;
1692 struct umtx_pi *pi;
1693
1694 uc = umtxq_getchain(key);
1695 UMTXQ_LOCKED_ASSERT(uc);
1696
1697 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1698 if (umtx_key_match(&pi->pi_key, key)) {
1699 return (pi);
1700 }
1701 }
1702 return (NULL);
1703}
1704
1705/*
1706 * Insert a PI mutex into hash table.
1707 */
1708static inline void
1709umtx_pi_insert(struct umtx_pi *pi)
1710{
1711 struct umtxq_chain *uc;
1712
1713 uc = umtxq_getchain(&pi->pi_key);
1714 UMTXQ_LOCKED_ASSERT(uc);
1715 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1716}
1717
1718/*
1719 * Lock a PI mutex.
1720 */
1721static int
1722_do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1723 int try)
1724{
1725 struct umtx_q *uq;
1726 struct umtx_pi *pi, *new_pi;
1727 uint32_t id, owner, old;
1728 int error;
1729
1730 id = td->td_tid;
1731 uq = td->td_umtxq;
1732
1733 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1734 &uq->uq_key)) != 0)
1735 return (error);
1736 umtxq_lock(&uq->uq_key);
1737 pi = umtx_pi_lookup(&uq->uq_key);
1738 if (pi == NULL) {
1739 new_pi = umtx_pi_alloc(M_NOWAIT);
1740 if (new_pi == NULL) {
1741 umtxq_unlock(&uq->uq_key);
1742 new_pi = umtx_pi_alloc(M_WAITOK);
1743 umtxq_lock(&uq->uq_key);
1744 pi = umtx_pi_lookup(&uq->uq_key);
1745 if (pi != NULL) {
1746 umtx_pi_free(new_pi);
1747 new_pi = NULL;
1748 }
1749 }
1750 if (new_pi != NULL) {
1751 new_pi->pi_key = uq->uq_key;
1752 umtx_pi_insert(new_pi);
1753 pi = new_pi;
1754 }
1755 }
1756 umtx_pi_ref(pi);
1757 umtxq_unlock(&uq->uq_key);
1758
1759 /*
1760 * Care must be exercised when dealing with umtx structure. It
1761 * can fault on any access.
1762 */
1763 for (;;) {
1764 /*
1765 * Try the uncontested case. This should be done in userland.
1766 */
1767 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1768
1769 /* The acquire succeeded. */
1770 if (owner == UMUTEX_UNOWNED) {
1771 error = 0;
1772 break;
1773 }
1774
1775 /* The address was invalid. */
1776 if (owner == -1) {
1777 error = EFAULT;
1778 break;
1779 }
1780
1781 /* If no one owns it but it is contested try to acquire it. */
1782 if (owner == UMUTEX_CONTESTED) {
1783 owner = casuword32(&m->m_owner,
1784 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1785
1786 if (owner == UMUTEX_CONTESTED) {
1787 umtxq_lock(&uq->uq_key);
1788 umtxq_busy(&uq->uq_key);
1789 error = umtx_pi_claim(pi, td);
1790 umtxq_unbusy(&uq->uq_key);
1791 umtxq_unlock(&uq->uq_key);
1792 break;
1793 }
1794
1795 /* The address was invalid. */
1796 if (owner == -1) {
1797 error = EFAULT;
1798 break;
1799 }
1800
1801 /* If this failed the lock has changed, restart. */
1802 continue;
1803 }
1804
1805 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1806 (owner & ~UMUTEX_CONTESTED) == id) {
1807 error = EDEADLK;
1808 break;
1809 }
1810
1811 if (try != 0) {
1812 error = EBUSY;
1813 break;
1814 }
1815
1816 /*
1817 * If we caught a signal, we have retried and now
1818 * exit immediately.
1819 */
1820 if (error != 0)
1821 break;
1822
1823 umtxq_lock(&uq->uq_key);
1824 umtxq_busy(&uq->uq_key);
1825 umtxq_unlock(&uq->uq_key);
1826
1827 /*
1828 * Set the contested bit so that a release in user space
1829 * knows to use the system call for unlock. If this fails
1830 * either some one else has acquired the lock or it has been
1831 * released.
1832 */
1833 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1834
1835 /* The address was invalid. */
1836 if (old == -1) {
1837 umtxq_lock(&uq->uq_key);
1838 umtxq_unbusy(&uq->uq_key);
1839 umtxq_unlock(&uq->uq_key);
1840 error = EFAULT;
1841 break;
1842 }
1843
1844 umtxq_lock(&uq->uq_key);
1845 /*
1846 * We set the contested bit, sleep. Otherwise the lock changed
1847 * and we need to retry or we lost a race to the thread
1848 * unlocking the umtx.
1849 */
1850 if (old == owner)
1851 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1852 "umtxpi", timo);
1853 else {
1854 umtxq_unbusy(&uq->uq_key);
1855 umtxq_unlock(&uq->uq_key);
1856 }
1857 }
1858
1859 umtxq_lock(&uq->uq_key);
1860 umtx_pi_unref(pi);
1861 umtxq_unlock(&uq->uq_key);
1862
1863 umtx_key_release(&uq->uq_key);
1864 return (error);
1865}
1866
1867/*
1868 * Unlock a PI mutex.
1869 */
1870static int
1871do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1872{
1873 struct umtx_key key;
1874 struct umtx_q *uq_first, *uq_first2, *uq_me;
1875 struct umtx_pi *pi, *pi2;
1876 uint32_t owner, old, id;
1877 int error;
1878 int count;
1879 int pri;
1880
1881 id = td->td_tid;
1882 /*
1883 * Make sure we own this mtx.
1884 */
1885 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1886 if (owner == -1)
1887 return (EFAULT);
1888
1889 if ((owner & ~UMUTEX_CONTESTED) != id)
1890 return (EPERM);
1891
1892 /* This should be done in userland */
1893 if ((owner & UMUTEX_CONTESTED) == 0) {
1894 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1895 if (old == -1)
1896 return (EFAULT);
1897 if (old == owner)
1898 return (0);
1899 owner = old;
1900 }
1901
1902 /* We should only ever be in here for contested locks */
1903 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1904 &key)) != 0)
1905 return (error);
1906
1907 umtxq_lock(&key);
1908 umtxq_busy(&key);
1909 count = umtxq_count_pi(&key, &uq_first);
1910 if (uq_first != NULL) {
1911 mtx_lock_spin(&umtx_lock);
1912 pi = uq_first->uq_pi_blocked;
1913 KASSERT(pi != NULL, ("pi == NULL?"));
1914 if (pi->pi_owner != curthread) {
1915 mtx_unlock_spin(&umtx_lock);
1916 umtxq_unbusy(&key);
1917 umtxq_unlock(&key);
1918 umtx_key_release(&key);
1919 /* userland messed the mutex */
1920 return (EPERM);
1921 }
1922 uq_me = curthread->td_umtxq;
1923 pi->pi_owner = NULL;
1924 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1925 /* get highest priority thread which is still sleeping. */
1926 uq_first = TAILQ_FIRST(&pi->pi_blocked);
1927 while (uq_first != NULL &&
1928 (uq_first->uq_flags & UQF_UMTXQ) == 0) {
1929 uq_first = TAILQ_NEXT(uq_first, uq_lockq);
1930 }
1931 pri = PRI_MAX;
1932 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1933 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1934 if (uq_first2 != NULL) {
1935 if (pri > UPRI(uq_first2->uq_thread))
1936 pri = UPRI(uq_first2->uq_thread);
1937 }
1938 }
1939 thread_lock(curthread);
1940 sched_unlend_user_prio(curthread, pri);
1941 thread_unlock(curthread);
1942 mtx_unlock_spin(&umtx_lock);
1943 if (uq_first)
1944 umtxq_signal_thread(uq_first);
1945 }
1946 umtxq_unlock(&key);
1947
1948 /*
1949 * When unlocking the umtx, it must be marked as unowned if
1950 * there is zero or one thread only waiting for it.
1951 * Otherwise, it must be marked as contested.
1952 */
1953 old = casuword32(&m->m_owner, owner,
1954 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1955
1956 umtxq_lock(&key);
1957 umtxq_unbusy(&key);
1958 umtxq_unlock(&key);
1959 umtx_key_release(&key);
1960 if (old == -1)
1961 return (EFAULT);
1962 if (old != owner)
1963 return (EINVAL);
1964 return (0);
1965}
1966
1967/*
1968 * Lock a PP mutex.
1969 */
1970static int
1971_do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1972 int try)
1973{
1974 struct umtx_q *uq, *uq2;
1975 struct umtx_pi *pi;
1976 uint32_t ceiling;
1977 uint32_t owner, id;
1978 int error, pri, old_inherited_pri, su;
1979
1980 id = td->td_tid;
1981 uq = td->td_umtxq;
1982 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1983 &uq->uq_key)) != 0)
1984 return (error);
1985 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1986 for (;;) {
1987 old_inherited_pri = uq->uq_inherited_pri;
1988 umtxq_lock(&uq->uq_key);
1989 umtxq_busy(&uq->uq_key);
1990 umtxq_unlock(&uq->uq_key);
1991
1992 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1993 if (ceiling > RTP_PRIO_MAX) {
1994 error = EINVAL;
1995 goto out;
1996 }
1997
1998 mtx_lock_spin(&umtx_lock);
1999 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
2000 mtx_unlock_spin(&umtx_lock);
2001 error = EINVAL;
2002 goto out;
2003 }
2004 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
2005 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
2006 thread_lock(td);
2007 if (uq->uq_inherited_pri < UPRI(td))
2008 sched_lend_user_prio(td, uq->uq_inherited_pri);
2009 thread_unlock(td);
2010 }
2011 mtx_unlock_spin(&umtx_lock);
2012
2013 owner = casuword32(&m->m_owner,
2014 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2015
2016 if (owner == UMUTEX_CONTESTED) {
2017 error = 0;
2018 break;
2019 }
2020
2021 /* The address was invalid. */
2022 if (owner == -1) {
2023 error = EFAULT;
2024 break;
2025 }
2026
2027 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
2028 (owner & ~UMUTEX_CONTESTED) == id) {
2029 error = EDEADLK;
2030 break;
2031 }
2032
2033 if (try != 0) {
2034 error = EBUSY;
2035 break;
2036 }
2037
2038 /*
2039 * If we caught a signal, we have retried and now
2040 * exit immediately.
2041 */
2042 if (error != 0)
2043 break;
2044
2045 umtxq_lock(&uq->uq_key);
2046 umtxq_insert(uq);
2047 umtxq_unbusy(&uq->uq_key);
2048 error = umtxq_sleep(uq, "umtxpp", timo);
2049 umtxq_remove(uq);
2050 umtxq_unlock(&uq->uq_key);
2051
2052 mtx_lock_spin(&umtx_lock);
2053 uq->uq_inherited_pri = old_inherited_pri;
2054 pri = PRI_MAX;
2055 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2056 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2057 if (uq2 != NULL) {
2058 if (pri > UPRI(uq2->uq_thread))
2059 pri = UPRI(uq2->uq_thread);
2060 }
2061 }
2062 if (pri > uq->uq_inherited_pri)
2063 pri = uq->uq_inherited_pri;
2064 thread_lock(td);
2065 sched_unlend_user_prio(td, pri);
2066 thread_unlock(td);
2067 mtx_unlock_spin(&umtx_lock);
2068 }
2069
2070 if (error != 0) {
2071 mtx_lock_spin(&umtx_lock);
2072 uq->uq_inherited_pri = old_inherited_pri;
2073 pri = PRI_MAX;
2074 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2075 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2076 if (uq2 != NULL) {
2077 if (pri > UPRI(uq2->uq_thread))
2078 pri = UPRI(uq2->uq_thread);
2079 }
2080 }
2081 if (pri > uq->uq_inherited_pri)
2082 pri = uq->uq_inherited_pri;
2083 thread_lock(td);
2084 sched_unlend_user_prio(td, pri);
2085 thread_unlock(td);
2086 mtx_unlock_spin(&umtx_lock);
2087 }
2088
2089out:
2090 umtxq_lock(&uq->uq_key);
2091 umtxq_unbusy(&uq->uq_key);
2092 umtxq_unlock(&uq->uq_key);
2093 umtx_key_release(&uq->uq_key);
2094 return (error);
2095}
2096
2097/*
2098 * Unlock a PP mutex.
2099 */
2100static int
2101do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
2102{
2103 struct umtx_key key;
2104 struct umtx_q *uq, *uq2;
2105 struct umtx_pi *pi;
2106 uint32_t owner, id;
2107 uint32_t rceiling;
2108 int error, pri, new_inherited_pri, su;
2109
2110 id = td->td_tid;
2111 uq = td->td_umtxq;
2112 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2113
2114 /*
2115 * Make sure we own this mtx.
2116 */
2117 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
2118 if (owner == -1)
2119 return (EFAULT);
2120
2121 if ((owner & ~UMUTEX_CONTESTED) != id)
2122 return (EPERM);
2123
2124 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2125 if (error != 0)
2126 return (error);
2127
2128 if (rceiling == -1)
2129 new_inherited_pri = PRI_MAX;
2130 else {
2131 rceiling = RTP_PRIO_MAX - rceiling;
2132 if (rceiling > RTP_PRIO_MAX)
2133 return (EINVAL);
2134 new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2135 }
2136
2137 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2138 &key)) != 0)
2139 return (error);
2140 umtxq_lock(&key);
2141 umtxq_busy(&key);
2142 umtxq_unlock(&key);
2143 /*
2144 * For priority protected mutex, always set unlocked state
2145 * to UMUTEX_CONTESTED, so that userland always enters kernel
2146 * to lock the mutex, it is necessary because thread priority
2147 * has to be adjusted for such mutex.
2148 */
2149 error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2150 UMUTEX_CONTESTED);
2151
2152 umtxq_lock(&key);
2153 if (error == 0)
2154 umtxq_signal(&key, 1);
2155 umtxq_unbusy(&key);
2156 umtxq_unlock(&key);
2157
2158 if (error == -1)
2159 error = EFAULT;
2160 else {
2161 mtx_lock_spin(&umtx_lock);
2162 if (su != 0)
2163 uq->uq_inherited_pri = new_inherited_pri;
2164 pri = PRI_MAX;
2165 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2166 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2167 if (uq2 != NULL) {
2168 if (pri > UPRI(uq2->uq_thread))
2169 pri = UPRI(uq2->uq_thread);
2170 }
2171 }
2172 if (pri > uq->uq_inherited_pri)
2173 pri = uq->uq_inherited_pri;
2174 thread_lock(td);
2175 sched_unlend_user_prio(td, pri);
2176 thread_unlock(td);
2177 mtx_unlock_spin(&umtx_lock);
2178 }
2179 umtx_key_release(&key);
2180 return (error);
2181}
2182
2183static int
2184do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2185 uint32_t *old_ceiling)
2186{
2187 struct umtx_q *uq;
2188 uint32_t save_ceiling;
2189 uint32_t owner, id;
2190 uint32_t flags;
2191 int error;
2192
2193 flags = fuword32(&m->m_flags);
2194 if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2195 return (EINVAL);
2196 if (ceiling > RTP_PRIO_MAX)
2197 return (EINVAL);
2198 id = td->td_tid;
2199 uq = td->td_umtxq;
2200 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2201 &uq->uq_key)) != 0)
2202 return (error);
2203 for (;;) {
2204 umtxq_lock(&uq->uq_key);
2205 umtxq_busy(&uq->uq_key);
2206 umtxq_unlock(&uq->uq_key);
2207
2208 save_ceiling = fuword32(&m->m_ceilings[0]);
2209
2210 owner = casuword32(&m->m_owner,
2211 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2212
2213 if (owner == UMUTEX_CONTESTED) {
2214 suword32(&m->m_ceilings[0], ceiling);
2215 suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2216 UMUTEX_CONTESTED);
2217 error = 0;
2218 break;
2219 }
2220
2221 /* The address was invalid. */
2222 if (owner == -1) {
2223 error = EFAULT;
2224 break;
2225 }
2226
2227 if ((owner & ~UMUTEX_CONTESTED) == id) {
2228 suword32(&m->m_ceilings[0], ceiling);
2229 error = 0;
2230 break;
2231 }
2232
2233 /*
2234 * If we caught a signal, we have retried and now
2235 * exit immediately.
2236 */
2237 if (error != 0)
2238 break;
2239
2240 /*
2241 * We set the contested bit, sleep. Otherwise the lock changed
2242 * and we need to retry or we lost a race to the thread
2243 * unlocking the umtx.
2244 */
2245 umtxq_lock(&uq->uq_key);
2246 umtxq_insert(uq);
2247 umtxq_unbusy(&uq->uq_key);
2248 error = umtxq_sleep(uq, "umtxpp", 0);
2249 umtxq_remove(uq);
2250 umtxq_unlock(&uq->uq_key);
2251 }
2252 umtxq_lock(&uq->uq_key);
2253 if (error == 0)
2254 umtxq_signal(&uq->uq_key, INT_MAX);
2255 umtxq_unbusy(&uq->uq_key);
2256 umtxq_unlock(&uq->uq_key);
2257 umtx_key_release(&uq->uq_key);
2258 if (error == 0 && old_ceiling != NULL)
2259 suword32(old_ceiling, save_ceiling);
2260 return (error);
2261}
2262
2263static int
2264_do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2265 int mode)
2266{
2267 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2268 case 0:
2269 return (_do_lock_normal(td, m, flags, timo, mode));
2270 case UMUTEX_PRIO_INHERIT:
2271 return (_do_lock_pi(td, m, flags, timo, mode));
2272 case UMUTEX_PRIO_PROTECT:
2273 return (_do_lock_pp(td, m, flags, timo, mode));
2274 }
2275 return (EINVAL);
2276}
2277
2278/*
2279 * Lock a userland POSIX mutex.
2280 */
2281static int
2282do_lock_umutex(struct thread *td, struct umutex *m,
2283 struct timespec *timeout, int mode)
2284{
2285 struct timespec ts, ts2, ts3;
2286 struct timeval tv;
2287 uint32_t flags;
2288 int error;
2289
2290 flags = fuword32(&m->m_flags);
2291 if (flags == -1)
2292 return (EFAULT);
2293
2294 if (timeout == NULL) {
2295 error = _do_lock_umutex(td, m, flags, 0, mode);
2296 /* Mutex locking is restarted if it is interrupted. */
2297 if (error == EINTR && mode != _UMUTEX_WAIT)
2298 error = ERESTART;
2299 } else {
2300 getnanouptime(&ts);
2301 timespecadd(&ts, timeout);
2302 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2303 for (;;) {
2304 error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode);
2305 if (error != ETIMEDOUT)
2306 break;
2307 getnanouptime(&ts2);
2308 if (timespeccmp(&ts2, &ts, >=)) {
2309 error = ETIMEDOUT;
2310 break;
2311 }
2312 ts3 = ts;
2313 timespecsub(&ts3, &ts2);
2314 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2315 }
2316 /* Timed-locking is not restarted. */
2317 if (error == ERESTART)
2318 error = EINTR;
2319 }
2320 return (error);
2321}
2322
2323/*
2324 * Unlock a userland POSIX mutex.
2325 */
2326static int
2327do_unlock_umutex(struct thread *td, struct umutex *m)
2328{
2329 uint32_t flags;
2330
2331 flags = fuword32(&m->m_flags);
2332 if (flags == -1)
2333 return (EFAULT);
2334
2335 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2336 case 0:
2337 return (do_unlock_normal(td, m, flags));
2338 case UMUTEX_PRIO_INHERIT:
2339 return (do_unlock_pi(td, m, flags));
2340 case UMUTEX_PRIO_PROTECT:
2341 return (do_unlock_pp(td, m, flags));
2342 }
2343
2344 return (EINVAL);
2345}
2346
2347static int
2348do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2349 struct timespec *timeout, u_long wflags)
2350{
2351 struct umtx_q *uq;
2352 struct timeval tv;
2353 struct timespec cts, ets, tts;
2354 uint32_t flags;
2355 uint32_t clockid;
2354 int error;
2355
2356 uq = td->td_umtxq;
2357 flags = fuword32(&cv->c_flags);
2358 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2359 if (error != 0)
2360 return (error);
2356 int error;
2357
2358 uq = td->td_umtxq;
2359 flags = fuword32(&cv->c_flags);
2360 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2361 if (error != 0)
2362 return (error);
2363
2364 if ((wflags & CVWAIT_CLOCKID) != 0) {
2365 clockid = fuword32(&cv->c_clockid);
2366 if (clockid < CLOCK_REALTIME ||
2367 clockid >= CLOCK_THREAD_CPUTIME_ID) {
2368 /* hmm, only HW clock id will work. */
2369 return (EINVAL);
2370 }
2371 } else {
2372 clockid = CLOCK_REALTIME;
2373 }
2374
2361 umtxq_lock(&uq->uq_key);
2362 umtxq_busy(&uq->uq_key);
2363 umtxq_insert(uq);
2364 umtxq_unlock(&uq->uq_key);
2365
2366 /*
2375 umtxq_lock(&uq->uq_key);
2376 umtxq_busy(&uq->uq_key);
2377 umtxq_insert(uq);
2378 umtxq_unlock(&uq->uq_key);
2379
2380 /*
2367 * The magic thing is we should set c_has_waiters to 1 before
2368 * releasing user mutex.
2381 * Set c_has_waiters to 1 before releasing user mutex, also
2382 * don't modify cache line when unnecessary.
2369 */
2383 */
2370 suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2384 if (fuword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters)) == 0)
2385 suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2371
2372 umtxq_lock(&uq->uq_key);
2373 umtxq_unbusy(&uq->uq_key);
2374 umtxq_unlock(&uq->uq_key);
2375
2376 error = do_unlock_umutex(td, m);
2377
2378 umtxq_lock(&uq->uq_key);
2379 if (error == 0) {
2386
2387 umtxq_lock(&uq->uq_key);
2388 umtxq_unbusy(&uq->uq_key);
2389 umtxq_unlock(&uq->uq_key);
2390
2391 error = do_unlock_umutex(td, m);
2392
2393 umtxq_lock(&uq->uq_key);
2394 if (error == 0) {
2380 if ((wflags & UMTX_CHECK_UNPARKING) &&
2381 (td->td_pflags & TDP_WAKEUP)) {
2382 td->td_pflags &= ~TDP_WAKEUP;
2383 error = EINTR;
2384 } else if (timeout == NULL) {
2395 if (timeout == NULL) {
2385 error = umtxq_sleep(uq, "ucond", 0);
2386 } else {
2396 error = umtxq_sleep(uq, "ucond", 0);
2397 } else {
2387 getnanouptime(&ets);
2388 timespecadd(&ets, timeout);
2389 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2398 if ((wflags & CVWAIT_ABSTIME) == 0) {
2399 kern_clock_gettime(td, clockid, &ets);
2400 timespecadd(&ets, timeout);
2401 tts = *timeout;
2402 } else { /* absolute time */
2403 ets = *timeout;
2404 tts = *timeout;
2405 kern_clock_gettime(td, clockid, &cts);
2406 timespecsub(&tts, &cts);
2407 }
2408 TIMESPEC_TO_TIMEVAL(&tv, &tts);
2390 for (;;) {
2391 error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
2392 if (error != ETIMEDOUT)
2393 break;
2409 for (;;) {
2410 error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
2411 if (error != ETIMEDOUT)
2412 break;
2394 getnanouptime(&cts);
2413 kern_clock_gettime(td, clockid, &cts);
2395 if (timespeccmp(&cts, &ets, >=)) {
2396 error = ETIMEDOUT;
2397 break;
2398 }
2399 tts = ets;
2400 timespecsub(&tts, &cts);
2401 TIMESPEC_TO_TIMEVAL(&tv, &tts);
2402 }
2403 }
2404 }
2405
2406 if ((uq->uq_flags & UQF_UMTXQ) == 0)
2407 error = 0;
2408 else {
2414 if (timespeccmp(&cts, &ets, >=)) {
2415 error = ETIMEDOUT;
2416 break;
2417 }
2418 tts = ets;
2419 timespecsub(&tts, &cts);
2420 TIMESPEC_TO_TIMEVAL(&tv, &tts);
2421 }
2422 }
2423 }
2424
2425 if ((uq->uq_flags & UQF_UMTXQ) == 0)
2426 error = 0;
2427 else {
2409 umtxq_remove(uq);
2428 /*
2429 * This must be timeout,interrupted by signal or
2430 * surprious wakeup, clear c_has_waiter flag when
2431 * necessary.
2432 */
2433 umtxq_busy(&uq->uq_key);
2434 if ((uq->uq_flags & UQF_UMTXQ) != 0) {
2435 int oldlen = uq->uq_cur_queue->length;
2436 umtxq_remove(uq);
2437 if (oldlen == 1) {
2438 umtxq_unlock(&uq->uq_key);
2439 suword32(
2440 __DEVOLATILE(uint32_t *,
2441 &cv->c_has_waiters), 0);
2442 umtxq_lock(&uq->uq_key);
2443 }
2444 }
2445 umtxq_unbusy(&uq->uq_key);
2410 if (error == ERESTART)
2411 error = EINTR;
2412 }
2413
2414 umtxq_unlock(&uq->uq_key);
2415 umtx_key_release(&uq->uq_key);
2416 return (error);
2417}
2418
2419/*
2420 * Signal a userland condition variable.
2421 */
2422static int
2423do_cv_signal(struct thread *td, struct ucond *cv)
2424{
2425 struct umtx_key key;
2426 int error, cnt, nwake;
2427 uint32_t flags;
2428
2429 flags = fuword32(&cv->c_flags);
2430 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2431 return (error);
2432 umtxq_lock(&key);
2433 umtxq_busy(&key);
2434 cnt = umtxq_count(&key);
2435 nwake = umtxq_signal(&key, 1);
2436 if (cnt <= nwake) {
2437 umtxq_unlock(&key);
2438 error = suword32(
2439 __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2440 umtxq_lock(&key);
2441 }
2442 umtxq_unbusy(&key);
2443 umtxq_unlock(&key);
2444 umtx_key_release(&key);
2445 return (error);
2446}
2447
2448static int
2449do_cv_broadcast(struct thread *td, struct ucond *cv)
2450{
2451 struct umtx_key key;
2452 int error;
2453 uint32_t flags;
2454
2455 flags = fuword32(&cv->c_flags);
2456 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2457 return (error);
2458
2459 umtxq_lock(&key);
2460 umtxq_busy(&key);
2461 umtxq_signal(&key, INT_MAX);
2462 umtxq_unlock(&key);
2463
2464 error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2465
2466 umtxq_lock(&key);
2467 umtxq_unbusy(&key);
2468 umtxq_unlock(&key);
2469
2470 umtx_key_release(&key);
2471 return (error);
2472}
2473
2474static int
2475do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
2476{
2477 struct umtx_q *uq;
2478 uint32_t flags, wrflags;
2479 int32_t state, oldstate;
2480 int32_t blocked_readers;
2481 int error;
2482
2483 uq = td->td_umtxq;
2484 flags = fuword32(&rwlock->rw_flags);
2485 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2486 if (error != 0)
2487 return (error);
2488
2489 wrflags = URWLOCK_WRITE_OWNER;
2490 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2491 wrflags |= URWLOCK_WRITE_WAITERS;
2492
2493 for (;;) {
2494 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2495 /* try to lock it */
2496 while (!(state & wrflags)) {
2497 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2498 umtx_key_release(&uq->uq_key);
2499 return (EAGAIN);
2500 }
2501 oldstate = casuword32(&rwlock->rw_state, state, state + 1);
2502 if (oldstate == state) {
2503 umtx_key_release(&uq->uq_key);
2504 return (0);
2505 }
2506 state = oldstate;
2507 }
2508
2509 if (error)
2510 break;
2511
2512 /* grab monitor lock */
2513 umtxq_lock(&uq->uq_key);
2514 umtxq_busy(&uq->uq_key);
2515 umtxq_unlock(&uq->uq_key);
2516
2517 /*
2518 * re-read the state, in case it changed between the try-lock above
2519 * and the check below
2520 */
2521 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2522
2523 /* set read contention bit */
2524 while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
2525 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
2526 if (oldstate == state)
2527 goto sleep;
2528 state = oldstate;
2529 }
2530
2531 /* state is changed while setting flags, restart */
2532 if (!(state & wrflags)) {
2533 umtxq_lock(&uq->uq_key);
2534 umtxq_unbusy(&uq->uq_key);
2535 umtxq_unlock(&uq->uq_key);
2536 continue;
2537 }
2538
2539sleep:
2540 /* contention bit is set, before sleeping, increase read waiter count */
2541 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2542 suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2543
2544 while (state & wrflags) {
2545 umtxq_lock(&uq->uq_key);
2546 umtxq_insert(uq);
2547 umtxq_unbusy(&uq->uq_key);
2548
2549 error = umtxq_sleep(uq, "urdlck", timo);
2550
2551 umtxq_busy(&uq->uq_key);
2552 umtxq_remove(uq);
2553 umtxq_unlock(&uq->uq_key);
2554 if (error)
2555 break;
2556 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2557 }
2558
2559 /* decrease read waiter count, and may clear read contention bit */
2560 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2561 suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2562 if (blocked_readers == 1) {
2563 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2564 for (;;) {
2565 oldstate = casuword32(&rwlock->rw_state, state,
2566 state & ~URWLOCK_READ_WAITERS);
2567 if (oldstate == state)
2568 break;
2569 state = oldstate;
2570 }
2571 }
2572
2573 umtxq_lock(&uq->uq_key);
2574 umtxq_unbusy(&uq->uq_key);
2575 umtxq_unlock(&uq->uq_key);
2576 }
2577 umtx_key_release(&uq->uq_key);
2578 return (error);
2579}
2580
2581static int
2582do_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout)
2583{
2584 struct timespec ts, ts2, ts3;
2585 struct timeval tv;
2586 int error;
2587
2588 getnanouptime(&ts);
2589 timespecadd(&ts, timeout);
2590 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2591 for (;;) {
2592 error = do_rw_rdlock(td, obj, val, tvtohz(&tv));
2593 if (error != ETIMEDOUT)
2594 break;
2595 getnanouptime(&ts2);
2596 if (timespeccmp(&ts2, &ts, >=)) {
2597 error = ETIMEDOUT;
2598 break;
2599 }
2600 ts3 = ts;
2601 timespecsub(&ts3, &ts2);
2602 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2603 }
2604 if (error == ERESTART)
2605 error = EINTR;
2606 return (error);
2607}
2608
2609static int
2610do_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
2611{
2612 struct umtx_q *uq;
2613 uint32_t flags;
2614 int32_t state, oldstate;
2615 int32_t blocked_writers;
2616 int32_t blocked_readers;
2617 int error;
2618
2619 uq = td->td_umtxq;
2620 flags = fuword32(&rwlock->rw_flags);
2621 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2622 if (error != 0)
2623 return (error);
2624
2625 blocked_readers = 0;
2626 for (;;) {
2627 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2628 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2629 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
2630 if (oldstate == state) {
2631 umtx_key_release(&uq->uq_key);
2632 return (0);
2633 }
2634 state = oldstate;
2635 }
2636
2637 if (error) {
2638 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
2639 blocked_readers != 0) {
2640 umtxq_lock(&uq->uq_key);
2641 umtxq_busy(&uq->uq_key);
2642 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
2643 umtxq_unbusy(&uq->uq_key);
2644 umtxq_unlock(&uq->uq_key);
2645 }
2646
2647 break;
2648 }
2649
2650 /* grab monitor lock */
2651 umtxq_lock(&uq->uq_key);
2652 umtxq_busy(&uq->uq_key);
2653 umtxq_unlock(&uq->uq_key);
2654
2655 /*
2656 * re-read the state, in case it changed between the try-lock above
2657 * and the check below
2658 */
2659 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2660
2661 while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
2662 (state & URWLOCK_WRITE_WAITERS) == 0) {
2663 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
2664 if (oldstate == state)
2665 goto sleep;
2666 state = oldstate;
2667 }
2668
2669 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2670 umtxq_lock(&uq->uq_key);
2671 umtxq_unbusy(&uq->uq_key);
2672 umtxq_unlock(&uq->uq_key);
2673 continue;
2674 }
2675sleep:
2676 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2677 suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
2678
2679 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
2680 umtxq_lock(&uq->uq_key);
2681 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2682 umtxq_unbusy(&uq->uq_key);
2683
2684 error = umtxq_sleep(uq, "uwrlck", timo);
2685
2686 umtxq_busy(&uq->uq_key);
2687 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2688 umtxq_unlock(&uq->uq_key);
2689 if (error)
2690 break;
2691 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2692 }
2693
2694 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2695 suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
2696 if (blocked_writers == 1) {
2697 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2698 for (;;) {
2699 oldstate = casuword32(&rwlock->rw_state, state,
2700 state & ~URWLOCK_WRITE_WAITERS);
2701 if (oldstate == state)
2702 break;
2703 state = oldstate;
2704 }
2705 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2706 } else
2707 blocked_readers = 0;
2708
2709 umtxq_lock(&uq->uq_key);
2710 umtxq_unbusy(&uq->uq_key);
2711 umtxq_unlock(&uq->uq_key);
2712 }
2713
2714 umtx_key_release(&uq->uq_key);
2715 return (error);
2716}
2717
2718static int
2719do_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout)
2720{
2721 struct timespec ts, ts2, ts3;
2722 struct timeval tv;
2723 int error;
2724
2725 getnanouptime(&ts);
2726 timespecadd(&ts, timeout);
2727 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2728 for (;;) {
2729 error = do_rw_wrlock(td, obj, tvtohz(&tv));
2730 if (error != ETIMEDOUT)
2731 break;
2732 getnanouptime(&ts2);
2733 if (timespeccmp(&ts2, &ts, >=)) {
2734 error = ETIMEDOUT;
2735 break;
2736 }
2737 ts3 = ts;
2738 timespecsub(&ts3, &ts2);
2739 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2740 }
2741 if (error == ERESTART)
2742 error = EINTR;
2743 return (error);
2744}
2745
2746static int
2747do_rw_unlock(struct thread *td, struct urwlock *rwlock)
2748{
2749 struct umtx_q *uq;
2750 uint32_t flags;
2751 int32_t state, oldstate;
2752 int error, q, count;
2753
2754 uq = td->td_umtxq;
2755 flags = fuword32(&rwlock->rw_flags);
2756 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2757 if (error != 0)
2758 return (error);
2759
2760 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2761 if (state & URWLOCK_WRITE_OWNER) {
2762 for (;;) {
2763 oldstate = casuword32(&rwlock->rw_state, state,
2764 state & ~URWLOCK_WRITE_OWNER);
2765 if (oldstate != state) {
2766 state = oldstate;
2767 if (!(oldstate & URWLOCK_WRITE_OWNER)) {
2768 error = EPERM;
2769 goto out;
2770 }
2771 } else
2772 break;
2773 }
2774 } else if (URWLOCK_READER_COUNT(state) != 0) {
2775 for (;;) {
2776 oldstate = casuword32(&rwlock->rw_state, state,
2777 state - 1);
2778 if (oldstate != state) {
2779 state = oldstate;
2780 if (URWLOCK_READER_COUNT(oldstate) == 0) {
2781 error = EPERM;
2782 goto out;
2783 }
2784 }
2785 else
2786 break;
2787 }
2788 } else {
2789 error = EPERM;
2790 goto out;
2791 }
2792
2793 count = 0;
2794
2795 if (!(flags & URWLOCK_PREFER_READER)) {
2796 if (state & URWLOCK_WRITE_WAITERS) {
2797 count = 1;
2798 q = UMTX_EXCLUSIVE_QUEUE;
2799 } else if (state & URWLOCK_READ_WAITERS) {
2800 count = INT_MAX;
2801 q = UMTX_SHARED_QUEUE;
2802 }
2803 } else {
2804 if (state & URWLOCK_READ_WAITERS) {
2805 count = INT_MAX;
2806 q = UMTX_SHARED_QUEUE;
2807 } else if (state & URWLOCK_WRITE_WAITERS) {
2808 count = 1;
2809 q = UMTX_EXCLUSIVE_QUEUE;
2810 }
2811 }
2812
2813 if (count) {
2814 umtxq_lock(&uq->uq_key);
2815 umtxq_busy(&uq->uq_key);
2816 umtxq_signal_queue(&uq->uq_key, count, q);
2817 umtxq_unbusy(&uq->uq_key);
2818 umtxq_unlock(&uq->uq_key);
2819 }
2820out:
2821 umtx_key_release(&uq->uq_key);
2822 return (error);
2823}
2824
2825static int
2826do_sem_wait(struct thread *td, struct _usem *sem, struct timespec *timeout)
2827{
2828 struct umtx_q *uq;
2829 struct timeval tv;
2830 struct timespec cts, ets, tts;
2831 uint32_t flags, count;
2832 int error;
2833
2834 uq = td->td_umtxq;
2835 flags = fuword32(&sem->_flags);
2836 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
2837 if (error != 0)
2838 return (error);
2839 umtxq_lock(&uq->uq_key);
2840 umtxq_busy(&uq->uq_key);
2841 umtxq_insert(uq);
2842 umtxq_unlock(&uq->uq_key);
2843
2844 if (fuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters)) == 0)
2845 casuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters), 0, 1);
2846
2847 count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count));
2848 if (count != 0) {
2849 umtxq_lock(&uq->uq_key);
2850 umtxq_unbusy(&uq->uq_key);
2851 umtxq_remove(uq);
2852 umtxq_unlock(&uq->uq_key);
2853 umtx_key_release(&uq->uq_key);
2854 return (0);
2855 }
2856
2857 umtxq_lock(&uq->uq_key);
2858 umtxq_unbusy(&uq->uq_key);
2859 umtxq_unlock(&uq->uq_key);
2860
2861 umtxq_lock(&uq->uq_key);
2862 if (timeout == NULL) {
2863 error = umtxq_sleep(uq, "usem", 0);
2864 } else {
2865 getnanouptime(&ets);
2866 timespecadd(&ets, timeout);
2867 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2868 for (;;) {
2869 error = umtxq_sleep(uq, "usem", tvtohz(&tv));
2870 if (error != ETIMEDOUT)
2871 break;
2872 getnanouptime(&cts);
2873 if (timespeccmp(&cts, &ets, >=)) {
2874 error = ETIMEDOUT;
2875 break;
2876 }
2877 tts = ets;
2878 timespecsub(&tts, &cts);
2879 TIMESPEC_TO_TIMEVAL(&tv, &tts);
2880 }
2881 }
2882
2883 if ((uq->uq_flags & UQF_UMTXQ) == 0)
2884 error = 0;
2885 else {
2886 umtxq_remove(uq);
2887 if (error == ERESTART)
2888 error = EINTR;
2889 }
2890 umtxq_unlock(&uq->uq_key);
2891 umtx_key_release(&uq->uq_key);
2892 return (error);
2893}
2894
2895/*
2896 * Signal a userland condition variable.
2897 */
2898static int
2899do_sem_wake(struct thread *td, struct _usem *sem)
2900{
2901 struct umtx_key key;
2902 int error, cnt, nwake;
2903 uint32_t flags;
2904
2905 flags = fuword32(&sem->_flags);
2906 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
2907 return (error);
2908 umtxq_lock(&key);
2909 umtxq_busy(&key);
2910 cnt = umtxq_count(&key);
2911 nwake = umtxq_signal(&key, 1);
2912 if (cnt <= nwake) {
2913 umtxq_unlock(&key);
2914 error = suword32(
2915 __DEVOLATILE(uint32_t *, &sem->_has_waiters), 0);
2916 umtxq_lock(&key);
2917 }
2918 umtxq_unbusy(&key);
2919 umtxq_unlock(&key);
2920 umtx_key_release(&key);
2921 return (error);
2922}
2923
2924int
2925_umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2926 /* struct umtx *umtx */
2927{
2928 return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
2929}
2930
2931int
2932_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2933 /* struct umtx *umtx */
2934{
2935 return do_unlock_umtx(td, uap->umtx, td->td_tid);
2936}
2937
2938static int
2939__umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
2940{
2941 struct timespec *ts, timeout;
2942 int error;
2943
2944 /* Allow a null timespec (wait forever). */
2945 if (uap->uaddr2 == NULL)
2946 ts = NULL;
2947 else {
2948 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2949 if (error != 0)
2950 return (error);
2951 if (timeout.tv_nsec >= 1000000000 ||
2952 timeout.tv_nsec < 0) {
2953 return (EINVAL);
2954 }
2955 ts = &timeout;
2956 }
2957 return (do_lock_umtx(td, uap->obj, uap->val, ts));
2958}
2959
2960static int
2961__umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
2962{
2963 return (do_unlock_umtx(td, uap->obj, uap->val));
2964}
2965
2966static int
2967__umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
2968{
2969 struct timespec *ts, timeout;
2970 int error;
2971
2972 if (uap->uaddr2 == NULL)
2973 ts = NULL;
2974 else {
2975 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2976 if (error != 0)
2977 return (error);
2978 if (timeout.tv_nsec >= 1000000000 ||
2979 timeout.tv_nsec < 0)
2980 return (EINVAL);
2981 ts = &timeout;
2982 }
2983 return do_wait(td, uap->obj, uap->val, ts, 0, 0);
2984}
2985
2986static int
2987__umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
2988{
2989 struct timespec *ts, timeout;
2990 int error;
2991
2992 if (uap->uaddr2 == NULL)
2993 ts = NULL;
2994 else {
2995 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2996 if (error != 0)
2997 return (error);
2998 if (timeout.tv_nsec >= 1000000000 ||
2999 timeout.tv_nsec < 0)
3000 return (EINVAL);
3001 ts = &timeout;
3002 }
3003 return do_wait(td, uap->obj, uap->val, ts, 1, 0);
3004}
3005
3006static int
3007__umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
3008{
3009 struct timespec *ts, timeout;
3010 int error;
3011
3012 if (uap->uaddr2 == NULL)
3013 ts = NULL;
3014 else {
3015 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
3016 if (error != 0)
3017 return (error);
3018 if (timeout.tv_nsec >= 1000000000 ||
3019 timeout.tv_nsec < 0)
3020 return (EINVAL);
3021 ts = &timeout;
3022 }
3023 return do_wait(td, uap->obj, uap->val, ts, 1, 1);
3024}
3025
3026static int
3027__umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
3028{
3029 return (kern_umtx_wake(td, uap->obj, uap->val, 0));
3030}
3031
2446 if (error == ERESTART)
2447 error = EINTR;
2448 }
2449
2450 umtxq_unlock(&uq->uq_key);
2451 umtx_key_release(&uq->uq_key);
2452 return (error);
2453}
2454
2455/*
2456 * Signal a userland condition variable.
2457 */
2458static int
2459do_cv_signal(struct thread *td, struct ucond *cv)
2460{
2461 struct umtx_key key;
2462 int error, cnt, nwake;
2463 uint32_t flags;
2464
2465 flags = fuword32(&cv->c_flags);
2466 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2467 return (error);
2468 umtxq_lock(&key);
2469 umtxq_busy(&key);
2470 cnt = umtxq_count(&key);
2471 nwake = umtxq_signal(&key, 1);
2472 if (cnt <= nwake) {
2473 umtxq_unlock(&key);
2474 error = suword32(
2475 __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2476 umtxq_lock(&key);
2477 }
2478 umtxq_unbusy(&key);
2479 umtxq_unlock(&key);
2480 umtx_key_release(&key);
2481 return (error);
2482}
2483
2484static int
2485do_cv_broadcast(struct thread *td, struct ucond *cv)
2486{
2487 struct umtx_key key;
2488 int error;
2489 uint32_t flags;
2490
2491 flags = fuword32(&cv->c_flags);
2492 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2493 return (error);
2494
2495 umtxq_lock(&key);
2496 umtxq_busy(&key);
2497 umtxq_signal(&key, INT_MAX);
2498 umtxq_unlock(&key);
2499
2500 error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2501
2502 umtxq_lock(&key);
2503 umtxq_unbusy(&key);
2504 umtxq_unlock(&key);
2505
2506 umtx_key_release(&key);
2507 return (error);
2508}
2509
2510static int
2511do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
2512{
2513 struct umtx_q *uq;
2514 uint32_t flags, wrflags;
2515 int32_t state, oldstate;
2516 int32_t blocked_readers;
2517 int error;
2518
2519 uq = td->td_umtxq;
2520 flags = fuword32(&rwlock->rw_flags);
2521 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2522 if (error != 0)
2523 return (error);
2524
2525 wrflags = URWLOCK_WRITE_OWNER;
2526 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2527 wrflags |= URWLOCK_WRITE_WAITERS;
2528
2529 for (;;) {
2530 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2531 /* try to lock it */
2532 while (!(state & wrflags)) {
2533 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2534 umtx_key_release(&uq->uq_key);
2535 return (EAGAIN);
2536 }
2537 oldstate = casuword32(&rwlock->rw_state, state, state + 1);
2538 if (oldstate == state) {
2539 umtx_key_release(&uq->uq_key);
2540 return (0);
2541 }
2542 state = oldstate;
2543 }
2544
2545 if (error)
2546 break;
2547
2548 /* grab monitor lock */
2549 umtxq_lock(&uq->uq_key);
2550 umtxq_busy(&uq->uq_key);
2551 umtxq_unlock(&uq->uq_key);
2552
2553 /*
2554 * re-read the state, in case it changed between the try-lock above
2555 * and the check below
2556 */
2557 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2558
2559 /* set read contention bit */
2560 while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
2561 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
2562 if (oldstate == state)
2563 goto sleep;
2564 state = oldstate;
2565 }
2566
2567 /* state is changed while setting flags, restart */
2568 if (!(state & wrflags)) {
2569 umtxq_lock(&uq->uq_key);
2570 umtxq_unbusy(&uq->uq_key);
2571 umtxq_unlock(&uq->uq_key);
2572 continue;
2573 }
2574
2575sleep:
2576 /* contention bit is set, before sleeping, increase read waiter count */
2577 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2578 suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2579
2580 while (state & wrflags) {
2581 umtxq_lock(&uq->uq_key);
2582 umtxq_insert(uq);
2583 umtxq_unbusy(&uq->uq_key);
2584
2585 error = umtxq_sleep(uq, "urdlck", timo);
2586
2587 umtxq_busy(&uq->uq_key);
2588 umtxq_remove(uq);
2589 umtxq_unlock(&uq->uq_key);
2590 if (error)
2591 break;
2592 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2593 }
2594
2595 /* decrease read waiter count, and may clear read contention bit */
2596 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2597 suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2598 if (blocked_readers == 1) {
2599 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2600 for (;;) {
2601 oldstate = casuword32(&rwlock->rw_state, state,
2602 state & ~URWLOCK_READ_WAITERS);
2603 if (oldstate == state)
2604 break;
2605 state = oldstate;
2606 }
2607 }
2608
2609 umtxq_lock(&uq->uq_key);
2610 umtxq_unbusy(&uq->uq_key);
2611 umtxq_unlock(&uq->uq_key);
2612 }
2613 umtx_key_release(&uq->uq_key);
2614 return (error);
2615}
2616
2617static int
2618do_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout)
2619{
2620 struct timespec ts, ts2, ts3;
2621 struct timeval tv;
2622 int error;
2623
2624 getnanouptime(&ts);
2625 timespecadd(&ts, timeout);
2626 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2627 for (;;) {
2628 error = do_rw_rdlock(td, obj, val, tvtohz(&tv));
2629 if (error != ETIMEDOUT)
2630 break;
2631 getnanouptime(&ts2);
2632 if (timespeccmp(&ts2, &ts, >=)) {
2633 error = ETIMEDOUT;
2634 break;
2635 }
2636 ts3 = ts;
2637 timespecsub(&ts3, &ts2);
2638 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2639 }
2640 if (error == ERESTART)
2641 error = EINTR;
2642 return (error);
2643}
2644
2645static int
2646do_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
2647{
2648 struct umtx_q *uq;
2649 uint32_t flags;
2650 int32_t state, oldstate;
2651 int32_t blocked_writers;
2652 int32_t blocked_readers;
2653 int error;
2654
2655 uq = td->td_umtxq;
2656 flags = fuword32(&rwlock->rw_flags);
2657 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2658 if (error != 0)
2659 return (error);
2660
2661 blocked_readers = 0;
2662 for (;;) {
2663 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2664 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2665 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
2666 if (oldstate == state) {
2667 umtx_key_release(&uq->uq_key);
2668 return (0);
2669 }
2670 state = oldstate;
2671 }
2672
2673 if (error) {
2674 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
2675 blocked_readers != 0) {
2676 umtxq_lock(&uq->uq_key);
2677 umtxq_busy(&uq->uq_key);
2678 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
2679 umtxq_unbusy(&uq->uq_key);
2680 umtxq_unlock(&uq->uq_key);
2681 }
2682
2683 break;
2684 }
2685
2686 /* grab monitor lock */
2687 umtxq_lock(&uq->uq_key);
2688 umtxq_busy(&uq->uq_key);
2689 umtxq_unlock(&uq->uq_key);
2690
2691 /*
2692 * re-read the state, in case it changed between the try-lock above
2693 * and the check below
2694 */
2695 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2696
2697 while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
2698 (state & URWLOCK_WRITE_WAITERS) == 0) {
2699 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
2700 if (oldstate == state)
2701 goto sleep;
2702 state = oldstate;
2703 }
2704
2705 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2706 umtxq_lock(&uq->uq_key);
2707 umtxq_unbusy(&uq->uq_key);
2708 umtxq_unlock(&uq->uq_key);
2709 continue;
2710 }
2711sleep:
2712 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2713 suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
2714
2715 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
2716 umtxq_lock(&uq->uq_key);
2717 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2718 umtxq_unbusy(&uq->uq_key);
2719
2720 error = umtxq_sleep(uq, "uwrlck", timo);
2721
2722 umtxq_busy(&uq->uq_key);
2723 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2724 umtxq_unlock(&uq->uq_key);
2725 if (error)
2726 break;
2727 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2728 }
2729
2730 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2731 suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
2732 if (blocked_writers == 1) {
2733 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2734 for (;;) {
2735 oldstate = casuword32(&rwlock->rw_state, state,
2736 state & ~URWLOCK_WRITE_WAITERS);
2737 if (oldstate == state)
2738 break;
2739 state = oldstate;
2740 }
2741 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2742 } else
2743 blocked_readers = 0;
2744
2745 umtxq_lock(&uq->uq_key);
2746 umtxq_unbusy(&uq->uq_key);
2747 umtxq_unlock(&uq->uq_key);
2748 }
2749
2750 umtx_key_release(&uq->uq_key);
2751 return (error);
2752}
2753
2754static int
2755do_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout)
2756{
2757 struct timespec ts, ts2, ts3;
2758 struct timeval tv;
2759 int error;
2760
2761 getnanouptime(&ts);
2762 timespecadd(&ts, timeout);
2763 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2764 for (;;) {
2765 error = do_rw_wrlock(td, obj, tvtohz(&tv));
2766 if (error != ETIMEDOUT)
2767 break;
2768 getnanouptime(&ts2);
2769 if (timespeccmp(&ts2, &ts, >=)) {
2770 error = ETIMEDOUT;
2771 break;
2772 }
2773 ts3 = ts;
2774 timespecsub(&ts3, &ts2);
2775 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2776 }
2777 if (error == ERESTART)
2778 error = EINTR;
2779 return (error);
2780}
2781
2782static int
2783do_rw_unlock(struct thread *td, struct urwlock *rwlock)
2784{
2785 struct umtx_q *uq;
2786 uint32_t flags;
2787 int32_t state, oldstate;
2788 int error, q, count;
2789
2790 uq = td->td_umtxq;
2791 flags = fuword32(&rwlock->rw_flags);
2792 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2793 if (error != 0)
2794 return (error);
2795
2796 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2797 if (state & URWLOCK_WRITE_OWNER) {
2798 for (;;) {
2799 oldstate = casuword32(&rwlock->rw_state, state,
2800 state & ~URWLOCK_WRITE_OWNER);
2801 if (oldstate != state) {
2802 state = oldstate;
2803 if (!(oldstate & URWLOCK_WRITE_OWNER)) {
2804 error = EPERM;
2805 goto out;
2806 }
2807 } else
2808 break;
2809 }
2810 } else if (URWLOCK_READER_COUNT(state) != 0) {
2811 for (;;) {
2812 oldstate = casuword32(&rwlock->rw_state, state,
2813 state - 1);
2814 if (oldstate != state) {
2815 state = oldstate;
2816 if (URWLOCK_READER_COUNT(oldstate) == 0) {
2817 error = EPERM;
2818 goto out;
2819 }
2820 }
2821 else
2822 break;
2823 }
2824 } else {
2825 error = EPERM;
2826 goto out;
2827 }
2828
2829 count = 0;
2830
2831 if (!(flags & URWLOCK_PREFER_READER)) {
2832 if (state & URWLOCK_WRITE_WAITERS) {
2833 count = 1;
2834 q = UMTX_EXCLUSIVE_QUEUE;
2835 } else if (state & URWLOCK_READ_WAITERS) {
2836 count = INT_MAX;
2837 q = UMTX_SHARED_QUEUE;
2838 }
2839 } else {
2840 if (state & URWLOCK_READ_WAITERS) {
2841 count = INT_MAX;
2842 q = UMTX_SHARED_QUEUE;
2843 } else if (state & URWLOCK_WRITE_WAITERS) {
2844 count = 1;
2845 q = UMTX_EXCLUSIVE_QUEUE;
2846 }
2847 }
2848
2849 if (count) {
2850 umtxq_lock(&uq->uq_key);
2851 umtxq_busy(&uq->uq_key);
2852 umtxq_signal_queue(&uq->uq_key, count, q);
2853 umtxq_unbusy(&uq->uq_key);
2854 umtxq_unlock(&uq->uq_key);
2855 }
2856out:
2857 umtx_key_release(&uq->uq_key);
2858 return (error);
2859}
2860
2861static int
2862do_sem_wait(struct thread *td, struct _usem *sem, struct timespec *timeout)
2863{
2864 struct umtx_q *uq;
2865 struct timeval tv;
2866 struct timespec cts, ets, tts;
2867 uint32_t flags, count;
2868 int error;
2869
2870 uq = td->td_umtxq;
2871 flags = fuword32(&sem->_flags);
2872 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
2873 if (error != 0)
2874 return (error);
2875 umtxq_lock(&uq->uq_key);
2876 umtxq_busy(&uq->uq_key);
2877 umtxq_insert(uq);
2878 umtxq_unlock(&uq->uq_key);
2879
2880 if (fuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters)) == 0)
2881 casuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters), 0, 1);
2882
2883 count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count));
2884 if (count != 0) {
2885 umtxq_lock(&uq->uq_key);
2886 umtxq_unbusy(&uq->uq_key);
2887 umtxq_remove(uq);
2888 umtxq_unlock(&uq->uq_key);
2889 umtx_key_release(&uq->uq_key);
2890 return (0);
2891 }
2892
2893 umtxq_lock(&uq->uq_key);
2894 umtxq_unbusy(&uq->uq_key);
2895 umtxq_unlock(&uq->uq_key);
2896
2897 umtxq_lock(&uq->uq_key);
2898 if (timeout == NULL) {
2899 error = umtxq_sleep(uq, "usem", 0);
2900 } else {
2901 getnanouptime(&ets);
2902 timespecadd(&ets, timeout);
2903 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2904 for (;;) {
2905 error = umtxq_sleep(uq, "usem", tvtohz(&tv));
2906 if (error != ETIMEDOUT)
2907 break;
2908 getnanouptime(&cts);
2909 if (timespeccmp(&cts, &ets, >=)) {
2910 error = ETIMEDOUT;
2911 break;
2912 }
2913 tts = ets;
2914 timespecsub(&tts, &cts);
2915 TIMESPEC_TO_TIMEVAL(&tv, &tts);
2916 }
2917 }
2918
2919 if ((uq->uq_flags & UQF_UMTXQ) == 0)
2920 error = 0;
2921 else {
2922 umtxq_remove(uq);
2923 if (error == ERESTART)
2924 error = EINTR;
2925 }
2926 umtxq_unlock(&uq->uq_key);
2927 umtx_key_release(&uq->uq_key);
2928 return (error);
2929}
2930
2931/*
2932 * Signal a userland condition variable.
2933 */
2934static int
2935do_sem_wake(struct thread *td, struct _usem *sem)
2936{
2937 struct umtx_key key;
2938 int error, cnt, nwake;
2939 uint32_t flags;
2940
2941 flags = fuword32(&sem->_flags);
2942 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
2943 return (error);
2944 umtxq_lock(&key);
2945 umtxq_busy(&key);
2946 cnt = umtxq_count(&key);
2947 nwake = umtxq_signal(&key, 1);
2948 if (cnt <= nwake) {
2949 umtxq_unlock(&key);
2950 error = suword32(
2951 __DEVOLATILE(uint32_t *, &sem->_has_waiters), 0);
2952 umtxq_lock(&key);
2953 }
2954 umtxq_unbusy(&key);
2955 umtxq_unlock(&key);
2956 umtx_key_release(&key);
2957 return (error);
2958}
2959
2960int
2961_umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2962 /* struct umtx *umtx */
2963{
2964 return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
2965}
2966
2967int
2968_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2969 /* struct umtx *umtx */
2970{
2971 return do_unlock_umtx(td, uap->umtx, td->td_tid);
2972}
2973
2974static int
2975__umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
2976{
2977 struct timespec *ts, timeout;
2978 int error;
2979
2980 /* Allow a null timespec (wait forever). */
2981 if (uap->uaddr2 == NULL)
2982 ts = NULL;
2983 else {
2984 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2985 if (error != 0)
2986 return (error);
2987 if (timeout.tv_nsec >= 1000000000 ||
2988 timeout.tv_nsec < 0) {
2989 return (EINVAL);
2990 }
2991 ts = &timeout;
2992 }
2993 return (do_lock_umtx(td, uap->obj, uap->val, ts));
2994}
2995
2996static int
2997__umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
2998{
2999 return (do_unlock_umtx(td, uap->obj, uap->val));
3000}
3001
3002static int
3003__umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
3004{
3005 struct timespec *ts, timeout;
3006 int error;
3007
3008 if (uap->uaddr2 == NULL)
3009 ts = NULL;
3010 else {
3011 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
3012 if (error != 0)
3013 return (error);
3014 if (timeout.tv_nsec >= 1000000000 ||
3015 timeout.tv_nsec < 0)
3016 return (EINVAL);
3017 ts = &timeout;
3018 }
3019 return do_wait(td, uap->obj, uap->val, ts, 0, 0);
3020}
3021
3022static int
3023__umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
3024{
3025 struct timespec *ts, timeout;
3026 int error;
3027
3028 if (uap->uaddr2 == NULL)
3029 ts = NULL;
3030 else {
3031 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
3032 if (error != 0)
3033 return (error);
3034 if (timeout.tv_nsec >= 1000000000 ||
3035 timeout.tv_nsec < 0)
3036 return (EINVAL);
3037 ts = &timeout;
3038 }
3039 return do_wait(td, uap->obj, uap->val, ts, 1, 0);
3040}
3041
3042static int
3043__umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
3044{
3045 struct timespec *ts, timeout;
3046 int error;
3047
3048 if (uap->uaddr2 == NULL)
3049 ts = NULL;
3050 else {
3051 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
3052 if (error != 0)
3053 return (error);
3054 if (timeout.tv_nsec >= 1000000000 ||
3055 timeout.tv_nsec < 0)
3056 return (EINVAL);
3057 ts = &timeout;
3058 }
3059 return do_wait(td, uap->obj, uap->val, ts, 1, 1);
3060}
3061
3062static int
3063__umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
3064{
3065 return (kern_umtx_wake(td, uap->obj, uap->val, 0));
3066}
3067
3068#define BATCH_SIZE 128
3032static int
3069static int
3070__umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap)
3071{
3072 int count = uap->val;
3073 void *uaddrs[BATCH_SIZE];
3074 char **upp = (char **)uap->obj;
3075 int tocopy;
3076 int error = 0;
3077 int i, pos = 0;
3078
3079 while (count > 0) {
3080 tocopy = count;
3081 if (tocopy > BATCH_SIZE)
3082 tocopy = BATCH_SIZE;
3083 error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *));
3084 if (error != 0)
3085 break;
3086 for (i = 0; i < tocopy; ++i)
3087 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1);
3088 count -= tocopy;
3089 pos += tocopy;
3090 }
3091 return (error);
3092}
3093
3094static int
3033__umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
3034{
3035 return (kern_umtx_wake(td, uap->obj, uap->val, 1));
3036}
3037
3038static int
3039__umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
3040{
3041 struct timespec *ts, timeout;
3042 int error;
3043
3044 /* Allow a null timespec (wait forever). */
3045 if (uap->uaddr2 == NULL)
3046 ts = NULL;
3047 else {
3048 error = copyin(uap->uaddr2, &timeout,
3049 sizeof(timeout));
3050 if (error != 0)
3051 return (error);
3052 if (timeout.tv_nsec >= 1000000000 ||
3053 timeout.tv_nsec < 0) {
3054 return (EINVAL);
3055 }
3056 ts = &timeout;
3057 }
3058 return do_lock_umutex(td, uap->obj, ts, 0);
3059}
3060
3061static int
3062__umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
3063{
3064 return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
3065}
3066
3067static int
3068__umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
3069{
3070 struct timespec *ts, timeout;
3071 int error;
3072
3073 /* Allow a null timespec (wait forever). */
3074 if (uap->uaddr2 == NULL)
3075 ts = NULL;
3076 else {
3077 error = copyin(uap->uaddr2, &timeout,
3078 sizeof(timeout));
3079 if (error != 0)
3080 return (error);
3081 if (timeout.tv_nsec >= 1000000000 ||
3082 timeout.tv_nsec < 0) {
3083 return (EINVAL);
3084 }
3085 ts = &timeout;
3086 }
3087 return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3088}
3089
3090static int
3091__umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
3092{
3093 return do_wake_umutex(td, uap->obj);
3094}
3095
3096static int
3097__umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
3098{
3099 return do_unlock_umutex(td, uap->obj);
3100}
3101
3102static int
3103__umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
3104{
3105 return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
3106}
3107
3108static int
3109__umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
3110{
3111 struct timespec *ts, timeout;
3112 int error;
3113
3114 /* Allow a null timespec (wait forever). */
3115 if (uap->uaddr2 == NULL)
3116 ts = NULL;
3117 else {
3118 error = copyin(uap->uaddr2, &timeout,
3119 sizeof(timeout));
3120 if (error != 0)
3121 return (error);
3122 if (timeout.tv_nsec >= 1000000000 ||
3123 timeout.tv_nsec < 0) {
3124 return (EINVAL);
3125 }
3126 ts = &timeout;
3127 }
3128 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3129}
3130
3131static int
3132__umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
3133{
3134 return do_cv_signal(td, uap->obj);
3135}
3136
3137static int
3138__umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
3139{
3140 return do_cv_broadcast(td, uap->obj);
3141}
3142
3143static int
3144__umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
3145{
3146 struct timespec timeout;
3147 int error;
3148
3149 /* Allow a null timespec (wait forever). */
3150 if (uap->uaddr2 == NULL) {
3151 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3152 } else {
3153 error = copyin(uap->uaddr2, &timeout,
3154 sizeof(timeout));
3155 if (error != 0)
3156 return (error);
3157 if (timeout.tv_nsec >= 1000000000 ||
3158 timeout.tv_nsec < 0) {
3159 return (EINVAL);
3160 }
3161 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3162 }
3163 return (error);
3164}
3165
3166static int
3167__umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
3168{
3169 struct timespec timeout;
3170 int error;
3171
3172 /* Allow a null timespec (wait forever). */
3173 if (uap->uaddr2 == NULL) {
3174 error = do_rw_wrlock(td, uap->obj, 0);
3175 } else {
3176 error = copyin(uap->uaddr2, &timeout,
3177 sizeof(timeout));
3178 if (error != 0)
3179 return (error);
3180 if (timeout.tv_nsec >= 1000000000 ||
3181 timeout.tv_nsec < 0) {
3182 return (EINVAL);
3183 }
3184
3185 error = do_rw_wrlock2(td, uap->obj, &timeout);
3186 }
3187 return (error);
3188}
3189
3190static int
3191__umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
3192{
3193 return do_rw_unlock(td, uap->obj);
3194}
3195
3196static int
3197__umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap)
3198{
3199 struct timespec *ts, timeout;
3200 int error;
3201
3202 /* Allow a null timespec (wait forever). */
3203 if (uap->uaddr2 == NULL)
3204 ts = NULL;
3205 else {
3206 error = copyin(uap->uaddr2, &timeout,
3207 sizeof(timeout));
3208 if (error != 0)
3209 return (error);
3210 if (timeout.tv_nsec >= 1000000000 ||
3211 timeout.tv_nsec < 0) {
3212 return (EINVAL);
3213 }
3214 ts = &timeout;
3215 }
3216 return (do_sem_wait(td, uap->obj, ts));
3217}
3218
3219static int
3220__umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap)
3221{
3222 return do_sem_wake(td, uap->obj);
3223}
3224
3225typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
3226
3227static _umtx_op_func op_table[] = {
3228 __umtx_op_lock_umtx, /* UMTX_OP_LOCK */
3229 __umtx_op_unlock_umtx, /* UMTX_OP_UNLOCK */
3230 __umtx_op_wait, /* UMTX_OP_WAIT */
3231 __umtx_op_wake, /* UMTX_OP_WAKE */
3232 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */
3233 __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */
3234 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */
3235 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */
3236 __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/
3237 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */
3238 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */
3239 __umtx_op_wait_uint, /* UMTX_OP_WAIT_UINT */
3240 __umtx_op_rw_rdlock, /* UMTX_OP_RW_RDLOCK */
3241 __umtx_op_rw_wrlock, /* UMTX_OP_RW_WRLOCK */
3242 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */
3243 __umtx_op_wait_uint_private, /* UMTX_OP_WAIT_UINT_PRIVATE */
3244 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */
3245 __umtx_op_wait_umutex, /* UMTX_OP_UMUTEX_WAIT */
3246 __umtx_op_wake_umutex, /* UMTX_OP_UMUTEX_WAKE */
3247 __umtx_op_sem_wait, /* UMTX_OP_SEM_WAIT */
3095__umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
3096{
3097 return (kern_umtx_wake(td, uap->obj, uap->val, 1));
3098}
3099
3100static int
3101__umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
3102{
3103 struct timespec *ts, timeout;
3104 int error;
3105
3106 /* Allow a null timespec (wait forever). */
3107 if (uap->uaddr2 == NULL)
3108 ts = NULL;
3109 else {
3110 error = copyin(uap->uaddr2, &timeout,
3111 sizeof(timeout));
3112 if (error != 0)
3113 return (error);
3114 if (timeout.tv_nsec >= 1000000000 ||
3115 timeout.tv_nsec < 0) {
3116 return (EINVAL);
3117 }
3118 ts = &timeout;
3119 }
3120 return do_lock_umutex(td, uap->obj, ts, 0);
3121}
3122
3123static int
3124__umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
3125{
3126 return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
3127}
3128
3129static int
3130__umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
3131{
3132 struct timespec *ts, timeout;
3133 int error;
3134
3135 /* Allow a null timespec (wait forever). */
3136 if (uap->uaddr2 == NULL)
3137 ts = NULL;
3138 else {
3139 error = copyin(uap->uaddr2, &timeout,
3140 sizeof(timeout));
3141 if (error != 0)
3142 return (error);
3143 if (timeout.tv_nsec >= 1000000000 ||
3144 timeout.tv_nsec < 0) {
3145 return (EINVAL);
3146 }
3147 ts = &timeout;
3148 }
3149 return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3150}
3151
3152static int
3153__umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
3154{
3155 return do_wake_umutex(td, uap->obj);
3156}
3157
3158static int
3159__umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
3160{
3161 return do_unlock_umutex(td, uap->obj);
3162}
3163
3164static int
3165__umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
3166{
3167 return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
3168}
3169
3170static int
3171__umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
3172{
3173 struct timespec *ts, timeout;
3174 int error;
3175
3176 /* Allow a null timespec (wait forever). */
3177 if (uap->uaddr2 == NULL)
3178 ts = NULL;
3179 else {
3180 error = copyin(uap->uaddr2, &timeout,
3181 sizeof(timeout));
3182 if (error != 0)
3183 return (error);
3184 if (timeout.tv_nsec >= 1000000000 ||
3185 timeout.tv_nsec < 0) {
3186 return (EINVAL);
3187 }
3188 ts = &timeout;
3189 }
3190 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3191}
3192
3193static int
3194__umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
3195{
3196 return do_cv_signal(td, uap->obj);
3197}
3198
3199static int
3200__umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
3201{
3202 return do_cv_broadcast(td, uap->obj);
3203}
3204
3205static int
3206__umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
3207{
3208 struct timespec timeout;
3209 int error;
3210
3211 /* Allow a null timespec (wait forever). */
3212 if (uap->uaddr2 == NULL) {
3213 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3214 } else {
3215 error = copyin(uap->uaddr2, &timeout,
3216 sizeof(timeout));
3217 if (error != 0)
3218 return (error);
3219 if (timeout.tv_nsec >= 1000000000 ||
3220 timeout.tv_nsec < 0) {
3221 return (EINVAL);
3222 }
3223 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3224 }
3225 return (error);
3226}
3227
3228static int
3229__umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
3230{
3231 struct timespec timeout;
3232 int error;
3233
3234 /* Allow a null timespec (wait forever). */
3235 if (uap->uaddr2 == NULL) {
3236 error = do_rw_wrlock(td, uap->obj, 0);
3237 } else {
3238 error = copyin(uap->uaddr2, &timeout,
3239 sizeof(timeout));
3240 if (error != 0)
3241 return (error);
3242 if (timeout.tv_nsec >= 1000000000 ||
3243 timeout.tv_nsec < 0) {
3244 return (EINVAL);
3245 }
3246
3247 error = do_rw_wrlock2(td, uap->obj, &timeout);
3248 }
3249 return (error);
3250}
3251
3252static int
3253__umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
3254{
3255 return do_rw_unlock(td, uap->obj);
3256}
3257
3258static int
3259__umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap)
3260{
3261 struct timespec *ts, timeout;
3262 int error;
3263
3264 /* Allow a null timespec (wait forever). */
3265 if (uap->uaddr2 == NULL)
3266 ts = NULL;
3267 else {
3268 error = copyin(uap->uaddr2, &timeout,
3269 sizeof(timeout));
3270 if (error != 0)
3271 return (error);
3272 if (timeout.tv_nsec >= 1000000000 ||
3273 timeout.tv_nsec < 0) {
3274 return (EINVAL);
3275 }
3276 ts = &timeout;
3277 }
3278 return (do_sem_wait(td, uap->obj, ts));
3279}
3280
3281static int
3282__umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap)
3283{
3284 return do_sem_wake(td, uap->obj);
3285}
3286
3287typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
3288
3289static _umtx_op_func op_table[] = {
3290 __umtx_op_lock_umtx, /* UMTX_OP_LOCK */
3291 __umtx_op_unlock_umtx, /* UMTX_OP_UNLOCK */
3292 __umtx_op_wait, /* UMTX_OP_WAIT */
3293 __umtx_op_wake, /* UMTX_OP_WAKE */
3294 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */
3295 __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */
3296 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */
3297 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */
3298 __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/
3299 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */
3300 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */
3301 __umtx_op_wait_uint, /* UMTX_OP_WAIT_UINT */
3302 __umtx_op_rw_rdlock, /* UMTX_OP_RW_RDLOCK */
3303 __umtx_op_rw_wrlock, /* UMTX_OP_RW_WRLOCK */
3304 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */
3305 __umtx_op_wait_uint_private, /* UMTX_OP_WAIT_UINT_PRIVATE */
3306 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */
3307 __umtx_op_wait_umutex, /* UMTX_OP_UMUTEX_WAIT */
3308 __umtx_op_wake_umutex, /* UMTX_OP_UMUTEX_WAKE */
3309 __umtx_op_sem_wait, /* UMTX_OP_SEM_WAIT */
3248 __umtx_op_sem_wake /* UMTX_OP_SEM_WAKE */
3310 __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */
3311 __umtx_op_nwake_private /* UMTX_OP_NWAKE_PRIVATE */
3249};
3250
3251int
3252_umtx_op(struct thread *td, struct _umtx_op_args *uap)
3253{
3254 if ((unsigned)uap->op < UMTX_OP_MAX)
3255 return (*op_table[uap->op])(td, uap);
3256 return (EINVAL);
3257}
3258
3259#ifdef COMPAT_FREEBSD32
3260int
3261freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
3262 /* struct umtx *umtx */
3263{
3264 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
3265}
3266
3267int
3268freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
3269 /* struct umtx *umtx */
3270{
3271 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
3272}
3273
3274struct timespec32 {
3275 uint32_t tv_sec;
3276 uint32_t tv_nsec;
3277};
3278
3279static inline int
3280copyin_timeout32(void *addr, struct timespec *tsp)
3281{
3282 struct timespec32 ts32;
3283 int error;
3284
3285 error = copyin(addr, &ts32, sizeof(struct timespec32));
3286 if (error == 0) {
3287 tsp->tv_sec = ts32.tv_sec;
3288 tsp->tv_nsec = ts32.tv_nsec;
3289 }
3290 return (error);
3291}
3292
3293static int
3294__umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3295{
3296 struct timespec *ts, timeout;
3297 int error;
3298
3299 /* Allow a null timespec (wait forever). */
3300 if (uap->uaddr2 == NULL)
3301 ts = NULL;
3302 else {
3303 error = copyin_timeout32(uap->uaddr2, &timeout);
3304 if (error != 0)
3305 return (error);
3306 if (timeout.tv_nsec >= 1000000000 ||
3307 timeout.tv_nsec < 0) {
3308 return (EINVAL);
3309 }
3310 ts = &timeout;
3311 }
3312 return (do_lock_umtx32(td, uap->obj, uap->val, ts));
3313}
3314
3315static int
3316__umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3317{
3318 return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
3319}
3320
3321static int
3322__umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3323{
3324 struct timespec *ts, timeout;
3325 int error;
3326
3327 if (uap->uaddr2 == NULL)
3328 ts = NULL;
3329 else {
3330 error = copyin_timeout32(uap->uaddr2, &timeout);
3331 if (error != 0)
3332 return (error);
3333 if (timeout.tv_nsec >= 1000000000 ||
3334 timeout.tv_nsec < 0)
3335 return (EINVAL);
3336 ts = &timeout;
3337 }
3338 return do_wait(td, uap->obj, uap->val, ts, 1, 0);
3339}
3340
3341static int
3342__umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3343{
3344 struct timespec *ts, timeout;
3345 int error;
3346
3347 /* Allow a null timespec (wait forever). */
3348 if (uap->uaddr2 == NULL)
3349 ts = NULL;
3350 else {
3351 error = copyin_timeout32(uap->uaddr2, &timeout);
3352 if (error != 0)
3353 return (error);
3354 if (timeout.tv_nsec >= 1000000000 ||
3355 timeout.tv_nsec < 0)
3356 return (EINVAL);
3357 ts = &timeout;
3358 }
3359 return do_lock_umutex(td, uap->obj, ts, 0);
3360}
3361
3362static int
3363__umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3364{
3365 struct timespec *ts, timeout;
3366 int error;
3367
3368 /* Allow a null timespec (wait forever). */
3369 if (uap->uaddr2 == NULL)
3370 ts = NULL;
3371 else {
3372 error = copyin_timeout32(uap->uaddr2, &timeout);
3373 if (error != 0)
3374 return (error);
3375 if (timeout.tv_nsec >= 1000000000 ||
3376 timeout.tv_nsec < 0)
3377 return (EINVAL);
3378 ts = &timeout;
3379 }
3380 return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3381}
3382
3383static int
3384__umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3385{
3386 struct timespec *ts, timeout;
3387 int error;
3388
3389 /* Allow a null timespec (wait forever). */
3390 if (uap->uaddr2 == NULL)
3391 ts = NULL;
3392 else {
3393 error = copyin_timeout32(uap->uaddr2, &timeout);
3394 if (error != 0)
3395 return (error);
3396 if (timeout.tv_nsec >= 1000000000 ||
3397 timeout.tv_nsec < 0)
3398 return (EINVAL);
3399 ts = &timeout;
3400 }
3401 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3402}
3403
3404static int
3405__umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3406{
3407 struct timespec timeout;
3408 int error;
3409
3410 /* Allow a null timespec (wait forever). */
3411 if (uap->uaddr2 == NULL) {
3412 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3413 } else {
3414 error = copyin_timeout32(uap->uaddr2, &timeout);
3415 if (error != 0)
3416 return (error);
3417 if (timeout.tv_nsec >= 1000000000 ||
3418 timeout.tv_nsec < 0) {
3419 return (EINVAL);
3420 }
3421 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3422 }
3423 return (error);
3424}
3425
3426static int
3427__umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3428{
3429 struct timespec timeout;
3430 int error;
3431
3432 /* Allow a null timespec (wait forever). */
3433 if (uap->uaddr2 == NULL) {
3434 error = do_rw_wrlock(td, uap->obj, 0);
3435 } else {
3436 error = copyin_timeout32(uap->uaddr2, &timeout);
3437 if (error != 0)
3438 return (error);
3439 if (timeout.tv_nsec >= 1000000000 ||
3440 timeout.tv_nsec < 0) {
3441 return (EINVAL);
3442 }
3443
3444 error = do_rw_wrlock2(td, uap->obj, &timeout);
3445 }
3446 return (error);
3447}
3448
3449static int
3450__umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
3451{
3452 struct timespec *ts, timeout;
3453 int error;
3454
3455 if (uap->uaddr2 == NULL)
3456 ts = NULL;
3457 else {
3458 error = copyin_timeout32(uap->uaddr2, &timeout);
3459 if (error != 0)
3460 return (error);
3461 if (timeout.tv_nsec >= 1000000000 ||
3462 timeout.tv_nsec < 0)
3463 return (EINVAL);
3464 ts = &timeout;
3465 }
3466 return do_wait(td, uap->obj, uap->val, ts, 1, 1);
3467}
3468
3469static int
3470__umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3471{
3472 struct timespec *ts, timeout;
3473 int error;
3474
3475 /* Allow a null timespec (wait forever). */
3476 if (uap->uaddr2 == NULL)
3477 ts = NULL;
3478 else {
3479 error = copyin_timeout32(uap->uaddr2, &timeout);
3480 if (error != 0)
3481 return (error);
3482 if (timeout.tv_nsec >= 1000000000 ||
3483 timeout.tv_nsec < 0)
3484 return (EINVAL);
3485 ts = &timeout;
3486 }
3487 return (do_sem_wait(td, uap->obj, ts));
3488}
3489
3312};
3313
3314int
3315_umtx_op(struct thread *td, struct _umtx_op_args *uap)
3316{
3317 if ((unsigned)uap->op < UMTX_OP_MAX)
3318 return (*op_table[uap->op])(td, uap);
3319 return (EINVAL);
3320}
3321
3322#ifdef COMPAT_FREEBSD32
3323int
3324freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
3325 /* struct umtx *umtx */
3326{
3327 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
3328}
3329
3330int
3331freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
3332 /* struct umtx *umtx */
3333{
3334 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
3335}
3336
3337struct timespec32 {
3338 uint32_t tv_sec;
3339 uint32_t tv_nsec;
3340};
3341
3342static inline int
3343copyin_timeout32(void *addr, struct timespec *tsp)
3344{
3345 struct timespec32 ts32;
3346 int error;
3347
3348 error = copyin(addr, &ts32, sizeof(struct timespec32));
3349 if (error == 0) {
3350 tsp->tv_sec = ts32.tv_sec;
3351 tsp->tv_nsec = ts32.tv_nsec;
3352 }
3353 return (error);
3354}
3355
3356static int
3357__umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3358{
3359 struct timespec *ts, timeout;
3360 int error;
3361
3362 /* Allow a null timespec (wait forever). */
3363 if (uap->uaddr2 == NULL)
3364 ts = NULL;
3365 else {
3366 error = copyin_timeout32(uap->uaddr2, &timeout);
3367 if (error != 0)
3368 return (error);
3369 if (timeout.tv_nsec >= 1000000000 ||
3370 timeout.tv_nsec < 0) {
3371 return (EINVAL);
3372 }
3373 ts = &timeout;
3374 }
3375 return (do_lock_umtx32(td, uap->obj, uap->val, ts));
3376}
3377
3378static int
3379__umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3380{
3381 return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
3382}
3383
3384static int
3385__umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3386{
3387 struct timespec *ts, timeout;
3388 int error;
3389
3390 if (uap->uaddr2 == NULL)
3391 ts = NULL;
3392 else {
3393 error = copyin_timeout32(uap->uaddr2, &timeout);
3394 if (error != 0)
3395 return (error);
3396 if (timeout.tv_nsec >= 1000000000 ||
3397 timeout.tv_nsec < 0)
3398 return (EINVAL);
3399 ts = &timeout;
3400 }
3401 return do_wait(td, uap->obj, uap->val, ts, 1, 0);
3402}
3403
3404static int
3405__umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3406{
3407 struct timespec *ts, timeout;
3408 int error;
3409
3410 /* Allow a null timespec (wait forever). */
3411 if (uap->uaddr2 == NULL)
3412 ts = NULL;
3413 else {
3414 error = copyin_timeout32(uap->uaddr2, &timeout);
3415 if (error != 0)
3416 return (error);
3417 if (timeout.tv_nsec >= 1000000000 ||
3418 timeout.tv_nsec < 0)
3419 return (EINVAL);
3420 ts = &timeout;
3421 }
3422 return do_lock_umutex(td, uap->obj, ts, 0);
3423}
3424
3425static int
3426__umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3427{
3428 struct timespec *ts, timeout;
3429 int error;
3430
3431 /* Allow a null timespec (wait forever). */
3432 if (uap->uaddr2 == NULL)
3433 ts = NULL;
3434 else {
3435 error = copyin_timeout32(uap->uaddr2, &timeout);
3436 if (error != 0)
3437 return (error);
3438 if (timeout.tv_nsec >= 1000000000 ||
3439 timeout.tv_nsec < 0)
3440 return (EINVAL);
3441 ts = &timeout;
3442 }
3443 return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3444}
3445
3446static int
3447__umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3448{
3449 struct timespec *ts, timeout;
3450 int error;
3451
3452 /* Allow a null timespec (wait forever). */
3453 if (uap->uaddr2 == NULL)
3454 ts = NULL;
3455 else {
3456 error = copyin_timeout32(uap->uaddr2, &timeout);
3457 if (error != 0)
3458 return (error);
3459 if (timeout.tv_nsec >= 1000000000 ||
3460 timeout.tv_nsec < 0)
3461 return (EINVAL);
3462 ts = &timeout;
3463 }
3464 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3465}
3466
3467static int
3468__umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3469{
3470 struct timespec timeout;
3471 int error;
3472
3473 /* Allow a null timespec (wait forever). */
3474 if (uap->uaddr2 == NULL) {
3475 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3476 } else {
3477 error = copyin_timeout32(uap->uaddr2, &timeout);
3478 if (error != 0)
3479 return (error);
3480 if (timeout.tv_nsec >= 1000000000 ||
3481 timeout.tv_nsec < 0) {
3482 return (EINVAL);
3483 }
3484 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3485 }
3486 return (error);
3487}
3488
3489static int
3490__umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3491{
3492 struct timespec timeout;
3493 int error;
3494
3495 /* Allow a null timespec (wait forever). */
3496 if (uap->uaddr2 == NULL) {
3497 error = do_rw_wrlock(td, uap->obj, 0);
3498 } else {
3499 error = copyin_timeout32(uap->uaddr2, &timeout);
3500 if (error != 0)
3501 return (error);
3502 if (timeout.tv_nsec >= 1000000000 ||
3503 timeout.tv_nsec < 0) {
3504 return (EINVAL);
3505 }
3506
3507 error = do_rw_wrlock2(td, uap->obj, &timeout);
3508 }
3509 return (error);
3510}
3511
3512static int
3513__umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
3514{
3515 struct timespec *ts, timeout;
3516 int error;
3517
3518 if (uap->uaddr2 == NULL)
3519 ts = NULL;
3520 else {
3521 error = copyin_timeout32(uap->uaddr2, &timeout);
3522 if (error != 0)
3523 return (error);
3524 if (timeout.tv_nsec >= 1000000000 ||
3525 timeout.tv_nsec < 0)
3526 return (EINVAL);
3527 ts = &timeout;
3528 }
3529 return do_wait(td, uap->obj, uap->val, ts, 1, 1);
3530}
3531
3532static int
3533__umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3534{
3535 struct timespec *ts, timeout;
3536 int error;
3537
3538 /* Allow a null timespec (wait forever). */
3539 if (uap->uaddr2 == NULL)
3540 ts = NULL;
3541 else {
3542 error = copyin_timeout32(uap->uaddr2, &timeout);
3543 if (error != 0)
3544 return (error);
3545 if (timeout.tv_nsec >= 1000000000 ||
3546 timeout.tv_nsec < 0)
3547 return (EINVAL);
3548 ts = &timeout;
3549 }
3550 return (do_sem_wait(td, uap->obj, ts));
3551}
3552
3553static int
3554__umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap)
3555{
3556 int count = uap->val;
3557 uint32_t uaddrs[BATCH_SIZE];
3558 uint32_t **upp = (uint32_t **)uap->obj;
3559 int tocopy;
3560 int error = 0;
3561 int i, pos = 0;
3562
3563 while (count > 0) {
3564 tocopy = count;
3565 if (tocopy > BATCH_SIZE)
3566 tocopy = BATCH_SIZE;
3567 error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t));
3568 if (error != 0)
3569 break;
3570 for (i = 0; i < tocopy; ++i)
3571 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i],
3572 INT_MAX, 1);
3573 count -= tocopy;
3574 pos += tocopy;
3575 }
3576 return (error);
3577}
3578
3490static _umtx_op_func op_table_compat32[] = {
3491 __umtx_op_lock_umtx_compat32, /* UMTX_OP_LOCK */
3492 __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */
3493 __umtx_op_wait_compat32, /* UMTX_OP_WAIT */
3494 __umtx_op_wake, /* UMTX_OP_WAKE */
3495 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */
3496 __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */
3497 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */
3498 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */
3499 __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/
3500 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */
3501 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */
3502 __umtx_op_wait_compat32, /* UMTX_OP_WAIT_UINT */
3503 __umtx_op_rw_rdlock_compat32, /* UMTX_OP_RW_RDLOCK */
3504 __umtx_op_rw_wrlock_compat32, /* UMTX_OP_RW_WRLOCK */
3505 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */
3506 __umtx_op_wait_uint_private_compat32, /* UMTX_OP_WAIT_UINT_PRIVATE */
3507 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */
3508 __umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
3509 __umtx_op_wake_umutex, /* UMTX_OP_UMUTEX_WAKE */
3510 __umtx_op_sem_wait_compat32, /* UMTX_OP_SEM_WAIT */
3579static _umtx_op_func op_table_compat32[] = {
3580 __umtx_op_lock_umtx_compat32, /* UMTX_OP_LOCK */
3581 __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */
3582 __umtx_op_wait_compat32, /* UMTX_OP_WAIT */
3583 __umtx_op_wake, /* UMTX_OP_WAKE */
3584 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */
3585 __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */
3586 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */
3587 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */
3588 __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/
3589 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */
3590 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */
3591 __umtx_op_wait_compat32, /* UMTX_OP_WAIT_UINT */
3592 __umtx_op_rw_rdlock_compat32, /* UMTX_OP_RW_RDLOCK */
3593 __umtx_op_rw_wrlock_compat32, /* UMTX_OP_RW_WRLOCK */
3594 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */
3595 __umtx_op_wait_uint_private_compat32, /* UMTX_OP_WAIT_UINT_PRIVATE */
3596 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */
3597 __umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
3598 __umtx_op_wake_umutex, /* UMTX_OP_UMUTEX_WAKE */
3599 __umtx_op_sem_wait_compat32, /* UMTX_OP_SEM_WAIT */
3511 __umtx_op_sem_wake /* UMTX_OP_SEM_WAKE */
3600 __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */
3601 __umtx_op_nwake_private32 /* UMTX_OP_NWAKE_PRIVATE */
3512};
3513
3514int
3515freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
3516{
3517 if ((unsigned)uap->op < UMTX_OP_MAX)
3518 return (*op_table_compat32[uap->op])(td,
3519 (struct _umtx_op_args *)uap);
3520 return (EINVAL);
3521}
3522#endif
3523
3524void
3525umtx_thread_init(struct thread *td)
3526{
3527 td->td_umtxq = umtxq_alloc();
3528 td->td_umtxq->uq_thread = td;
3529}
3530
3531void
3532umtx_thread_fini(struct thread *td)
3533{
3534 umtxq_free(td->td_umtxq);
3535}
3536
3537/*
3538 * It will be called when new thread is created, e.g fork().
3539 */
3540void
3541umtx_thread_alloc(struct thread *td)
3542{
3543 struct umtx_q *uq;
3544
3545 uq = td->td_umtxq;
3546 uq->uq_inherited_pri = PRI_MAX;
3547
3548 KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
3549 KASSERT(uq->uq_thread == td, ("uq_thread != td"));
3550 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
3551 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
3552}
3553
3554/*
3555 * exec() hook.
3556 */
3557static void
3558umtx_exec_hook(void *arg __unused, struct proc *p __unused,
3559 struct image_params *imgp __unused)
3560{
3561 umtx_thread_cleanup(curthread);
3562}
3563
3564/*
3565 * thread_exit() hook.
3566 */
3567void
3568umtx_thread_exit(struct thread *td)
3569{
3570 umtx_thread_cleanup(td);
3571}
3572
3573/*
3574 * clean up umtx data.
3575 */
3576static void
3577umtx_thread_cleanup(struct thread *td)
3578{
3579 struct umtx_q *uq;
3580 struct umtx_pi *pi;
3581
3582 if ((uq = td->td_umtxq) == NULL)
3583 return;
3584
3585 mtx_lock_spin(&umtx_lock);
3586 uq->uq_inherited_pri = PRI_MAX;
3587 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
3588 pi->pi_owner = NULL;
3589 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
3590 }
3591 mtx_unlock_spin(&umtx_lock);
3592 thread_lock(td);
3593 sched_unlend_user_prio(td, PRI_MAX);
3594 thread_unlock(td);
3595}
3602};
3603
3604int
3605freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
3606{
3607 if ((unsigned)uap->op < UMTX_OP_MAX)
3608 return (*op_table_compat32[uap->op])(td,
3609 (struct _umtx_op_args *)uap);
3610 return (EINVAL);
3611}
3612#endif
3613
3614void
3615umtx_thread_init(struct thread *td)
3616{
3617 td->td_umtxq = umtxq_alloc();
3618 td->td_umtxq->uq_thread = td;
3619}
3620
3621void
3622umtx_thread_fini(struct thread *td)
3623{
3624 umtxq_free(td->td_umtxq);
3625}
3626
3627/*
3628 * It will be called when new thread is created, e.g fork().
3629 */
3630void
3631umtx_thread_alloc(struct thread *td)
3632{
3633 struct umtx_q *uq;
3634
3635 uq = td->td_umtxq;
3636 uq->uq_inherited_pri = PRI_MAX;
3637
3638 KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
3639 KASSERT(uq->uq_thread == td, ("uq_thread != td"));
3640 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
3641 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
3642}
3643
3644/*
3645 * exec() hook.
3646 */
3647static void
3648umtx_exec_hook(void *arg __unused, struct proc *p __unused,
3649 struct image_params *imgp __unused)
3650{
3651 umtx_thread_cleanup(curthread);
3652}
3653
3654/*
3655 * thread_exit() hook.
3656 */
3657void
3658umtx_thread_exit(struct thread *td)
3659{
3660 umtx_thread_cleanup(td);
3661}
3662
3663/*
3664 * clean up umtx data.
3665 */
3666static void
3667umtx_thread_cleanup(struct thread *td)
3668{
3669 struct umtx_q *uq;
3670 struct umtx_pi *pi;
3671
3672 if ((uq = td->td_umtxq) == NULL)
3673 return;
3674
3675 mtx_lock_spin(&umtx_lock);
3676 uq->uq_inherited_pri = PRI_MAX;
3677 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
3678 pi->pi_owner = NULL;
3679 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
3680 }
3681 mtx_unlock_spin(&umtx_lock);
3682 thread_lock(td);
3683 sched_unlend_user_prio(td, PRI_MAX);
3684 thread_unlock(td);
3685}