kern_umtx.c revision 139013
159415Sobrien/*
259243Sobrien * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
359243Sobrien * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
459243Sobrien * All rights reserved.
559243Sobrien *
659243Sobrien * Redistribution and use in source and binary forms, with or without
759243Sobrien * modification, are permitted provided that the following conditions
859243Sobrien * are met:
959243Sobrien * 1. Redistributions of source code must retain the above copyright
1059243Sobrien *    notice unmodified, this list of conditions, and the following
1159243Sobrien *    disclaimer.
1259243Sobrien * 2. Redistributions in binary form must reproduce the above copyright
1359243Sobrien *    notice, this list of conditions and the following disclaimer in the
1459243Sobrien *    documentation and/or other materials provided with the distribution.
1559243Sobrien *
1659243Sobrien * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1759243Sobrien * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1859243Sobrien * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
1959243Sobrien * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2059243Sobrien * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2159243Sobrien * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2259243Sobrien * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2359243Sobrien * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2459243Sobrien * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2559243Sobrien * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2659243Sobrien */
2759243Sobrien
2859243Sobrien#include <sys/cdefs.h>
2959243Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 139013 2004-12-18 12:52:44Z davidxu $");
3059243Sobrien
3159243Sobrien#include <sys/param.h>
3259243Sobrien#include <sys/kernel.h>
3359243Sobrien#include <sys/limits.h>
3459243Sobrien#include <sys/lock.h>
3559243Sobrien#include <sys/malloc.h>
3659243Sobrien#include <sys/mutex.h>
3759243Sobrien#include <sys/proc.h>
3859243Sobrien#include <sys/sysent.h>
3959415Sobrien#include <sys/systm.h>
4059243Sobrien#include <sys/sysproto.h>
4159243Sobrien#include <sys/eventhandler.h>
4259243Sobrien#include <sys/thr.h>
4359243Sobrien#include <sys/umtx.h>
4459243Sobrien
4559243Sobrien#include <vm/vm.h>
4659243Sobrien#include <vm/vm_param.h>
4759243Sobrien#include <vm/pmap.h>
4859243Sobrien#include <vm/vm_map.h>
4959243Sobrien#include <vm/vm_object.h>
5059243Sobrien
5159243Sobrien#define UMTX_PRIVATE	0
5259243Sobrien#define UMTX_SHARED	1
5359243Sobrien
5459243Sobrien#define UMTX_STATIC_SHARED
5559243Sobrien
5659243Sobrienstruct umtx_key {
5759243Sobrien	int	type;
5859243Sobrien	union {
5959243Sobrien		struct {
6059243Sobrien			vm_object_t	object;
6159243Sobrien			long		offset;
6259243Sobrien		} shared;
6359243Sobrien		struct {
6459243Sobrien			struct umtx	*umtx;
6559243Sobrien			long		pid;
6659243Sobrien		} private;
6759243Sobrien		struct {
6859243Sobrien			void		*ptr;
6959243Sobrien			long		word;
7059243Sobrien		} both;
7159243Sobrien	} info;
7259243Sobrien};
7359243Sobrien
7459243Sobrienstruct umtx_q {
7559243Sobrien	LIST_ENTRY(umtx_q)	uq_next;	/* Linked list for the hash. */
7659243Sobrien	struct umtx_key		uq_key;		/* Umtx key. */
7759243Sobrien	struct thread		*uq_thread;	/* The thread waits on */
7859243Sobrien	LIST_ENTRY(umtx_q)	uq_rqnext;	/* Linked list for requeuing. */
7959243Sobrien	vm_offset_t		uq_addr;	/* Umtx's virtual address. */
8059243Sobrien};
8159243Sobrien
8259243SobrienLIST_HEAD(umtx_head, umtx_q);
8359243Sobrienstruct umtxq_chain {
8459243Sobrien	struct mtx		uc_lock;	/* Lock for this chain. */
8559243Sobrien	struct umtx_head	uc_queue;	/* List of sleep queues. */
8659243Sobrien};
8759243Sobrien
8859243Sobrien#define	GOLDEN_RATIO_PRIME	2654404609U
8959243Sobrien#define	UMTX_CHAINS		128
9059243Sobrien#define	UMTX_SHIFTS		(__WORD_BIT - 7)
9159243Sobrien
9259243Sobrienstatic struct umtxq_chain umtxq_chains[UMTX_CHAINS];
9359243Sobrienstatic MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
9459243Sobrien
9559243Sobrien#define	UMTX_CONTESTED	LONG_MIN
9659243Sobrien
9759243Sobrienstatic void umtxq_init_chains(void *);
9859243Sobrienstatic int umtxq_hash(struct umtx_key *key);
9959243Sobrienstatic struct mtx *umtxq_mtx(int chain);
10059243Sobrienstatic void umtxq_lock(struct umtx_key *key);
10159243Sobrienstatic void umtxq_unlock(struct umtx_key *key);
10259243Sobrienstatic void umtxq_insert(struct umtx_q *uq);
10359243Sobrienstatic void umtxq_remove(struct umtx_q *uq);
10459243Sobrienstatic int umtxq_sleep(struct thread *td, struct umtx_key *key,
10559243Sobrien	int prio, const char *wmesg, int timo);
10659243Sobrienstatic int  umtxq_count(struct umtx_key *key);
10759243Sobrienstatic void umtxq_signal(struct umtx_key *key);
10859243Sobrienstatic void umtxq_broadcast(struct umtx_key *key);
10959243Sobrien#ifdef UMTX_DYNAMIC_SHARED
11059243Sobrienstatic void fork_handler(void *arg, struct proc *p1, struct proc *p2,
11159243Sobrien	int flags);
11259243Sobrien#endif
11359243Sobrienstatic int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
11459243Sobrienstatic int umtx_key_get(struct thread *td, struct umtx *umtx,
11559243Sobrien	struct umtx_key *key);
11659243Sobrienstatic void umtx_key_release(struct umtx_key *key);
11759243Sobrien
11859243SobrienSYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_init_chains, NULL);
11959243Sobrien
12059243Sobrienstatic void
12159243Sobrienumtxq_init_chains(void *arg __unused)
12259243Sobrien{
12359243Sobrien	int i;
12459243Sobrien
12559243Sobrien	for (i = 0; i < UMTX_CHAINS; ++i) {
12659243Sobrien		mtx_init(&umtxq_chains[i].uc_lock, "umtxq_lock", NULL,
12759243Sobrien			 MTX_DEF | MTX_DUPOK);
12859243Sobrien		LIST_INIT(&umtxq_chains[i].uc_queue);
12959243Sobrien	}
13059243Sobrien#ifdef UMTX_DYNAMIC_SHARED
13159243Sobrien	EVENTHANDLER_REGISTER(process_fork, fork_handler, 0, 10000);
13259243Sobrien#endif
13359243Sobrien}
13459243Sobrien
13559243Sobrienstatic inline int
13659243Sobrienumtxq_hash(struct umtx_key *key)
13759243Sobrien{
13859243Sobrien	unsigned n = (uintptr_t)key->info.both.ptr + key->info.both.word;
13959243Sobrien	return (((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS);
14059243Sobrien}
14159243Sobrien
14259243Sobrienstatic inline int
14359243Sobrienumtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
14459243Sobrien{
14559243Sobrien	return (k1->type == k2->type &&
14659243Sobrien		k1->info.both.ptr == k2->info.both.ptr &&
14759243Sobrien	        k1->info.both.word == k2->info.both.word);
14859243Sobrien}
14959243Sobrien
15059243Sobrienstatic inline struct mtx *
15159243Sobrienumtxq_mtx(int chain)
15259243Sobrien{
15359243Sobrien	return (&umtxq_chains[chain].uc_lock);
15459243Sobrien}
15559243Sobrien
15659243Sobrienstatic inline void
15759243Sobrienumtxq_lock(struct umtx_key *key)
15859243Sobrien{
15959243Sobrien	int chain = umtxq_hash(key);
16059243Sobrien	mtx_lock(umtxq_mtx(chain));
16159243Sobrien}
16259243Sobrien
16359243Sobrienstatic inline void
16459243Sobrienumtxq_unlock(struct umtx_key *key)
16559243Sobrien{
16659243Sobrien	int chain = umtxq_hash(key);
16759243Sobrien	mtx_unlock(umtxq_mtx(chain));
16859243Sobrien}
16959243Sobrien
17059243Sobrien/*
17159243Sobrien * Insert a thread onto the umtx queue.
17259243Sobrien */
17359243Sobrienstatic inline void
17459243Sobrienumtxq_insert(struct umtx_q *uq)
17559243Sobrien{
17659243Sobrien	struct umtx_head *head;
17759243Sobrien	int chain = umtxq_hash(&uq->uq_key);
17859243Sobrien
17959243Sobrien	head = &umtxq_chains[chain].uc_queue;
18059243Sobrien	LIST_INSERT_HEAD(head, uq, uq_next);
18159243Sobrien	uq->uq_thread->td_umtxq = uq;
18259243Sobrien	mtx_lock_spin(&sched_lock);
18359243Sobrien	uq->uq_thread->td_flags |= TDF_UMTXQ;
18459415Sobrien	mtx_unlock_spin(&sched_lock);
18559243Sobrien}
18659243Sobrien
18759243Sobrien/*
18859243Sobrien * Remove thread from the umtx queue.
18959243Sobrien */
19059243Sobrienstatic inline void
19159243Sobrienumtxq_remove(struct umtx_q *uq)
19259243Sobrien{
19359243Sobrien	if (uq->uq_thread->td_flags & TDF_UMTXQ) {
19459243Sobrien		LIST_REMOVE(uq, uq_next);
19559243Sobrien		uq->uq_thread->td_umtxq = NULL;
19659243Sobrien		/* turning off TDF_UMTXQ should be the last thing. */
19759243Sobrien		mtx_lock_spin(&sched_lock);
19859243Sobrien		uq->uq_thread->td_flags &= ~TDF_UMTXQ;
19959243Sobrien		mtx_unlock_spin(&sched_lock);
20059243Sobrien	}
20159243Sobrien}
20259243Sobrien
20359243Sobrienstatic int
20459243Sobrienumtxq_count(struct umtx_key *key)
20559243Sobrien{
20659243Sobrien	struct umtx_q *uq;
20759243Sobrien	struct umtx_head *head;
20859243Sobrien	int chain, count = 0;
20959243Sobrien
21059243Sobrien	chain = umtxq_hash(key);
21159243Sobrien	umtxq_lock(key);
21259243Sobrien	head = &umtxq_chains[chain].uc_queue;
21359415Sobrien	LIST_FOREACH(uq, head, uq_next) {
21459415Sobrien		if (umtx_key_match(&uq->uq_key, key)) {
21559415Sobrien			if (++count > 1)
21659415Sobrien				break;
21759415Sobrien		}
21859415Sobrien	}
21959243Sobrien	umtxq_unlock(key);
22059243Sobrien	return (count);
22159243Sobrien}
22259243Sobrien
22359243Sobrienstatic void
22459243Sobrienumtxq_signal(struct umtx_key *key)
22559243Sobrien{
22659243Sobrien	struct umtx_q *uq;
22759243Sobrien	struct umtx_head *head;
22859243Sobrien	struct thread *blocked = NULL;
22959243Sobrien	int chain;
23059243Sobrien
23159243Sobrien	chain = umtxq_hash(key);
23259243Sobrien	umtxq_lock(key);
23359243Sobrien	head = &umtxq_chains[chain].uc_queue;
23459243Sobrien	LIST_FOREACH(uq, head, uq_next) {
23559243Sobrien		if (umtx_key_match(&uq->uq_key, key)) {
23659243Sobrien			blocked = uq->uq_thread;
23759243Sobrien			umtxq_remove(uq);
23859243Sobrien			break;
23959243Sobrien		}
24059243Sobrien	}
24159243Sobrien	umtxq_unlock(key);
24259243Sobrien	if (blocked != NULL)
24359243Sobrien		wakeup(blocked);
24459243Sobrien}
24559243Sobrien
24659243Sobrienstatic void
24759243Sobrienumtxq_broadcast(struct umtx_key *key)
24859243Sobrien{
24959243Sobrien	struct umtx_q *uq, *next;
25059243Sobrien	struct umtx_head *head;
25159243Sobrien	struct thread *blocked;
25259243Sobrien	int chain;
25359243Sobrien
25459243Sobrien	chain = umtxq_hash(key);
25559243Sobrien	umtxq_lock(key);
25659415Sobrien	head = &umtxq_chains[chain].uc_queue;
25759243Sobrien	for (uq = LIST_FIRST(head); uq != NULL; uq = next) {
25859415Sobrien		next = LIST_NEXT(uq, uq_next);
25959415Sobrien		if (umtx_key_match(&uq->uq_key, key)) {
26059243Sobrien			blocked = uq->uq_thread;
26159243Sobrien			umtxq_remove(uq);
26259243Sobrien			wakeup(blocked);
26359243Sobrien		}
26459243Sobrien		uq = next;
26559243Sobrien	}
26659243Sobrien	umtxq_unlock(key);
26759243Sobrien}
26859243Sobrien
26959243Sobrienstatic inline int
27059243Sobrienumtxq_sleep(struct thread *td, struct umtx_key *key, int priority,
27159243Sobrien	    const char *wmesg, int timo)
27259243Sobrien{
27359243Sobrien	int error;
27459243Sobrien	int chain = umtxq_hash(key);
27559243Sobrien
27659243Sobrien	error = msleep(td, umtxq_mtx(chain), priority, wmesg, timo);
27759243Sobrien	return (error);
27859243Sobrien}
27959243Sobrien
28059243Sobrienstatic int
28159243Sobrienumtx_key_get(struct thread *td, struct umtx *umtx, struct umtx_key *key)
28259243Sobrien{
28359243Sobrien#if defined(UMTX_DYNAMIC_SHARED) || defined(UMTX_STATIC_SHARED)
28459243Sobrien	vm_map_t map;
28559243Sobrien	vm_map_entry_t entry;
28659243Sobrien	vm_pindex_t pindex;
28759243Sobrien	vm_prot_t prot;
28859243Sobrien	boolean_t wired;
28959243Sobrien
29059243Sobrien	map = &td->td_proc->p_vmspace->vm_map;
29159243Sobrien	if (vm_map_lookup(&map, (vm_offset_t)umtx, VM_PROT_WRITE,
29259243Sobrien	    &entry, &key->info.shared.object, &pindex, &prot,
29359243Sobrien	    &wired) != KERN_SUCCESS) {
29459243Sobrien		return EFAULT;
29559243Sobrien	}
29659243Sobrien#endif
29759243Sobrien
29859243Sobrien#if defined(UMTX_DYNAMIC_SHARED)
29959243Sobrien	key->type = UMTX_SHARED;
30059243Sobrien	key->info.shared.offset = entry->offset + entry->start -
30159243Sobrien		(vm_offset_t)umtx;
30259243Sobrien	/*
30359243Sobrien	 * Add object reference, if we don't do this, a buggy application
30459243Sobrien	 * deallocates the object, the object will be reused by other
30559243Sobrien	 * applications, then unlock will wake wrong thread.
30659243Sobrien	 */
30759243Sobrien	vm_object_reference(key->info.shared.object);
30859243Sobrien	vm_map_lookup_done(map, entry);
30959243Sobrien#elif defined(UMTX_STATIC_SHARED)
31059243Sobrien	if (VM_INHERIT_SHARE == entry->inheritance) {
31159243Sobrien		key->type = UMTX_SHARED;
31259243Sobrien		key->info.shared.offset = entry->offset + entry->start -
31359243Sobrien			(vm_offset_t)umtx;
31459243Sobrien		vm_object_reference(key->info.shared.object);
31559243Sobrien	} else {
31659243Sobrien		key->type = UMTX_PRIVATE;
31759243Sobrien		key->info.private.umtx = umtx;
31859243Sobrien		key->info.private.pid  = td->td_proc->p_pid;
31959243Sobrien	}
32059243Sobrien	vm_map_lookup_done(map, entry);
32159243Sobrien#else
32259243Sobrien	key->type = UMTX_PRIVATE;
32359243Sobrien	key->info.private.umtx = umtx;
32459243Sobrien	key->info.private.pid  = td->td_proc->p_pid;
32559243Sobrien#endif
32659243Sobrien	return (0);
32759243Sobrien}
32859243Sobrien
32959243Sobrienstatic inline void
33059243Sobrienumtx_key_release(struct umtx_key *key)
33159243Sobrien{
33259243Sobrien	if (key->type == UMTX_SHARED)
33359243Sobrien		vm_object_deallocate(key->info.shared.object);
33459243Sobrien}
33559243Sobrien
33659243Sobrienstatic inline int
33759243Sobrienumtxq_queue_me(struct thread *td, struct umtx *umtx, struct umtx_q *uq)
33859243Sobrien{
33959243Sobrien	int error;
34059243Sobrien
34159243Sobrien	if ((error = umtx_key_get(td, umtx, &uq->uq_key)) != 0)
34259243Sobrien		return (error);
34359243Sobrien
34459243Sobrien	uq->uq_addr = (vm_offset_t)umtx;
34559243Sobrien	uq->uq_thread = td;
34659243Sobrien	umtxq_lock(&uq->uq_key);
34759243Sobrien	umtxq_insert(uq);
34859243Sobrien	umtxq_unlock(&uq->uq_key);
34959243Sobrien	return (0);
35059243Sobrien}
35159243Sobrien
35259243Sobrien#if defined(UMTX_DYNAMIC_SHARED)
35359243Sobrienstatic void
35459243Sobrienfork_handler(void *arg, struct proc *p1, struct proc *p2, int flags)
35559243Sobrien{
35659243Sobrien	vm_map_t map;
357	vm_map_entry_t entry;
358	vm_object_t object;
359	vm_pindex_t pindex;
360	vm_prot_t prot;
361	boolean_t wired;
362	struct umtx_key key;
363	LIST_HEAD(, umtx_q) workq;
364	struct umtx_q *uq;
365	struct thread *td;
366	int onq;
367
368	LIST_INIT(&workq);
369
370	/* Collect threads waiting on umtxq */
371	PROC_LOCK(p1);
372	FOREACH_THREAD_IN_PROC(p1, td) {
373		if (td->td_flags & TDF_UMTXQ) {
374			uq = td->td_umtxq;
375			if (uq)
376				LIST_INSERT_HEAD(&workq, uq, uq_rqnext);
377		}
378	}
379	PROC_UNLOCK(p1);
380
381	LIST_FOREACH(uq, &workq, uq_rqnext) {
382		map = &p1->p_vmspace->vm_map;
383		if (vm_map_lookup(&map, uq->uq_addr, VM_PROT_WRITE,
384		    &entry, &object, &pindex, &prot, &wired) != KERN_SUCCESS) {
385			continue;
386		}
387		key.type = UMTX_SHARED;
388		key.info.shared.object = object;
389		key.info.shared.offset = entry->offset + entry->start -
390			uq->uq_addr;
391		if (umtx_key_match(&key, &uq->uq_key)) {
392			vm_map_lookup_done(map, entry);
393			continue;
394		}
395
396		umtxq_lock(&uq->uq_key);
397		if (uq->uq_thread->td_flags & TDF_UMTXQ) {
398			umtxq_remove(uq);
399			onq = 1;
400		} else
401			onq = 0;
402		umtxq_unlock(&uq->uq_key);
403		if (onq) {
404			vm_object_deallocate(uq->uq_key.info.shared.object);
405			uq->uq_key = key;
406			umtxq_lock(&uq->uq_key);
407			umtxq_insert(uq);
408			umtxq_unlock(&uq->uq_key);
409			vm_object_reference(uq->uq_key.info.shared.object);
410		}
411		vm_map_lookup_done(map, entry);
412	}
413}
414#endif
415
416static int
417_do_lock(struct thread *td, struct umtx *umtx, long id, int timo)
418{
419	struct umtx_q uq;
420	intptr_t owner;
421	intptr_t old;
422	int error = 0;
423
424	/*
425	 * Care must be exercised when dealing with umtx structure.  It
426	 * can fault on any access.
427	 */
428
429	for (;;) {
430		/*
431		 * Try the uncontested case.  This should be done in userland.
432		 */
433		owner = casuptr((intptr_t *)&umtx->u_owner,
434		    UMTX_UNOWNED, id);
435
436		/* The acquire succeeded. */
437		if (owner == UMTX_UNOWNED)
438			return (0);
439
440		/* The address was invalid. */
441		if (owner == -1)
442			return (EFAULT);
443
444		/* If no one owns it but it is contested try to acquire it. */
445		if (owner == UMTX_CONTESTED) {
446			owner = casuptr((intptr_t *)&umtx->u_owner,
447			    UMTX_CONTESTED, id | UMTX_CONTESTED);
448
449			if (owner == UMTX_CONTESTED)
450				return (0);
451
452			/* The address was invalid. */
453			if (owner == -1)
454				return (EFAULT);
455
456			/* If this failed the lock has changed, restart. */
457			continue;
458		}
459
460		/*
461		 * If we caught a signal, we have retried and now
462		 * exit immediately.
463		 */
464		if (error || (error = umtxq_queue_me(td, umtx, &uq)) != 0)
465			return (error);
466
467		/*
468		 * Set the contested bit so that a release in user space
469		 * knows to use the system call for unlock.  If this fails
470		 * either some one else has acquired the lock or it has been
471		 * released.
472		 */
473		old = casuptr((intptr_t *)&umtx->u_owner, owner,
474		    owner | UMTX_CONTESTED);
475
476		/* The address was invalid. */
477		if (old == -1) {
478			umtxq_lock(&uq.uq_key);
479			umtxq_remove(&uq);
480			umtxq_unlock(&uq.uq_key);
481			umtx_key_release(&uq.uq_key);
482			return (EFAULT);
483		}
484
485		/*
486		 * We set the contested bit, sleep. Otherwise the lock changed
487		 * and we need to retry or we lost a race to the thread
488		 * unlocking the umtx.
489		 */
490		umtxq_lock(&uq.uq_key);
491		if (old == owner && (td->td_flags & TDF_UMTXQ)) {
492			error = umtxq_sleep(td, &uq.uq_key,
493				       td->td_priority | PCATCH | PDROP,
494				       "umtx", timo);
495			if (td->td_flags & TDF_UMTXQ) {
496				umtxq_lock(&uq.uq_key);
497				umtxq_remove(&uq);
498				umtxq_unlock(&uq.uq_key);
499			}
500		} else {
501			umtxq_remove(&uq);
502			umtxq_unlock(&uq.uq_key);
503			error = 0;
504		}
505		umtx_key_release(&uq.uq_key);
506	}
507
508	return (0);
509}
510
511static int
512do_lock(struct thread *td, struct umtx *umtx, long id,
513	struct timespec *abstime)
514{
515	struct timespec ts1, ts2;
516	struct timeval tv;
517	int timo, error;
518
519	if (abstime == NULL) {
520		error = _do_lock(td, umtx, id, 0);
521	} else {
522		for (;;) {
523			ts1 = *abstime;
524			getnanotime(&ts2);
525			timespecsub(&ts1, &ts2);
526			TIMESPEC_TO_TIMEVAL(&tv, &ts1);
527			if (tv.tv_sec < 0) {
528				error = ETIMEDOUT;
529				break;
530			}
531			timo = tvtohz(&tv);
532			error = _do_lock(td, umtx, id, timo);
533			if (error != ETIMEDOUT) {
534				if (error == ERESTART)
535					error = EINTR;
536				break;
537			}
538		}
539	}
540	return (error);
541}
542
543static int
544do_unlock(struct thread *td, struct umtx *umtx, long id)
545{
546	struct umtx_key key;
547	intptr_t owner;
548	intptr_t old;
549	int count, error;
550
551	/*
552	 * Make sure we own this mtx.
553	 *
554	 * XXX Need a {fu,su}ptr this is not correct on arch where
555	 * sizeof(intptr_t) != sizeof(long).
556	 */
557	if ((owner = fuword(&umtx->u_owner)) == -1)
558		return (EFAULT);
559
560	if ((owner & ~UMTX_CONTESTED) != id)
561		return (EPERM);
562
563	/* We should only ever be in here for contested locks */
564	if ((owner & UMTX_CONTESTED) == 0)
565		return (EINVAL);
566
567	/*
568	 * When unlocking the umtx, it must be marked as unowned if
569	 * there is zero or one thread only waiting for it.
570	 * Otherwise, it must be marked as contested.
571	 */
572	old = casuptr((intptr_t *)&umtx->u_owner, owner, UMTX_UNOWNED);
573	if (old == -1)
574		return (EFAULT);
575	if (old != owner)
576		return (EINVAL);
577
578	if ((error = umtx_key_get(td, umtx, &key)) != 0)
579		return (error);
580
581	/*
582	 * At the point, a new thread can lock the umtx before we
583	 * reach here, so contested bit will not be set, if there
584	 * are two or more threads on wait queue, we should set
585	 * contensted bit for them.
586	 */
587	count = umtxq_count(&key);
588	if (count <= 0) {
589		umtx_key_release(&key);
590		return (0);
591	}
592
593	/*
594	 * If there is second thread waiting on umtx, set contested bit,
595	 * if they are resumed before we reach here, it is harmless,
596	 * just a bit unefficient.
597	 */
598	if (count > 1) {
599		owner = UMTX_UNOWNED;
600		for (;;) {
601			old = casuptr((intptr_t *)&umtx->u_owner, owner,
602				    owner | UMTX_CONTESTED);
603			if (old == owner)
604				break;
605			if (old == -1) {
606				umtx_key_release(&key);
607				return (EFAULT);
608			}
609			owner = old;
610		}
611		/*
612		 * Another thread locked the umtx before us, so don't bother
613		 * to wake more threads, that thread will do it when it unlocks
614		 * the umtx.
615		 */
616		if ((owner & ~UMTX_CONTESTED) != 0) {
617			umtx_key_release(&key);
618			return (0);
619		}
620	}
621
622	/* Wake blocked thread. */
623	umtxq_signal(&key);
624	umtx_key_release(&key);
625
626	return (0);
627}
628
629static int
630do_unlock_and_wait(struct thread *td, struct umtx *umtx, long id, void *uaddr,
631	struct timespec *abstime)
632{
633	struct umtx_q uq;
634	intptr_t owner;
635	intptr_t old;
636	struct timespec ts1, ts2;
637	struct timeval tv;
638	int timo, error = 0;
639
640	if (umtx == uaddr)
641		return (EINVAL);
642
643	/*
644	 * Make sure we own this mtx.
645	 *
646	 * XXX Need a {fu,su}ptr this is not correct on arch where
647	 * sizeof(intptr_t) != sizeof(long).
648	 */
649	if ((owner = fuword(&umtx->u_owner)) == -1)
650		return (EFAULT);
651
652	if ((owner & ~UMTX_CONTESTED) != id)
653		return (EPERM);
654
655	if ((error = umtxq_queue_me(td, uaddr, &uq)) != 0)
656		return (error);
657
658	old = casuptr((intptr_t *)&umtx->u_owner, id, UMTX_UNOWNED);
659	if (old == -1) {
660		umtxq_lock(&uq.uq_key);
661		umtxq_remove(&uq);
662		umtxq_unlock(&uq.uq_key);
663		umtx_key_release(&uq.uq_key);
664		return (EFAULT);
665	}
666	if (old != id) {
667		error = do_unlock(td, umtx, id);
668		if (error) {
669			umtxq_lock(&uq.uq_key);
670			umtxq_remove(&uq);
671			umtxq_unlock(&uq.uq_key);
672			umtx_key_release(&uq.uq_key);
673			return (error);
674		}
675	}
676	if (abstime == NULL) {
677		umtxq_lock(&uq.uq_key);
678		if (td->td_flags & TDF_UMTXQ)
679			error = umtxq_sleep(td, &uq.uq_key,
680			       td->td_priority | PCATCH, "ucond", 0);
681		umtxq_remove(&uq);
682		umtxq_unlock(&uq.uq_key);
683		if (error == ERESTART)
684			error = EINTR;
685	} else {
686		for (;;) {
687			ts1 = *abstime;
688			getnanotime(&ts2);
689			timespecsub(&ts1, &ts2);
690			TIMESPEC_TO_TIMEVAL(&tv, &ts1);
691			if (tv.tv_sec < 0) {
692				error = ETIMEDOUT;
693				break;
694			}
695			timo = tvtohz(&tv);
696			umtxq_lock(&uq.uq_key);
697			if (td->td_flags & TDF_UMTXQ) {
698				error = umtxq_sleep(td, &uq.uq_key,
699						td->td_priority | PCATCH,
700						"ucond", timo);
701				if (!(td->td_flags & TDF_UMTXQ)) {
702					umtxq_unlock(&uq.uq_key);
703					error = 0;
704					break;
705				}
706				if (error != 0 && error != ETIMEDOUT) {
707					umtxq_unlock(&uq.uq_key);
708					if (error == ERESTART)
709						error = EINTR;
710					break;
711				}
712			} else {
713				umtxq_unlock(&uq.uq_key);
714				error = 0;
715				break;
716			}
717		}
718		if (td->td_flags & TDF_UMTXQ) {
719			umtxq_lock(&uq.uq_key);
720			umtxq_remove(&uq);
721			umtxq_unlock(&uq.uq_key);
722		}
723	}
724	umtx_key_release(&uq.uq_key);
725	return (error);
726}
727
728static int
729do_wake(struct thread *td, void *uaddr, int broadcast)
730{
731	struct umtx_key key;
732	int error;
733
734	if ((error = umtx_key_get(td, uaddr, &key)) != 0)
735		return (error);
736	if (!broadcast)
737		umtxq_signal(&key);
738	else
739		umtxq_broadcast(&key);
740	umtx_key_release(&key);
741	return (0);
742}
743
744int
745_umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
746    /* struct umtx *umtx */
747{
748	return _do_lock(td, uap->umtx, td->td_tid, 0);
749}
750
751int
752_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
753    /* struct umtx *umtx */
754{
755	return do_unlock(td, uap->umtx, td->td_tid);
756}
757
758int
759_umtx_op(struct thread *td, struct _umtx_op_args *uap)
760{
761	struct timespec abstime;
762	struct timespec *ts;
763	int error;
764
765	switch(uap->op) {
766	case UMTX_OP_LOCK:
767		/* Allow a null timespec (wait forever). */
768		if (uap->abstime == NULL)
769			ts = NULL;
770		else {
771			error = copyin(uap->abstime, &abstime, sizeof(abstime));
772			if (error != 0)
773				return (error);
774			if (abstime.tv_nsec >= 1000000000 ||
775			    abstime.tv_nsec < 0)
776				return (EINVAL);
777			ts = &abstime;
778		}
779		return do_lock(td, uap->umtx, uap->id, ts);
780	case UMTX_OP_UNLOCK:
781		return do_unlock(td, uap->umtx, uap->id);
782	case UMTX_OP_UNLOCK_AND_WAIT:
783		/* Allow a null timespec (wait forever). */
784		if (uap->abstime == NULL)
785			ts = NULL;
786		else {
787			error = copyin(uap->abstime, &abstime, sizeof(abstime));
788			if (error != 0)
789				return (error);
790			if (abstime.tv_nsec >= 1000000000 ||
791			    abstime.tv_nsec < 0)
792				return (EINVAL);
793			ts = &abstime;
794		}
795		return do_unlock_and_wait(td, uap->umtx, uap->id,
796					  uap->uaddr, ts);
797	case UMTX_OP_WAKE:
798		return do_wake(td, uap->uaddr, uap->id);
799	default:
800		return (EINVAL);
801	}
802}
803