kern_umtx.c revision 157815
1/*-
2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice unmodified, this list of conditions, and the following
11 *    disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 157815 2006-04-17 18:20:38Z jhb $");
30
31#include <sys/param.h>
32#include <sys/kernel.h>
33#include <sys/limits.h>
34#include <sys/lock.h>
35#include <sys/malloc.h>
36#include <sys/mutex.h>
37#include <sys/proc.h>
38#include <sys/sysent.h>
39#include <sys/systm.h>
40#include <sys/sysproto.h>
41#include <sys/eventhandler.h>
42#include <sys/thr.h>
43#include <sys/umtx.h>
44
45#include <vm/vm.h>
46#include <vm/vm_param.h>
47#include <vm/pmap.h>
48#include <vm/vm_map.h>
49#include <vm/vm_object.h>
50
51#define UMTX_PRIVATE	0
52#define UMTX_SHARED	1
53
54struct umtx_key {
55	int	type;
56	union {
57		struct {
58			vm_object_t	object;
59			long		offset;
60		} shared;
61		struct {
62			struct umtx	*umtx;
63			long		pid;
64		} private;
65		struct {
66			void		*ptr;
67			long		word;
68		} both;
69	} info;
70};
71
72struct umtx_q {
73	LIST_ENTRY(umtx_q)	uq_next;	/* Linked list for the hash. */
74	struct umtx_key		uq_key;		/* Umtx key. */
75	struct thread		*uq_thread;	/* The thread waits on. */
76	LIST_ENTRY(umtx_q)	uq_rqnext;	/* Linked list for requeuing. */
77	vm_offset_t		uq_addr;	/* Umtx's virtual address. */
78};
79
80LIST_HEAD(umtx_head, umtx_q);
81struct umtxq_chain {
82	struct mtx		uc_lock;	/* Lock for this chain. */
83	struct umtx_head	uc_queue;	/* List of sleep queues. */
84#define	UCF_BUSY		0x01
85#define	UCF_WANT		0x02
86	int			uc_flags;
87};
88
89#define	GOLDEN_RATIO_PRIME	2654404609U
90#define	UMTX_CHAINS		128
91#define	UMTX_SHIFTS		(__WORD_BIT - 7)
92
93static struct umtxq_chain umtxq_chains[UMTX_CHAINS];
94static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
95
96static void umtxq_init_chains(void *);
97static int umtxq_hash(struct umtx_key *key);
98static struct mtx *umtxq_mtx(int chain);
99static void umtxq_lock(struct umtx_key *key);
100static void umtxq_unlock(struct umtx_key *key);
101static void umtxq_busy(struct umtx_key *key);
102static void umtxq_unbusy(struct umtx_key *key);
103static void umtxq_insert(struct umtx_q *uq);
104static void umtxq_remove(struct umtx_q *uq);
105static int umtxq_sleep(struct thread *td, struct umtx_key *key,
106	int prio, const char *wmesg, int timo);
107static int umtxq_count(struct umtx_key *key);
108static int umtxq_signal(struct umtx_key *key, int nr_wakeup);
109static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
110static int umtx_key_get(struct thread *td, struct umtx *umtx,
111	struct umtx_key *key);
112static void umtx_key_release(struct umtx_key *key);
113
114SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_init_chains, NULL);
115
116struct umtx_q *
117umtxq_alloc(void)
118{
119	return (malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK));
120}
121
122void
123umtxq_free(struct umtx_q *uq)
124{
125	free(uq, M_UMTX);
126}
127
128static void
129umtxq_init_chains(void *arg __unused)
130{
131	int i;
132
133	for (i = 0; i < UMTX_CHAINS; ++i) {
134		mtx_init(&umtxq_chains[i].uc_lock, "umtxq_lock", NULL,
135			 MTX_DEF | MTX_DUPOK);
136		LIST_INIT(&umtxq_chains[i].uc_queue);
137		umtxq_chains[i].uc_flags = 0;
138	}
139}
140
141static inline int
142umtxq_hash(struct umtx_key *key)
143{
144	unsigned n = (uintptr_t)key->info.both.ptr + key->info.both.word;
145	return (((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS);
146}
147
148static inline int
149umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
150{
151	return (k1->type == k2->type &&
152		k1->info.both.ptr == k2->info.both.ptr &&
153	        k1->info.both.word == k2->info.both.word);
154}
155
156static inline struct mtx *
157umtxq_mtx(int chain)
158{
159	return (&umtxq_chains[chain].uc_lock);
160}
161
162static inline void
163umtxq_busy(struct umtx_key *key)
164{
165	int chain = umtxq_hash(key);
166
167	mtx_assert(umtxq_mtx(chain), MA_OWNED);
168	while (umtxq_chains[chain].uc_flags & UCF_BUSY) {
169		umtxq_chains[chain].uc_flags |= UCF_WANT;
170		msleep(&umtxq_chains[chain], umtxq_mtx(chain),
171		    0, "umtxq_busy", 0);
172	}
173	umtxq_chains[chain].uc_flags |= UCF_BUSY;
174}
175
176static inline void
177umtxq_unbusy(struct umtx_key *key)
178{
179	int chain = umtxq_hash(key);
180
181	mtx_assert(umtxq_mtx(chain), MA_OWNED);
182	KASSERT(umtxq_chains[chain].uc_flags & UCF_BUSY, ("not busy"));
183	umtxq_chains[chain].uc_flags &= ~UCF_BUSY;
184	if (umtxq_chains[chain].uc_flags & UCF_WANT) {
185		umtxq_chains[chain].uc_flags &= ~UCF_WANT;
186		wakeup(&umtxq_chains[chain]);
187	}
188}
189
190static inline void
191umtxq_lock(struct umtx_key *key)
192{
193	int chain = umtxq_hash(key);
194	mtx_lock(umtxq_mtx(chain));
195}
196
197static inline void
198umtxq_unlock(struct umtx_key *key)
199{
200	int chain = umtxq_hash(key);
201	mtx_unlock(umtxq_mtx(chain));
202}
203
204/*
205 * Insert a thread onto the umtx queue.
206 */
207static inline void
208umtxq_insert(struct umtx_q *uq)
209{
210	struct umtx_head *head;
211	int chain = umtxq_hash(&uq->uq_key);
212
213	mtx_assert(umtxq_mtx(chain), MA_OWNED);
214	head = &umtxq_chains[chain].uc_queue;
215	LIST_INSERT_HEAD(head, uq, uq_next);
216	mtx_lock_spin(&sched_lock);
217	uq->uq_thread->td_flags |= TDF_UMTXQ;
218	mtx_unlock_spin(&sched_lock);
219}
220
221/*
222 * Remove thread from the umtx queue.
223 */
224static inline void
225umtxq_remove(struct umtx_q *uq)
226{
227	mtx_assert(umtxq_mtx(umtxq_hash(&uq->uq_key)), MA_OWNED);
228	if (uq->uq_thread->td_flags & TDF_UMTXQ) {
229		LIST_REMOVE(uq, uq_next);
230		/* turning off TDF_UMTXQ should be the last thing. */
231		mtx_lock_spin(&sched_lock);
232		uq->uq_thread->td_flags &= ~TDF_UMTXQ;
233		mtx_unlock_spin(&sched_lock);
234	}
235}
236
237static int
238umtxq_count(struct umtx_key *key)
239{
240	struct umtx_q *uq;
241	struct umtx_head *head;
242	int chain, count = 0;
243
244	chain = umtxq_hash(key);
245	mtx_assert(umtxq_mtx(chain), MA_OWNED);
246	head = &umtxq_chains[chain].uc_queue;
247	LIST_FOREACH(uq, head, uq_next) {
248		if (umtx_key_match(&uq->uq_key, key)) {
249			if (++count > 1)
250				break;
251		}
252	}
253	return (count);
254}
255
256static int
257umtxq_signal(struct umtx_key *key, int n_wake)
258{
259	struct umtx_q *uq, *next;
260	struct umtx_head *head;
261	struct thread *blocked = NULL;
262	int chain, ret;
263
264	ret = 0;
265	chain = umtxq_hash(key);
266	mtx_assert(umtxq_mtx(chain), MA_OWNED);
267	head = &umtxq_chains[chain].uc_queue;
268	for (uq = LIST_FIRST(head); uq; uq = next) {
269		next = LIST_NEXT(uq, uq_next);
270		if (umtx_key_match(&uq->uq_key, key)) {
271			blocked = uq->uq_thread;
272			umtxq_remove(uq);
273			wakeup(blocked);
274			if (++ret >= n_wake)
275				break;
276		}
277	}
278	return (ret);
279}
280
281static inline int
282umtxq_sleep(struct thread *td, struct umtx_key *key, int priority,
283	    const char *wmesg, int timo)
284{
285	int chain = umtxq_hash(key);
286	int error = msleep(td, umtxq_mtx(chain), priority, wmesg, timo);
287	if (error == EWOULDBLOCK)
288		error = ETIMEDOUT;
289	return (error);
290}
291
292static int
293umtx_key_get(struct thread *td, struct umtx *umtx, struct umtx_key *key)
294{
295	vm_map_t map;
296	vm_map_entry_t entry;
297	vm_pindex_t pindex;
298	vm_prot_t prot;
299	boolean_t wired;
300
301	map = &td->td_proc->p_vmspace->vm_map;
302	if (vm_map_lookup(&map, (vm_offset_t)umtx, VM_PROT_WRITE,
303	    &entry, &key->info.shared.object, &pindex, &prot,
304	    &wired) != KERN_SUCCESS) {
305		return EFAULT;
306	}
307
308	if (VM_INHERIT_SHARE == entry->inheritance) {
309		key->type = UMTX_SHARED;
310		key->info.shared.offset = entry->offset + entry->start -
311			(vm_offset_t)umtx;
312		vm_object_reference(key->info.shared.object);
313	} else {
314		key->type = UMTX_PRIVATE;
315		key->info.private.umtx = umtx;
316		key->info.private.pid  = td->td_proc->p_pid;
317	}
318	vm_map_lookup_done(map, entry);
319	return (0);
320}
321
322static inline void
323umtx_key_release(struct umtx_key *key)
324{
325	if (key->type == UMTX_SHARED)
326		vm_object_deallocate(key->info.shared.object);
327}
328
329static inline int
330umtxq_queue_me(struct thread *td, struct umtx *umtx, struct umtx_q *uq)
331{
332	int error;
333
334	if ((error = umtx_key_get(td, umtx, &uq->uq_key)) != 0)
335		return (error);
336
337	uq->uq_addr = (vm_offset_t)umtx;
338	uq->uq_thread = td;
339	umtxq_lock(&uq->uq_key);
340	/* hmm, for condition variable, we don't need busy flag. */
341	umtxq_busy(&uq->uq_key);
342	umtxq_insert(uq);
343	umtxq_unbusy(&uq->uq_key);
344	umtxq_unlock(&uq->uq_key);
345	return (0);
346}
347
348static int
349_do_lock(struct thread *td, struct umtx *umtx, long id, int timo)
350{
351	struct umtx_q *uq;
352	intptr_t owner;
353	intptr_t old;
354	int error = 0;
355
356	uq = td->td_umtxq;
357	/*
358	 * Care must be exercised when dealing with umtx structure.  It
359	 * can fault on any access.
360	 */
361
362	for (;;) {
363		/*
364		 * Try the uncontested case.  This should be done in userland.
365		 */
366		owner = casuptr((intptr_t *)&umtx->u_owner,
367		    UMTX_UNOWNED, id);
368
369		/* The acquire succeeded. */
370		if (owner == UMTX_UNOWNED)
371			return (0);
372
373		/* The address was invalid. */
374		if (owner == -1)
375			return (EFAULT);
376
377		/* If no one owns it but it is contested try to acquire it. */
378		if (owner == UMTX_CONTESTED) {
379			owner = casuptr((intptr_t *)&umtx->u_owner,
380			    UMTX_CONTESTED, id | UMTX_CONTESTED);
381
382			if (owner == UMTX_CONTESTED)
383				return (0);
384
385			/* The address was invalid. */
386			if (owner == -1)
387				return (EFAULT);
388
389			/* If this failed the lock has changed, restart. */
390			continue;
391		}
392
393		/*
394		 * If we caught a signal, we have retried and now
395		 * exit immediately.
396		 */
397		if (error || (error = umtxq_queue_me(td, umtx, uq)) != 0)
398			return (error);
399
400		/*
401		 * Set the contested bit so that a release in user space
402		 * knows to use the system call for unlock.  If this fails
403		 * either some one else has acquired the lock or it has been
404		 * released.
405		 */
406		old = casuptr((intptr_t *)&umtx->u_owner, owner,
407		    owner | UMTX_CONTESTED);
408
409		/* The address was invalid. */
410		if (old == -1) {
411			umtxq_lock(&uq->uq_key);
412			umtxq_busy(&uq->uq_key);
413			umtxq_remove(uq);
414			umtxq_unbusy(&uq->uq_key);
415			umtxq_unlock(&uq->uq_key);
416			umtx_key_release(&uq->uq_key);
417			return (EFAULT);
418		}
419
420		/*
421		 * We set the contested bit, sleep. Otherwise the lock changed
422		 * and we need to retry or we lost a race to the thread
423		 * unlocking the umtx.
424		 */
425		umtxq_lock(&uq->uq_key);
426		if (old == owner && (td->td_flags & TDF_UMTXQ)) {
427			error = umtxq_sleep(td, &uq->uq_key, PCATCH,
428				       "umtx", timo);
429		}
430		umtxq_busy(&uq->uq_key);
431		umtxq_remove(uq);
432		umtxq_unbusy(&uq->uq_key);
433		umtxq_unlock(&uq->uq_key);
434		umtx_key_release(&uq->uq_key);
435	}
436
437	return (0);
438}
439
440static int
441do_lock(struct thread *td, struct umtx *umtx, long id,
442	struct timespec *timeout)
443{
444	struct timespec ts, ts2, ts3;
445	struct timeval tv;
446	int error;
447
448	if (timeout == NULL) {
449		error = _do_lock(td, umtx, id, 0);
450	} else {
451		getnanouptime(&ts);
452		timespecadd(&ts, timeout);
453		TIMESPEC_TO_TIMEVAL(&tv, timeout);
454		for (;;) {
455			error = _do_lock(td, umtx, id, tvtohz(&tv));
456			if (error != ETIMEDOUT)
457				break;
458			getnanouptime(&ts2);
459			if (timespeccmp(&ts2, &ts, >=)) {
460				error = ETIMEDOUT;
461				break;
462			}
463			ts3 = ts;
464			timespecsub(&ts3, &ts2);
465			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
466		}
467	}
468	/*
469	 * This lets userland back off critical region if needed.
470	 */
471	if (error == ERESTART)
472		error = EINTR;
473	return (error);
474}
475
476static int
477do_unlock(struct thread *td, struct umtx *umtx, long id)
478{
479	struct umtx_key key;
480	intptr_t owner;
481	intptr_t old;
482	int error;
483	int count;
484
485	/*
486	 * Make sure we own this mtx.
487	 *
488	 * XXX Need a {fu,su}ptr this is not correct on arch where
489	 * sizeof(intptr_t) != sizeof(long).
490	 */
491	if ((owner = fuword(&umtx->u_owner)) == -1)
492		return (EFAULT);
493
494	if ((owner & ~UMTX_CONTESTED) != id)
495		return (EPERM);
496
497	/* We should only ever be in here for contested locks */
498	if ((owner & UMTX_CONTESTED) == 0)
499		return (EINVAL);
500
501	if ((error = umtx_key_get(td, umtx, &key)) != 0)
502		return (error);
503
504	umtxq_lock(&key);
505	umtxq_busy(&key);
506	count = umtxq_count(&key);
507	umtxq_unlock(&key);
508
509	/*
510	 * When unlocking the umtx, it must be marked as unowned if
511	 * there is zero or one thread only waiting for it.
512	 * Otherwise, it must be marked as contested.
513	 */
514	old = casuptr((intptr_t *)&umtx->u_owner, owner,
515			count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
516	umtxq_lock(&key);
517	umtxq_signal(&key, 0);
518	umtxq_unbusy(&key);
519	umtxq_unlock(&key);
520	umtx_key_release(&key);
521	if (old == -1)
522		return (EFAULT);
523	if (old != owner)
524		return (EINVAL);
525	return (0);
526}
527
528static int
529do_wait(struct thread *td, struct umtx *umtx, long id, struct timespec *timeout)
530{
531	struct umtx_q *uq;
532	struct timespec ts, ts2, ts3;
533	struct timeval tv;
534	long tmp;
535	int error = 0;
536
537	uq = td->td_umtxq;
538	if ((error = umtxq_queue_me(td, umtx, uq)) != 0)
539		return (error);
540	tmp = fuword(&umtx->u_owner);
541	if (tmp != id) {
542		umtxq_lock(&uq->uq_key);
543		umtxq_remove(uq);
544		umtxq_unlock(&uq->uq_key);
545	} else if (timeout == NULL) {
546		umtxq_lock(&uq->uq_key);
547		if (td->td_flags & TDF_UMTXQ)
548			error = umtxq_sleep(td, &uq->uq_key,
549			    PCATCH, "ucond", 0);
550		if (!(td->td_flags & TDF_UMTXQ))
551			error = 0;
552		else
553			umtxq_remove(uq);
554		umtxq_unlock(&uq->uq_key);
555	} else {
556		getnanouptime(&ts);
557		timespecadd(&ts, timeout);
558		TIMESPEC_TO_TIMEVAL(&tv, timeout);
559		for (;;) {
560			umtxq_lock(&uq->uq_key);
561			if (td->td_flags & TDF_UMTXQ) {
562				error = umtxq_sleep(td, &uq->uq_key, PCATCH,
563					    "ucond", tvtohz(&tv));
564			}
565			if (!(td->td_flags & TDF_UMTXQ)) {
566				umtxq_unlock(&uq->uq_key);
567				goto out;
568			}
569			umtxq_unlock(&uq->uq_key);
570			if (error != ETIMEDOUT)
571				break;
572			getnanouptime(&ts2);
573			if (timespeccmp(&ts2, &ts, >=)) {
574				error = ETIMEDOUT;
575				break;
576			}
577			ts3 = ts;
578			timespecsub(&ts3, &ts2);
579			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
580		}
581		umtxq_lock(&uq->uq_key);
582		umtxq_remove(uq);
583		umtxq_unlock(&uq->uq_key);
584	}
585out:
586	umtx_key_release(&uq->uq_key);
587	if (error == ERESTART)
588		error = EINTR;
589	return (error);
590}
591
592int
593kern_umtx_wake(struct thread *td, void *uaddr, int n_wake)
594{
595	struct umtx_key key;
596	int ret;
597
598	if ((ret = umtx_key_get(td, uaddr, &key)) != 0)
599		return (ret);
600	umtxq_lock(&key);
601	ret = umtxq_signal(&key, n_wake);
602	umtxq_unlock(&key);
603	umtx_key_release(&key);
604	return (0);
605}
606
607int
608_umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
609    /* struct umtx *umtx */
610{
611	return _do_lock(td, uap->umtx, td->td_tid, 0);
612}
613
614int
615_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
616    /* struct umtx *umtx */
617{
618	return do_unlock(td, uap->umtx, td->td_tid);
619}
620
621int
622_umtx_op(struct thread *td, struct _umtx_op_args *uap)
623{
624	struct timespec timeout;
625	struct timespec *ts;
626	int error;
627
628	switch(uap->op) {
629	case UMTX_OP_LOCK:
630		/* Allow a null timespec (wait forever). */
631		if (uap->uaddr2 == NULL)
632			ts = NULL;
633		else {
634			error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
635			if (error != 0)
636				break;
637			if (timeout.tv_nsec >= 1000000000 ||
638			    timeout.tv_nsec < 0) {
639				error = EINVAL;
640				break;
641			}
642			ts = &timeout;
643		}
644		error = do_lock(td, uap->umtx, uap->id, ts);
645		break;
646	case UMTX_OP_UNLOCK:
647		error = do_unlock(td, uap->umtx, uap->id);
648		break;
649	case UMTX_OP_WAIT:
650		/* Allow a null timespec (wait forever). */
651		if (uap->uaddr2 == NULL)
652			ts = NULL;
653		else {
654			error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
655			if (error != 0)
656				break;
657			if (timeout.tv_nsec >= 1000000000 ||
658			    timeout.tv_nsec < 0) {
659				error = EINVAL;
660				break;
661			}
662			ts = &timeout;
663		}
664		error = do_wait(td, uap->umtx, uap->id, ts);
665		break;
666	case UMTX_OP_WAKE:
667		error = kern_umtx_wake(td, uap->umtx, uap->id);
668		break;
669	default:
670		error = EINVAL;
671		break;
672	}
673	return (error);
674}
675