uipc_sem.c revision 164033
1/*-
2 * Copyright (c) 2002 Alfred Perlstein <alfred@FreeBSD.org>
3 * Copyright (c) 2003-2005 SPARTA, Inc.
4 * Copyright (c) 2005 Robert N. M. Watson
5 * All rights reserved.
6 *
7 * This software was developed for the FreeBSD Project in part by Network
8 * Associates Laboratories, the Security Research Division of Network
9 * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"),
10 * as part of the DARPA CHATS research program.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 *    notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 *    notice, this list of conditions and the following disclaimer in the
19 *    documentation and/or other materials provided with the distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: head/sys/kern/uipc_sem.c 164033 2006-11-06 13:42:10Z rwatson $");
36
37#include "opt_mac.h"
38#include "opt_posix.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/sysproto.h>
43#include <sys/eventhandler.h>
44#include <sys/kernel.h>
45#include <sys/priv.h>
46#include <sys/proc.h>
47#include <sys/lock.h>
48#include <sys/mutex.h>
49#include <sys/module.h>
50#include <sys/condvar.h>
51#include <sys/sem.h>
52#include <sys/uio.h>
53#include <sys/syscall.h>
54#include <sys/stat.h>
55#include <sys/sysent.h>
56#include <sys/sysctl.h>
57#include <sys/time.h>
58#include <sys/malloc.h>
59#include <sys/fcntl.h>
60
61#include <posix4/ksem.h>
62#include <posix4/posix4.h>
63#include <posix4/semaphore.h>
64#include <posix4/_semaphore.h>
65
66#include <security/mac/mac_framework.h>
67
68static int sem_count_proc(struct proc *p);
69static struct ksem *sem_lookup_byname(const char *name);
70static int sem_create(struct thread *td, const char *name,
71    struct ksem **ksret, mode_t mode, unsigned int value);
72static void sem_free(struct ksem *ksnew);
73static int sem_perm(struct thread *td, struct ksem *ks);
74static void sem_enter(struct proc *p, struct ksem *ks);
75static int sem_leave(struct proc *p, struct ksem *ks);
76static void sem_exechook(void *arg, struct proc *p, struct image_params *imgp);
77static void sem_exithook(void *arg, struct proc *p);
78static void sem_forkhook(void *arg, struct proc *p1, struct proc *p2,
79    int flags);
80static int sem_hasopen(struct thread *td, struct ksem *ks);
81
82static int kern_sem_close(struct thread *td, semid_t id);
83static int kern_sem_post(struct thread *td, semid_t id);
84static int kern_sem_wait(struct thread *td, semid_t id, int tryflag,
85    struct timespec *abstime);
86static int kern_sem_init(struct thread *td, int dir, unsigned int value,
87    semid_t *idp);
88static int kern_sem_open(struct thread *td, int dir, const char *name,
89    int oflag, mode_t mode, unsigned int value, semid_t *idp);
90static int kern_sem_unlink(struct thread *td, const char *name);
91
92#ifndef SEM_MAX
93#define SEM_MAX	30
94#endif
95
96#define SEM_MAX_NAMELEN	14
97
98#define SEM_TO_ID(x)	((intptr_t)(x))
99#define ID_TO_SEM(x)	id_to_sem(x)
100
101/*
102 * available semaphores go here, this includes sem_init and any semaphores
103 * created via sem_open that have not yet been unlinked.
104 */
105LIST_HEAD(, ksem) ksem_head = LIST_HEAD_INITIALIZER(&ksem_head);
106/*
107 * semaphores still in use but have been sem_unlink()'d go here.
108 */
109LIST_HEAD(, ksem) ksem_deadhead = LIST_HEAD_INITIALIZER(&ksem_deadhead);
110
111static struct mtx sem_lock;
112static MALLOC_DEFINE(M_SEM, "sems", "semaphore data");
113
114static int nsems = 0;
115SYSCTL_DECL(_p1003_1b);
116SYSCTL_INT(_p1003_1b, OID_AUTO, nsems, CTLFLAG_RD, &nsems, 0, "");
117
118static eventhandler_tag sem_exit_tag, sem_exec_tag, sem_fork_tag;
119
120#ifdef SEM_DEBUG
121#define DP(x)	printf x
122#else
123#define DP(x)
124#endif
125
126static __inline
127void
128sem_ref(struct ksem *ks)
129{
130
131	mtx_assert(&sem_lock, MA_OWNED);
132	ks->ks_ref++;
133	DP(("sem_ref: ks = %p, ref = %d\n", ks, ks->ks_ref));
134}
135
136static __inline
137void
138sem_rel(struct ksem *ks)
139{
140
141	mtx_assert(&sem_lock, MA_OWNED);
142	DP(("sem_rel: ks = %p, ref = %d\n", ks, ks->ks_ref - 1));
143	if (--ks->ks_ref == 0)
144		sem_free(ks);
145}
146
147static __inline struct ksem *id_to_sem(semid_t id);
148
149static __inline
150struct ksem *
151id_to_sem(semid_t id)
152{
153	struct ksem *ks;
154
155	mtx_assert(&sem_lock, MA_OWNED);
156	DP(("id_to_sem: id = %0x,%p\n", id, (struct ksem *)id));
157	LIST_FOREACH(ks, &ksem_head, ks_entry) {
158		DP(("id_to_sem: ks = %p\n", ks));
159		if (ks == (struct ksem *)id)
160			return (ks);
161	}
162	return (NULL);
163}
164
165static struct ksem *
166sem_lookup_byname(const char *name)
167{
168	struct ksem *ks;
169
170	mtx_assert(&sem_lock, MA_OWNED);
171	LIST_FOREACH(ks, &ksem_head, ks_entry)
172		if (ks->ks_name != NULL && strcmp(ks->ks_name, name) == 0)
173			return (ks);
174	return (NULL);
175}
176
177static int
178sem_create(struct thread *td, const char *name, struct ksem **ksret,
179    mode_t mode, unsigned int value)
180{
181	struct ksem *ret;
182	struct proc *p;
183	struct ucred *uc;
184	size_t len;
185	int error;
186
187	DP(("sem_create\n"));
188	p = td->td_proc;
189	uc = td->td_ucred;
190	if (value > SEM_VALUE_MAX)
191		return (EINVAL);
192	ret = malloc(sizeof(*ret), M_SEM, M_WAITOK | M_ZERO);
193	if (name != NULL) {
194		len = strlen(name);
195		if (len > SEM_MAX_NAMELEN) {
196			free(ret, M_SEM);
197			return (ENAMETOOLONG);
198		}
199		/* name must start with a '/' but not contain one. */
200		if (*name != '/' || len < 2 || index(name + 1, '/') != NULL) {
201			free(ret, M_SEM);
202			return (EINVAL);
203		}
204		ret->ks_name = malloc(len + 1, M_SEM, M_WAITOK);
205		strcpy(ret->ks_name, name);
206	} else {
207		ret->ks_name = NULL;
208	}
209	ret->ks_mode = mode;
210	ret->ks_value = value;
211	ret->ks_ref = 1;
212	ret->ks_waiters = 0;
213	ret->ks_uid = uc->cr_uid;
214	ret->ks_gid = uc->cr_gid;
215	ret->ks_onlist = 0;
216	cv_init(&ret->ks_cv, "sem");
217	LIST_INIT(&ret->ks_users);
218#ifdef MAC
219	mac_init_posix_sem(ret);
220	mac_create_posix_sem(uc, ret);
221#endif
222	if (name != NULL)
223		sem_enter(td->td_proc, ret);
224	*ksret = ret;
225	mtx_lock(&sem_lock);
226	if (nsems >= p31b_getcfg(CTL_P1003_1B_SEM_NSEMS_MAX)) {
227		sem_leave(td->td_proc, ret);
228		sem_free(ret);
229		error = ENFILE;
230	} else {
231		nsems++;
232		error = 0;
233	}
234	mtx_unlock(&sem_lock);
235	return (error);
236}
237
238#ifndef _SYS_SYSPROTO_H_
239struct ksem_init_args {
240	unsigned int value;
241	semid_t *idp;
242};
243int ksem_init(struct thread *td, struct ksem_init_args *uap);
244#endif
245int
246ksem_init(struct thread *td, struct ksem_init_args *uap)
247{
248	int error;
249
250	error = kern_sem_init(td, UIO_USERSPACE, uap->value, uap->idp);
251	return (error);
252}
253
254static int
255kern_sem_init(struct thread *td, int dir, unsigned int value, semid_t *idp)
256{
257	struct ksem *ks;
258	semid_t id;
259	int error;
260
261	error = sem_create(td, NULL, &ks, S_IRWXU | S_IRWXG, value);
262	if (error)
263		return (error);
264	id = SEM_TO_ID(ks);
265	if (dir == UIO_USERSPACE) {
266		error = copyout(&id, idp, sizeof(id));
267		if (error) {
268			mtx_lock(&sem_lock);
269			sem_rel(ks);
270			mtx_unlock(&sem_lock);
271			return (error);
272		}
273	} else {
274		*idp = id;
275	}
276	mtx_lock(&sem_lock);
277	LIST_INSERT_HEAD(&ksem_head, ks, ks_entry);
278	ks->ks_onlist = 1;
279	mtx_unlock(&sem_lock);
280	return (error);
281}
282
283#ifndef _SYS_SYSPROTO_H_
284struct ksem_open_args {
285	char *name;
286	int oflag;
287	mode_t mode;
288	unsigned int value;
289	semid_t *idp;
290};
291int ksem_open(struct thread *td, struct ksem_open_args *uap);
292#endif
293int
294ksem_open(struct thread *td, struct ksem_open_args *uap)
295{
296	char name[SEM_MAX_NAMELEN + 1];
297	size_t done;
298	int error;
299
300	error = copyinstr(uap->name, name, SEM_MAX_NAMELEN + 1, &done);
301	if (error)
302		return (error);
303	DP((">>> sem_open start\n"));
304	error = kern_sem_open(td, UIO_USERSPACE,
305	    name, uap->oflag, uap->mode, uap->value, uap->idp);
306	DP(("<<< sem_open end\n"));
307	return (error);
308}
309
310static int
311kern_sem_open(struct thread *td, int dir, const char *name, int oflag,
312    mode_t mode, unsigned int value, semid_t *idp)
313{
314	struct ksem *ksnew, *ks;
315	int error;
316	semid_t id;
317
318	ksnew = NULL;
319	mtx_lock(&sem_lock);
320	ks = sem_lookup_byname(name);
321	/*
322	 * If we found it but O_EXCL is set, error.
323	 */
324	if (ks != NULL && (oflag & O_EXCL) != 0) {
325		mtx_unlock(&sem_lock);
326		return (EEXIST);
327	}
328	/*
329	 * If we didn't find it...
330	 */
331	if (ks == NULL) {
332		/*
333		 * didn't ask for creation? error.
334		 */
335		if ((oflag & O_CREAT) == 0) {
336			mtx_unlock(&sem_lock);
337			return (ENOENT);
338		}
339		/*
340		 * We may block during creation, so drop the lock.
341		 */
342		mtx_unlock(&sem_lock);
343		error = sem_create(td, name, &ksnew, mode, value);
344		if (error != 0)
345			return (error);
346		id = SEM_TO_ID(ksnew);
347		if (dir == UIO_USERSPACE) {
348			DP(("about to copyout! %d to %p\n", id, idp));
349			error = copyout(&id, idp, sizeof(id));
350			if (error) {
351				mtx_lock(&sem_lock);
352				sem_leave(td->td_proc, ksnew);
353				sem_rel(ksnew);
354				mtx_unlock(&sem_lock);
355				return (error);
356			}
357		} else {
358			DP(("about to set! %d to %p\n", id, idp));
359			*idp = id;
360		}
361		/*
362		 * We need to make sure we haven't lost a race while
363		 * allocating during creation.
364		 */
365		mtx_lock(&sem_lock);
366		ks = sem_lookup_byname(name);
367		if (ks != NULL) {
368			/* we lost... */
369			sem_leave(td->td_proc, ksnew);
370			sem_rel(ksnew);
371			/* we lost and we can't loose... */
372			if ((oflag & O_EXCL) != 0) {
373				mtx_unlock(&sem_lock);
374				return (EEXIST);
375			}
376		} else {
377			DP(("sem_create: about to add to list...\n"));
378			LIST_INSERT_HEAD(&ksem_head, ksnew, ks_entry);
379			DP(("sem_create: setting list bit...\n"));
380			ksnew->ks_onlist = 1;
381			DP(("sem_create: done, about to unlock...\n"));
382		}
383	} else {
384#ifdef MAC
385		error = mac_check_posix_sem_open(td->td_ucred, ks);
386		if (error)
387			goto err_open;
388#endif
389		/*
390		 * if we aren't the creator, then enforce permissions.
391		 */
392		error = sem_perm(td, ks);
393		if (error)
394			goto err_open;
395		sem_ref(ks);
396		mtx_unlock(&sem_lock);
397		id = SEM_TO_ID(ks);
398		if (dir == UIO_USERSPACE) {
399			error = copyout(&id, idp, sizeof(id));
400			if (error) {
401				mtx_lock(&sem_lock);
402				sem_rel(ks);
403				mtx_unlock(&sem_lock);
404				return (error);
405			}
406		} else {
407			*idp = id;
408		}
409		sem_enter(td->td_proc, ks);
410		mtx_lock(&sem_lock);
411		sem_rel(ks);
412	}
413err_open:
414	mtx_unlock(&sem_lock);
415	return (error);
416}
417
418static int
419sem_perm(struct thread *td, struct ksem *ks)
420{
421	struct ucred *uc;
422
423	/*
424	 * XXXRW: This permission routine appears to be incorrect.  If the
425	 * user matches, we shouldn't go on to the group if the user
426	 * permissions don't allow the action?  Not changed for now.  To fix,
427	 * change from a series of if (); if (); to if () else if () else...
428	 */
429	uc = td->td_ucred;
430	DP(("sem_perm: uc(%d,%d) ks(%d,%d,%o)\n",
431	    uc->cr_uid, uc->cr_gid,
432	     ks->ks_uid, ks->ks_gid, ks->ks_mode));
433	if ((uc->cr_uid == ks->ks_uid) && (ks->ks_mode & S_IWUSR) != 0)
434		return (0);
435	if ((uc->cr_gid == ks->ks_gid) && (ks->ks_mode & S_IWGRP) != 0)
436		return (0);
437	if ((ks->ks_mode & S_IWOTH) != 0)
438		return (0);
439	return (priv_check(td, PRIV_SEM_WRITE));
440}
441
442static void
443sem_free(struct ksem *ks)
444{
445
446	nsems--;
447	if (ks->ks_onlist)
448		LIST_REMOVE(ks, ks_entry);
449	if (ks->ks_name != NULL)
450		free(ks->ks_name, M_SEM);
451	cv_destroy(&ks->ks_cv);
452	free(ks, M_SEM);
453}
454
455static __inline struct kuser *sem_getuser(struct proc *p, struct ksem *ks);
456
457static __inline struct kuser *
458sem_getuser(struct proc *p, struct ksem *ks)
459{
460	struct kuser *k;
461
462	LIST_FOREACH(k, &ks->ks_users, ku_next)
463		if (k->ku_pid == p->p_pid)
464			return (k);
465	return (NULL);
466}
467
468static int
469sem_hasopen(struct thread *td, struct ksem *ks)
470{
471
472	return ((ks->ks_name == NULL && sem_perm(td, ks) == 0)
473	    || sem_getuser(td->td_proc, ks) != NULL);
474}
475
476static int
477sem_leave(struct proc *p, struct ksem *ks)
478{
479	struct kuser *k;
480
481	DP(("sem_leave: ks = %p\n", ks));
482	k = sem_getuser(p, ks);
483	DP(("sem_leave: ks = %p, k = %p\n", ks, k));
484	if (k != NULL) {
485		LIST_REMOVE(k, ku_next);
486		sem_rel(ks);
487		DP(("sem_leave: about to free k\n"));
488		free(k, M_SEM);
489		DP(("sem_leave: returning\n"));
490		return (0);
491	}
492	return (EINVAL);
493}
494
495static void
496sem_enter(p, ks)
497	struct proc *p;
498	struct ksem *ks;
499{
500	struct kuser *ku, *k;
501
502	ku = malloc(sizeof(*ku), M_SEM, M_WAITOK);
503	ku->ku_pid = p->p_pid;
504	mtx_lock(&sem_lock);
505	k = sem_getuser(p, ks);
506	if (k != NULL) {
507		mtx_unlock(&sem_lock);
508		free(ku, M_TEMP);
509		return;
510	}
511	LIST_INSERT_HEAD(&ks->ks_users, ku, ku_next);
512	sem_ref(ks);
513	mtx_unlock(&sem_lock);
514}
515
516#ifndef _SYS_SYSPROTO_H_
517struct ksem_unlink_args {
518	char *name;
519};
520int ksem_unlink(struct thread *td, struct ksem_unlink_args *uap);
521#endif
522
523int
524ksem_unlink(struct thread *td, struct ksem_unlink_args *uap)
525{
526	char name[SEM_MAX_NAMELEN + 1];
527	size_t done;
528	int error;
529
530	error = copyinstr(uap->name, name, SEM_MAX_NAMELEN + 1, &done);
531	return (error ? error :
532	    kern_sem_unlink(td, name));
533}
534
535static int
536kern_sem_unlink(struct thread *td, const char *name)
537{
538	struct ksem *ks;
539	int error;
540
541	mtx_lock(&sem_lock);
542	ks = sem_lookup_byname(name);
543	if (ks != NULL) {
544#ifdef MAC
545		error = mac_check_posix_sem_unlink(td->td_ucred, ks);
546		if (error) {
547			mtx_unlock(&sem_lock);
548			return (error);
549		}
550#endif
551		error = sem_perm(td, ks);
552	} else
553		error = ENOENT;
554	DP(("sem_unlink: '%s' ks = %p, error = %d\n", name, ks, error));
555	if (error == 0) {
556		LIST_REMOVE(ks, ks_entry);
557		LIST_INSERT_HEAD(&ksem_deadhead, ks, ks_entry);
558		sem_rel(ks);
559	}
560	mtx_unlock(&sem_lock);
561	return (error);
562}
563
564#ifndef _SYS_SYSPROTO_H_
565struct ksem_close_args {
566	semid_t id;
567};
568int ksem_close(struct thread *td, struct ksem_close_args *uap);
569#endif
570
571int
572ksem_close(struct thread *td, struct ksem_close_args *uap)
573{
574
575	return (kern_sem_close(td, uap->id));
576}
577
578static int
579kern_sem_close(struct thread *td, semid_t id)
580{
581	struct ksem *ks;
582	int error;
583
584	error = EINVAL;
585	mtx_lock(&sem_lock);
586	ks = ID_TO_SEM(id);
587	/* this is not a valid operation for unnamed sems */
588	if (ks != NULL && ks->ks_name != NULL)
589		error = sem_leave(td->td_proc, ks);
590	mtx_unlock(&sem_lock);
591	return (error);
592}
593
594#ifndef _SYS_SYSPROTO_H_
595struct ksem_post_args {
596	semid_t id;
597};
598int ksem_post(struct thread *td, struct ksem_post_args *uap);
599#endif
600int
601ksem_post(struct thread *td, struct ksem_post_args *uap)
602{
603
604	return (kern_sem_post(td, uap->id));
605}
606
607static int
608kern_sem_post(struct thread *td, semid_t id)
609{
610	struct ksem *ks;
611	int error;
612
613	mtx_lock(&sem_lock);
614	ks = ID_TO_SEM(id);
615	if (ks == NULL || !sem_hasopen(td, ks)) {
616		error = EINVAL;
617		goto err;
618	}
619#ifdef MAC
620	error = mac_check_posix_sem_post(td->td_ucred, ks);
621	if (error)
622		goto err;
623#endif
624	if (ks->ks_value == SEM_VALUE_MAX) {
625		error = EOVERFLOW;
626		goto err;
627	}
628	++ks->ks_value;
629	if (ks->ks_waiters > 0)
630		cv_signal(&ks->ks_cv);
631	error = 0;
632err:
633	mtx_unlock(&sem_lock);
634	return (error);
635}
636
637#ifndef _SYS_SYSPROTO_H_
638struct ksem_wait_args {
639	semid_t id;
640};
641int ksem_wait(struct thread *td, struct ksem_wait_args *uap);
642#endif
643
644int
645ksem_wait(struct thread *td, struct ksem_wait_args *uap)
646{
647
648	return (kern_sem_wait(td, uap->id, 0, NULL));
649}
650
651#ifndef _SYS_SYSPROTO_H_
652struct ksem_timedwait_args {
653	semid_t id;
654	const struct timespec *abstime;
655};
656int ksem_timedwait(struct thread *td, struct ksem_timedwait_args *uap);
657#endif
658int
659ksem_timedwait(struct thread *td, struct ksem_timedwait_args *uap)
660{
661	struct timespec abstime;
662	struct timespec *ts;
663	int error;
664
665	/* We allow a null timespec (wait forever). */
666	if (uap->abstime == NULL)
667		ts = NULL;
668	else {
669		error = copyin(uap->abstime, &abstime, sizeof(abstime));
670		if (error != 0)
671			return (error);
672		if (abstime.tv_nsec >= 1000000000 || abstime.tv_nsec < 0)
673			return (EINVAL);
674		ts = &abstime;
675	}
676	return (kern_sem_wait(td, uap->id, 0, ts));
677}
678
679#ifndef _SYS_SYSPROTO_H_
680struct ksem_trywait_args {
681	semid_t id;
682};
683int ksem_trywait(struct thread *td, struct ksem_trywait_args *uap);
684#endif
685int
686ksem_trywait(struct thread *td, struct ksem_trywait_args *uap)
687{
688
689	return (kern_sem_wait(td, uap->id, 1, NULL));
690}
691
692static int
693kern_sem_wait(struct thread *td, semid_t id, int tryflag,
694    struct timespec *abstime)
695{
696	struct timespec ts1, ts2;
697	struct timeval tv;
698	struct ksem *ks;
699	int error;
700
701	DP((">>> kern_sem_wait entered!\n"));
702	mtx_lock(&sem_lock);
703	ks = ID_TO_SEM(id);
704	if (ks == NULL) {
705		DP(("kern_sem_wait ks == NULL\n"));
706		error = EINVAL;
707		goto err;
708	}
709	sem_ref(ks);
710	if (!sem_hasopen(td, ks)) {
711		DP(("kern_sem_wait hasopen failed\n"));
712		error = EINVAL;
713		goto err;
714	}
715#ifdef MAC
716	error = mac_check_posix_sem_wait(td->td_ucred, ks);
717	if (error) {
718		DP(("kern_sem_wait mac failed\n"));
719		goto err;
720	}
721#endif
722	DP(("kern_sem_wait value = %d, tryflag %d\n", ks->ks_value, tryflag));
723	if (ks->ks_value == 0) {
724		ks->ks_waiters++;
725		if (tryflag != 0)
726			error = EAGAIN;
727		else if (abstime == NULL)
728			error = cv_wait_sig(&ks->ks_cv, &sem_lock);
729		else {
730			for (;;) {
731				ts1 = *abstime;
732				getnanotime(&ts2);
733				timespecsub(&ts1, &ts2);
734				TIMESPEC_TO_TIMEVAL(&tv, &ts1);
735				if (tv.tv_sec < 0) {
736					error = ETIMEDOUT;
737					break;
738				}
739				error = cv_timedwait_sig(&ks->ks_cv,
740				    &sem_lock, tvtohz(&tv));
741				if (error != EWOULDBLOCK)
742					break;
743			}
744		}
745		ks->ks_waiters--;
746		if (error)
747			goto err;
748	}
749	ks->ks_value--;
750	error = 0;
751err:
752	if (ks != NULL)
753		sem_rel(ks);
754	mtx_unlock(&sem_lock);
755	DP(("<<< kern_sem_wait leaving, error = %d\n", error));
756	return (error);
757}
758
759#ifndef _SYS_SYSPROTO_H_
760struct ksem_getvalue_args {
761	semid_t id;
762	int *val;
763};
764int ksem_getvalue(struct thread *td, struct ksem_getvalue_args *uap);
765#endif
766int
767ksem_getvalue(struct thread *td, struct ksem_getvalue_args *uap)
768{
769	struct ksem *ks;
770	int error, val;
771
772	mtx_lock(&sem_lock);
773	ks = ID_TO_SEM(uap->id);
774	if (ks == NULL || !sem_hasopen(td, ks)) {
775		mtx_unlock(&sem_lock);
776		return (EINVAL);
777	}
778#ifdef MAC
779	error = mac_check_posix_sem_getvalue(td->td_ucred, ks);
780	if (error) {
781		mtx_unlock(&sem_lock);
782		return (error);
783	}
784#endif
785	val = ks->ks_value;
786	mtx_unlock(&sem_lock);
787	error = copyout(&val, uap->val, sizeof(val));
788	return (error);
789}
790
791#ifndef _SYS_SYSPROTO_H_
792struct ksem_destroy_args {
793	semid_t id;
794};
795int ksem_destroy(struct thread *td, struct ksem_destroy_args *uap);
796#endif
797int
798ksem_destroy(struct thread *td, struct ksem_destroy_args *uap)
799{
800	struct ksem *ks;
801	int error;
802
803	mtx_lock(&sem_lock);
804	ks = ID_TO_SEM(uap->id);
805	if (ks == NULL || !sem_hasopen(td, ks) ||
806	    ks->ks_name != NULL) {
807		error = EINVAL;
808		goto err;
809	}
810#ifdef MAC
811	error = mac_check_posix_sem_destroy(td->td_ucred, ks);
812	if (error)
813		goto err;
814#endif
815	if (ks->ks_waiters != 0) {
816		error = EBUSY;
817		goto err;
818	}
819	sem_rel(ks);
820	error = 0;
821err:
822	mtx_unlock(&sem_lock);
823	return (error);
824}
825
826/*
827 * Count the number of kusers associated with a proc, so as to guess at how
828 * many to allocate when forking.
829 */
830static int
831sem_count_proc(struct proc *p)
832{
833	struct ksem *ks;
834	struct kuser *ku;
835	int count;
836
837	mtx_assert(&sem_lock, MA_OWNED);
838
839	count = 0;
840	LIST_FOREACH(ks, &ksem_head, ks_entry) {
841		LIST_FOREACH(ku, &ks->ks_users, ku_next) {
842			if (ku->ku_pid == p->p_pid)
843				count++;
844		}
845	}
846	LIST_FOREACH(ks, &ksem_deadhead, ks_entry) {
847		LIST_FOREACH(ku, &ks->ks_users, ku_next) {
848			if (ku->ku_pid == p->p_pid)
849				count++;
850		}
851	}
852	return (count);
853}
854
855/*
856 * When a process forks, the child process must gain a reference to each open
857 * semaphore in the parent process, whether it is unlinked or not.  This
858 * requires allocating a kuser structure for each semaphore reference in the
859 * new process.  Because the set of semaphores in the parent can change while
860 * the fork is in progress, we have to handle races -- first we attempt to
861 * allocate enough storage to acquire references to each of the semaphores,
862 * then we enter the semaphores and release the temporary references.
863 */
864static void
865sem_forkhook(void *arg, struct proc *p1, struct proc *p2, int flags)
866{
867	struct ksem *ks, **sem_array;
868	int count, i, new_count;
869	struct kuser *ku;
870
871	mtx_lock(&sem_lock);
872	count = sem_count_proc(p1);
873	if (count == 0) {
874		mtx_unlock(&sem_lock);
875		return;
876	}
877race_lost:
878	mtx_assert(&sem_lock, MA_OWNED);
879	mtx_unlock(&sem_lock);
880	sem_array = malloc(sizeof(struct ksem *) * count, M_TEMP, M_WAITOK);
881	mtx_lock(&sem_lock);
882	new_count = sem_count_proc(p1);
883	if (count < new_count) {
884		/* Lost race, repeat and allocate more storage. */
885		free(sem_array, M_TEMP);
886		count = new_count;
887		goto race_lost;
888	}
889	/*
890	 * Given an array capable of storing an adequate number of semaphore
891	 * references, now walk the list of semaphores and acquire a new
892	 * reference for any semaphore opened by p1.
893	 */
894	count = new_count;
895	i = 0;
896	LIST_FOREACH(ks, &ksem_head, ks_entry) {
897		LIST_FOREACH(ku, &ks->ks_users, ku_next) {
898			if (ku->ku_pid == p1->p_pid) {
899				sem_ref(ks);
900				sem_array[i] = ks;
901				i++;
902				break;
903			}
904		}
905	}
906	LIST_FOREACH(ks, &ksem_deadhead, ks_entry) {
907		LIST_FOREACH(ku, &ks->ks_users, ku_next) {
908			if (ku->ku_pid == p1->p_pid) {
909				sem_ref(ks);
910				sem_array[i] = ks;
911				i++;
912				break;
913			}
914		}
915	}
916	mtx_unlock(&sem_lock);
917	KASSERT(i == count, ("sem_forkhook: i != count (%d, %d)", i, count));
918	/*
919	 * Now cause p2 to enter each of the referenced semaphores, then
920	 * release our temporary reference.  This is pretty inefficient.
921	 * Finally, free our temporary array.
922	 */
923	for (i = 0; i < count; i++) {
924		sem_enter(p2, sem_array[i]);
925		mtx_lock(&sem_lock);
926		sem_rel(sem_array[i]);
927		mtx_unlock(&sem_lock);
928	}
929	free(sem_array, M_TEMP);
930}
931
932static void
933sem_exechook(void *arg, struct proc *p, struct image_params *imgp __unused)
934{
935   	sem_exithook(arg, p);
936}
937
938static void
939sem_exithook(void *arg, struct proc *p)
940{
941	struct ksem *ks, *ksnext;
942
943	mtx_lock(&sem_lock);
944	ks = LIST_FIRST(&ksem_head);
945	while (ks != NULL) {
946		ksnext = LIST_NEXT(ks, ks_entry);
947		sem_leave(p, ks);
948		ks = ksnext;
949	}
950	ks = LIST_FIRST(&ksem_deadhead);
951	while (ks != NULL) {
952		ksnext = LIST_NEXT(ks, ks_entry);
953		sem_leave(p, ks);
954		ks = ksnext;
955	}
956	mtx_unlock(&sem_lock);
957}
958
959static int
960sem_modload(struct module *module, int cmd, void *arg)
961{
962        int error = 0;
963
964        switch (cmd) {
965        case MOD_LOAD:
966		mtx_init(&sem_lock, "sem", "semaphore", MTX_DEF);
967		p31b_setcfg(CTL_P1003_1B_SEM_NSEMS_MAX, SEM_MAX);
968		p31b_setcfg(CTL_P1003_1B_SEM_VALUE_MAX, SEM_VALUE_MAX);
969		sem_exit_tag = EVENTHANDLER_REGISTER(process_exit, sem_exithook,
970		    NULL, EVENTHANDLER_PRI_ANY);
971		sem_exec_tag = EVENTHANDLER_REGISTER(process_exec, sem_exechook,
972		    NULL, EVENTHANDLER_PRI_ANY);
973		sem_fork_tag = EVENTHANDLER_REGISTER(process_fork, sem_forkhook, NULL, EVENTHANDLER_PRI_ANY);
974                break;
975        case MOD_UNLOAD:
976		if (nsems != 0) {
977			error = EOPNOTSUPP;
978			break;
979		}
980		EVENTHANDLER_DEREGISTER(process_exit, sem_exit_tag);
981		EVENTHANDLER_DEREGISTER(process_exec, sem_exec_tag);
982		EVENTHANDLER_DEREGISTER(process_fork, sem_fork_tag);
983		mtx_destroy(&sem_lock);
984                break;
985        case MOD_SHUTDOWN:
986                break;
987        default:
988                error = EINVAL;
989                break;
990        }
991        return (error);
992}
993
994static moduledata_t sem_mod = {
995        "sem",
996        &sem_modload,
997        NULL
998};
999
1000SYSCALL_MODULE_HELPER(ksem_init);
1001SYSCALL_MODULE_HELPER(ksem_open);
1002SYSCALL_MODULE_HELPER(ksem_unlink);
1003SYSCALL_MODULE_HELPER(ksem_close);
1004SYSCALL_MODULE_HELPER(ksem_post);
1005SYSCALL_MODULE_HELPER(ksem_wait);
1006SYSCALL_MODULE_HELPER(ksem_timedwait);
1007SYSCALL_MODULE_HELPER(ksem_trywait);
1008SYSCALL_MODULE_HELPER(ksem_getvalue);
1009SYSCALL_MODULE_HELPER(ksem_destroy);
1010
1011DECLARE_MODULE(sem, sem_mod, SI_SUB_SYSV_SEM, SI_ORDER_FIRST);
1012MODULE_VERSION(sem, 1);
1013