1/*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1982, 1986, 1989, 1993
31 *	The Regents of the University of California.  All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 *    notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 *    notice, this list of conditions and the following disclaimer in the
45 *    documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 *    must display the following acknowledgement:
48 *	This product includes software developed by the University of
49 *	California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 *    may be used to endorse or promote products derived from this software
52 *    without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 *	@(#)sys_generic.c	8.9 (Berkeley) 2/14/95
67 */
68/*
69 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
70 * support for mandatory and extensible security protections.  This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
74
75#include <sys/param.h>
76#include <sys/systm.h>
77#include <sys/filedesc.h>
78#include <sys/ioctl.h>
79#include <sys/file_internal.h>
80#include <sys/proc_internal.h>
81#include <sys/socketvar.h>
82#include <sys/uio_internal.h>
83#include <sys/kernel.h>
84#include <sys/stat.h>
85#include <sys/malloc.h>
86#include <sys/sysproto.h>
87
88#include <sys/mount_internal.h>
89#include <sys/protosw.h>
90#include <sys/ev.h>
91#include <sys/user.h>
92#include <sys/kdebug.h>
93#include <sys/poll.h>
94#include <sys/event.h>
95#include <sys/eventvar.h>
96
97#include <mach/mach_types.h>
98#include <kern/kern_types.h>
99#include <kern/assert.h>
100#include <kern/kalloc.h>
101#include <kern/thread.h>
102#include <kern/clock.h>
103
104#include <sys/mbuf.h>
105#include <sys/socket.h>
106#include <sys/socketvar.h>
107#include <sys/errno.h>
108#include <sys/syscall.h>
109#include <sys/pipe.h>
110
111#include <bsm/audit_kernel.h>
112
113#include <net/if.h>
114#include <net/route.h>
115
116#include <netinet/in.h>
117#include <netinet/in_systm.h>
118#include <netinet/ip.h>
119#include <netinet/in_pcb.h>
120#include <netinet/ip_var.h>
121#include <netinet/ip6.h>
122#include <netinet/tcp.h>
123#include <netinet/tcp_fsm.h>
124#include <netinet/tcp_seq.h>
125#include <netinet/tcp_timer.h>
126#include <netinet/tcp_var.h>
127#include <netinet/tcpip.h>
128#include <netinet/tcp_debug.h>
129/* for wait queue based select */
130#include <kern/wait_queue.h>
131#include <kern/kalloc.h>
132#include <sys/vnode_internal.h>
133
134/* XXX should be in a header file somewhere */
135void evsofree(struct socket *);
136void evpipefree(struct pipe *);
137void postpipeevent(struct pipe *, int);
138void postevent(struct socket *, struct sockbuf *, int);
139extern kern_return_t IOBSDGetPlatformUUID(__darwin_uuid_t uuid, mach_timespec_t timeoutp);
140
141int rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval);
142int wr_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval);
143extern void	*get_bsduthreadarg(thread_t);
144extern int	*get_bsduthreadrval(thread_t);
145
146__private_extern__ int	dofileread(vfs_context_t ctx, struct fileproc *fp,
147								   user_addr_t bufp, user_size_t nbyte,
148								   off_t offset, int flags, user_ssize_t *retval);
149__private_extern__ int	dofilewrite(vfs_context_t ctx, struct fileproc *fp,
150									user_addr_t bufp, user_size_t nbyte,
151									off_t offset, int flags, user_ssize_t *retval);
152__private_extern__ int	preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_vnode);
153__private_extern__ void	donefileread(struct proc *p, struct fileproc *fp_ret, int fd);
154
155#if NETAT
156extern int appletalk_inited;
157#endif /* NETAT */
158
159#define f_flag f_fglob->fg_flag
160#define f_type f_fglob->fg_type
161#define f_msgcount f_fglob->fg_msgcount
162#define f_cred f_fglob->fg_cred
163#define f_ops f_fglob->fg_ops
164#define f_offset f_fglob->fg_offset
165#define f_data f_fglob->fg_data
166
167/*
168 * Read system call.
169 *
170 * Returns:	0			Success
171 *	preparefileread:EBADF
172 *	preparefileread:ESPIPE
173 *	preparefileread:ENXIO
174 *	preparefileread:EBADF
175 *	dofileread:???
176 */
177int
178read(struct proc *p, struct read_args *uap, user_ssize_t *retval)
179{
180	__pthread_testcancel(1);
181	return(read_nocancel(p, (struct read_nocancel_args *)uap, retval));
182}
183
184int
185read_nocancel(struct proc *p, struct read_nocancel_args *uap, user_ssize_t *retval)
186{
187	struct fileproc *fp;
188	int error;
189	int fd = uap->fd;
190
191	if ( (error = preparefileread(p, &fp, fd, 0)) )
192	        return (error);
193
194	error = dofileread(vfs_context_current(), fp, uap->cbuf, uap->nbyte,
195			   (off_t)-1, 0, retval);
196
197	donefileread(p, fp, fd);
198
199	return (error);
200}
201
202/*
203 * Pread system call
204 *
205 * Returns:	0			Success
206 *	preparefileread:EBADF
207 *	preparefileread:ESPIPE
208 *	preparefileread:ENXIO
209 *	preparefileread:EBADF
210 *	dofileread:???
211 */
212int
213pread(struct proc *p, struct pread_args *uap, user_ssize_t *retval)
214{
215	__pthread_testcancel(1);
216	return(pread_nocancel(p, (struct pread_nocancel_args *)uap, retval));
217}
218
219int
220pread_nocancel(struct proc *p, struct pread_nocancel_args *uap, user_ssize_t *retval)
221{
222	struct fileproc *fp = NULL;	/* fp set by preparefileread() */
223	int fd = uap->fd;
224	int error;
225
226	if ( (error = preparefileread(p, &fp, fd, 1)) )
227		goto out;
228
229	error = dofileread(vfs_context_current(), fp, uap->buf, uap->nbyte,
230			uap->offset, FOF_OFFSET, retval);
231
232	donefileread(p, fp, fd);
233
234	if (!error)
235	    KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE),
236	      uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
237
238out:
239	return (error);
240}
241
242/*
243 * Code common for read and pread
244 */
245
246void
247donefileread(struct proc *p, struct fileproc *fp, int fd)
248{
249	proc_fdlock_spin(p);
250
251	fp->f_flags &= ~FP_INCHRREAD;
252
253	fp_drop(p, fd, fp, 1);
254        proc_fdunlock(p);
255}
256
257/*
258 * Returns:	0			Success
259 *		EBADF
260 *		ESPIPE
261 *		ENXIO
262 *	fp_lookup:EBADF
263 *	fo_read:???
264 */
265int
266preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_pread)
267{
268	vnode_t vp;
269	int 	error;
270	struct fileproc *fp;
271
272	proc_fdlock_spin(p);
273
274	error = fp_lookup(p, fd, &fp, 1);
275
276	if (error) {
277	        proc_fdunlock(p);
278		return (error);
279	}
280	if ((fp->f_flag & FREAD) == 0) {
281	        error = EBADF;
282		goto out;
283	}
284	if (check_for_pread && (fp->f_type != DTYPE_VNODE)) {
285	        error = ESPIPE;
286		goto out;
287	}
288	if (fp->f_type == DTYPE_VNODE) {
289		vp = (struct vnode *)fp->f_fglob->fg_data;
290
291		if (check_for_pread && (vnode_isfifo(vp))) {
292			error = ESPIPE;
293			goto out;
294		}
295		if (check_for_pread && (vp->v_flag & VISTTY)) {
296			error = ENXIO;
297			goto out;
298		}
299		if (vp->v_type == VCHR)
300			fp->f_flags |= FP_INCHRREAD;
301	}
302
303	*fp_ret = fp;
304
305        proc_fdunlock(p);
306	return (0);
307
308out:
309	fp_drop(p, fd, fp, 1);
310	proc_fdunlock(p);
311	return (error);
312}
313
314
315/*
316 * Returns:	0			Success
317 *		EINVAL
318 *	fo_read:???
319 */
320__private_extern__ int
321dofileread(vfs_context_t ctx, struct fileproc *fp,
322	   user_addr_t bufp, user_size_t nbyte, off_t offset, int flags,
323	   user_ssize_t *retval)
324{
325	uio_t auio;
326	user_ssize_t bytecnt;
327	long error = 0;
328	char uio_buf[ UIO_SIZEOF(1) ];
329
330	// LP64todo - do we want to raise this?
331	if (nbyte > INT_MAX)
332		return (EINVAL);
333
334	if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
335		auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_READ,
336									  &uio_buf[0], sizeof(uio_buf));
337	} else {
338		auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_READ,
339									  &uio_buf[0], sizeof(uio_buf));
340	}
341	uio_addiov(auio, bufp, nbyte);
342
343	bytecnt = nbyte;
344
345	if ((error = fo_read(fp, auio, flags, ctx))) {
346		if (uio_resid(auio) != bytecnt && (error == ERESTART ||
347			error == EINTR || error == EWOULDBLOCK))
348			error = 0;
349	}
350	bytecnt -= uio_resid(auio);
351
352	*retval = bytecnt;
353
354	return (error);
355}
356
357/*
358 * Scatter read system call.
359 *
360 * Returns:	0			Success
361 *		EINVAL
362 *		ENOMEM
363 *	copyin:EFAULT
364 *	rd_uio:???
365 */
366int
367readv(struct proc *p, struct readv_args *uap, user_ssize_t *retval)
368{
369	__pthread_testcancel(1);
370	return(readv_nocancel(p, (struct readv_nocancel_args *)uap, retval));
371}
372
373int
374readv_nocancel(struct proc *p, struct readv_nocancel_args *uap, user_ssize_t *retval)
375{
376	uio_t auio = NULL;
377	int error;
378	int size_of_iovec;
379	struct user_iovec *iovp;
380
381	/* Verify range bedfore calling uio_create() */
382	if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
383		return (EINVAL);
384
385	/* allocate a uio large enough to hold the number of iovecs passed */
386	auio = uio_create(uap->iovcnt, 0,
387				  (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
388				  UIO_READ);
389
390	/* get location of iovecs within the uio.  then copyin the iovecs from
391	 * user space.
392	 */
393	iovp = uio_iovsaddr(auio);
394	if (iovp == NULL) {
395		error = ENOMEM;
396		goto ExitThisRoutine;
397	}
398	size_of_iovec = (IS_64BIT_PROCESS(p) ? sizeof(struct user_iovec) : sizeof(struct iovec));
399	error = copyin(uap->iovp, (caddr_t)iovp, (uap->iovcnt * size_of_iovec));
400	if (error) {
401		goto ExitThisRoutine;
402	}
403
404	/* finalize uio_t for use and do the IO
405	 */
406	uio_calculateresid(auio);
407	error = rd_uio(p, uap->fd, auio, retval);
408
409ExitThisRoutine:
410	if (auio != NULL) {
411		uio_free(auio);
412	}
413	return (error);
414}
415
416/*
417 * Write system call
418 *
419 * Returns:	0			Success
420 *		EBADF
421 *	fp_lookup:EBADF
422 *	dofilewrite:???
423 */
424int
425write(struct proc *p, struct write_args *uap, user_ssize_t *retval)
426{
427	__pthread_testcancel(1);
428	return(write_nocancel(p, (struct write_nocancel_args *)uap, retval));
429
430}
431
432int
433write_nocancel(struct proc *p, struct write_nocancel_args *uap, user_ssize_t *retval)
434{
435	struct fileproc *fp;
436	int error;
437	int fd = uap->fd;
438
439	error = fp_lookup(p,fd,&fp,0);
440	if (error)
441		return(error);
442	if ((fp->f_flag & FWRITE) == 0) {
443		error = EBADF;
444	} else {
445		struct vfs_context context = *(vfs_context_current());
446		context.vc_ucred = fp->f_fglob->fg_cred;
447
448		error = dofilewrite(&context, fp, uap->cbuf, uap->nbyte,
449			(off_t)-1, 0, retval);
450	}
451	if (error == 0)
452	        fp_drop_written(p, fd, fp);
453	else
454	        fp_drop(p, fd, fp, 0);
455	return(error);
456}
457
458/*
459 * pwrite system call
460 *
461 * Returns:	0			Success
462 *		EBADF
463 *		ESPIPE
464 *		ENXIO
465 *		EINVAL
466 *	fp_lookup:EBADF
467 *	dofilewrite:???
468 */
469int
470pwrite(struct proc *p, struct pwrite_args *uap, user_ssize_t *retval)
471{
472	__pthread_testcancel(1);
473	return(pwrite_nocancel(p, (struct pwrite_nocancel_args *)uap, retval));
474}
475
476int
477pwrite_nocancel(struct proc *p, struct pwrite_nocancel_args *uap, user_ssize_t *retval)
478{
479        struct fileproc *fp;
480        int error;
481	int fd = uap->fd;
482	vnode_t vp  = (vnode_t)0;
483
484	error = fp_lookup(p,fd,&fp,0);
485	if (error)
486		return(error);
487
488	if ((fp->f_flag & FWRITE) == 0) {
489		error = EBADF;
490	} else {
491		struct vfs_context context = *vfs_context_current();
492		context.vc_ucred = fp->f_fglob->fg_cred;
493
494		if (fp->f_type != DTYPE_VNODE) {
495			error = ESPIPE;
496			goto errout;
497		}
498		vp = (vnode_t)fp->f_fglob->fg_data;
499		if (vnode_isfifo(vp)) {
500			error = ESPIPE;
501			goto errout;
502		}
503		if ((vp->v_flag & VISTTY)) {
504			error = ENXIO;
505			goto errout;
506		}
507		if (uap->offset == (off_t)-1) {
508			error = EINVAL;
509			goto errout;
510		}
511
512		    error = dofilewrite(&context, fp, uap->buf, uap->nbyte,
513			uap->offset, FOF_OFFSET, retval);
514        }
515errout:
516	if (error == 0)
517	        fp_drop_written(p, fd, fp);
518	else
519	        fp_drop(p, fd, fp, 0);
520
521	if (!error)
522	    KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pwrite) | DBG_FUNC_NONE),
523	      uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
524
525        return(error);
526}
527
528/*
529 * Returns:	0			Success
530 *		EINVAL
531 *	<fo_write>:EPIPE
532 *	<fo_write>:???			[indirect through struct fileops]
533 */
534__private_extern__ int
535dofilewrite(vfs_context_t ctx, struct fileproc *fp,
536	    user_addr_t bufp, user_size_t nbyte, off_t offset, int flags,
537	    user_ssize_t *retval)
538{
539	uio_t auio;
540	long error = 0;
541	user_ssize_t bytecnt;
542	char uio_buf[ UIO_SIZEOF(1) ];
543
544	// LP64todo - do we want to raise this?
545	if (nbyte > INT_MAX)
546		return (EINVAL);
547
548	if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
549		auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_WRITE,
550									  &uio_buf[0], sizeof(uio_buf));
551	} else {
552		auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_WRITE,
553									  &uio_buf[0], sizeof(uio_buf));
554	}
555	uio_addiov(auio, bufp, nbyte);
556
557	bytecnt = nbyte;
558	if ((error = fo_write(fp, auio, flags, ctx))) {
559		if (uio_resid(auio) != bytecnt && (error == ERESTART ||
560			error == EINTR || error == EWOULDBLOCK))
561			error = 0;
562		/* The socket layer handles SIGPIPE */
563		if (error == EPIPE && fp->f_type != DTYPE_SOCKET) {
564			/* XXX Raise the signal on the thread? */
565			psignal(vfs_context_proc(ctx), SIGPIPE);
566		}
567	}
568	bytecnt -= uio_resid(auio);
569	*retval = bytecnt;
570
571	return (error);
572}
573
574/*
575 * Gather write system call
576 */
577int
578writev(struct proc *p, struct writev_args *uap, user_ssize_t *retval)
579{
580	__pthread_testcancel(1);
581	return(writev_nocancel(p, (struct writev_nocancel_args *)uap, retval));
582}
583
584int
585writev_nocancel(struct proc *p, struct writev_nocancel_args *uap, user_ssize_t *retval)
586{
587	uio_t auio = NULL;
588	int error;
589	int size_of_iovec;
590	struct user_iovec *iovp;
591
592	/* Verify range bedfore calling uio_create() */
593	if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
594		return (EINVAL);
595
596	/* allocate a uio large enough to hold the number of iovecs passed */
597	auio = uio_create(uap->iovcnt, 0,
598				  (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
599				  UIO_WRITE);
600
601	/* get location of iovecs within the uio.  then copyin the iovecs from
602	 * user space.
603	 */
604	iovp = uio_iovsaddr(auio);
605	if (iovp == NULL) {
606		error = ENOMEM;
607		goto ExitThisRoutine;
608	}
609	size_of_iovec = (IS_64BIT_PROCESS(p) ? sizeof(struct user_iovec) : sizeof(struct iovec));
610	error = copyin(uap->iovp, (caddr_t)iovp, (uap->iovcnt * size_of_iovec));
611	if (error) {
612		goto ExitThisRoutine;
613	}
614
615	/* finalize uio_t for use and do the IO
616	 */
617	uio_calculateresid(auio);
618	error = wr_uio(p, uap->fd, auio, retval);
619
620ExitThisRoutine:
621	if (auio != NULL) {
622		uio_free(auio);
623	}
624	return (error);
625}
626
627
628int
629wr_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval)
630{
631	struct fileproc *fp;
632	int error;
633	user_ssize_t count;
634	struct vfs_context context = *vfs_context_current();
635
636	error = fp_lookup(p,fdes,&fp,0);
637	if (error)
638		return(error);
639
640	if ((fp->f_flag & FWRITE) == 0) {
641		error = EBADF;
642		goto out;
643	}
644	count = uio_resid(uio);
645
646	context.vc_ucred = fp->f_cred;
647	error = fo_write(fp, uio, 0, &context);
648	if (error) {
649		if (uio_resid(uio) != count && (error == ERESTART ||
650						error == EINTR || error == EWOULDBLOCK))
651		        error = 0;
652		/* The socket layer handles SIGPIPE */
653		if (error == EPIPE && fp->f_type != DTYPE_SOCKET)
654		        psignal(p, SIGPIPE);
655	}
656	*retval = count - uio_resid(uio);
657
658out:
659	if ( (error == 0) )
660	        fp_drop_written(p, fdes, fp);
661	else
662	        fp_drop(p, fdes, fp, 0);
663	return(error);
664}
665
666
667int
668rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval)
669{
670	struct fileproc *fp;
671	int error;
672	user_ssize_t count;
673	struct vfs_context context = *vfs_context_current();
674
675	if ( (error = preparefileread(p, &fp, fdes, 0)) )
676	        return (error);
677
678	count = uio_resid(uio);
679
680	context.vc_ucred = fp->f_cred;
681
682	error = fo_read(fp, uio, 0, &context);
683
684	if (error) {
685	        if (uio_resid(uio) != count && (error == ERESTART ||
686						error == EINTR || error == EWOULDBLOCK))
687		        error = 0;
688	}
689	*retval = count - uio_resid(uio);
690
691	donefileread(p, fp, fdes);
692
693	return (error);
694}
695
696/*
697 * Ioctl system call
698 *
699 * Returns:	0			Success
700 *		EBADF
701 *		ENOTTY
702 *		ENOMEM
703 *		ESRCH
704 *	copyin:EFAULT
705 *	copyoutEFAULT
706 *	fp_lookup:EBADF			Bad file descriptor
707 *	fo_ioctl:???
708 */
709int
710ioctl(struct proc *p, struct ioctl_args *uap, __unused register_t *retval)
711{
712	struct fileproc *fp;
713	u_long com;
714	int error = 0;
715	u_int size;
716	caddr_t datap, memp;
717	boolean_t is64bit;
718	int tmp;
719#define STK_PARAMS	128
720	char stkbuf[STK_PARAMS];
721	int fd = uap->fd;
722	struct vfs_context context = *vfs_context_current();
723
724	AUDIT_ARG(fd, uap->fd);
725	AUDIT_ARG(cmd, CAST_DOWN(int, uap->com)); /* LP64todo: uap->com is a user-land long */
726	AUDIT_ARG(addr, uap->data);
727
728	is64bit = proc_is64bit(p);
729
730	proc_fdlock(p);
731	error = fp_lookup(p,fd,&fp,1);
732	if (error)  {
733		proc_fdunlock(p);
734		return(error);
735	}
736
737	AUDIT_ARG(file, p, fp);
738
739	if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
740			error = EBADF;
741			goto out;
742	}
743
744	context.vc_ucred = fp->f_fglob->fg_cred;
745
746#if CONFIG_MACF
747	error = mac_file_check_ioctl(context.vc_ucred, fp->f_fglob, uap->com);
748	if (error)
749		goto out;
750#endif
751
752#if NETAT
753	/*
754	 * ### LD 6/11/97 Hack Alert: this is to get AppleTalk to work
755	 * while implementing an ATioctl system call
756	 */
757	{
758		if (appletalk_inited && ((uap->com & 0x0000FFFF) == 0xff99)) {
759			u_long  fixed_command;
760
761#ifdef APPLETALK_DEBUG
762			kprintf("ioctl: special AppleTalk \n");
763#endif
764			datap = &stkbuf[0];
765			*(user_addr_t *)datap = uap->data;
766			fixed_command = _IOW(0, 0xff99, uap->data);
767			error = fo_ioctl(fp, fixed_command, datap, &context);
768			goto out;
769		}
770	}
771
772#endif /* NETAT */
773
774
775	switch (com = uap->com) {
776	case FIONCLEX:
777		*fdflags(p, uap->fd) &= ~UF_EXCLOSE;
778		error =0;
779		goto out;
780	case FIOCLEX:
781		*fdflags(p, uap->fd) |= UF_EXCLOSE;
782		error =0;
783		goto out;
784	}
785
786	/*
787	 * Interpret high order word to find amount of data to be
788	 * copied to/from the user's address space.
789	 */
790	size = IOCPARM_LEN(com);
791	if (size > IOCPARM_MAX) {
792			error = ENOTTY;
793			goto out;
794	}
795	memp = NULL;
796	if (size > sizeof (stkbuf)) {
797		proc_fdunlock(p);
798		if ((memp = (caddr_t)kalloc(size)) == 0) {
799			proc_fdlock(p);
800			error = ENOMEM;
801			goto out;
802		}
803		proc_fdlock(p);
804		datap = memp;
805	} else
806		datap = &stkbuf[0];
807	if (com&IOC_IN) {
808		if (size) {
809			proc_fdunlock(p);
810			error = copyin(uap->data, datap, size);
811			if (error) {
812				if (memp)
813					kfree(memp, size);
814				proc_fdlock(p);
815				goto out;
816			}
817			proc_fdlock(p);
818		} else {
819			/* XXX - IOC_IN and no size?  we should proably return an error here!! */
820			if (is64bit) {
821				*(user_addr_t *)datap = uap->data;
822			}
823			else {
824				*(uint32_t *)datap = (uint32_t)uap->data;
825			}
826		}
827	} else if ((com&IOC_OUT) && size)
828		/*
829		 * Zero the buffer so the user always
830		 * gets back something deterministic.
831		 */
832		bzero(datap, size);
833	else if (com&IOC_VOID) {
834		/* XXX - this is odd since IOC_VOID means no parameters */
835		if (is64bit) {
836			*(user_addr_t *)datap = uap->data;
837		}
838		else {
839			*(uint32_t *)datap = (uint32_t)uap->data;
840		}
841	}
842
843	switch (com) {
844
845	case FIONBIO:
846		if ( (tmp = *(int *)datap) )
847			fp->f_flag |= FNONBLOCK;
848		else
849			fp->f_flag &= ~FNONBLOCK;
850		error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
851		break;
852
853	case FIOASYNC:
854		if ( (tmp = *(int *)datap) )
855			fp->f_flag |= FASYNC;
856		else
857			fp->f_flag &= ~FASYNC;
858		error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
859		break;
860
861	case FIOSETOWN:
862		tmp = *(int *)datap;
863		if (fp->f_type == DTYPE_SOCKET) {
864			((struct socket *)fp->f_data)->so_pgid = tmp;
865			error = 0;
866			break;
867		}
868		if (fp->f_type == DTYPE_PIPE) {
869		        error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context);
870			break;
871		}
872		if (tmp <= 0) {
873			tmp = -tmp;
874		} else {
875			struct proc *p1 = proc_find(tmp);
876			if (p1 == 0) {
877				error = ESRCH;
878				break;
879			}
880			tmp = p1->p_pgrpid;
881			proc_rele(p1);
882		}
883		error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context);
884		break;
885
886	case FIOGETOWN:
887		if (fp->f_type == DTYPE_SOCKET) {
888			error = 0;
889			*(int *)datap = ((struct socket *)fp->f_data)->so_pgid;
890			break;
891		}
892		error = fo_ioctl(fp, TIOCGPGRP, datap, &context);
893		*(int *)datap = -*(int *)datap;
894		break;
895
896	default:
897		error = fo_ioctl(fp, com, datap, &context);
898		/*
899		 * Copy any data to user, size was
900		 * already set and checked above.
901		 */
902		if (error == 0 && (com&IOC_OUT) && size)
903			error = copyout(datap, uap->data, (u_int)size);
904		break;
905	}
906	proc_fdunlock(p);
907	if (memp)
908		kfree(memp, size);
909	proc_fdlock(p);
910out:
911	fp_drop(p, fd, fp, 1);
912	proc_fdunlock(p);
913	return(error);
914}
915
916int	selwait, nselcoll;
917#define SEL_FIRSTPASS 1
918#define SEL_SECONDPASS 2
919extern int selcontinue(int error);
920extern int selprocess(int error, int sel_pass);
921static int selscan(struct proc *p, struct _select * sel,
922			int nfd, register_t *retval, int sel_pass, wait_queue_sub_t wqsub);
923static int selcount(struct proc *p, u_int32_t *ibits, u_int32_t *obits,
924			int nfd, int * count, int *kfcount);
925static int seldrop(struct proc *p, u_int32_t *ibits, int nfd);
926extern uint64_t	tvtoabstime(struct timeval	*tvp);
927
928/*
929 * Select system call.
930 *
931 * Returns:	0			Success
932 *		EINVAL			Invalid argument
933 *		EAGAIN			Nonconformant error if allocation fails
934 *	selprocess:???
935 */
936int
937select(struct proc *p, struct select_args *uap, register_t *retval)
938{
939	__pthread_testcancel(1);
940	return(select_nocancel(p, (struct select_nocancel_args *)uap, retval));
941}
942
943int
944select_nocancel(struct proc *p, struct select_nocancel_args *uap, register_t *retval)
945{
946	int error = 0;
947	u_int ni, nw, size;
948	thread_t th_act;
949	struct uthread	*uth;
950	struct _select *sel;
951	int needzerofill = 1;
952	int count = 0;
953	int kfcount = 0;
954
955	th_act = current_thread();
956	uth = get_bsdthread_info(th_act);
957	sel = &uth->uu_select;
958	retval = (int *)get_bsduthreadrval(th_act);
959	*retval = 0;
960
961	if (uap->nd < 0) {
962		return (EINVAL);
963	}
964
965	/* select on thread of process that already called proc_exit() */
966	if (p->p_fd == NULL) {
967		return (EBADF);
968	}
969
970	if (uap->nd > p->p_fd->fd_nfiles)
971		uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */
972
973	nw = howmany(uap->nd, NFDBITS);
974	ni = nw * sizeof(fd_mask);
975
976	/*
977	 * if the previously allocated space for the bits is smaller than
978	 * what is requested or no space has yet been allocated for this
979	 * thread, allocate enough space now.
980	 *
981	 * Note: If this process fails, select() will return EAGAIN; this
982	 * is the same thing pool() returns in a no-memory situation, but
983	 * it is not a POSIX compliant error code for select().
984	 */
985	if (sel->nbytes < (3 * ni)) {
986		int nbytes = 3 * ni;
987
988		/* Free previous allocation, if any */
989		if (sel->ibits != NULL)
990			FREE(sel->ibits, M_TEMP);
991		if (sel->obits != NULL) {
992			FREE(sel->obits, M_TEMP);
993			/* NULL out; subsequent ibits allocation may fail */
994			sel->obits = NULL;
995		}
996
997		MALLOC(sel->ibits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO);
998		if (sel->ibits == NULL)
999			return (EAGAIN);
1000		MALLOC(sel->obits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO);
1001		if (sel->obits == NULL) {
1002			FREE(sel->ibits, M_TEMP);
1003			sel->ibits = NULL;
1004			return (EAGAIN);
1005		}
1006		sel->nbytes = nbytes;
1007		needzerofill = 0;
1008	}
1009
1010	if (needzerofill) {
1011		bzero((caddr_t)sel->ibits, sel->nbytes);
1012		bzero((caddr_t)sel->obits, sel->nbytes);
1013	}
1014
1015	/*
1016	 * get the bits from the user address space
1017	 */
1018#define	getbits(name, x) \
1019	do { \
1020		if (uap->name && (error = copyin(uap->name, \
1021			(caddr_t)&sel->ibits[(x) * nw], ni))) \
1022			goto continuation; \
1023	} while (0)
1024
1025	getbits(in, 0);
1026	getbits(ou, 1);
1027	getbits(ex, 2);
1028#undef	getbits
1029
1030	if (uap->tv) {
1031		struct timeval atv;
1032		if (IS_64BIT_PROCESS(p)) {
1033			struct user_timeval atv64;
1034			error = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64));
1035			/* Loses resolution - assume timeout < 68 years */
1036			atv.tv_sec = atv64.tv_sec;
1037			atv.tv_usec = atv64.tv_usec;
1038		} else {
1039			error = copyin(uap->tv, (caddr_t)&atv, sizeof(atv));
1040		}
1041		if (error)
1042			goto continuation;
1043		if (itimerfix(&atv)) {
1044			error = EINVAL;
1045			goto continuation;
1046		}
1047
1048		clock_absolutetime_interval_to_deadline(
1049										tvtoabstime(&atv), &sel->abstime);
1050	}
1051	else
1052		sel->abstime = 0;
1053
1054	sel->kfcount = 0;
1055	if ( (error = selcount(p, sel->ibits, sel->obits, uap->nd, &count, &kfcount)) ) {
1056			goto continuation;
1057	}
1058	sel->count = count;
1059	sel->kfcount = kfcount;
1060	size = SIZEOF_WAITQUEUE_SET + (count * SIZEOF_WAITQUEUE_LINK);
1061	if (uth->uu_allocsize) {
1062		if (uth->uu_wqset == 0)
1063			panic("select: wql memory smashed");
1064		/* needed for the select now */
1065		if (size > uth->uu_allocsize) {
1066			kfree(uth->uu_wqset,  uth->uu_allocsize);
1067			uth->uu_allocsize = size;
1068			uth->uu_wqset = (wait_queue_set_t)kalloc(size);
1069			if (uth->uu_wqset == (wait_queue_set_t)NULL)
1070				panic("failed to allocate memory for waitqueue\n");
1071		}
1072	} else {
1073		sel->count = count;
1074		uth->uu_allocsize = size;
1075		uth->uu_wqset = (wait_queue_set_t)kalloc(uth->uu_allocsize);
1076		if (uth->uu_wqset == (wait_queue_set_t)NULL)
1077			panic("failed to allocate memory for waitqueue\n");
1078	}
1079	bzero(uth->uu_wqset, size);
1080	sel->wql = (char *)uth->uu_wqset + SIZEOF_WAITQUEUE_SET;
1081	wait_queue_set_init(uth->uu_wqset, (SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST));
1082
1083continuation:
1084	return selprocess(error, SEL_FIRSTPASS);
1085}
1086
1087int
1088selcontinue(int error)
1089{
1090	return selprocess(error, SEL_SECONDPASS);
1091}
1092
1093int
1094selprocess(int error, int sel_pass)
1095{
1096	int ncoll;
1097	u_int ni, nw;
1098	thread_t th_act;
1099	struct uthread	*uth;
1100	struct proc *p;
1101	struct select_args *uap;
1102	int *retval;
1103	struct _select *sel;
1104	int unwind = 1;
1105	int prepost = 0;
1106	int somewakeup = 0;
1107	int doretry = 0;
1108	wait_result_t wait_result;
1109
1110	p = current_proc();
1111	th_act = current_thread();
1112	uap = (struct select_args *)get_bsduthreadarg(th_act);
1113	retval = (int *)get_bsduthreadrval(th_act);
1114	uth = get_bsdthread_info(th_act);
1115	sel = &uth->uu_select;
1116
1117	/* if it is first pass wait queue is not setup yet */
1118	if ((error != 0) && (sel_pass == SEL_FIRSTPASS))
1119			unwind = 0;
1120	if (sel->count == 0)
1121			unwind = 0;
1122retry:
1123	if (error != 0) {
1124	  goto done;
1125	}
1126
1127	ncoll = nselcoll;
1128	OSBitOrAtomic(P_SELECT, (UInt32 *)&p->p_flag);
1129	/* skip scans if the select is just for timeouts */
1130	if (sel->count) {
1131		if (sel_pass == SEL_FIRSTPASS)
1132			wait_queue_sub_clearrefs(uth->uu_wqset);
1133
1134		error = selscan(p, sel, uap->nd, retval, sel_pass, (wait_queue_sub_t)uth->uu_wqset);
1135		if (error || *retval) {
1136			goto done;
1137		}
1138		if (prepost) {
1139			/* if the select of log, then we canwakeup and discover some one
1140		 	* else already read the data; go toselct again if time permits
1141		 	*/
1142		 	prepost = 0;
1143		 	doretry = 1;
1144		}
1145		if (somewakeup) {
1146		 	somewakeup = 0;
1147		 	doretry = 1;
1148		}
1149	}
1150
1151	if (uap->tv) {
1152		uint64_t	now;
1153
1154		clock_get_uptime(&now);
1155		if (now >= sel->abstime)
1156			goto done;
1157	}
1158
1159	if (doretry) {
1160		/* cleanup obits and try again */
1161		doretry = 0;
1162		sel_pass = SEL_FIRSTPASS;
1163		goto retry;
1164	}
1165
1166	/*
1167	 * To effect a poll, the timeout argument should be
1168	 * non-nil, pointing to a zero-valued timeval structure.
1169	 */
1170	if (uap->tv && sel->abstime == 0) {
1171		goto done;
1172	}
1173
1174	/* No spurious wakeups due to colls,no need to check for them */
1175	 if ((sel_pass == SEL_SECONDPASS) || ((p->p_flag & P_SELECT) == 0)) {
1176		sel_pass = SEL_FIRSTPASS;
1177		goto retry;
1178	}
1179
1180	OSBitAndAtomic(~((uint32_t)P_SELECT), (UInt32 *)&p->p_flag);
1181
1182	/* if the select is just for timeout skip check */
1183	if (sel->count &&(sel_pass == SEL_SECONDPASS))
1184		panic("selprocess: 2nd pass assertwaiting");
1185
1186	/* Wait Queue Subordinate has waitqueue as first element */
1187	wait_result = wait_queue_assert_wait((wait_queue_t)uth->uu_wqset,
1188					     &selwait, THREAD_ABORTSAFE, sel->abstime);
1189	if (wait_result != THREAD_AWAKENED) {
1190		/* there are no preposted events */
1191		error = tsleep1(NULL, PSOCK | PCATCH,
1192				"select", 0, selcontinue);
1193	} else  {
1194		prepost = 1;
1195		error = 0;
1196	}
1197
1198	sel_pass = SEL_SECONDPASS;
1199	if (error == 0) {
1200		if (!prepost)
1201			somewakeup =1;
1202		goto retry;
1203	}
1204done:
1205	if (unwind) {
1206		wait_subqueue_unlink_all(uth->uu_wqset);
1207		seldrop(p, sel->ibits, uap->nd);
1208	}
1209	OSBitAndAtomic(~((uint32_t)P_SELECT), (UInt32 *)&p->p_flag);
1210	/* select is not restarted after signals... */
1211	if (error == ERESTART)
1212		error = EINTR;
1213	if (error == EWOULDBLOCK)
1214		error = 0;
1215	nw = howmany(uap->nd, NFDBITS);
1216	ni = nw * sizeof(fd_mask);
1217
1218#define	putbits(name, x) \
1219	do { \
1220		if (uap->name && (error2 = \
1221			copyout((caddr_t)&sel->obits[(x) * nw], uap->name, ni))) \
1222			error = error2; \
1223	} while (0)
1224
1225	if (error == 0) {
1226		int error2;
1227
1228		putbits(in, 0);
1229		putbits(ou, 1);
1230		putbits(ex, 2);
1231#undef putbits
1232	}
1233	return(error);
1234}
1235
1236static int
1237selscan(struct proc *p, struct _select *sel, int nfd, register_t *retval,
1238	int sel_pass, wait_queue_sub_t wqsub)
1239{
1240	struct filedesc *fdp = p->p_fd;
1241	int msk, i, j, fd;
1242	u_int32_t bits;
1243	struct fileproc *fp;
1244	int n = 0;
1245	int nc = 0;
1246	static int flag[3] = { FREAD, FWRITE, 0 };
1247	u_int32_t *iptr, *optr;
1248	u_int nw;
1249	u_int32_t *ibits, *obits;
1250	char * wql;
1251	char * wql_ptr;
1252	int count, kfcount;
1253	boolean_t funnel_state;
1254	vnode_t vp;
1255	struct vfs_context context = *vfs_context_current();
1256
1257	/*
1258	 * Problems when reboot; due to MacOSX signal probs
1259	 * in Beaker1C ; verify that the p->p_fd is valid
1260	 */
1261	if (fdp == NULL) {
1262		*retval=0;
1263		return(EIO);
1264	}
1265	ibits = sel->ibits;
1266	obits = sel->obits;
1267	wql = sel->wql;
1268
1269	nw = howmany(nfd, NFDBITS);
1270
1271	count = sel->count;
1272	kfcount = sel->kfcount;
1273
1274	if (kfcount > count)
1275		panic("selscan: count < kfcount");
1276
1277	if (kfcount != 0) {
1278		funnel_state = thread_funnel_set(kernel_flock, TRUE);
1279
1280		proc_fdlock(p);
1281		for (msk = 0; msk < 3; msk++) {
1282			iptr = (u_int32_t *)&ibits[msk * nw];
1283			optr = (u_int32_t *)&obits[msk * nw];
1284
1285			for (i = 0; i < nfd; i += NFDBITS) {
1286				bits = iptr[i/NFDBITS];
1287
1288				while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1289					bits &= ~(1 << j);
1290					fp = fdp->fd_ofiles[fd];
1291
1292					if (fp == NULL ||
1293						(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1294						proc_fdunlock(p);
1295						thread_funnel_set(kernel_flock, funnel_state);
1296						return(EBADF);
1297					}
1298					if (sel_pass == SEL_SECONDPASS) {
1299						wql_ptr = (char *)0;
1300						fp->f_flags &= ~FP_INSELECT;
1301						fp->f_waddr = (void *)0;
1302					} else {
1303					        wql_ptr = (wql + nc * SIZEOF_WAITQUEUE_LINK);
1304						fp->f_flags |= FP_INSELECT;
1305						fp->f_waddr = (void *)wqsub;
1306					}
1307
1308					context.vc_ucred = fp->f_cred;
1309
1310					if (fp->f_ops && (fp->f_type == DTYPE_VNODE)
1311							&& ((vp = (struct vnode *)fp->f_data)  != NULLVP)
1312							&& (vp->v_type == VCHR)
1313						&& fo_select(fp, flag[msk], wql_ptr, &context)) {
1314						optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
1315						n++;
1316					}
1317					nc++;
1318				}
1319			}
1320		}
1321		proc_fdunlock(p);
1322		thread_funnel_set(kernel_flock, funnel_state);
1323	}
1324
1325	nc = 0;
1326	if (kfcount != count) {
1327		proc_fdlock(p);
1328		for (msk = 0; msk < 3; msk++) {
1329			iptr = (u_int32_t *)&ibits[msk * nw];
1330			optr = (u_int32_t *)&obits[msk * nw];
1331
1332			for (i = 0; i < nfd; i += NFDBITS) {
1333				bits = iptr[i/NFDBITS];
1334
1335				while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1336					bits &= ~(1 << j);
1337					fp = fdp->fd_ofiles[fd];
1338
1339					if (fp == NULL ||
1340						(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1341						proc_fdunlock(p);
1342						return(EBADF);
1343					}
1344					if (sel_pass == SEL_SECONDPASS) {
1345						wql_ptr = (char *)0;
1346						fp->f_flags &= ~FP_INSELECT;
1347						fp->f_waddr = (void *)0;
1348					} else {
1349					        wql_ptr = (wql + nc * SIZEOF_WAITQUEUE_LINK);
1350						fp->f_flags |= FP_INSELECT;
1351						fp->f_waddr = (void *)wqsub;
1352					}
1353
1354					context.vc_ucred = fp->f_cred;
1355
1356					if ((fp->f_ops &&
1357						((fp->f_type != DTYPE_VNODE)
1358						|| (((vp = (struct vnode *)fp->f_data)  != NULLVP)
1359							&& (vp->v_type != VCHR))
1360						)
1361						&& fo_select(fp, flag[msk], wql_ptr, &context))) {
1362						optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
1363						n++;
1364					}
1365					nc++;
1366				}
1367			}
1368		}
1369		proc_fdunlock(p);
1370	}
1371	*retval = n;
1372	return (0);
1373}
1374
1375int poll_callback(struct kqueue *, struct kevent *, void *);
1376
1377struct poll_continue_args {
1378	user_addr_t pca_fds;
1379	u_int pca_nfds;
1380	u_int pca_rfds;
1381};
1382
1383int
1384poll(struct proc *p, struct poll_args *uap, register_t *retval)
1385{
1386	__pthread_testcancel(1);
1387	return(poll_nocancel(p, (struct poll_nocancel_args *)uap, retval));
1388}
1389
1390
1391int
1392poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, register_t *retval)
1393{
1394	struct poll_continue_args *cont;
1395	struct pollfd *fds;
1396	struct kqueue *kq;
1397	struct timeval atv;
1398	int ncoll, error = 0;
1399	u_int nfds = uap->nfds;
1400	u_int rfds = 0;
1401	u_int i;
1402	size_t ni;
1403
1404	/*
1405	 * This is kinda bogus.  We have fd limits, but that is not
1406	 * really related to the size of the pollfd array.  Make sure
1407	 * we let the process use at least FD_SETSIZE entries and at
1408	 * least enough for the current limits.  We want to be reasonably
1409	 * safe, but not overly restrictive.
1410	 */
1411	if (nfds > OPEN_MAX ||
1412	    (nfds > p->p_rlimit[RLIMIT_NOFILE].rlim_cur && (proc_suser(p) || nfds > FD_SETSIZE)))
1413		return (EINVAL);
1414
1415	kq = kqueue_alloc(p);
1416	if (kq == NULL)
1417		return (EAGAIN);
1418
1419	ni = nfds * sizeof(struct pollfd) + sizeof(struct poll_continue_args);
1420	MALLOC(cont, struct poll_continue_args *, ni, M_TEMP, M_WAITOK);
1421	if (NULL == cont) {
1422		error = EAGAIN;
1423		goto out;
1424	}
1425
1426	fds = (struct pollfd *)&cont[1];
1427	error = copyin(uap->fds, fds, nfds * sizeof(struct pollfd));
1428	if (error)
1429		goto out;
1430
1431	if (uap->timeout != -1) {
1432		struct timeval rtv;
1433
1434		atv.tv_sec = uap->timeout / 1000;
1435		atv.tv_usec = (uap->timeout % 1000) * 1000;
1436		if (itimerfix(&atv)) {
1437			error = EINVAL;
1438			goto out;
1439		}
1440		getmicrouptime(&rtv);
1441		timevaladd(&atv, &rtv);
1442	} else {
1443		atv.tv_sec = 0;
1444		atv.tv_usec = 0;
1445	}
1446
1447	/* JMM - all this P_SELECT stuff is bogus */
1448	ncoll = nselcoll;
1449	OSBitOrAtomic(P_SELECT, (UInt32 *)&p->p_flag);
1450	for (i = 0; i < nfds; i++) {
1451		short events = fds[i].events;
1452		struct kevent kev;
1453		int kerror = 0;
1454
1455		/* per spec, ignore fd values below zero */
1456		if (fds[i].fd < 0) {
1457			fds[i].revents = 0;
1458			continue;
1459		}
1460
1461		/* convert the poll event into a kqueue kevent */
1462		kev.ident = fds[i].fd;
1463		kev.flags = EV_ADD | EV_ONESHOT | EV_POLL;
1464		kev.fflags = NOTE_LOWAT;
1465		kev.data = 1; /* efficiency be damned: any data should trigger */
1466		kev.udata = CAST_USER_ADDR_T(&fds[i]);
1467
1468		/* Handle input events */
1469		if (events & ( POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND | POLLHUP )) {
1470			kev.filter = EVFILT_READ;
1471			if (!(events & ( POLLIN | POLLRDNORM )))
1472				kev.flags |= EV_OOBAND;
1473			kerror = kevent_register(kq, &kev, p);
1474		}
1475
1476		/* Handle output events */
1477		if (kerror == 0 &&
1478		    events & ( POLLOUT | POLLWRNORM | POLLWRBAND )) {
1479			kev.filter = EVFILT_WRITE;
1480			kerror = kevent_register(kq, &kev, p);
1481		}
1482
1483		/* Handle BSD extension vnode events */
1484		if (kerror == 0 &&
1485		    events & ( POLLEXTEND | POLLATTRIB | POLLNLINK | POLLWRITE )) {
1486			kev.filter = EVFILT_VNODE;
1487			kev.fflags = 0;
1488			if (events & POLLEXTEND)
1489				kev.fflags |= NOTE_EXTEND;
1490			if (events & POLLATTRIB)
1491				kev.fflags |= NOTE_ATTRIB;
1492			if (events & POLLNLINK)
1493				kev.fflags |= NOTE_LINK;
1494			if (events & POLLWRITE)
1495				kev.fflags |= NOTE_WRITE;
1496			kerror = kevent_register(kq, &kev, p);
1497		}
1498
1499		if (kerror != 0) {
1500			fds[i].revents = POLLNVAL;
1501			rfds++;
1502		} else
1503			fds[i].revents = 0;
1504	}
1505
1506	/* Did we have any trouble registering? */
1507	if (rfds > 0)
1508		goto done;
1509
1510	/* scan for, and possibly wait for, the kevents to trigger */
1511	cont->pca_fds = uap->fds;
1512	cont->pca_nfds = nfds;
1513	cont->pca_rfds = rfds;
1514	error = kevent_scan(kq, poll_callback, NULL, cont, &atv, p);
1515	rfds = cont->pca_rfds;
1516
1517 done:
1518	OSBitAndAtomic(~((uint32_t)P_SELECT), (UInt32 *)&p->p_flag);
1519	/* poll is not restarted after signals... */
1520	if (error == ERESTART)
1521		error = EINTR;
1522	if (error == EWOULDBLOCK)
1523		error = 0;
1524	if (error == 0) {
1525		error = copyout(fds, uap->fds, nfds * sizeof(struct pollfd));
1526		*retval = rfds;
1527	}
1528
1529 out:
1530	if (NULL != cont)
1531		FREE(cont, M_TEMP);
1532
1533	kqueue_dealloc(kq);
1534	return (error);
1535}
1536
1537int
1538poll_callback(__unused struct kqueue *kq, struct kevent *kevp, void *data)
1539{
1540	struct poll_continue_args *cont = (struct poll_continue_args *)data;
1541	struct pollfd *fds = CAST_DOWN(struct pollfd *, kevp->udata);
1542	short mask;
1543
1544	/* convert the results back into revents */
1545	if (kevp->flags & EV_EOF)
1546		fds->revents |= POLLHUP;
1547	if (kevp->flags & EV_ERROR)
1548		fds->revents |= POLLERR;
1549
1550	switch (kevp->filter) {
1551	case EVFILT_READ:
1552		if (fds->revents & POLLHUP)
1553			mask = (POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND );
1554		else {
1555			mask = 0;
1556			if (kevp->data != 0)
1557				mask |= (POLLIN | POLLRDNORM );
1558			if (kevp->flags & EV_OOBAND)
1559				mask |= ( POLLPRI | POLLRDBAND );
1560		}
1561		fds->revents |= (fds->events & mask);
1562		break;
1563
1564	case EVFILT_WRITE:
1565		if (!(fds->revents & POLLHUP))
1566			fds->revents |= (fds->events & ( POLLOUT | POLLWRNORM | POLLWRBAND ));
1567		break;
1568
1569	case EVFILT_VNODE:
1570		if (kevp->fflags & NOTE_EXTEND)
1571			fds->revents |= (fds->events & POLLEXTEND);
1572		if (kevp->fflags & NOTE_ATTRIB)
1573			fds->revents |= (fds->events & POLLATTRIB);
1574		if (kevp->fflags & NOTE_LINK)
1575			fds->revents |= (fds->events & POLLNLINK);
1576		if (kevp->fflags & NOTE_WRITE)
1577			fds->revents |= (fds->events & POLLWRITE);
1578		break;
1579	}
1580
1581	if (fds->revents)
1582		cont->pca_rfds++;
1583
1584	return 0;
1585}
1586
1587int
1588seltrue(__unused dev_t dev, __unused int flag, __unused struct proc *p)
1589{
1590
1591	return (1);
1592}
1593
1594static int
1595selcount(struct proc *p, u_int32_t *ibits, __unused u_int32_t *obits,
1596		 int nfd, int *countp, int * kfcountp)
1597{
1598	struct filedesc *fdp = p->p_fd;
1599	int msk, i, j, fd;
1600	u_int32_t bits;
1601	struct fileproc *fp;
1602	int n = 0;
1603	u_int32_t *iptr;
1604	u_int nw;
1605	int error=0;
1606	int kfc = 0;
1607	int dropcount;
1608	vnode_t vp;
1609
1610	/*
1611	 * Problems when reboot; due to MacOSX signal probs
1612	 * in Beaker1C ; verify that the p->p_fd is valid
1613	 */
1614	if (fdp == NULL) {
1615		*countp = 0;
1616		*kfcountp = 0;
1617		return(EIO);
1618	}
1619	nw = howmany(nfd, NFDBITS);
1620
1621	proc_fdlock(p);
1622	for (msk = 0; msk < 3; msk++) {
1623		iptr = (u_int32_t *)&ibits[msk * nw];
1624		for (i = 0; i < nfd; i += NFDBITS) {
1625			bits = iptr[i/NFDBITS];
1626			while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1627				bits &= ~(1 << j);
1628				fp = fdp->fd_ofiles[fd];
1629				if (fp == NULL ||
1630					(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1631						*countp = 0;
1632						*kfcountp = 0;
1633						error = EBADF;
1634						goto bad;
1635				}
1636				fp->f_iocount++;
1637				if ((fp->f_type == DTYPE_VNODE)
1638						&& ((vp = (struct vnode *)fp->f_data)  != NULLVP)
1639						&& (vp->v_type == VCHR) )
1640					kfc++;
1641
1642				n++;
1643			}
1644		}
1645	}
1646	proc_fdunlock(p);
1647
1648	*countp = n;
1649	*kfcountp = kfc;
1650	return (0);
1651bad:
1652	dropcount = 0;
1653
1654	if (n== 0)
1655		goto out;
1656	/* undo the iocounts */
1657	for (msk = 0; msk < 3; msk++) {
1658		iptr = (u_int32_t *)&ibits[msk * nw];
1659		for (i = 0; i < nfd; i += NFDBITS) {
1660			bits = iptr[i/NFDBITS];
1661			while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1662				bits &= ~(1 << j);
1663				fp = fdp->fd_ofiles[fd];
1664				if (dropcount >= n)
1665					goto out;
1666				fp->f_iocount--;
1667
1668				if (p->p_fpdrainwait && fp->f_iocount == 0) {
1669				        p->p_fpdrainwait = 0;
1670					wakeup(&p->p_fpdrainwait);
1671				}
1672				dropcount++;
1673			}
1674		}
1675	}
1676out:
1677	proc_fdunlock(p);
1678	return(error);
1679}
1680
1681static int
1682seldrop(struct proc *p, u_int32_t *ibits, int nfd)
1683{
1684	struct filedesc *fdp = p->p_fd;
1685	int msk, i, j, fd;
1686	u_int32_t bits;
1687	struct fileproc *fp;
1688	int n = 0;
1689	u_int32_t *iptr;
1690	u_int nw;
1691
1692	/*
1693	 * Problems when reboot; due to MacOSX signal probs
1694	 * in Beaker1C ; verify that the p->p_fd is valid
1695	 */
1696	if (fdp == NULL) {
1697		return(EIO);
1698	}
1699
1700	nw = howmany(nfd, NFDBITS);
1701
1702
1703	proc_fdlock(p);
1704	for (msk = 0; msk < 3; msk++) {
1705		iptr = (u_int32_t *)&ibits[msk * nw];
1706		for (i = 0; i < nfd; i += NFDBITS) {
1707			bits = iptr[i/NFDBITS];
1708			while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1709				bits &= ~(1 << j);
1710				fp = fdp->fd_ofiles[fd];
1711				if (fp == NULL
1712#if 0
1713			/* if you are here then it is being closed */
1714					|| (fdp->fd_ofileflags[fd] & UF_RESERVED)
1715#endif
1716					) {
1717						proc_fdunlock(p);
1718						return(EBADF);
1719				}
1720				n++;
1721				fp->f_iocount--;
1722				fp->f_flags &= ~FP_INSELECT;
1723
1724				if (p->p_fpdrainwait && fp->f_iocount == 0) {
1725				        p->p_fpdrainwait = 0;
1726					wakeup(&p->p_fpdrainwait);
1727				}
1728			}
1729		}
1730	}
1731	proc_fdunlock(p);
1732	return (0);
1733}
1734
1735/*
1736 * Record a select request.
1737 */
1738void
1739selrecord(__unused struct proc *selector, struct selinfo *sip, void * p_wql)
1740{
1741	thread_t	cur_act = current_thread();
1742	struct uthread * ut = get_bsdthread_info(cur_act);
1743
1744	/* need to look at collisions */
1745
1746	if ((p_wql == (void *)0) && ((sip->si_flags & SI_INITED) == 0)) {
1747		return;
1748	}
1749
1750	/*do not record if this is second pass of select */
1751	if((p_wql == (void *)0)) {
1752		return;
1753	}
1754
1755	if ((sip->si_flags & SI_INITED) == 0) {
1756		wait_queue_init(&sip->si_wait_queue, SYNC_POLICY_FIFO);
1757		sip->si_flags |= SI_INITED;
1758		sip->si_flags &= ~SI_CLEAR;
1759	}
1760
1761	if (sip->si_flags & SI_RECORDED) {
1762		sip->si_flags |= SI_COLL;
1763	} else
1764		sip->si_flags &= ~SI_COLL;
1765
1766	sip->si_flags |= SI_RECORDED;
1767	if (!wait_queue_member(&sip->si_wait_queue, ut->uu_wqset))
1768		wait_queue_link_noalloc(&sip->si_wait_queue, ut->uu_wqset,
1769					(wait_queue_link_t)p_wql);
1770
1771	return;
1772}
1773
1774void
1775selwakeup(struct selinfo *sip)
1776{
1777
1778	if ((sip->si_flags & SI_INITED) == 0) {
1779		return;
1780	}
1781
1782	if (sip->si_flags & SI_COLL) {
1783		nselcoll++;
1784		sip->si_flags &= ~SI_COLL;
1785#if 0
1786		/* will not  support */
1787		//wakeup((caddr_t)&selwait);
1788#endif
1789	}
1790
1791	if (sip->si_flags & SI_RECORDED) {
1792		wait_queue_wakeup_all(&sip->si_wait_queue, &selwait, THREAD_AWAKENED);
1793		sip->si_flags &= ~SI_RECORDED;
1794	}
1795
1796}
1797
1798void
1799selthreadclear(struct selinfo *sip)
1800{
1801
1802	if ((sip->si_flags & SI_INITED) == 0) {
1803		return;
1804	}
1805	if (sip->si_flags & SI_RECORDED) {
1806			selwakeup(sip);
1807			sip->si_flags &= ~(SI_RECORDED | SI_COLL);
1808	}
1809	sip->si_flags |= SI_CLEAR;
1810	wait_queue_unlinkall_nofree(&sip->si_wait_queue);
1811}
1812
1813
1814
1815
1816#define DBG_POST	0x10
1817#define DBG_WATCH	0x11
1818#define DBG_WAIT	0x12
1819#define DBG_MOD		0x13
1820#define DBG_EWAKEUP	0x14
1821#define DBG_ENQUEUE	0x15
1822#define DBG_DEQUEUE	0x16
1823
1824#define DBG_MISC_POST MISCDBG_CODE(DBG_EVENT,DBG_POST)
1825#define DBG_MISC_WATCH MISCDBG_CODE(DBG_EVENT,DBG_WATCH)
1826#define DBG_MISC_WAIT MISCDBG_CODE(DBG_EVENT,DBG_WAIT)
1827#define DBG_MISC_MOD MISCDBG_CODE(DBG_EVENT,DBG_MOD)
1828#define DBG_MISC_EWAKEUP MISCDBG_CODE(DBG_EVENT,DBG_EWAKEUP)
1829#define DBG_MISC_ENQUEUE MISCDBG_CODE(DBG_EVENT,DBG_ENQUEUE)
1830#define DBG_MISC_DEQUEUE MISCDBG_CODE(DBG_EVENT,DBG_DEQUEUE)
1831
1832
1833#define EVPROCDEQUE(p, evq)	do {				\
1834	proc_lock(p);						\
1835	if (evq->ee_flags & EV_QUEUED) {			\
1836	        TAILQ_REMOVE(&p->p_evlist, evq, ee_plist);	\
1837		evq->ee_flags &= ~EV_QUEUED;			\
1838	}							\
1839	proc_unlock(p);						\
1840} while (0);
1841
1842
1843/*
1844 * called upon socket close. deque and free all events for
1845 * the socket...  socket must be locked by caller.
1846 */
1847void
1848evsofree(struct socket *sp)
1849{
1850        struct eventqelt *evq, *next;
1851	proc_t 	p;
1852
1853	if (sp == NULL)
1854	        return;
1855
1856	for (evq = sp->so_evlist.tqh_first; evq != NULL; evq = next) {
1857	        next = evq->ee_slist.tqe_next;
1858		p = evq->ee_proc;
1859
1860		if (evq->ee_flags & EV_QUEUED) {
1861		        EVPROCDEQUE(p, evq);
1862		}
1863		TAILQ_REMOVE(&sp->so_evlist, evq, ee_slist); // remove from socket q
1864		FREE(evq, M_TEMP);
1865	}
1866}
1867
1868
1869/*
1870 * called upon pipe close. deque and free all events for
1871 * the pipe... pipe must be locked by caller
1872 */
1873void
1874evpipefree(struct pipe *cpipe)
1875{
1876        struct eventqelt *evq, *next;
1877	proc_t 	p;
1878
1879	for (evq = cpipe->pipe_evlist.tqh_first; evq != NULL; evq = next) {
1880	        next = evq->ee_slist.tqe_next;
1881		p = evq->ee_proc;
1882
1883		EVPROCDEQUE(p, evq);
1884
1885		TAILQ_REMOVE(&cpipe->pipe_evlist, evq, ee_slist); // remove from pipe q
1886		FREE(evq, M_TEMP);
1887	}
1888}
1889
1890
1891/*
1892 * enqueue this event if it's not already queued. wakeup
1893 * the proc if we do queue this event to it...
1894 * entered with proc lock held... we drop it before
1895 * doing the wakeup and return in that state
1896 */
1897static void
1898evprocenque(struct eventqelt *evq)
1899{
1900        proc_t	p;
1901
1902	assert(evq);
1903	p = evq->ee_proc;
1904
1905	KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_START, (uint32_t)evq, evq->ee_flags, evq->ee_eventmask,0,0);
1906
1907	proc_lock(p);
1908
1909	if (evq->ee_flags & EV_QUEUED) {
1910	        proc_unlock(p);
1911
1912	        KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
1913		return;
1914	}
1915	evq->ee_flags |= EV_QUEUED;
1916
1917	TAILQ_INSERT_TAIL(&p->p_evlist, evq, ee_plist);
1918
1919	proc_unlock(p);
1920
1921	wakeup(&p->p_evlist);
1922
1923	KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
1924}
1925
1926
1927/*
1928 * pipe lock must be taken by the caller
1929 */
1930void
1931postpipeevent(struct pipe *pipep, int event)
1932{
1933	int	mask;
1934	struct eventqelt *evq;
1935
1936	if (pipep == NULL)
1937	        return;
1938	KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, event,0,0,1,0);
1939
1940	for (evq = pipep->pipe_evlist.tqh_first;
1941	     evq != NULL; evq = evq->ee_slist.tqe_next) {
1942
1943	        if (evq->ee_eventmask == 0)
1944		        continue;
1945	        mask = 0;
1946
1947		switch (event & (EV_RWBYTES | EV_RCLOSED | EV_WCLOSED)) {
1948
1949		case EV_RWBYTES:
1950		  if ((evq->ee_eventmask & EV_RE) && pipep->pipe_buffer.cnt) {
1951		          mask |= EV_RE;
1952			  evq->ee_req.er_rcnt = pipep->pipe_buffer.cnt;
1953		  }
1954		  if ((evq->ee_eventmask & EV_WR) &&
1955		      (pipep->pipe_buffer.size - pipep->pipe_buffer.cnt) >= PIPE_BUF) {
1956
1957		          if (pipep->pipe_state & PIPE_EOF) {
1958			          mask |= EV_WR|EV_RESET;
1959				  break;
1960			  }
1961			  mask |= EV_WR;
1962			  evq->ee_req.er_wcnt = pipep->pipe_buffer.size - pipep->pipe_buffer.cnt;
1963		  }
1964		  break;
1965
1966		case EV_WCLOSED:
1967		case EV_RCLOSED:
1968		  if ((evq->ee_eventmask & EV_RE)) {
1969		          mask |= EV_RE|EV_RCLOSED;
1970		  }
1971		  if ((evq->ee_eventmask & EV_WR)) {
1972		          mask |= EV_WR|EV_WCLOSED;
1973		  }
1974		  break;
1975
1976		default:
1977		  return;
1978		}
1979		if (mask) {
1980		        /*
1981			 * disarm... postevents are nops until this event is 'read' via
1982			 * waitevent and then re-armed via modwatch
1983			 */
1984		        evq->ee_eventmask = 0;
1985
1986			/*
1987			 * since events are disarmed until after the waitevent
1988			 * the ee_req.er_xxxx fields can't change once we've
1989			 * inserted this event into the proc queue...
1990			 * therefore, the waitevent will see a 'consistent'
1991			 * snapshot of the event, even though it won't hold
1992			 * the pipe lock, and we're updating the event outside
1993			 * of the proc lock, which it will hold
1994			 */
1995		        evq->ee_req.er_eventbits |= mask;
1996
1997			KERNEL_DEBUG(DBG_MISC_POST, (uint32_t)evq, evq->ee_req.er_eventbits, mask, 1,0);
1998
1999			evprocenque(evq);
2000		}
2001	}
2002	KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, 0,0,0,1,0);
2003}
2004
2005#if SOCKETS
2006/*
2007 * given either a sockbuf or a socket run down the
2008 * event list and queue ready events found...
2009 * the socket must be locked by the caller
2010 */
2011void
2012postevent(struct socket *sp, struct sockbuf *sb, int event)
2013{
2014        int	mask;
2015	struct	eventqelt *evq;
2016	struct	tcpcb *tp;
2017
2018	if (sb)
2019	        sp = sb->sb_so;
2020	if (sp == NULL)
2021	        return;
2022
2023	KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, (int)sp, event, 0, 0, 0);
2024
2025	for (evq = sp->so_evlist.tqh_first;
2026	     evq != NULL; evq = evq->ee_slist.tqe_next) {
2027
2028	        if (evq->ee_eventmask == 0)
2029		        continue;
2030	        mask = 0;
2031
2032		/* ready for reading:
2033		   - byte cnt >= receive low water mark
2034		   - read-half of conn closed
2035		   - conn pending for listening sock
2036		   - socket error pending
2037
2038		   ready for writing
2039		   - byte cnt avail >= send low water mark
2040		   - write half of conn closed
2041		   - socket error pending
2042		   - non-blocking conn completed successfully
2043
2044		   exception pending
2045		   - out of band data
2046		   - sock at out of band mark
2047		*/
2048
2049		switch (event & EV_DMASK) {
2050
2051		case EV_OOB:
2052		  if ((evq->ee_eventmask & EV_EX)) {
2053		          if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK)))
2054			          mask |= EV_EX|EV_OOB;
2055		  }
2056		  break;
2057
2058		case EV_RWBYTES|EV_OOB:
2059		  if ((evq->ee_eventmask & EV_EX)) {
2060		          if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK)))
2061			          mask |= EV_EX|EV_OOB;
2062		  }
2063		  /*
2064		   * fall into the next case
2065		   */
2066		case EV_RWBYTES:
2067		  if ((evq->ee_eventmask & EV_RE) && soreadable(sp)) {
2068		          if (sp->so_error) {
2069			          if ((sp->so_type == SOCK_STREAM) && ((sp->so_error == ECONNREFUSED) || (sp->so_error == ECONNRESET))) {
2070				          if ((sp->so_pcb == 0) || (((struct inpcb *)sp->so_pcb)->inp_state == INPCB_STATE_DEAD) || !(tp = sototcpcb(sp)) ||
2071					      (tp->t_state == TCPS_CLOSED)) {
2072					          mask |= EV_RE|EV_RESET;
2073						  break;
2074					  }
2075				  }
2076			  }
2077			  mask |= EV_RE;
2078			  evq->ee_req.er_rcnt = sp->so_rcv.sb_cc;
2079
2080			  if (sp->so_state & SS_CANTRCVMORE) {
2081			          mask |= EV_FIN;
2082				  break;
2083			  }
2084		  }
2085		  if ((evq->ee_eventmask & EV_WR) && sowriteable(sp)) {
2086		          if (sp->so_error) {
2087			          if ((sp->so_type == SOCK_STREAM) && ((sp->so_error == ECONNREFUSED) || (sp->so_error == ECONNRESET))) {
2088				          if ((sp->so_pcb == 0) || (((struct inpcb *)sp->so_pcb)->inp_state == INPCB_STATE_DEAD) || !(tp = sototcpcb(sp)) ||
2089					      (tp->t_state == TCPS_CLOSED)) {
2090					          mask |= EV_WR|EV_RESET;
2091						  break;
2092					  }
2093				  }
2094			  }
2095			  mask |= EV_WR;
2096			  evq->ee_req.er_wcnt = sbspace(&sp->so_snd);
2097		  }
2098		  break;
2099
2100		case EV_RCONN:
2101		  if ((evq->ee_eventmask & EV_RE)) {
2102			  mask |= EV_RE|EV_RCONN;
2103		          evq->ee_req.er_rcnt = sp->so_qlen + 1;  // incl this one
2104		  }
2105		  break;
2106
2107		case EV_WCONN:
2108		  if ((evq->ee_eventmask & EV_WR)) {
2109		          mask |= EV_WR|EV_WCONN;
2110		  }
2111		  break;
2112
2113		case EV_RCLOSED:
2114		  if ((evq->ee_eventmask & EV_RE)) {
2115		          mask |= EV_RE|EV_RCLOSED;
2116		  }
2117		  break;
2118
2119		case EV_WCLOSED:
2120		  if ((evq->ee_eventmask & EV_WR)) {
2121		          mask |= EV_WR|EV_WCLOSED;
2122		  }
2123		  break;
2124
2125		case EV_FIN:
2126		  if (evq->ee_eventmask & EV_RE) {
2127		          mask |= EV_RE|EV_FIN;
2128		  }
2129		  break;
2130
2131		case EV_RESET:
2132		case EV_TIMEOUT:
2133		  if (evq->ee_eventmask & EV_RE) {
2134		          mask |= EV_RE | event;
2135		  }
2136		  if (evq->ee_eventmask & EV_WR) {
2137		          mask |= EV_WR | event;
2138		  }
2139		  break;
2140
2141		default:
2142		  KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, (int)sp, -1, 0, 0, 0);
2143		  return;
2144		} /* switch */
2145
2146		KERNEL_DEBUG(DBG_MISC_POST, (int)evq, evq->ee_eventmask, evq->ee_req.er_eventbits, mask, 0);
2147
2148		if (mask) {
2149		        /*
2150			 * disarm... postevents are nops until this event is 'read' via
2151			 * waitevent and then re-armed via modwatch
2152			 */
2153		        evq->ee_eventmask = 0;
2154
2155			/*
2156			 * since events are disarmed until after the waitevent
2157			 * the ee_req.er_xxxx fields can't change once we've
2158			 * inserted this event into the proc queue...
2159			 * since waitevent can't see this event until we
2160			 * enqueue it, waitevent will see a 'consistent'
2161			 * snapshot of the event, even though it won't hold
2162			 * the socket lock, and we're updating the event outside
2163			 * of the proc lock, which it will hold
2164			 */
2165		        evq->ee_req.er_eventbits |= mask;
2166
2167			evprocenque(evq);
2168		}
2169	}
2170	KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, (int)sp, 0, 0, 0, 0);
2171}
2172#endif /* SOCKETS */
2173
2174
2175/*
2176 * watchevent system call. user passes us an event to watch
2177 * for. we malloc an event object, initialize it, and queue
2178 * it to the open socket. when the event occurs, postevent()
2179 * will enque it back to our proc where we can retrieve it
2180 * via waitevent().
2181 *
2182 * should this prevent duplicate events on same socket?
2183 *
2184 * Returns:
2185 *		ENOMEM			No memory for operation
2186 *	copyin:EFAULT
2187 */
2188int
2189watchevent(proc_t p, struct watchevent_args *uap, __unused int *retval)
2190{
2191	struct eventqelt *evq = (struct eventqelt *)0;
2192	struct eventqelt *np = NULL;
2193	struct eventreq64 *erp;
2194	struct fileproc *fp = NULL;
2195	int error;
2196
2197	KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_START, 0,0,0,0,0);
2198
2199	// get a qelt and fill with users req
2200	MALLOC(evq, struct eventqelt *, sizeof(struct eventqelt), M_TEMP, M_WAITOK);
2201
2202	if (evq == NULL)
2203		return (ENOMEM);
2204	erp = &evq->ee_req;
2205
2206	// get users request pkt
2207
2208	if (IS_64BIT_PROCESS(p)) {
2209	        error = copyin(uap->u_req, (caddr_t)erp, sizeof(struct eventreq64));
2210	} else {
2211	        struct eventreq32 er32;
2212
2213	        error = copyin(uap->u_req, (caddr_t)&er32, sizeof(struct eventreq32));
2214		if (error == 0) {
2215		       /*
2216			* the user only passes in the
2217			* er_type, er_handle and er_data...
2218			* the other fields are initialized
2219			* below, so don't bother to copy
2220			*/
2221		        erp->er_type = er32.er_type;
2222		        erp->er_handle = er32.er_handle;
2223		        erp->er_data = (user_addr_t)er32.er_data;
2224		}
2225	}
2226	if (error) {
2227	        FREE(evq, M_TEMP);
2228		KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
2229
2230		return(error);
2231	}
2232	KERNEL_DEBUG(DBG_MISC_WATCH, erp->er_handle,uap->u_eventmask,(uint32_t)evq,0,0);
2233
2234	// validate, freeing qelt if errors
2235	error = 0;
2236	proc_fdlock(p);
2237
2238	if (erp->er_type != EV_FD) {
2239		error = EINVAL;
2240	} else if ((error = fp_lookup(p, erp->er_handle, &fp, 1)) != 0) {
2241		error = EBADF;
2242#if SOCKETS
2243	} else if (fp->f_type == DTYPE_SOCKET) {
2244		socket_lock((struct socket *)fp->f_data, 1);
2245		np = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2246#endif /* SOCKETS */
2247	} else if (fp->f_type == DTYPE_PIPE) {
2248		PIPE_LOCK((struct pipe *)fp->f_data);
2249		np = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2250	} else {
2251		fp_drop(p, erp->er_handle, fp, 1);
2252		error = EINVAL;
2253	}
2254	proc_fdunlock(p);
2255
2256	if (error) {
2257		FREE(evq, M_TEMP);
2258
2259		KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
2260		return(error);
2261	}
2262
2263	/*
2264	 * only allow one watch per file per proc
2265	 */
2266	for ( ; np != NULL; np = np->ee_slist.tqe_next) {
2267		if (np->ee_proc == p) {
2268#if SOCKETS
2269			if (fp->f_type == DTYPE_SOCKET)
2270				socket_unlock((struct socket *)fp->f_data, 1);
2271			else
2272#endif /* SOCKETS */
2273				PIPE_UNLOCK((struct pipe *)fp->f_data);
2274			fp_drop(p, erp->er_handle, fp, 0);
2275			FREE(evq, M_TEMP);
2276
2277			KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0);
2278			return(EINVAL);
2279		}
2280	}
2281	erp->er_ecnt = erp->er_rcnt = erp->er_wcnt = erp->er_eventbits = 0;
2282	evq->ee_proc = p;
2283	evq->ee_eventmask = uap->u_eventmask & EV_MASK;
2284	evq->ee_flags = 0;
2285
2286#if SOCKETS
2287	if (fp->f_type == DTYPE_SOCKET) {
2288		TAILQ_INSERT_TAIL(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2289		postevent((struct socket *)fp->f_data, 0, EV_RWBYTES); // catch existing events
2290
2291		socket_unlock((struct socket *)fp->f_data, 1);
2292	} else
2293#endif /* SOCKETS */
2294	{
2295		TAILQ_INSERT_TAIL(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2296		postpipeevent((struct pipe *)fp->f_data, EV_RWBYTES);
2297
2298		PIPE_UNLOCK((struct pipe *)fp->f_data);
2299	}
2300	fp_drop_event(p, erp->er_handle, fp);
2301
2302	KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, 0,0,0,0,0);
2303	return(0);
2304}
2305
2306
2307
2308/*
2309 * waitevent system call.
2310 * grabs the next waiting event for this proc and returns
2311 * it. if no events, user can request to sleep with timeout
2312 * or without or poll mode
2313 *    ((tv != NULL && interval == 0) || tv == -1)
2314 */
2315int
2316waitevent(proc_t p, struct waitevent_args *uap, int *retval)
2317{
2318        int error = 0;
2319	struct eventqelt *evq;
2320	struct eventreq64 *erp;
2321	uint64_t abstime, interval;
2322	boolean_t fast_poll = FALSE;
2323	union {
2324	        struct eventreq64 er64;
2325	        struct eventreq32 er32;
2326	} uer;
2327
2328	interval = 0;
2329
2330	if (uap->tv) {
2331		struct timeval atv;
2332		/*
2333		 * check for fast poll method
2334		 */
2335		if (IS_64BIT_PROCESS(p)) {
2336		        if (uap->tv == (user_addr_t)-1)
2337			        fast_poll = TRUE;
2338		} else if (uap->tv == (user_addr_t)((uint32_t)-1))
2339		        fast_poll = TRUE;
2340
2341		if (fast_poll == TRUE) {
2342		        if (p->p_evlist.tqh_first == NULL) {
2343				KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_NONE, -1,0,0,0,0);
2344				/*
2345				 * poll failed
2346				 */
2347			        *retval = 1;
2348				return (0);
2349			}
2350			proc_lock(p);
2351			goto retry;
2352		}
2353		error = copyin(uap->tv, (caddr_t)&atv, sizeof (atv));
2354
2355		if (error)
2356			return(error);
2357		if (itimerfix(&atv)) {
2358			error = EINVAL;
2359			return(error);
2360		}
2361		interval = tvtoabstime(&atv);
2362	}
2363	KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_START, 0,0,0,0,0);
2364
2365	proc_lock(p);
2366retry:
2367	if ((evq = p->p_evlist.tqh_first) != NULL) {
2368	        /*
2369		 * found one... make a local copy while it's still on the queue
2370		 * to prevent it from changing while in the midst of copying
2371		 * don't want to hold the proc lock across a copyout because
2372		 * it might block on a page fault at the target in user space
2373		 */
2374	        erp = &evq->ee_req;
2375
2376		if (IS_64BIT_PROCESS(p))
2377		        bcopy((caddr_t)erp, (caddr_t)&uer.er64, sizeof (struct eventreq64));
2378		else {
2379		        uer.er32.er_type  = erp->er_type;
2380		        uer.er32.er_handle  = erp->er_handle;
2381		        uer.er32.er_data  = (uint32_t)erp->er_data;
2382		        uer.er32.er_ecnt  = erp->er_ecnt;
2383		        uer.er32.er_rcnt  = erp->er_rcnt;
2384		        uer.er32.er_wcnt  = erp->er_wcnt;
2385		        uer.er32.er_eventbits = erp->er_eventbits;
2386		}
2387	        TAILQ_REMOVE(&p->p_evlist, evq, ee_plist);
2388
2389		evq->ee_flags &= ~EV_QUEUED;
2390
2391		proc_unlock(p);
2392
2393		if (IS_64BIT_PROCESS(p))
2394		        error = copyout((caddr_t)&uer.er64, uap->u_req, sizeof(struct eventreq64));
2395		else
2396		        error = copyout((caddr_t)&uer.er32, uap->u_req, sizeof(struct eventreq32));
2397
2398		KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,
2399			     evq->ee_req.er_handle,evq->ee_req.er_eventbits,(uint32_t)evq,0);
2400		return (error);
2401	}
2402	else {
2403		if (uap->tv && interval == 0) {
2404			proc_unlock(p);
2405			*retval = 1;  // poll failed
2406
2407			KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,0,0,0,0);
2408			return (error);
2409		}
2410		if (interval != 0)
2411			clock_absolutetime_interval_to_deadline(interval, &abstime);
2412		else
2413		        abstime = 0;
2414
2415		KERNEL_DEBUG(DBG_MISC_WAIT, 1,(uint32_t)&p->p_evlist,0,0,0);
2416
2417		error = msleep1(&p->p_evlist, &p->p_mlock, (PSOCK | PCATCH), "waitevent", abstime);
2418
2419		KERNEL_DEBUG(DBG_MISC_WAIT, 2,(uint32_t)&p->p_evlist,0,0,0);
2420
2421		if (error == 0)
2422			goto retry;
2423		if (error == ERESTART)
2424			error = EINTR;
2425		if (error == EWOULDBLOCK) {
2426			*retval = 1;
2427			error = 0;
2428		}
2429	}
2430	proc_unlock(p);
2431
2432	KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, 0,0,0,0,0);
2433	return (error);
2434}
2435
2436
2437/*
2438 * modwatch system call. user passes in event to modify.
2439 * if we find it we reset the event bits and que/deque event
2440 * it needed.
2441 */
2442int
2443modwatch(proc_t p, struct modwatch_args *uap, __unused int *retval)
2444{
2445	struct eventreq64 er;
2446	struct eventreq64 *erp = &er;
2447	struct eventqelt *evq = NULL;	/* protected by error return */
2448	int error;
2449	struct fileproc *fp;
2450	int flag;
2451
2452	KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_START, 0,0,0,0,0);
2453
2454	/*
2455	 * get user's request pkt
2456	 * just need the er_type and er_handle which sit above the
2457	 * problematic er_data (32/64 issue)... so only copy in
2458	 * those 2 fields
2459	 */
2460	if ((error = copyin(uap->u_req, (caddr_t)erp, sizeof(er.er_type) + sizeof(er.er_handle)))) {
2461	        KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, error,0,0,0,0);
2462	        return(error);
2463	}
2464	proc_fdlock(p);
2465
2466	if (erp->er_type != EV_FD) {
2467		error = EINVAL;
2468	} else if ((error = fp_lookup(p, erp->er_handle, &fp, 1)) != 0) {
2469		error = EBADF;
2470#if SOCKETS
2471	} else if (fp->f_type == DTYPE_SOCKET) {
2472		socket_lock((struct socket *)fp->f_data, 1);
2473		evq = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2474#endif /* SOCKETS */
2475	} else if (fp->f_type == DTYPE_PIPE) {
2476		PIPE_LOCK((struct pipe *)fp->f_data);
2477		evq = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2478	} else {
2479		fp_drop(p, erp->er_handle, fp, 1);
2480		error = EINVAL;
2481	}
2482
2483	if (error) {
2484		proc_fdunlock(p);
2485		KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, error,0,0,0,0);
2486		return(error);
2487	}
2488
2489	if ((uap->u_eventmask == EV_RM) && (fp->f_flags & FP_WAITEVENT)) {
2490		fp->f_flags &= ~FP_WAITEVENT;
2491	}
2492	proc_fdunlock(p);
2493
2494	// locate event if possible
2495	for ( ; evq != NULL; evq = evq->ee_slist.tqe_next) {
2496	        if (evq->ee_proc == p)
2497		        break;
2498	}
2499	if (evq == NULL) {
2500#if SOCKETS
2501		if (fp->f_type == DTYPE_SOCKET)
2502			socket_unlock((struct socket *)fp->f_data, 1);
2503		else
2504#endif /* SOCKETS */
2505			PIPE_UNLOCK((struct pipe *)fp->f_data);
2506		fp_drop(p, erp->er_handle, fp, 0);
2507		KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, EINVAL,0,0,0,0);
2508		return(EINVAL);
2509	}
2510	KERNEL_DEBUG(DBG_MISC_MOD, erp->er_handle,uap->u_eventmask,(uint32_t)evq,0,0);
2511
2512	if (uap->u_eventmask == EV_RM) {
2513		EVPROCDEQUE(p, evq);
2514
2515#if SOCKETS
2516		if (fp->f_type == DTYPE_SOCKET) {
2517			TAILQ_REMOVE(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2518			socket_unlock((struct socket *)fp->f_data, 1);
2519		} else
2520#endif /* SOCKETS */
2521		{
2522			TAILQ_REMOVE(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2523			PIPE_UNLOCK((struct pipe *)fp->f_data);
2524		}
2525		fp_drop(p, erp->er_handle, fp, 0);
2526		FREE(evq, M_TEMP);
2527		KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, 0,0,0,0,0);
2528		return(0);
2529	}
2530	switch (uap->u_eventmask & EV_MASK) {
2531
2532	case 0:
2533		flag = 0;
2534		break;
2535
2536	case EV_RE:
2537	case EV_WR:
2538	case EV_RE|EV_WR:
2539		flag = EV_RWBYTES;
2540		break;
2541
2542	case EV_EX:
2543		flag = EV_OOB;
2544		break;
2545
2546	case EV_EX|EV_RE:
2547	case EV_EX|EV_WR:
2548	case EV_EX|EV_RE|EV_WR:
2549		flag = EV_OOB|EV_RWBYTES;
2550		break;
2551
2552	default:
2553#if SOCKETS
2554		if (fp->f_type == DTYPE_SOCKET)
2555			socket_unlock((struct socket *)fp->f_data, 1);
2556		else
2557#endif /* SOCKETS */
2558			PIPE_UNLOCK((struct pipe *)fp->f_data);
2559		fp_drop(p, erp->er_handle, fp, 0);
2560		KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0);
2561		return(EINVAL);
2562	}
2563	/*
2564	 * since we're holding the socket/pipe lock, the event
2565	 * cannot go from the unqueued state to the queued state
2566	 * however, it can go from the queued state to the unqueued state
2567	 * since that direction is protected by the proc_lock...
2568	 * so do a quick check for EV_QUEUED w/o holding the proc lock
2569	 * since by far the common case will be NOT EV_QUEUED, this saves
2570	 * us taking the proc_lock the majority of the time
2571	 */
2572	if (evq->ee_flags & EV_QUEUED) {
2573		/*
2574		 * EVPROCDEQUE will recheck the state after it grabs the proc_lock
2575		 */
2576		EVPROCDEQUE(p, evq);
2577	}
2578	/*
2579	 * while the event is off the proc queue and
2580	 * we're holding the socket/pipe lock
2581	 * it's safe to update these fields...
2582	 */
2583	evq->ee_req.er_eventbits = 0;
2584	evq->ee_eventmask = uap->u_eventmask & EV_MASK;
2585
2586#if SOCKETS
2587	if (fp->f_type == DTYPE_SOCKET) {
2588		postevent((struct socket *)fp->f_data, 0, flag);
2589		socket_unlock((struct socket *)fp->f_data, 1);
2590	} else
2591#endif /* SOCKETS */
2592	{
2593		postpipeevent((struct pipe *)fp->f_data, flag);
2594		PIPE_UNLOCK((struct pipe *)fp->f_data);
2595	}
2596	fp_drop(p, erp->er_handle, fp, 0);
2597	KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, evq->ee_req.er_handle,evq->ee_eventmask,(uint32_t)fp->f_data,flag,0);
2598	return(0);
2599}
2600
2601/* this routine is called from the close of fd with proc_fdlock held */
2602int
2603waitevent_close(struct proc *p, struct fileproc *fp)
2604{
2605	struct eventqelt *evq;
2606
2607
2608	fp->f_flags &= ~FP_WAITEVENT;
2609
2610#if SOCKETS
2611	if (fp->f_type == DTYPE_SOCKET) {
2612	        socket_lock((struct socket *)fp->f_data, 1);
2613		evq = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2614	} else
2615#endif /* SOCKETS */
2616	if (fp->f_type == DTYPE_PIPE) {
2617	        PIPE_LOCK((struct pipe *)fp->f_data);
2618		evq = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2619	}
2620	else {
2621		return(EINVAL);
2622	}
2623	proc_fdunlock(p);
2624
2625
2626	// locate event if possible
2627	for ( ; evq != NULL; evq = evq->ee_slist.tqe_next) {
2628	        if (evq->ee_proc == p)
2629		        break;
2630	}
2631	if (evq == NULL) {
2632#if SOCKETS
2633	        if (fp->f_type == DTYPE_SOCKET)
2634		        socket_unlock((struct socket *)fp->f_data, 1);
2635		else
2636#endif /* SOCKETS */
2637		        PIPE_UNLOCK((struct pipe *)fp->f_data);
2638
2639		proc_fdlock(p);
2640
2641		return(EINVAL);
2642	}
2643	EVPROCDEQUE(p, evq);
2644
2645#if SOCKETS
2646	if (fp->f_type == DTYPE_SOCKET) {
2647		TAILQ_REMOVE(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2648		socket_unlock((struct socket *)fp->f_data, 1);
2649	} else
2650#endif /* SOCKETS */
2651	{
2652		TAILQ_REMOVE(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2653		PIPE_UNLOCK((struct pipe *)fp->f_data);
2654	}
2655	FREE(evq, M_TEMP);
2656
2657	proc_fdlock(p);
2658
2659	return(0);
2660}
2661
2662
2663/*
2664 * gethostuuid
2665 *
2666 * Description:	Get the host UUID from IOKit and return it to user space.
2667 *
2668 * Parameters:	uuid_buf		Pointer to buffer to receive UUID
2669 *		timeout			Timespec for timout
2670 *
2671 * Returns:	0			Success
2672 *		EWOULDBLOCK		Timeout is too short
2673 *		copyout:EFAULT		Bad user buffer
2674 *
2675 * Notes:	A timeout seems redundant, since if it's tolerable to not
2676 *		have a system UUID in hand, then why ask for one?
2677 */
2678int
2679gethostuuid(struct proc *p, struct gethostuuid_args *uap, __unused register_t *retval)
2680{
2681	kern_return_t kret;
2682	int error;
2683	mach_timespec_t mach_ts;	/* for IOKit call */
2684	__darwin_uuid_t uuid_kern;	/* for IOKit call */
2685
2686	/* Convert the 32/64 bit timespec into a mach_timespec_t */
2687	if ( proc_is64bit(p) ) {
2688		struct user_timespec ts;
2689		error = copyin(uap->timeoutp, &ts, sizeof(ts));
2690		if (error)
2691			return (error);
2692		mach_ts.tv_sec = ts.tv_sec;
2693		mach_ts.tv_nsec = ts.tv_nsec;
2694	} else {
2695		struct timespec ts;
2696		error = copyin(uap->timeoutp, &ts, sizeof(ts) );
2697		if (error)
2698			return (error);
2699		mach_ts.tv_sec = ts.tv_sec;
2700		mach_ts.tv_nsec = ts.tv_nsec;
2701	}
2702
2703	/* Call IOKit with the stack buffer to get the UUID */
2704	kret = IOBSDGetPlatformUUID(uuid_kern, mach_ts);
2705
2706	/*
2707	 * If we get it, copy out the data to the user buffer; note that a
2708	 * uuid_t is an array of characters, so this is size invariant for
2709	 * 32 vs. 64 bit.
2710	 */
2711	if (kret == KERN_SUCCESS) {
2712		error = copyout(uuid_kern, uap->uuid_buf, sizeof(uuid_kern));
2713	} else {
2714		error = EWOULDBLOCK;
2715	}
2716
2717	return (error);
2718}
2719