sys_generic.c revision 36846
1/*
2 * Copyright (c) 1982, 1986, 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)sys_generic.c	8.5 (Berkeley) 1/21/94
39 * $Id: sys_generic.c,v 1.38 1998/05/17 11:52:51 phk Exp $
40 */
41
42#include "opt_ktrace.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/sysproto.h>
47#include <sys/filedesc.h>
48#include <sys/filio.h>
49#include <sys/ttycom.h>
50#include <sys/fcntl.h>
51#include <sys/file.h>
52#include <sys/proc.h>
53#include <sys/signalvar.h>
54#include <sys/socketvar.h>
55#include <sys/uio.h>
56#include <sys/kernel.h>
57#include <sys/malloc.h>
58#include <sys/poll.h>
59#include <sys/sysent.h>
60#ifdef KTRACE
61#include <sys/ktrace.h>
62#endif
63
64static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer");
65static MALLOC_DEFINE(M_SELECT, "select", "select() buffer");
66MALLOC_DEFINE(M_IOV, "iov", "large iov's");
67
68static int	pollscan __P((struct proc *, struct pollfd *, int));
69static int	selscan __P((struct proc *, fd_mask **, fd_mask **, int));
70
71/*
72 * Read system call.
73 */
74#ifndef _SYS_SYSPROTO_H_
75struct read_args {
76	int	fd;
77	char	*buf;
78	u_int	nbyte;
79};
80#endif
81/* ARGSUSED */
82int
83read(p, uap)
84	struct proc *p;
85	register struct read_args *uap;
86{
87	register struct file *fp;
88	register struct filedesc *fdp = p->p_fd;
89	struct uio auio;
90	struct iovec aiov;
91	long cnt, error = 0;
92#ifdef KTRACE
93	struct iovec ktriov;
94#endif
95
96	if (((u_int)uap->fd) >= fdp->fd_nfiles ||
97	    (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
98	    (fp->f_flag & FREAD) == 0)
99		return (EBADF);
100	aiov.iov_base = (caddr_t)uap->buf;
101	aiov.iov_len = uap->nbyte;
102	auio.uio_iov = &aiov;
103	auio.uio_iovcnt = 1;
104	auio.uio_offset = -1;
105
106	auio.uio_resid = uap->nbyte;
107	if (auio.uio_resid < 0)
108		return (EINVAL);
109
110	auio.uio_rw = UIO_READ;
111	auio.uio_segflg = UIO_USERSPACE;
112	auio.uio_procp = p;
113#ifdef KTRACE
114	/*
115	 * if tracing, save a copy of iovec
116	 */
117	if (KTRPOINT(p, KTR_GENIO))
118		ktriov = aiov;
119#endif
120	cnt = uap->nbyte;
121	if ((error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred)))
122		if (auio.uio_resid != cnt && (error == ERESTART ||
123		    error == EINTR || error == EWOULDBLOCK))
124			error = 0;
125	cnt -= auio.uio_resid;
126#ifdef KTRACE
127	if (KTRPOINT(p, KTR_GENIO) && error == 0)
128		ktrgenio(p->p_tracep, uap->fd, UIO_READ, &ktriov, cnt, error);
129#endif
130	p->p_retval[0] = cnt;
131	return (error);
132}
133
134/*
135 * Scatter read system call.
136 */
137#ifndef _SYS_SYSPROTO_H_
138struct readv_args {
139	int	fd;
140	struct	iovec *iovp;
141	u_int	iovcnt;
142};
143#endif
144int
145readv(p, uap)
146	struct proc *p;
147	register struct readv_args *uap;
148{
149	register struct file *fp;
150	register struct filedesc *fdp = p->p_fd;
151	struct uio auio;
152	register struct iovec *iov;
153	struct iovec *needfree;
154	struct iovec aiov[UIO_SMALLIOV];
155	long i, cnt, error = 0;
156	u_int iovlen;
157#ifdef KTRACE
158	struct iovec *ktriov = NULL;
159#endif
160
161	if (((u_int)uap->fd) >= fdp->fd_nfiles ||
162	    (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
163	    (fp->f_flag & FREAD) == 0)
164		return (EBADF);
165	/* note: can't use iovlen until iovcnt is validated */
166	iovlen = uap->iovcnt * sizeof (struct iovec);
167	if (uap->iovcnt > UIO_SMALLIOV) {
168		if (uap->iovcnt > UIO_MAXIOV)
169			return (EINVAL);
170		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
171		needfree = iov;
172	} else {
173		iov = aiov;
174		needfree = NULL;
175	}
176	auio.uio_iov = iov;
177	auio.uio_iovcnt = uap->iovcnt;
178	auio.uio_rw = UIO_READ;
179	auio.uio_segflg = UIO_USERSPACE;
180	auio.uio_procp = p;
181	auio.uio_offset = -1;
182	if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
183		goto done;
184	auio.uio_resid = 0;
185	for (i = 0; i < uap->iovcnt; i++) {
186		auio.uio_resid += iov->iov_len;
187		if (auio.uio_resid < 0) {
188			error = EINVAL;
189			goto done;
190		}
191		iov++;
192	}
193#ifdef KTRACE
194	/*
195	 * if tracing, save a copy of iovec
196	 */
197	if (KTRPOINT(p, KTR_GENIO))  {
198		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
199		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
200	}
201#endif
202	cnt = auio.uio_resid;
203	if ((error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred)))
204		if (auio.uio_resid != cnt && (error == ERESTART ||
205		    error == EINTR || error == EWOULDBLOCK))
206			error = 0;
207	cnt -= auio.uio_resid;
208#ifdef KTRACE
209	if (ktriov != NULL) {
210		if (error == 0)
211			ktrgenio(p->p_tracep, uap->fd, UIO_READ, ktriov,
212			    cnt, error);
213		FREE(ktriov, M_TEMP);
214	}
215#endif
216	p->p_retval[0] = cnt;
217done:
218	if (needfree)
219		FREE(needfree, M_IOV);
220	return (error);
221}
222
223/*
224 * Write system call
225 */
226#ifndef _SYS_SYSPROTO_H_
227struct write_args {
228	int	fd;
229	char	*buf;
230	u_int	nbyte;
231};
232#endif
233int
234write(p, uap)
235	struct proc *p;
236	register struct write_args *uap;
237{
238	register struct file *fp;
239	register struct filedesc *fdp = p->p_fd;
240	struct uio auio;
241	struct iovec aiov;
242	long cnt, error = 0;
243#ifdef KTRACE
244	struct iovec ktriov;
245#endif
246
247	if (((u_int)uap->fd) >= fdp->fd_nfiles ||
248	    (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
249	    (fp->f_flag & FWRITE) == 0)
250		return (EBADF);
251	aiov.iov_base = (caddr_t)uap->buf;
252	aiov.iov_len = uap->nbyte;
253	auio.uio_iov = &aiov;
254	auio.uio_iovcnt = 1;
255	auio.uio_offset = -1;
256	auio.uio_resid = uap->nbyte;
257	auio.uio_rw = UIO_WRITE;
258	auio.uio_segflg = UIO_USERSPACE;
259	auio.uio_procp = p;
260#ifdef KTRACE
261	/*
262	 * if tracing, save a copy of iovec
263	 */
264	if (KTRPOINT(p, KTR_GENIO))
265		ktriov = aiov;
266#endif
267	cnt = uap->nbyte;
268	if ((error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred))) {
269		if (auio.uio_resid != cnt && (error == ERESTART ||
270		    error == EINTR || error == EWOULDBLOCK))
271			error = 0;
272		if (error == EPIPE)
273			psignal(p, SIGPIPE);
274	}
275	cnt -= auio.uio_resid;
276#ifdef KTRACE
277	if (KTRPOINT(p, KTR_GENIO) && error == 0)
278		ktrgenio(p->p_tracep, uap->fd, UIO_WRITE,
279		    &ktriov, cnt, error);
280#endif
281	p->p_retval[0] = cnt;
282	return (error);
283}
284
285/*
286 * Gather write system call
287 */
288#ifndef _SYS_SYSPROTO_H_
289struct writev_args {
290	int	fd;
291	struct	iovec *iovp;
292	u_int	iovcnt;
293};
294#endif
295int
296writev(p, uap)
297	struct proc *p;
298	register struct writev_args *uap;
299{
300	register struct file *fp;
301	register struct filedesc *fdp = p->p_fd;
302	struct uio auio;
303	register struct iovec *iov;
304	struct iovec *needfree;
305	struct iovec aiov[UIO_SMALLIOV];
306	long i, cnt, error = 0;
307	u_int iovlen;
308#ifdef KTRACE
309	struct iovec *ktriov = NULL;
310#endif
311
312	if (((u_int)uap->fd) >= fdp->fd_nfiles ||
313	    (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
314	    (fp->f_flag & FWRITE) == 0)
315		return (EBADF);
316	/* note: can't use iovlen until iovcnt is validated */
317	iovlen = uap->iovcnt * sizeof (struct iovec);
318	if (uap->iovcnt > UIO_SMALLIOV) {
319		if (uap->iovcnt > UIO_MAXIOV)
320			return (EINVAL);
321		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
322		needfree = iov;
323	} else {
324		iov = aiov;
325		needfree = NULL;
326	}
327	auio.uio_iov = iov;
328	auio.uio_iovcnt = uap->iovcnt;
329	auio.uio_rw = UIO_WRITE;
330	auio.uio_segflg = UIO_USERSPACE;
331	auio.uio_procp = p;
332	auio.uio_offset = -1;
333	if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
334		goto done;
335	auio.uio_resid = 0;
336	for (i = 0; i < uap->iovcnt; i++) {
337		auio.uio_resid += iov->iov_len;
338		if (auio.uio_resid < 0) {
339			error = EINVAL;
340			goto done;
341		}
342		iov++;
343	}
344#ifdef KTRACE
345	/*
346	 * if tracing, save a copy of iovec
347	 */
348	if (KTRPOINT(p, KTR_GENIO))  {
349		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
350		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
351	}
352#endif
353	cnt = auio.uio_resid;
354	if ((error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred))) {
355		if (auio.uio_resid != cnt && (error == ERESTART ||
356		    error == EINTR || error == EWOULDBLOCK))
357			error = 0;
358		if (error == EPIPE)
359			psignal(p, SIGPIPE);
360	}
361	cnt -= auio.uio_resid;
362#ifdef KTRACE
363	if (ktriov != NULL) {
364		if (error == 0)
365			ktrgenio(p->p_tracep, uap->fd, UIO_WRITE,
366				ktriov, cnt, error);
367		FREE(ktriov, M_TEMP);
368	}
369#endif
370	p->p_retval[0] = cnt;
371done:
372	if (needfree)
373		FREE(needfree, M_IOV);
374	return (error);
375}
376
377/*
378 * Ioctl system call
379 */
380#ifndef _SYS_SYSPROTO_H_
381struct ioctl_args {
382	int	fd;
383	int	com;
384	caddr_t	data;
385};
386#endif
387/* ARGSUSED */
388int
389ioctl(p, uap)
390	struct proc *p;
391	register struct ioctl_args *uap;
392{
393	register struct file *fp;
394	register struct filedesc *fdp;
395	register u_long com;
396	int error;
397	register u_int size;
398	caddr_t data, memp;
399	int tmp;
400#define STK_PARAMS	128
401	char stkbuf[STK_PARAMS];
402
403	fdp = p->p_fd;
404	if ((u_int)uap->fd >= fdp->fd_nfiles ||
405	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
406		return (EBADF);
407
408	if ((fp->f_flag & (FREAD | FWRITE)) == 0)
409		return (EBADF);
410
411	switch (com = uap->com) {
412	case FIONCLEX:
413		fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE;
414		return (0);
415	case FIOCLEX:
416		fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE;
417		return (0);
418	}
419
420	/*
421	 * Interpret high order word to find amount of data to be
422	 * copied to/from the user's address space.
423	 */
424	size = IOCPARM_LEN(com);
425	if (size > IOCPARM_MAX)
426		return (ENOTTY);
427	memp = NULL;
428	if (size > sizeof (stkbuf)) {
429		memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
430		data = memp;
431	} else
432		data = stkbuf;
433	if (com&IOC_IN) {
434		if (size) {
435			error = copyin(uap->data, data, (u_int)size);
436			if (error) {
437				if (memp)
438					free(memp, M_IOCTLOPS);
439				return (error);
440			}
441		} else
442			*(caddr_t *)data = uap->data;
443	} else if ((com&IOC_OUT) && size)
444		/*
445		 * Zero the buffer so the user always
446		 * gets back something deterministic.
447		 */
448		bzero(data, size);
449	else if (com&IOC_VOID)
450		*(caddr_t *)data = uap->data;
451
452	switch (com) {
453
454	case FIONBIO:
455		if ((tmp = *(int *)data))
456			fp->f_flag |= FNONBLOCK;
457		else
458			fp->f_flag &= ~FNONBLOCK;
459		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
460		break;
461
462	case FIOASYNC:
463		if ((tmp = *(int *)data))
464			fp->f_flag |= FASYNC;
465		else
466			fp->f_flag &= ~FASYNC;
467		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
468		break;
469
470	case FIOSETOWN:
471		tmp = *(int *)data;
472		if (fp->f_type == DTYPE_SOCKET) {
473			((struct socket *)fp->f_data)->so_pgid = tmp;
474			error = 0;
475			break;
476		}
477		if (tmp <= 0) {
478			tmp = -tmp;
479		} else {
480			struct proc *p1 = pfind(tmp);
481			if (p1 == 0) {
482				error = ESRCH;
483				break;
484			}
485			tmp = p1->p_pgrp->pg_id;
486		}
487		error = (*fp->f_ops->fo_ioctl)
488			(fp, (int)TIOCSPGRP, (caddr_t)&tmp, p);
489		break;
490
491	case FIOGETOWN:
492		if (fp->f_type == DTYPE_SOCKET) {
493			error = 0;
494			*(int *)data = ((struct socket *)fp->f_data)->so_pgid;
495			break;
496		}
497		error = (*fp->f_ops->fo_ioctl)(fp, (int)TIOCGPGRP, data, p);
498		*(int *)data = -*(int *)data;
499		break;
500
501	default:
502		error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
503		/*
504		 * Copy any data to user, size was
505		 * already set and checked above.
506		 */
507		if (error == 0 && (com&IOC_OUT) && size)
508			error = copyout(data, uap->data, (u_int)size);
509		break;
510	}
511	if (memp)
512		free(memp, M_IOCTLOPS);
513	return (error);
514}
515
516static int	nselcoll;
517int	selwait;
518
519/*
520 * Select system call.
521 */
522#ifndef _SYS_SYSPROTO_H_
523struct select_args {
524	int	nd;
525	fd_set	*in, *ou, *ex;
526	struct	timeval *tv;
527};
528#endif
529int
530select(p, uap)
531	register struct proc *p;
532	register struct select_args *uap;
533{
534	/*
535	 * The magic 2048 here is chosen to be just enough for FD_SETSIZE
536	 * infds with the new FD_SETSIZE of 1024, and more than enough for
537	 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE
538	 * of 256.
539	 */
540	fd_mask s_selbits[howmany(2048, NFDBITS)];
541	fd_mask *ibits[3], *obits[3], *selbits, *sbp;
542	struct timeval atv, rtv, ttv;
543	int s, ncoll, error, timo;
544	u_int nbufbytes, ncpbytes, nfdbits;
545
546	if (uap->nd < 0)
547		return (EINVAL);
548	if (uap->nd > p->p_fd->fd_nfiles)
549		uap->nd = p->p_fd->fd_nfiles;   /* forgiving; slightly wrong */
550
551	/*
552	 * Allocate just enough bits for the non-null fd_sets.  Use the
553	 * preallocated auto buffer if possible.
554	 */
555	nfdbits = roundup(uap->nd, NFDBITS);
556	ncpbytes = nfdbits / NBBY;
557	nbufbytes = 0;
558	if (uap->in != NULL)
559		nbufbytes += 2 * ncpbytes;
560	if (uap->ou != NULL)
561		nbufbytes += 2 * ncpbytes;
562	if (uap->ex != NULL)
563		nbufbytes += 2 * ncpbytes;
564	if (nbufbytes <= sizeof s_selbits)
565		selbits = &s_selbits[0];
566	else
567		selbits = malloc(nbufbytes, M_SELECT, M_WAITOK);
568
569	/*
570	 * Assign pointers into the bit buffers and fetch the input bits.
571	 * Put the output buffers together so that they can be bzeroed
572	 * together.
573	 */
574	sbp = selbits;
575#define	getbits(name, x) \
576	do {								\
577		if (uap->name == NULL)					\
578			ibits[x] = NULL;				\
579		else {							\
580			ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp;	\
581			obits[x] = sbp;					\
582			sbp += ncpbytes / sizeof *sbp;			\
583			error = copyin(uap->name, ibits[x], ncpbytes);	\
584			if (error != 0)					\
585				goto done;				\
586		}							\
587	} while (0)
588	getbits(in, 0);
589	getbits(ou, 1);
590	getbits(ex, 2);
591#undef	getbits
592	if (nbufbytes != 0)
593		bzero(selbits, nbufbytes / 2);
594
595	if (uap->tv) {
596		error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
597			sizeof (atv));
598		if (error)
599			goto done;
600		if (itimerfix(&atv)) {
601			error = EINVAL;
602			goto done;
603		}
604		getmicrouptime(&rtv);
605		timevaladd(&atv, &rtv);
606	} else
607		atv.tv_sec = 0;
608	timo = 0;
609retry:
610	ncoll = nselcoll;
611	p->p_flag |= P_SELECT;
612	error = selscan(p, ibits, obits, uap->nd);
613	if (error || p->p_retval[0])
614		goto done;
615	if (atv.tv_sec) {
616		getmicrouptime(&rtv);
617		if (timevalcmp(&rtv, &atv, >=))
618			goto done;
619		ttv = atv;
620		timevalsub(&ttv, &rtv);
621		timo = ttv.tv_sec > 24 * 60 * 60 ?
622		    24 * 60 * 60 * hz : tvtohz(&ttv);
623	}
624	s = splhigh();
625	if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
626		splx(s);
627		goto retry;
628	}
629	p->p_flag &= ~P_SELECT;
630	error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
631	splx(s);
632	if (error == 0)
633		goto retry;
634done:
635	p->p_flag &= ~P_SELECT;
636	/* select is not restarted after signals... */
637	if (error == ERESTART)
638		error = EINTR;
639	if (error == EWOULDBLOCK)
640		error = 0;
641#define	putbits(name, x) \
642	if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \
643		error = error2;
644	if (error == 0) {
645		int error2;
646
647		putbits(in, 0);
648		putbits(ou, 1);
649		putbits(ex, 2);
650#undef putbits
651	}
652	if (selbits != &s_selbits[0])
653		free(selbits, M_SELECT);
654	return (error);
655}
656
657static int
658selscan(p, ibits, obits, nfd)
659	struct proc *p;
660	fd_mask **ibits, **obits;
661	int nfd;
662{
663	register struct filedesc *fdp = p->p_fd;
664	register int msk, i, j, fd;
665	register fd_mask bits;
666	struct file *fp;
667	int n = 0;
668	/* Note: backend also returns POLLHUP/POLLERR if appropriate. */
669	static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND };
670
671	for (msk = 0; msk < 3; msk++) {
672		if (ibits[msk] == NULL)
673			continue;
674		for (i = 0; i < nfd; i += NFDBITS) {
675			bits = ibits[msk][i/NFDBITS];
676			while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
677				bits &= ~(1 << j);
678				fp = fdp->fd_ofiles[fd];
679				if (fp == NULL)
680					return (EBADF);
681				if ((*fp->f_ops->fo_poll)(fp, flag[msk],
682				    fp->f_cred, p)) {
683					obits[msk][(fd)/NFDBITS] |=
684						(1 << ((fd) % NFDBITS));
685					n++;
686				}
687			}
688		}
689	}
690	p->p_retval[0] = n;
691	return (0);
692}
693
694/*
695 * Poll system call.
696 */
697#ifndef _SYS_SYSPROTO_H_
698struct poll_args {
699	struct pollfd *fds;
700	u_int	nfds;
701	int	timeout;
702};
703#endif
704int
705poll(p, uap)
706	register struct proc *p;
707	register struct poll_args *uap;
708{
709	caddr_t bits;
710	char smallbits[32 * sizeof(struct pollfd)];
711	struct timeval atv, rtv, ttv;
712	int s, ncoll, error = 0, timo;
713	size_t ni;
714
715	if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) {
716		/* forgiving; slightly wrong */
717		SCARG(uap, nfds) = p->p_fd->fd_nfiles;
718	}
719	ni = SCARG(uap, nfds) * sizeof(struct pollfd);
720	if (ni > sizeof(smallbits))
721		bits = malloc(ni, M_TEMP, M_WAITOK);
722	else
723		bits = smallbits;
724	error = copyin(SCARG(uap, fds), bits, ni);
725	if (error)
726		goto done;
727	if (SCARG(uap, timeout) != INFTIM) {
728		atv.tv_sec = SCARG(uap, timeout) / 1000;
729		atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
730		if (itimerfix(&atv)) {
731			error = EINVAL;
732			goto done;
733		}
734		getmicrouptime(&rtv);
735		timevaladd(&atv, &rtv);
736	} else
737		atv.tv_sec = 0;
738	timo = 0;
739retry:
740	ncoll = nselcoll;
741	p->p_flag |= P_SELECT;
742	error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds));
743	if (error || p->p_retval[0])
744		goto done;
745	if (atv.tv_sec) {
746		getmicrouptime(&rtv);
747		if (timevalcmp(&rtv, &atv, >=))
748			goto done;
749		ttv = atv;
750		timevalsub(&ttv, &rtv);
751		timo = ttv.tv_sec > 24 * 60 * 60 ?
752		    24 * 60 * 60 * hz : tvtohz(&ttv);
753	}
754	s = splhigh();
755	if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
756		splx(s);
757		goto retry;
758	}
759	p->p_flag &= ~P_SELECT;
760	error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo);
761	splx(s);
762	if (error == 0)
763		goto retry;
764done:
765	p->p_flag &= ~P_SELECT;
766	/* poll is not restarted after signals... */
767	if (error == ERESTART)
768		error = EINTR;
769	if (error == EWOULDBLOCK)
770		error = 0;
771	if (error == 0) {
772		error = copyout(bits, SCARG(uap, fds), ni);
773		if (error)
774			goto out;
775	}
776out:
777	if (ni > sizeof(smallbits))
778		free(bits, M_TEMP);
779	return (error);
780}
781
782static int
783pollscan(p, fds, nfd)
784	struct proc *p;
785	struct pollfd *fds;
786	int nfd;
787{
788	register struct filedesc *fdp = p->p_fd;
789	int i;
790	struct file *fp;
791	int n = 0;
792
793	for (i = 0; i < nfd; i++, fds++) {
794		if ((u_int)fds->fd >= fdp->fd_nfiles) {
795			fds->revents = POLLNVAL;
796			n++;
797		} else {
798			fp = fdp->fd_ofiles[fds->fd];
799			if (fp == 0) {
800				fds->revents = POLLNVAL;
801				n++;
802			} else {
803				/*
804				 * Note: backend also returns POLLHUP and
805				 * POLLERR if appropriate.
806				 */
807				fds->revents = (*fp->f_ops->fo_poll)(fp,
808				    fds->events, fp->f_cred, p);
809				if (fds->revents != 0)
810					n++;
811			}
812		}
813	}
814	p->p_retval[0] = n;
815	return (0);
816}
817
818/*
819 * OpenBSD poll system call.
820 * XXX this isn't quite a true representation..  OpenBSD uses select ops.
821 */
822#ifndef _SYS_SYSPROTO_H_
823struct openbsd_poll_args {
824	struct pollfd *fds;
825	u_int	nfds;
826	int	timeout;
827};
828#endif
829int
830openbsd_poll(p, uap)
831	register struct proc *p;
832	register struct openbsd_poll_args *uap;
833{
834	return (poll(p, (struct poll_args *)uap));
835}
836
837/*ARGSUSED*/
838int
839seltrue(dev, events, p)
840	dev_t dev;
841	int events;
842	struct proc *p;
843{
844
845	return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
846}
847
848/*
849 * Record a select request.
850 */
851void
852selrecord(selector, sip)
853	struct proc *selector;
854	struct selinfo *sip;
855{
856	struct proc *p;
857	pid_t mypid;
858
859	mypid = selector->p_pid;
860	if (sip->si_pid == mypid)
861		return;
862	if (sip->si_pid && (p = pfind(sip->si_pid)) &&
863	    p->p_wchan == (caddr_t)&selwait)
864		sip->si_flags |= SI_COLL;
865	else
866		sip->si_pid = mypid;
867}
868
869/*
870 * Do a wakeup when a selectable event occurs.
871 */
872void
873selwakeup(sip)
874	register struct selinfo *sip;
875{
876	register struct proc *p;
877	int s;
878
879	if (sip->si_pid == 0)
880		return;
881	if (sip->si_flags & SI_COLL) {
882		nselcoll++;
883		sip->si_flags &= ~SI_COLL;
884		wakeup((caddr_t)&selwait);
885	}
886	p = pfind(sip->si_pid);
887	sip->si_pid = 0;
888	if (p != NULL) {
889		s = splhigh();
890		if (p->p_wchan == (caddr_t)&selwait) {
891			if (p->p_stat == SSLEEP)
892				setrunnable(p);
893			else
894				unsleep(p);
895		} else if (p->p_flag & P_SELECT)
896			p->p_flag &= ~P_SELECT;
897		splx(s);
898	}
899}
900