Deleted Added
sdiff udiff text old ( 69733 ) new ( 70834 )
full compact
1/*
2 * Copyright (c) 1982, 1986, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94
39 * $FreeBSD: head/sys/kern/sys_generic.c 69733 2000-12-07 23:45:57Z dillon $
40 */
41
42#include "opt_ktrace.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/sysproto.h>
47#include <sys/filedesc.h>
48#include <sys/filio.h>
49#include <sys/fcntl.h>
50#include <sys/file.h>
51#include <sys/proc.h>
52#include <sys/signalvar.h>
53#include <sys/socketvar.h>
54#include <sys/uio.h>
55#include <sys/kernel.h>
56#include <sys/malloc.h>
57#include <sys/poll.h>
58#include <sys/sysctl.h>
59#include <sys/sysent.h>
60#include <sys/bio.h>
61#include <sys/buf.h>
62#ifdef KTRACE
63#include <sys/ktrace.h>
64#endif
65#include <vm/vm.h>
66#include <vm/vm_page.h>
67
68#include <machine/limits.h>
69
70static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer");
71static MALLOC_DEFINE(M_SELECT, "select", "select() buffer");
72MALLOC_DEFINE(M_IOV, "iov", "large iov's");
73
74static int pollscan __P((struct proc *, struct pollfd *, int));
75static int selscan __P((struct proc *, fd_mask **, fd_mask **, int));
76static int dofileread __P((struct proc *, struct file *, int, void *,
77 size_t, off_t, int));
78static int dofilewrite __P((struct proc *, struct file *, int,
79 const void *, size_t, off_t, int));
80
81struct file*
82holdfp(fdp, fd, flag)
83 struct filedesc* fdp;
84 int fd, flag;
85{
86 struct file* fp;
87
88 if (((u_int)fd) >= fdp->fd_nfiles ||
89 (fp = fdp->fd_ofiles[fd]) == NULL ||
90 (fp->f_flag & flag) == 0) {
91 return (NULL);
92 }
93 fhold(fp);
94 return (fp);
95}
96
97/*
98 * Read system call.
99 */
100#ifndef _SYS_SYSPROTO_H_
101struct read_args {
102 int fd;
103 void *buf;
104 size_t nbyte;
105};
106#endif
107int
108read(p, uap)
109 struct proc *p;
110 register struct read_args *uap;
111{
112 register struct file *fp;
113 int error;
114
115 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL)
116 return (EBADF);
117 error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0);
118 fdrop(fp, p);
119 return(error);
120}
121
122/*
123 * Pread system call
124 */
125#ifndef _SYS_SYSPROTO_H_
126struct pread_args {
127 int fd;
128 void *buf;
129 size_t nbyte;
130 int pad;
131 off_t offset;
132};
133#endif
134int
135pread(p, uap)
136 struct proc *p;
137 register struct pread_args *uap;
138{
139 register struct file *fp;
140 int error;
141
142 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL)
143 return (EBADF);
144 if (fp->f_type != DTYPE_VNODE) {
145 error = ESPIPE;
146 } else {
147 error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte,
148 uap->offset, FOF_OFFSET);
149 }
150 fdrop(fp, p);
151 return(error);
152}
153
154/*
155 * Code common for read and pread
156 */
157int
158dofileread(p, fp, fd, buf, nbyte, offset, flags)
159 struct proc *p;
160 struct file *fp;
161 int fd, flags;
162 void *buf;
163 size_t nbyte;
164 off_t offset;
165{
166 struct uio auio;
167 struct iovec aiov;
168 long cnt, error = 0;
169#ifdef KTRACE
170 struct iovec ktriov;
171 struct uio ktruio;
172 int didktr = 0;
173#endif
174
175 aiov.iov_base = (caddr_t)buf;
176 aiov.iov_len = nbyte;
177 auio.uio_iov = &aiov;
178 auio.uio_iovcnt = 1;
179 auio.uio_offset = offset;
180 if (nbyte > INT_MAX)
181 return (EINVAL);
182 auio.uio_resid = nbyte;
183 auio.uio_rw = UIO_READ;
184 auio.uio_segflg = UIO_USERSPACE;
185 auio.uio_procp = p;
186#ifdef KTRACE
187 /*
188 * if tracing, save a copy of iovec
189 */
190 if (KTRPOINT(p, KTR_GENIO)) {
191 ktriov = aiov;
192 ktruio = auio;
193 didktr = 1;
194 }
195#endif
196 cnt = nbyte;
197
198 if ((error = fo_read(fp, &auio, fp->f_cred, flags, p))) {
199 if (auio.uio_resid != cnt && (error == ERESTART ||
200 error == EINTR || error == EWOULDBLOCK))
201 error = 0;
202 }
203 cnt -= auio.uio_resid;
204#ifdef KTRACE
205 if (didktr && error == 0) {
206 ktruio.uio_iov = &ktriov;
207 ktruio.uio_resid = cnt;
208 ktrgenio(p->p_tracep, fd, UIO_READ, &ktruio, error);
209 }
210#endif
211 p->p_retval[0] = cnt;
212 return (error);
213}
214
215/*
216 * Scatter read system call.
217 */
218#ifndef _SYS_SYSPROTO_H_
219struct readv_args {
220 int fd;
221 struct iovec *iovp;
222 u_int iovcnt;
223};
224#endif
225int
226readv(p, uap)
227 struct proc *p;
228 register struct readv_args *uap;
229{
230 register struct file *fp;
231 register struct filedesc *fdp = p->p_fd;
232 struct uio auio;
233 register struct iovec *iov;
234 struct iovec *needfree;
235 struct iovec aiov[UIO_SMALLIOV];
236 long i, cnt, error = 0;
237 u_int iovlen;
238#ifdef KTRACE
239 struct iovec *ktriov = NULL;
240 struct uio ktruio;
241#endif
242
243 if ((fp = holdfp(fdp, uap->fd, FREAD)) == NULL)
244 return (EBADF);
245 /* note: can't use iovlen until iovcnt is validated */
246 iovlen = uap->iovcnt * sizeof (struct iovec);
247 if (uap->iovcnt > UIO_SMALLIOV) {
248 if (uap->iovcnt > UIO_MAXIOV)
249 return (EINVAL);
250 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
251 needfree = iov;
252 } else {
253 iov = aiov;
254 needfree = NULL;
255 }
256 auio.uio_iov = iov;
257 auio.uio_iovcnt = uap->iovcnt;
258 auio.uio_rw = UIO_READ;
259 auio.uio_segflg = UIO_USERSPACE;
260 auio.uio_procp = p;
261 auio.uio_offset = -1;
262 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
263 goto done;
264 auio.uio_resid = 0;
265 for (i = 0; i < uap->iovcnt; i++) {
266 if (iov->iov_len > INT_MAX - auio.uio_resid) {
267 error = EINVAL;
268 goto done;
269 }
270 auio.uio_resid += iov->iov_len;
271 iov++;
272 }
273#ifdef KTRACE
274 /*
275 * if tracing, save a copy of iovec
276 */
277 if (KTRPOINT(p, KTR_GENIO)) {
278 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
279 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
280 ktruio = auio;
281 }
282#endif
283 cnt = auio.uio_resid;
284 if ((error = fo_read(fp, &auio, fp->f_cred, 0, p))) {
285 if (auio.uio_resid != cnt && (error == ERESTART ||
286 error == EINTR || error == EWOULDBLOCK))
287 error = 0;
288 }
289 cnt -= auio.uio_resid;
290#ifdef KTRACE
291 if (ktriov != NULL) {
292 if (error == 0) {
293 ktruio.uio_iov = ktriov;
294 ktruio.uio_resid = cnt;
295 ktrgenio(p->p_tracep, uap->fd, UIO_READ, &ktruio,
296 error);
297 }
298 FREE(ktriov, M_TEMP);
299 }
300#endif
301 p->p_retval[0] = cnt;
302done:
303 fdrop(fp, p);
304 if (needfree)
305 FREE(needfree, M_IOV);
306 return (error);
307}
308
309/*
310 * Write system call
311 */
312#ifndef _SYS_SYSPROTO_H_
313struct write_args {
314 int fd;
315 const void *buf;
316 size_t nbyte;
317};
318#endif
319int
320write(p, uap)
321 struct proc *p;
322 register struct write_args *uap;
323{
324 register struct file *fp;
325 int error;
326
327 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL)
328 return (EBADF);
329 error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0);
330 fdrop(fp, p);
331 return(error);
332}
333
334/*
335 * Pwrite system call
336 */
337#ifndef _SYS_SYSPROTO_H_
338struct pwrite_args {
339 int fd;
340 const void *buf;
341 size_t nbyte;
342 int pad;
343 off_t offset;
344};
345#endif
346int
347pwrite(p, uap)
348 struct proc *p;
349 register struct pwrite_args *uap;
350{
351 register struct file *fp;
352 int error;
353
354 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL)
355 return (EBADF);
356 if (fp->f_type != DTYPE_VNODE) {
357 error = ESPIPE;
358 } else {
359 error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte,
360 uap->offset, FOF_OFFSET);
361 }
362 fdrop(fp, p);
363 return(error);
364}
365
366static int
367dofilewrite(p, fp, fd, buf, nbyte, offset, flags)
368 struct proc *p;
369 struct file *fp;
370 int fd, flags;
371 const void *buf;
372 size_t nbyte;
373 off_t offset;
374{
375 struct uio auio;
376 struct iovec aiov;
377 long cnt, error = 0;
378#ifdef KTRACE
379 struct iovec ktriov;
380 struct uio ktruio;
381 int didktr = 0;
382#endif
383
384 aiov.iov_base = (void *)(uintptr_t)buf;
385 aiov.iov_len = nbyte;
386 auio.uio_iov = &aiov;
387 auio.uio_iovcnt = 1;
388 auio.uio_offset = offset;
389 if (nbyte > INT_MAX)
390 return (EINVAL);
391 auio.uio_resid = nbyte;
392 auio.uio_rw = UIO_WRITE;
393 auio.uio_segflg = UIO_USERSPACE;
394 auio.uio_procp = p;
395#ifdef KTRACE
396 /*
397 * if tracing, save a copy of iovec and uio
398 */
399 if (KTRPOINT(p, KTR_GENIO)) {
400 ktriov = aiov;
401 ktruio = auio;
402 didktr = 1;
403 }
404#endif
405 cnt = nbyte;
406 if (fp->f_type == DTYPE_VNODE)
407 bwillwrite();
408 if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) {
409 if (auio.uio_resid != cnt && (error == ERESTART ||
410 error == EINTR || error == EWOULDBLOCK))
411 error = 0;
412 if (error == EPIPE)
413 psignal(p, SIGPIPE);
414 }
415 cnt -= auio.uio_resid;
416#ifdef KTRACE
417 if (didktr && error == 0) {
418 ktruio.uio_iov = &ktriov;
419 ktruio.uio_resid = cnt;
420 ktrgenio(p->p_tracep, fd, UIO_WRITE, &ktruio, error);
421 }
422#endif
423 p->p_retval[0] = cnt;
424 return (error);
425}
426
427/*
428 * Gather write system call
429 */
430#ifndef _SYS_SYSPROTO_H_
431struct writev_args {
432 int fd;
433 struct iovec *iovp;
434 u_int iovcnt;
435};
436#endif
437int
438writev(p, uap)
439 struct proc *p;
440 register struct writev_args *uap;
441{
442 register struct file *fp;
443 register struct filedesc *fdp = p->p_fd;
444 struct uio auio;
445 register struct iovec *iov;
446 struct iovec *needfree;
447 struct iovec aiov[UIO_SMALLIOV];
448 long i, cnt, error = 0;
449 u_int iovlen;
450#ifdef KTRACE
451 struct iovec *ktriov = NULL;
452 struct uio ktruio;
453#endif
454
455 if ((fp = holdfp(fdp, uap->fd, FWRITE)) == NULL)
456 return (EBADF);
457 /* note: can't use iovlen until iovcnt is validated */
458 iovlen = uap->iovcnt * sizeof (struct iovec);
459 if (uap->iovcnt > UIO_SMALLIOV) {
460 if (uap->iovcnt > UIO_MAXIOV) {
461 needfree = NULL;
462 error = EINVAL;
463 goto done;
464 }
465 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
466 needfree = iov;
467 } else {
468 iov = aiov;
469 needfree = NULL;
470 }
471 auio.uio_iov = iov;
472 auio.uio_iovcnt = uap->iovcnt;
473 auio.uio_rw = UIO_WRITE;
474 auio.uio_segflg = UIO_USERSPACE;
475 auio.uio_procp = p;
476 auio.uio_offset = -1;
477 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
478 goto done;
479 auio.uio_resid = 0;
480 for (i = 0; i < uap->iovcnt; i++) {
481 if (iov->iov_len > INT_MAX - auio.uio_resid) {
482 error = EINVAL;
483 goto done;
484 }
485 auio.uio_resid += iov->iov_len;
486 iov++;
487 }
488#ifdef KTRACE
489 /*
490 * if tracing, save a copy of iovec and uio
491 */
492 if (KTRPOINT(p, KTR_GENIO)) {
493 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
494 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
495 ktruio = auio;
496 }
497#endif
498 cnt = auio.uio_resid;
499 if (fp->f_type == DTYPE_VNODE)
500 bwillwrite();
501 if ((error = fo_write(fp, &auio, fp->f_cred, 0, p))) {
502 if (auio.uio_resid != cnt && (error == ERESTART ||
503 error == EINTR || error == EWOULDBLOCK))
504 error = 0;
505 if (error == EPIPE)
506 psignal(p, SIGPIPE);
507 }
508 cnt -= auio.uio_resid;
509#ifdef KTRACE
510 if (ktriov != NULL) {
511 if (error == 0) {
512 ktruio.uio_iov = ktriov;
513 ktruio.uio_resid = cnt;
514 ktrgenio(p->p_tracep, uap->fd, UIO_WRITE, &ktruio,
515 error);
516 }
517 FREE(ktriov, M_TEMP);
518 }
519#endif
520 p->p_retval[0] = cnt;
521done:
522 fdrop(fp, p);
523 if (needfree)
524 FREE(needfree, M_IOV);
525 return (error);
526}
527
528/*
529 * Ioctl system call
530 */
531#ifndef _SYS_SYSPROTO_H_
532struct ioctl_args {
533 int fd;
534 u_long com;
535 caddr_t data;
536};
537#endif
538/* ARGSUSED */
539int
540ioctl(p, uap)
541 struct proc *p;
542 register struct ioctl_args *uap;
543{
544 register struct file *fp;
545 register struct filedesc *fdp;
546 register u_long com;
547 int error;
548 register u_int size;
549 caddr_t data, memp;
550 int tmp;
551#define STK_PARAMS 128
552 union {
553 char stkbuf[STK_PARAMS];
554 long align;
555 } ubuf;
556
557 fdp = p->p_fd;
558 if ((u_int)uap->fd >= fdp->fd_nfiles ||
559 (fp = fdp->fd_ofiles[uap->fd]) == NULL)
560 return (EBADF);
561
562 if ((fp->f_flag & (FREAD | FWRITE)) == 0)
563 return (EBADF);
564
565 switch (com = uap->com) {
566 case FIONCLEX:
567 fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE;
568 return (0);
569 case FIOCLEX:
570 fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE;
571 return (0);
572 }
573
574 /*
575 * Interpret high order word to find amount of data to be
576 * copied to/from the user's address space.
577 */
578 size = IOCPARM_LEN(com);
579 if (size > IOCPARM_MAX)
580 return (ENOTTY);
581
582 fhold(fp);
583
584 memp = NULL;
585 if (size > sizeof (ubuf.stkbuf)) {
586 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
587 data = memp;
588 } else {
589 data = ubuf.stkbuf;
590 }
591 if (com&IOC_IN) {
592 if (size) {
593 error = copyin(uap->data, data, (u_int)size);
594 if (error) {
595 if (memp)
596 free(memp, M_IOCTLOPS);
597 fdrop(fp, p);
598 return (error);
599 }
600 } else {
601 *(caddr_t *)data = uap->data;
602 }
603 } else if ((com&IOC_OUT) && size) {
604 /*
605 * Zero the buffer so the user always
606 * gets back something deterministic.
607 */
608 bzero(data, size);
609 } else if (com&IOC_VOID) {
610 *(caddr_t *)data = uap->data;
611 }
612
613 switch (com) {
614
615 case FIONBIO:
616 if ((tmp = *(int *)data))
617 fp->f_flag |= FNONBLOCK;
618 else
619 fp->f_flag &= ~FNONBLOCK;
620 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p);
621 break;
622
623 case FIOASYNC:
624 if ((tmp = *(int *)data))
625 fp->f_flag |= FASYNC;
626 else
627 fp->f_flag &= ~FASYNC;
628 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p);
629 break;
630
631 default:
632 error = fo_ioctl(fp, com, data, p);
633 /*
634 * Copy any data to user, size was
635 * already set and checked above.
636 */
637 if (error == 0 && (com&IOC_OUT) && size)
638 error = copyout(data, uap->data, (u_int)size);
639 break;
640 }
641 if (memp)
642 free(memp, M_IOCTLOPS);
643 fdrop(fp, p);
644 return (error);
645}
646
647static int nselcoll; /* Select collisions since boot */
648int selwait;
649SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, "");
650
651/*
652 * Select system call.
653 */
654#ifndef _SYS_SYSPROTO_H_
655struct select_args {
656 int nd;
657 fd_set *in, *ou, *ex;
658 struct timeval *tv;
659};
660#endif
661int
662select(p, uap)
663 register struct proc *p;
664 register struct select_args *uap;
665{
666 /*
667 * The magic 2048 here is chosen to be just enough for FD_SETSIZE
668 * infds with the new FD_SETSIZE of 1024, and more than enough for
669 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE
670 * of 256.
671 */
672 fd_mask s_selbits[howmany(2048, NFDBITS)];
673 fd_mask *ibits[3], *obits[3], *selbits, *sbp;
674 struct timeval atv, rtv, ttv;
675 int s, ncoll, error, timo;
676 u_int nbufbytes, ncpbytes, nfdbits;
677
678 if (uap->nd < 0)
679 return (EINVAL);
680 if (uap->nd > p->p_fd->fd_nfiles)
681 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */
682
683 /*
684 * Allocate just enough bits for the non-null fd_sets. Use the
685 * preallocated auto buffer if possible.
686 */
687 nfdbits = roundup(uap->nd, NFDBITS);
688 ncpbytes = nfdbits / NBBY;
689 nbufbytes = 0;
690 if (uap->in != NULL)
691 nbufbytes += 2 * ncpbytes;
692 if (uap->ou != NULL)
693 nbufbytes += 2 * ncpbytes;
694 if (uap->ex != NULL)
695 nbufbytes += 2 * ncpbytes;
696 if (nbufbytes <= sizeof s_selbits)
697 selbits = &s_selbits[0];
698 else
699 selbits = malloc(nbufbytes, M_SELECT, M_WAITOK);
700
701 /*
702 * Assign pointers into the bit buffers and fetch the input bits.
703 * Put the output buffers together so that they can be bzeroed
704 * together.
705 */
706 sbp = selbits;
707#define getbits(name, x) \
708 do { \
709 if (uap->name == NULL) \
710 ibits[x] = NULL; \
711 else { \
712 ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \
713 obits[x] = sbp; \
714 sbp += ncpbytes / sizeof *sbp; \
715 error = copyin(uap->name, ibits[x], ncpbytes); \
716 if (error != 0) \
717 goto done; \
718 } \
719 } while (0)
720 getbits(in, 0);
721 getbits(ou, 1);
722 getbits(ex, 2);
723#undef getbits
724 if (nbufbytes != 0)
725 bzero(selbits, nbufbytes / 2);
726
727 if (uap->tv) {
728 error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
729 sizeof (atv));
730 if (error)
731 goto done;
732 if (itimerfix(&atv)) {
733 error = EINVAL;
734 goto done;
735 }
736 getmicrouptime(&rtv);
737 timevaladd(&atv, &rtv);
738 } else {
739 atv.tv_sec = 0;
740 atv.tv_usec = 0;
741 }
742 timo = 0;
743retry:
744 ncoll = nselcoll;
745 p->p_flag |= P_SELECT;
746 error = selscan(p, ibits, obits, uap->nd);
747 if (error || p->p_retval[0])
748 goto done;
749 if (atv.tv_sec || atv.tv_usec) {
750 getmicrouptime(&rtv);
751 if (timevalcmp(&rtv, &atv, >=))
752 goto done;
753 ttv = atv;
754 timevalsub(&ttv, &rtv);
755 timo = ttv.tv_sec > 24 * 60 * 60 ?
756 24 * 60 * 60 * hz : tvtohz(&ttv);
757 }
758 s = splhigh();
759 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
760 splx(s);
761 goto retry;
762 }
763 p->p_flag &= ~P_SELECT;
764
765 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
766
767 splx(s);
768 if (error == 0)
769 goto retry;
770done:
771 p->p_flag &= ~P_SELECT;
772 /* select is not restarted after signals... */
773 if (error == ERESTART)
774 error = EINTR;
775 if (error == EWOULDBLOCK)
776 error = 0;
777#define putbits(name, x) \
778 if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \
779 error = error2;
780 if (error == 0) {
781 int error2;
782
783 putbits(in, 0);
784 putbits(ou, 1);
785 putbits(ex, 2);
786#undef putbits
787 }
788 if (selbits != &s_selbits[0])
789 free(selbits, M_SELECT);
790 return (error);
791}
792
793static int
794selscan(p, ibits, obits, nfd)
795 struct proc *p;
796 fd_mask **ibits, **obits;
797 int nfd;
798{
799 struct filedesc *fdp = p->p_fd;
800 int msk, i, fd;
801 fd_mask bits;
802 struct file *fp;
803 int n = 0;
804 /* Note: backend also returns POLLHUP/POLLERR if appropriate. */
805 static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND };
806
807 for (msk = 0; msk < 3; msk++) {
808 if (ibits[msk] == NULL)
809 continue;
810 for (i = 0; i < nfd; i += NFDBITS) {
811 bits = ibits[msk][i/NFDBITS];
812 /* ffs(int mask) not portable, fd_mask is long */
813 for (fd = i; bits && fd < nfd; fd++, bits >>= 1) {
814 if (!(bits & 1))
815 continue;
816 fp = fdp->fd_ofiles[fd];
817 if (fp == NULL)
818 return (EBADF);
819 if (fo_poll(fp, flag[msk], fp->f_cred, p)) {
820 obits[msk][(fd)/NFDBITS] |=
821 ((fd_mask)1 << ((fd) % NFDBITS));
822 n++;
823 }
824 }
825 }
826 }
827 p->p_retval[0] = n;
828 return (0);
829}
830
831/*
832 * Poll system call.
833 */
834#ifndef _SYS_SYSPROTO_H_
835struct poll_args {
836 struct pollfd *fds;
837 u_int nfds;
838 int timeout;
839};
840#endif
841int
842poll(p, uap)
843 register struct proc *p;
844 register struct poll_args *uap;
845{
846 caddr_t bits;
847 char smallbits[32 * sizeof(struct pollfd)];
848 struct timeval atv, rtv, ttv;
849 int s, ncoll, error = 0, timo;
850 size_t ni;
851
852 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) {
853 /* forgiving; slightly wrong */
854 SCARG(uap, nfds) = p->p_fd->fd_nfiles;
855 }
856 ni = SCARG(uap, nfds) * sizeof(struct pollfd);
857 if (ni > sizeof(smallbits))
858 bits = malloc(ni, M_TEMP, M_WAITOK);
859 else
860 bits = smallbits;
861 error = copyin(SCARG(uap, fds), bits, ni);
862 if (error)
863 goto done;
864 if (SCARG(uap, timeout) != INFTIM) {
865 atv.tv_sec = SCARG(uap, timeout) / 1000;
866 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
867 if (itimerfix(&atv)) {
868 error = EINVAL;
869 goto done;
870 }
871 getmicrouptime(&rtv);
872 timevaladd(&atv, &rtv);
873 } else {
874 atv.tv_sec = 0;
875 atv.tv_usec = 0;
876 }
877 timo = 0;
878retry:
879 ncoll = nselcoll;
880 p->p_flag |= P_SELECT;
881 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds));
882 if (error || p->p_retval[0])
883 goto done;
884 if (atv.tv_sec || atv.tv_usec) {
885 getmicrouptime(&rtv);
886 if (timevalcmp(&rtv, &atv, >=))
887 goto done;
888 ttv = atv;
889 timevalsub(&ttv, &rtv);
890 timo = ttv.tv_sec > 24 * 60 * 60 ?
891 24 * 60 * 60 * hz : tvtohz(&ttv);
892 }
893 s = splhigh();
894 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
895 splx(s);
896 goto retry;
897 }
898 p->p_flag &= ~P_SELECT;
899 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo);
900 splx(s);
901 if (error == 0)
902 goto retry;
903done:
904 p->p_flag &= ~P_SELECT;
905 /* poll is not restarted after signals... */
906 if (error == ERESTART)
907 error = EINTR;
908 if (error == EWOULDBLOCK)
909 error = 0;
910 if (error == 0) {
911 error = copyout(bits, SCARG(uap, fds), ni);
912 if (error)
913 goto out;
914 }
915out:
916 if (ni > sizeof(smallbits))
917 free(bits, M_TEMP);
918 return (error);
919}
920
921static int
922pollscan(p, fds, nfd)
923 struct proc *p;
924 struct pollfd *fds;
925 int nfd;
926{
927 register struct filedesc *fdp = p->p_fd;
928 int i;
929 struct file *fp;
930 int n = 0;
931
932 for (i = 0; i < nfd; i++, fds++) {
933 if (fds->fd >= fdp->fd_nfiles) {
934 fds->revents = POLLNVAL;
935 n++;
936 } else if (fds->fd < 0) {
937 fds->revents = 0;
938 } else {
939 fp = fdp->fd_ofiles[fds->fd];
940 if (fp == NULL) {
941 fds->revents = POLLNVAL;
942 n++;
943 } else {
944 /*
945 * Note: backend also returns POLLHUP and
946 * POLLERR if appropriate.
947 */
948 fds->revents = fo_poll(fp, fds->events,
949 fp->f_cred, p);
950 if (fds->revents != 0)
951 n++;
952 }
953 }
954 }
955 p->p_retval[0] = n;
956 return (0);
957}
958
959/*
960 * OpenBSD poll system call.
961 * XXX this isn't quite a true representation.. OpenBSD uses select ops.
962 */
963#ifndef _SYS_SYSPROTO_H_
964struct openbsd_poll_args {
965 struct pollfd *fds;
966 u_int nfds;
967 int timeout;
968};
969#endif
970int
971openbsd_poll(p, uap)
972 register struct proc *p;
973 register struct openbsd_poll_args *uap;
974{
975 return (poll(p, (struct poll_args *)uap));
976}
977
978/*ARGSUSED*/
979int
980seltrue(dev, events, p)
981 dev_t dev;
982 int events;
983 struct proc *p;
984{
985
986 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
987}
988
989/*
990 * Record a select request.
991 */
992void
993selrecord(selector, sip)
994 struct proc *selector;
995 struct selinfo *sip;
996{
997 struct proc *p;
998 pid_t mypid;
999
1000 mypid = selector->p_pid;
1001 if (sip->si_pid == mypid)
1002 return;
1003 if (sip->si_pid && (p = pfind(sip->si_pid)) &&
1004 p->p_wchan == (caddr_t)&selwait)
1005 sip->si_flags |= SI_COLL;
1006 else
1007 sip->si_pid = mypid;
1008}
1009
1010/*
1011 * Do a wakeup when a selectable event occurs.
1012 */
1013void
1014selwakeup(sip)
1015 register struct selinfo *sip;
1016{
1017 register struct proc *p;
1018 int s;
1019
1020 if (sip->si_pid == 0)
1021 return;
1022 if (sip->si_flags & SI_COLL) {
1023 nselcoll++;
1024 sip->si_flags &= ~SI_COLL;
1025 wakeup((caddr_t)&selwait);
1026 }
1027 p = pfind(sip->si_pid);
1028 sip->si_pid = 0;
1029 if (p != NULL) {
1030 s = splhigh();
1031 mtx_enter(&sched_lock, MTX_SPIN);
1032 if (p->p_wchan == (caddr_t)&selwait) {
1033 if (p->p_stat == SSLEEP)
1034 setrunnable(p);
1035 else
1036 unsleep(p);
1037 } else if (p->p_flag & P_SELECT)
1038 p->p_flag &= ~P_SELECT;
1039 mtx_exit(&sched_lock, MTX_SPIN);
1040 splx(s);
1041 }
1042}