Deleted Added
full compact
sys_generic.c (89306) sys_generic.c (89319)
1/*
2 * Copyright (c) 1982, 1986, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94
1/*
2 * Copyright (c) 1982, 1986, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94
39 * $FreeBSD: head/sys/kern/sys_generic.c 89306 2002-01-13 11:58:06Z alfred $
39 * $FreeBSD: head/sys/kern/sys_generic.c 89319 2002-01-14 00:13:45Z alfred $
40 */
41
42#include "opt_ktrace.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/sysproto.h>
47#include <sys/filedesc.h>
48#include <sys/filio.h>
49#include <sys/fcntl.h>
50#include <sys/file.h>
51#include <sys/proc.h>
52#include <sys/signalvar.h>
53#include <sys/socketvar.h>
54#include <sys/uio.h>
55#include <sys/kernel.h>
56#include <sys/malloc.h>
57#include <sys/poll.h>
58#include <sys/resourcevar.h>
59#include <sys/selinfo.h>
60#include <sys/sysctl.h>
61#include <sys/sysent.h>
62#include <sys/bio.h>
63#include <sys/buf.h>
64#include <sys/condvar.h>
65#ifdef KTRACE
66#include <sys/ktrace.h>
67#endif
68#include <vm/vm.h>
69#include <vm/vm_page.h>
70
71#include <machine/limits.h>
72
73static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer");
74static MALLOC_DEFINE(M_SELECT, "select", "select() buffer");
75MALLOC_DEFINE(M_IOV, "iov", "large iov's");
76
77static int pollscan __P((struct thread *, struct pollfd *, u_int));
78static int pollholddrop __P((struct thread *, struct pollfd *, u_int, int));
79static int selscan __P((struct thread *, fd_mask **, fd_mask **, int));
80static int selholddrop __P((struct thread *, fd_mask *, fd_mask *, int, int));
81static int dofileread __P((struct thread *, struct file *, int, void *,
82 size_t, off_t, int));
83static int dofilewrite __P((struct thread *, struct file *, int,
84 const void *, size_t, off_t, int));
85
86struct file*
87holdfp(fdp, fd, flag)
88 struct filedesc* fdp;
89 int fd, flag;
90{
91 struct file* fp;
92
93 FILEDESC_LOCK(fdp);
94 if (((u_int)fd) >= fdp->fd_nfiles ||
95 (fp = fdp->fd_ofiles[fd]) == NULL) {
96 FILEDESC_UNLOCK(fdp);
97 return (NULL);
98 }
99 FILE_LOCK(fp);
100 FILEDESC_UNLOCK(fdp);
101 if ((fp->f_flag & flag) == 0) {
102 FILE_UNLOCK(fp);
103 return (NULL);
104 }
105 fp->f_count++;
106 FILE_UNLOCK(fp);
107 return (fp);
108}
109
110/*
111 * Read system call.
112 */
113#ifndef _SYS_SYSPROTO_H_
114struct read_args {
115 int fd;
116 void *buf;
117 size_t nbyte;
118};
119#endif
120/*
121 * MPSAFE
122 */
123int
124read(td, uap)
125 struct thread *td;
126 struct read_args *uap;
127{
128 struct file *fp;
129 int error;
130
131 mtx_lock(&Giant);
132 if ((error = fget_read(td, uap->fd, &fp)) == 0) {
133 error = dofileread(td, fp, uap->fd, uap->buf,
134 uap->nbyte, (off_t)-1, 0);
135 fdrop(fp, td);
136 }
137 mtx_unlock(&Giant);
138 return(error);
139}
140
141/*
142 * Pread system call
143 */
144#ifndef _SYS_SYSPROTO_H_
145struct pread_args {
146 int fd;
147 void *buf;
148 size_t nbyte;
149 int pad;
150 off_t offset;
151};
152#endif
153/*
154 * MPSAFE
155 */
156int
157pread(td, uap)
158 struct thread *td;
159 struct pread_args *uap;
160{
161 struct file *fp;
162 int error;
163
164 fp = holdfp(td->td_proc->p_fd, uap->fd, FREAD);
165 if (fp == NULL)
166 return (EBADF);
167 if (fp->f_type != DTYPE_VNODE) {
168 error = ESPIPE;
169 } else {
170 mtx_lock(&Giant);
171 error = dofileread(td, fp, uap->fd, uap->buf, uap->nbyte,
172 uap->offset, FOF_OFFSET);
173 mtx_unlock(&Giant);
174 }
175 fdrop(fp, td);
176 return(error);
177}
178
179/*
180 * Code common for read and pread
181 */
182int
183dofileread(td, fp, fd, buf, nbyte, offset, flags)
184 struct thread *td;
185 struct file *fp;
186 int fd, flags;
187 void *buf;
188 size_t nbyte;
189 off_t offset;
190{
191 struct uio auio;
192 struct iovec aiov;
193 long cnt, error = 0;
194#ifdef KTRACE
195 struct iovec ktriov;
196 struct uio ktruio;
197 int didktr = 0;
198#endif
199
200 aiov.iov_base = (caddr_t)buf;
201 aiov.iov_len = nbyte;
202 auio.uio_iov = &aiov;
203 auio.uio_iovcnt = 1;
204 auio.uio_offset = offset;
205 if (nbyte > INT_MAX)
206 return (EINVAL);
207 auio.uio_resid = nbyte;
208 auio.uio_rw = UIO_READ;
209 auio.uio_segflg = UIO_USERSPACE;
210 auio.uio_td = td;
211#ifdef KTRACE
212 /*
213 * if tracing, save a copy of iovec
214 */
215 if (KTRPOINT(td->td_proc, KTR_GENIO)) {
216 ktriov = aiov;
217 ktruio = auio;
218 didktr = 1;
219 }
220#endif
221 cnt = nbyte;
222
223 if ((error = fo_read(fp, &auio, fp->f_cred, flags, td))) {
224 if (auio.uio_resid != cnt && (error == ERESTART ||
225 error == EINTR || error == EWOULDBLOCK))
226 error = 0;
227 }
228 cnt -= auio.uio_resid;
229#ifdef KTRACE
230 if (didktr && error == 0) {
231 ktruio.uio_iov = &ktriov;
232 ktruio.uio_resid = cnt;
233 ktrgenio(td->td_proc->p_tracep, fd, UIO_READ, &ktruio, error);
234 }
235#endif
236 td->td_retval[0] = cnt;
237 return (error);
238}
239
240/*
241 * Scatter read system call.
242 */
243#ifndef _SYS_SYSPROTO_H_
244struct readv_args {
245 int fd;
246 struct iovec *iovp;
247 u_int iovcnt;
248};
249#endif
250/*
251 * MPSAFE
252 */
253int
254readv(td, uap)
255 struct thread *td;
256 struct readv_args *uap;
257{
258 struct file *fp;
259 struct uio auio;
260 struct iovec *iov;
261 struct iovec *needfree;
262 struct iovec aiov[UIO_SMALLIOV];
263 long i, cnt, error = 0;
264 u_int iovlen;
265#ifdef KTRACE
266 struct iovec *ktriov = NULL;
267 struct uio ktruio;
268#endif
269 mtx_lock(&Giant);
270
271 if ((error = fget_read(td, uap->fd, &fp)) != 0)
272 goto done2;
273 /* note: can't use iovlen until iovcnt is validated */
274 iovlen = uap->iovcnt * sizeof (struct iovec);
275 if (uap->iovcnt > UIO_SMALLIOV) {
276 if (uap->iovcnt > UIO_MAXIOV) {
277 error = EINVAL;
278 goto done2;
279 }
280 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
281 needfree = iov;
282 } else {
283 iov = aiov;
284 needfree = NULL;
285 }
286 auio.uio_iov = iov;
287 auio.uio_iovcnt = uap->iovcnt;
288 auio.uio_rw = UIO_READ;
289 auio.uio_segflg = UIO_USERSPACE;
290 auio.uio_td = td;
291 auio.uio_offset = -1;
292 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
293 goto done;
294 auio.uio_resid = 0;
295 for (i = 0; i < uap->iovcnt; i++) {
296 if (iov->iov_len > INT_MAX - auio.uio_resid) {
297 error = EINVAL;
298 goto done;
299 }
300 auio.uio_resid += iov->iov_len;
301 iov++;
302 }
303#ifdef KTRACE
304 /*
305 * if tracing, save a copy of iovec
306 */
307 if (KTRPOINT(td->td_proc, KTR_GENIO)) {
308 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
309 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
310 ktruio = auio;
311 }
312#endif
313 cnt = auio.uio_resid;
314 if ((error = fo_read(fp, &auio, fp->f_cred, 0, td))) {
315 if (auio.uio_resid != cnt && (error == ERESTART ||
316 error == EINTR || error == EWOULDBLOCK))
317 error = 0;
318 }
319 cnt -= auio.uio_resid;
320#ifdef KTRACE
321 if (ktriov != NULL) {
322 if (error == 0) {
323 ktruio.uio_iov = ktriov;
324 ktruio.uio_resid = cnt;
325 ktrgenio(td->td_proc->p_tracep, uap->fd, UIO_READ, &ktruio,
326 error);
327 }
328 FREE(ktriov, M_TEMP);
329 }
330#endif
331 td->td_retval[0] = cnt;
332done:
333 fdrop(fp, td);
334 if (needfree)
335 FREE(needfree, M_IOV);
336done2:
337 mtx_unlock(&Giant);
338 return (error);
339}
340
341/*
342 * Write system call
343 */
344#ifndef _SYS_SYSPROTO_H_
345struct write_args {
346 int fd;
347 const void *buf;
348 size_t nbyte;
349};
350#endif
351/*
352 * MPSAFE
353 */
354int
355write(td, uap)
356 struct thread *td;
357 struct write_args *uap;
358{
359 struct file *fp;
360 int error;
361
362 mtx_lock(&Giant);
363 if ((error = fget_write(td, uap->fd, &fp)) == 0) {
364 error = dofilewrite(td, fp, uap->fd, uap->buf, uap->nbyte,
365 (off_t)-1, 0);
366 fdrop(fp, td);
367 } else {
368 error = EBADF; /* XXX this can't be right */
369 }
370 mtx_unlock(&Giant);
371 return(error);
372}
373
374/*
375 * Pwrite system call
376 */
377#ifndef _SYS_SYSPROTO_H_
378struct pwrite_args {
379 int fd;
380 const void *buf;
381 size_t nbyte;
382 int pad;
383 off_t offset;
384};
385#endif
386/*
387 * MPSAFE
388 */
389int
390pwrite(td, uap)
391 struct thread *td;
392 struct pwrite_args *uap;
393{
394 struct file *fp;
395 int error;
396
397 mtx_lock(&Giant);
398 if ((error = fget_write(td, uap->fd, &fp)) == 0) {
399 if (fp->f_type == DTYPE_VNODE) {
400 error = dofilewrite(td, fp, uap->fd, uap->buf,
401 uap->nbyte, uap->offset, FOF_OFFSET);
402 } else {
403 error = ESPIPE;
404 }
405 fdrop(fp, td);
406 } else {
407 error = EBADF; /* this can't be right */
408 }
409 return(error);
410}
411
412static int
413dofilewrite(td, fp, fd, buf, nbyte, offset, flags)
414 struct thread *td;
415 struct file *fp;
416 int fd, flags;
417 const void *buf;
418 size_t nbyte;
419 off_t offset;
420{
421 struct uio auio;
422 struct iovec aiov;
423 long cnt, error = 0;
424#ifdef KTRACE
425 struct iovec ktriov;
426 struct uio ktruio;
427 int didktr = 0;
428#endif
429
430 aiov.iov_base = (void *)(uintptr_t)buf;
431 aiov.iov_len = nbyte;
432 auio.uio_iov = &aiov;
433 auio.uio_iovcnt = 1;
434 auio.uio_offset = offset;
435 if (nbyte > INT_MAX)
436 return (EINVAL);
437 auio.uio_resid = nbyte;
438 auio.uio_rw = UIO_WRITE;
439 auio.uio_segflg = UIO_USERSPACE;
440 auio.uio_td = td;
441#ifdef KTRACE
442 /*
443 * if tracing, save a copy of iovec and uio
444 */
445 if (KTRPOINT(td->td_proc, KTR_GENIO)) {
446 ktriov = aiov;
447 ktruio = auio;
448 didktr = 1;
449 }
450#endif
451 cnt = nbyte;
452 if (fp->f_type == DTYPE_VNODE)
453 bwillwrite();
454 if ((error = fo_write(fp, &auio, fp->f_cred, flags, td))) {
455 if (auio.uio_resid != cnt && (error == ERESTART ||
456 error == EINTR || error == EWOULDBLOCK))
457 error = 0;
458 if (error == EPIPE) {
459 PROC_LOCK(td->td_proc);
460 psignal(td->td_proc, SIGPIPE);
461 PROC_UNLOCK(td->td_proc);
462 }
463 }
464 cnt -= auio.uio_resid;
465#ifdef KTRACE
466 if (didktr && error == 0) {
467 ktruio.uio_iov = &ktriov;
468 ktruio.uio_resid = cnt;
469 ktrgenio(td->td_proc->p_tracep, fd, UIO_WRITE, &ktruio, error);
470 }
471#endif
472 td->td_retval[0] = cnt;
473 return (error);
474}
475
476/*
477 * Gather write system call
478 */
479#ifndef _SYS_SYSPROTO_H_
480struct writev_args {
481 int fd;
482 struct iovec *iovp;
483 u_int iovcnt;
484};
485#endif
486/*
487 * MPSAFE
488 */
489int
490writev(td, uap)
491 struct thread *td;
492 register struct writev_args *uap;
493{
494 struct file *fp;
495 struct uio auio;
496 register struct iovec *iov;
497 struct iovec *needfree;
498 struct iovec aiov[UIO_SMALLIOV];
499 long i, cnt, error = 0;
500 u_int iovlen;
501#ifdef KTRACE
502 struct iovec *ktriov = NULL;
503 struct uio ktruio;
504#endif
505
506 mtx_lock(&Giant);
507 if ((error = fget_write(td, uap->fd, &fp)) != 0) {
508 error = EBADF;
509 goto done2;
510 }
511 /* note: can't use iovlen until iovcnt is validated */
512 iovlen = uap->iovcnt * sizeof (struct iovec);
513 if (uap->iovcnt > UIO_SMALLIOV) {
514 if (uap->iovcnt > UIO_MAXIOV) {
515 needfree = NULL;
516 error = EINVAL;
517 goto done;
518 }
519 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
520 needfree = iov;
521 } else {
522 iov = aiov;
523 needfree = NULL;
524 }
525 auio.uio_iov = iov;
526 auio.uio_iovcnt = uap->iovcnt;
527 auio.uio_rw = UIO_WRITE;
528 auio.uio_segflg = UIO_USERSPACE;
529 auio.uio_td = td;
530 auio.uio_offset = -1;
531 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
532 goto done;
533 auio.uio_resid = 0;
534 for (i = 0; i < uap->iovcnt; i++) {
535 if (iov->iov_len > INT_MAX - auio.uio_resid) {
536 error = EINVAL;
537 goto done;
538 }
539 auio.uio_resid += iov->iov_len;
540 iov++;
541 }
542#ifdef KTRACE
543 /*
544 * if tracing, save a copy of iovec and uio
545 */
546 if (KTRPOINT(td->td_proc, KTR_GENIO)) {
547 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
548 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
549 ktruio = auio;
550 }
551#endif
552 cnt = auio.uio_resid;
553 if (fp->f_type == DTYPE_VNODE)
554 bwillwrite();
555 if ((error = fo_write(fp, &auio, fp->f_cred, 0, td))) {
556 if (auio.uio_resid != cnt && (error == ERESTART ||
557 error == EINTR || error == EWOULDBLOCK))
558 error = 0;
559 if (error == EPIPE) {
560 PROC_LOCK(td->td_proc);
561 psignal(td->td_proc, SIGPIPE);
562 PROC_UNLOCK(td->td_proc);
563 }
564 }
565 cnt -= auio.uio_resid;
566#ifdef KTRACE
567 if (ktriov != NULL) {
568 if (error == 0) {
569 ktruio.uio_iov = ktriov;
570 ktruio.uio_resid = cnt;
571 ktrgenio(td->td_proc->p_tracep, uap->fd, UIO_WRITE, &ktruio,
572 error);
573 }
574 FREE(ktriov, M_TEMP);
575 }
576#endif
577 td->td_retval[0] = cnt;
578done:
579 fdrop(fp, td);
580 if (needfree)
581 FREE(needfree, M_IOV);
582done2:
583 mtx_unlock(&Giant);
584 return (error);
585}
586
587/*
588 * Ioctl system call
589 */
590#ifndef _SYS_SYSPROTO_H_
591struct ioctl_args {
592 int fd;
593 u_long com;
594 caddr_t data;
595};
596#endif
597/*
598 * MPSAFE
599 */
600/* ARGSUSED */
601int
602ioctl(td, uap)
603 struct thread *td;
604 register struct ioctl_args *uap;
605{
40 */
41
42#include "opt_ktrace.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/sysproto.h>
47#include <sys/filedesc.h>
48#include <sys/filio.h>
49#include <sys/fcntl.h>
50#include <sys/file.h>
51#include <sys/proc.h>
52#include <sys/signalvar.h>
53#include <sys/socketvar.h>
54#include <sys/uio.h>
55#include <sys/kernel.h>
56#include <sys/malloc.h>
57#include <sys/poll.h>
58#include <sys/resourcevar.h>
59#include <sys/selinfo.h>
60#include <sys/sysctl.h>
61#include <sys/sysent.h>
62#include <sys/bio.h>
63#include <sys/buf.h>
64#include <sys/condvar.h>
65#ifdef KTRACE
66#include <sys/ktrace.h>
67#endif
68#include <vm/vm.h>
69#include <vm/vm_page.h>
70
71#include <machine/limits.h>
72
73static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer");
74static MALLOC_DEFINE(M_SELECT, "select", "select() buffer");
75MALLOC_DEFINE(M_IOV, "iov", "large iov's");
76
77static int pollscan __P((struct thread *, struct pollfd *, u_int));
78static int pollholddrop __P((struct thread *, struct pollfd *, u_int, int));
79static int selscan __P((struct thread *, fd_mask **, fd_mask **, int));
80static int selholddrop __P((struct thread *, fd_mask *, fd_mask *, int, int));
81static int dofileread __P((struct thread *, struct file *, int, void *,
82 size_t, off_t, int));
83static int dofilewrite __P((struct thread *, struct file *, int,
84 const void *, size_t, off_t, int));
85
86struct file*
87holdfp(fdp, fd, flag)
88 struct filedesc* fdp;
89 int fd, flag;
90{
91 struct file* fp;
92
93 FILEDESC_LOCK(fdp);
94 if (((u_int)fd) >= fdp->fd_nfiles ||
95 (fp = fdp->fd_ofiles[fd]) == NULL) {
96 FILEDESC_UNLOCK(fdp);
97 return (NULL);
98 }
99 FILE_LOCK(fp);
100 FILEDESC_UNLOCK(fdp);
101 if ((fp->f_flag & flag) == 0) {
102 FILE_UNLOCK(fp);
103 return (NULL);
104 }
105 fp->f_count++;
106 FILE_UNLOCK(fp);
107 return (fp);
108}
109
110/*
111 * Read system call.
112 */
113#ifndef _SYS_SYSPROTO_H_
114struct read_args {
115 int fd;
116 void *buf;
117 size_t nbyte;
118};
119#endif
120/*
121 * MPSAFE
122 */
123int
124read(td, uap)
125 struct thread *td;
126 struct read_args *uap;
127{
128 struct file *fp;
129 int error;
130
131 mtx_lock(&Giant);
132 if ((error = fget_read(td, uap->fd, &fp)) == 0) {
133 error = dofileread(td, fp, uap->fd, uap->buf,
134 uap->nbyte, (off_t)-1, 0);
135 fdrop(fp, td);
136 }
137 mtx_unlock(&Giant);
138 return(error);
139}
140
141/*
142 * Pread system call
143 */
144#ifndef _SYS_SYSPROTO_H_
145struct pread_args {
146 int fd;
147 void *buf;
148 size_t nbyte;
149 int pad;
150 off_t offset;
151};
152#endif
153/*
154 * MPSAFE
155 */
156int
157pread(td, uap)
158 struct thread *td;
159 struct pread_args *uap;
160{
161 struct file *fp;
162 int error;
163
164 fp = holdfp(td->td_proc->p_fd, uap->fd, FREAD);
165 if (fp == NULL)
166 return (EBADF);
167 if (fp->f_type != DTYPE_VNODE) {
168 error = ESPIPE;
169 } else {
170 mtx_lock(&Giant);
171 error = dofileread(td, fp, uap->fd, uap->buf, uap->nbyte,
172 uap->offset, FOF_OFFSET);
173 mtx_unlock(&Giant);
174 }
175 fdrop(fp, td);
176 return(error);
177}
178
179/*
180 * Code common for read and pread
181 */
182int
183dofileread(td, fp, fd, buf, nbyte, offset, flags)
184 struct thread *td;
185 struct file *fp;
186 int fd, flags;
187 void *buf;
188 size_t nbyte;
189 off_t offset;
190{
191 struct uio auio;
192 struct iovec aiov;
193 long cnt, error = 0;
194#ifdef KTRACE
195 struct iovec ktriov;
196 struct uio ktruio;
197 int didktr = 0;
198#endif
199
200 aiov.iov_base = (caddr_t)buf;
201 aiov.iov_len = nbyte;
202 auio.uio_iov = &aiov;
203 auio.uio_iovcnt = 1;
204 auio.uio_offset = offset;
205 if (nbyte > INT_MAX)
206 return (EINVAL);
207 auio.uio_resid = nbyte;
208 auio.uio_rw = UIO_READ;
209 auio.uio_segflg = UIO_USERSPACE;
210 auio.uio_td = td;
211#ifdef KTRACE
212 /*
213 * if tracing, save a copy of iovec
214 */
215 if (KTRPOINT(td->td_proc, KTR_GENIO)) {
216 ktriov = aiov;
217 ktruio = auio;
218 didktr = 1;
219 }
220#endif
221 cnt = nbyte;
222
223 if ((error = fo_read(fp, &auio, fp->f_cred, flags, td))) {
224 if (auio.uio_resid != cnt && (error == ERESTART ||
225 error == EINTR || error == EWOULDBLOCK))
226 error = 0;
227 }
228 cnt -= auio.uio_resid;
229#ifdef KTRACE
230 if (didktr && error == 0) {
231 ktruio.uio_iov = &ktriov;
232 ktruio.uio_resid = cnt;
233 ktrgenio(td->td_proc->p_tracep, fd, UIO_READ, &ktruio, error);
234 }
235#endif
236 td->td_retval[0] = cnt;
237 return (error);
238}
239
240/*
241 * Scatter read system call.
242 */
243#ifndef _SYS_SYSPROTO_H_
244struct readv_args {
245 int fd;
246 struct iovec *iovp;
247 u_int iovcnt;
248};
249#endif
250/*
251 * MPSAFE
252 */
253int
254readv(td, uap)
255 struct thread *td;
256 struct readv_args *uap;
257{
258 struct file *fp;
259 struct uio auio;
260 struct iovec *iov;
261 struct iovec *needfree;
262 struct iovec aiov[UIO_SMALLIOV];
263 long i, cnt, error = 0;
264 u_int iovlen;
265#ifdef KTRACE
266 struct iovec *ktriov = NULL;
267 struct uio ktruio;
268#endif
269 mtx_lock(&Giant);
270
271 if ((error = fget_read(td, uap->fd, &fp)) != 0)
272 goto done2;
273 /* note: can't use iovlen until iovcnt is validated */
274 iovlen = uap->iovcnt * sizeof (struct iovec);
275 if (uap->iovcnt > UIO_SMALLIOV) {
276 if (uap->iovcnt > UIO_MAXIOV) {
277 error = EINVAL;
278 goto done2;
279 }
280 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
281 needfree = iov;
282 } else {
283 iov = aiov;
284 needfree = NULL;
285 }
286 auio.uio_iov = iov;
287 auio.uio_iovcnt = uap->iovcnt;
288 auio.uio_rw = UIO_READ;
289 auio.uio_segflg = UIO_USERSPACE;
290 auio.uio_td = td;
291 auio.uio_offset = -1;
292 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
293 goto done;
294 auio.uio_resid = 0;
295 for (i = 0; i < uap->iovcnt; i++) {
296 if (iov->iov_len > INT_MAX - auio.uio_resid) {
297 error = EINVAL;
298 goto done;
299 }
300 auio.uio_resid += iov->iov_len;
301 iov++;
302 }
303#ifdef KTRACE
304 /*
305 * if tracing, save a copy of iovec
306 */
307 if (KTRPOINT(td->td_proc, KTR_GENIO)) {
308 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
309 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
310 ktruio = auio;
311 }
312#endif
313 cnt = auio.uio_resid;
314 if ((error = fo_read(fp, &auio, fp->f_cred, 0, td))) {
315 if (auio.uio_resid != cnt && (error == ERESTART ||
316 error == EINTR || error == EWOULDBLOCK))
317 error = 0;
318 }
319 cnt -= auio.uio_resid;
320#ifdef KTRACE
321 if (ktriov != NULL) {
322 if (error == 0) {
323 ktruio.uio_iov = ktriov;
324 ktruio.uio_resid = cnt;
325 ktrgenio(td->td_proc->p_tracep, uap->fd, UIO_READ, &ktruio,
326 error);
327 }
328 FREE(ktriov, M_TEMP);
329 }
330#endif
331 td->td_retval[0] = cnt;
332done:
333 fdrop(fp, td);
334 if (needfree)
335 FREE(needfree, M_IOV);
336done2:
337 mtx_unlock(&Giant);
338 return (error);
339}
340
341/*
342 * Write system call
343 */
344#ifndef _SYS_SYSPROTO_H_
345struct write_args {
346 int fd;
347 const void *buf;
348 size_t nbyte;
349};
350#endif
351/*
352 * MPSAFE
353 */
354int
355write(td, uap)
356 struct thread *td;
357 struct write_args *uap;
358{
359 struct file *fp;
360 int error;
361
362 mtx_lock(&Giant);
363 if ((error = fget_write(td, uap->fd, &fp)) == 0) {
364 error = dofilewrite(td, fp, uap->fd, uap->buf, uap->nbyte,
365 (off_t)-1, 0);
366 fdrop(fp, td);
367 } else {
368 error = EBADF; /* XXX this can't be right */
369 }
370 mtx_unlock(&Giant);
371 return(error);
372}
373
374/*
375 * Pwrite system call
376 */
377#ifndef _SYS_SYSPROTO_H_
378struct pwrite_args {
379 int fd;
380 const void *buf;
381 size_t nbyte;
382 int pad;
383 off_t offset;
384};
385#endif
386/*
387 * MPSAFE
388 */
389int
390pwrite(td, uap)
391 struct thread *td;
392 struct pwrite_args *uap;
393{
394 struct file *fp;
395 int error;
396
397 mtx_lock(&Giant);
398 if ((error = fget_write(td, uap->fd, &fp)) == 0) {
399 if (fp->f_type == DTYPE_VNODE) {
400 error = dofilewrite(td, fp, uap->fd, uap->buf,
401 uap->nbyte, uap->offset, FOF_OFFSET);
402 } else {
403 error = ESPIPE;
404 }
405 fdrop(fp, td);
406 } else {
407 error = EBADF; /* this can't be right */
408 }
409 return(error);
410}
411
412static int
413dofilewrite(td, fp, fd, buf, nbyte, offset, flags)
414 struct thread *td;
415 struct file *fp;
416 int fd, flags;
417 const void *buf;
418 size_t nbyte;
419 off_t offset;
420{
421 struct uio auio;
422 struct iovec aiov;
423 long cnt, error = 0;
424#ifdef KTRACE
425 struct iovec ktriov;
426 struct uio ktruio;
427 int didktr = 0;
428#endif
429
430 aiov.iov_base = (void *)(uintptr_t)buf;
431 aiov.iov_len = nbyte;
432 auio.uio_iov = &aiov;
433 auio.uio_iovcnt = 1;
434 auio.uio_offset = offset;
435 if (nbyte > INT_MAX)
436 return (EINVAL);
437 auio.uio_resid = nbyte;
438 auio.uio_rw = UIO_WRITE;
439 auio.uio_segflg = UIO_USERSPACE;
440 auio.uio_td = td;
441#ifdef KTRACE
442 /*
443 * if tracing, save a copy of iovec and uio
444 */
445 if (KTRPOINT(td->td_proc, KTR_GENIO)) {
446 ktriov = aiov;
447 ktruio = auio;
448 didktr = 1;
449 }
450#endif
451 cnt = nbyte;
452 if (fp->f_type == DTYPE_VNODE)
453 bwillwrite();
454 if ((error = fo_write(fp, &auio, fp->f_cred, flags, td))) {
455 if (auio.uio_resid != cnt && (error == ERESTART ||
456 error == EINTR || error == EWOULDBLOCK))
457 error = 0;
458 if (error == EPIPE) {
459 PROC_LOCK(td->td_proc);
460 psignal(td->td_proc, SIGPIPE);
461 PROC_UNLOCK(td->td_proc);
462 }
463 }
464 cnt -= auio.uio_resid;
465#ifdef KTRACE
466 if (didktr && error == 0) {
467 ktruio.uio_iov = &ktriov;
468 ktruio.uio_resid = cnt;
469 ktrgenio(td->td_proc->p_tracep, fd, UIO_WRITE, &ktruio, error);
470 }
471#endif
472 td->td_retval[0] = cnt;
473 return (error);
474}
475
476/*
477 * Gather write system call
478 */
479#ifndef _SYS_SYSPROTO_H_
480struct writev_args {
481 int fd;
482 struct iovec *iovp;
483 u_int iovcnt;
484};
485#endif
486/*
487 * MPSAFE
488 */
489int
490writev(td, uap)
491 struct thread *td;
492 register struct writev_args *uap;
493{
494 struct file *fp;
495 struct uio auio;
496 register struct iovec *iov;
497 struct iovec *needfree;
498 struct iovec aiov[UIO_SMALLIOV];
499 long i, cnt, error = 0;
500 u_int iovlen;
501#ifdef KTRACE
502 struct iovec *ktriov = NULL;
503 struct uio ktruio;
504#endif
505
506 mtx_lock(&Giant);
507 if ((error = fget_write(td, uap->fd, &fp)) != 0) {
508 error = EBADF;
509 goto done2;
510 }
511 /* note: can't use iovlen until iovcnt is validated */
512 iovlen = uap->iovcnt * sizeof (struct iovec);
513 if (uap->iovcnt > UIO_SMALLIOV) {
514 if (uap->iovcnt > UIO_MAXIOV) {
515 needfree = NULL;
516 error = EINVAL;
517 goto done;
518 }
519 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
520 needfree = iov;
521 } else {
522 iov = aiov;
523 needfree = NULL;
524 }
525 auio.uio_iov = iov;
526 auio.uio_iovcnt = uap->iovcnt;
527 auio.uio_rw = UIO_WRITE;
528 auio.uio_segflg = UIO_USERSPACE;
529 auio.uio_td = td;
530 auio.uio_offset = -1;
531 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
532 goto done;
533 auio.uio_resid = 0;
534 for (i = 0; i < uap->iovcnt; i++) {
535 if (iov->iov_len > INT_MAX - auio.uio_resid) {
536 error = EINVAL;
537 goto done;
538 }
539 auio.uio_resid += iov->iov_len;
540 iov++;
541 }
542#ifdef KTRACE
543 /*
544 * if tracing, save a copy of iovec and uio
545 */
546 if (KTRPOINT(td->td_proc, KTR_GENIO)) {
547 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
548 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
549 ktruio = auio;
550 }
551#endif
552 cnt = auio.uio_resid;
553 if (fp->f_type == DTYPE_VNODE)
554 bwillwrite();
555 if ((error = fo_write(fp, &auio, fp->f_cred, 0, td))) {
556 if (auio.uio_resid != cnt && (error == ERESTART ||
557 error == EINTR || error == EWOULDBLOCK))
558 error = 0;
559 if (error == EPIPE) {
560 PROC_LOCK(td->td_proc);
561 psignal(td->td_proc, SIGPIPE);
562 PROC_UNLOCK(td->td_proc);
563 }
564 }
565 cnt -= auio.uio_resid;
566#ifdef KTRACE
567 if (ktriov != NULL) {
568 if (error == 0) {
569 ktruio.uio_iov = ktriov;
570 ktruio.uio_resid = cnt;
571 ktrgenio(td->td_proc->p_tracep, uap->fd, UIO_WRITE, &ktruio,
572 error);
573 }
574 FREE(ktriov, M_TEMP);
575 }
576#endif
577 td->td_retval[0] = cnt;
578done:
579 fdrop(fp, td);
580 if (needfree)
581 FREE(needfree, M_IOV);
582done2:
583 mtx_unlock(&Giant);
584 return (error);
585}
586
587/*
588 * Ioctl system call
589 */
590#ifndef _SYS_SYSPROTO_H_
591struct ioctl_args {
592 int fd;
593 u_long com;
594 caddr_t data;
595};
596#endif
597/*
598 * MPSAFE
599 */
600/* ARGSUSED */
601int
602ioctl(td, uap)
603 struct thread *td;
604 register struct ioctl_args *uap;
605{
606 register struct file *fp;
606 struct file *fp;
607 register struct filedesc *fdp;
608 register u_long com;
609 int error = 0;
610 register u_int size;
611 caddr_t data, memp;
612 int tmp;
613#define STK_PARAMS 128
614 union {
615 char stkbuf[STK_PARAMS];
616 long align;
617 } ubuf;
618
607 register struct filedesc *fdp;
608 register u_long com;
609 int error = 0;
610 register u_int size;
611 caddr_t data, memp;
612 int tmp;
613#define STK_PARAMS 128
614 union {
615 char stkbuf[STK_PARAMS];
616 long align;
617 } ubuf;
618
619 fp = ffind_hold(td, uap->fd);
620 if (fp == NULL)
621 return (EBADF);
619 if ((error = fget(td, uap->fd, &fp)) != 0)
620 return (error);
622 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
623 fdrop(fp, td);
624 return (EBADF);
625 }
626 fdp = td->td_proc->p_fd;
627 switch (com = uap->com) {
628 case FIONCLEX:
629 FILEDESC_LOCK(fdp);
630 fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE;
631 FILEDESC_UNLOCK(fdp);
632 fdrop(fp, td);
633 return (0);
634 case FIOCLEX:
635 FILEDESC_LOCK(fdp);
636 fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE;
637 FILEDESC_UNLOCK(fdp);
638 fdrop(fp, td);
639 return (0);
640 }
641
642 /*
643 * Interpret high order word to find amount of data to be
644 * copied to/from the user's address space.
645 */
646 size = IOCPARM_LEN(com);
647 if (size > IOCPARM_MAX) {
648 fdrop(fp, td);
649 return (ENOTTY);
650 }
651
652 mtx_lock(&Giant);
653 memp = NULL;
654 if (size > sizeof (ubuf.stkbuf)) {
655 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
656 data = memp;
657 } else {
658 data = ubuf.stkbuf;
659 }
660 if (com&IOC_IN) {
661 if (size) {
662 error = copyin(uap->data, data, (u_int)size);
663 if (error) {
664 if (memp)
665 free(memp, M_IOCTLOPS);
666 fdrop(fp, td);
667 goto done;
668 }
669 } else {
670 *(caddr_t *)data = uap->data;
671 }
672 } else if ((com&IOC_OUT) && size) {
673 /*
674 * Zero the buffer so the user always
675 * gets back something deterministic.
676 */
677 bzero(data, size);
678 } else if (com&IOC_VOID) {
679 *(caddr_t *)data = uap->data;
680 }
681
682 switch (com) {
683
684 case FIONBIO:
685 FILE_LOCK(fp);
686 if ((tmp = *(int *)data))
687 fp->f_flag |= FNONBLOCK;
688 else
689 fp->f_flag &= ~FNONBLOCK;
690 FILE_UNLOCK(fp);
691 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td);
692 break;
693
694 case FIOASYNC:
695 FILE_LOCK(fp);
696 if ((tmp = *(int *)data))
697 fp->f_flag |= FASYNC;
698 else
699 fp->f_flag &= ~FASYNC;
700 FILE_UNLOCK(fp);
701 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, td);
702 break;
703
704 default:
705 error = fo_ioctl(fp, com, data, td);
706 /*
707 * Copy any data to user, size was
708 * already set and checked above.
709 */
710 if (error == 0 && (com&IOC_OUT) && size)
711 error = copyout(data, uap->data, (u_int)size);
712 break;
713 }
714 if (memp)
715 free(memp, M_IOCTLOPS);
716 fdrop(fp, td);
717done:
718 mtx_unlock(&Giant);
719 return (error);
720}
721
722static int nselcoll; /* Select collisions since boot */
723struct cv selwait;
724SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, "");
725
726/*
727 * Select system call.
728 */
729#ifndef _SYS_SYSPROTO_H_
730struct select_args {
731 int nd;
732 fd_set *in, *ou, *ex;
733 struct timeval *tv;
734};
735#endif
736/*
737 * MPSAFE
738 */
739int
740select(td, uap)
741 register struct thread *td;
742 register struct select_args *uap;
743{
744 struct filedesc *fdp;
745 /*
746 * The magic 2048 here is chosen to be just enough for FD_SETSIZE
747 * infds with the new FD_SETSIZE of 1024, and more than enough for
748 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE
749 * of 256.
750 */
751 fd_mask s_selbits[howmany(2048, NFDBITS)];
752 fd_mask s_heldbits[howmany(2048, NFDBITS)];
753 fd_mask *ibits[3], *obits[3], *selbits, *sbp, *heldbits, *hibits, *hobits;
754 struct timeval atv, rtv, ttv;
755 int ncoll, error, timo, i;
756 u_int nbufbytes, ncpbytes, nfdbits;
757
758 if (uap->nd < 0)
759 return (EINVAL);
760 fdp = td->td_proc->p_fd;
761 mtx_lock(&Giant);
762 FILEDESC_LOCK(fdp);
763
764 if (uap->nd > td->td_proc->p_fd->fd_nfiles)
765 uap->nd = td->td_proc->p_fd->fd_nfiles; /* forgiving; slightly wrong */
766 FILEDESC_UNLOCK(fdp);
767
768 /*
769 * Allocate just enough bits for the non-null fd_sets. Use the
770 * preallocated auto buffer if possible.
771 */
772 nfdbits = roundup(uap->nd, NFDBITS);
773 ncpbytes = nfdbits / NBBY;
774 nbufbytes = 0;
775 if (uap->in != NULL)
776 nbufbytes += 2 * ncpbytes;
777 if (uap->ou != NULL)
778 nbufbytes += 2 * ncpbytes;
779 if (uap->ex != NULL)
780 nbufbytes += 2 * ncpbytes;
781 if (nbufbytes <= sizeof s_selbits)
782 selbits = &s_selbits[0];
783 else
784 selbits = malloc(nbufbytes, M_SELECT, M_WAITOK);
785 if (2 * ncpbytes <= sizeof s_heldbits) {
786 bzero(s_heldbits, sizeof(s_heldbits));
787 heldbits = &s_heldbits[0];
788 } else
789 heldbits = malloc(2 * ncpbytes, M_SELECT, M_WAITOK | M_ZERO);
790
791 /*
792 * Assign pointers into the bit buffers and fetch the input bits.
793 * Put the output buffers together so that they can be bzeroed
794 * together.
795 */
796 sbp = selbits;
797 hibits = heldbits + ncpbytes / sizeof *heldbits;
798 hobits = heldbits;
799#define getbits(name, x) \
800 do { \
801 if (uap->name == NULL) \
802 ibits[x] = NULL; \
803 else { \
804 ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \
805 obits[x] = sbp; \
806 sbp += ncpbytes / sizeof *sbp; \
807 error = copyin(uap->name, ibits[x], ncpbytes); \
808 if (error != 0) \
809 goto done_noproclock; \
810 for (i = 0; \
811 i < ncpbytes / sizeof ibits[i][0]; \
812 i++) \
813 hibits[i] |= ibits[x][i]; \
814 } \
815 } while (0)
816 getbits(in, 0);
817 getbits(ou, 1);
818 getbits(ex, 2);
819#undef getbits
820 if (nbufbytes != 0)
821 bzero(selbits, nbufbytes / 2);
822
823 if (uap->tv) {
824 error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
825 sizeof (atv));
826 if (error)
827 goto done_noproclock;
828 if (itimerfix(&atv)) {
829 error = EINVAL;
830 goto done_noproclock;
831 }
832 getmicrouptime(&rtv);
833 timevaladd(&atv, &rtv);
834 } else {
835 atv.tv_sec = 0;
836 atv.tv_usec = 0;
837 }
838 selholddrop(td, hibits, hobits, uap->nd, 1);
839 timo = 0;
840 PROC_LOCK(td->td_proc);
841retry:
842 ncoll = nselcoll;
843 mtx_lock_spin(&sched_lock);
844 td->td_flags |= TDF_SELECT;
845 mtx_unlock_spin(&sched_lock);
846 PROC_UNLOCK(td->td_proc);
847 error = selscan(td, ibits, obits, uap->nd);
848 PROC_LOCK(td->td_proc);
849 if (error || td->td_retval[0])
850 goto done;
851 if (atv.tv_sec || atv.tv_usec) {
852 getmicrouptime(&rtv);
853 if (timevalcmp(&rtv, &atv, >=)) {
854 /*
855 * An event of our interest may occur during locking a process.
856 * In order to avoid missing the event that occured during locking
857 * the process, test TDF_SELECT and rescan file descriptors if
858 * necessary.
859 */
860 mtx_lock_spin(&sched_lock);
861 if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
862 ncoll = nselcoll;
863 td->td_flags |= TDF_SELECT;
864 mtx_unlock_spin(&sched_lock);
865 PROC_UNLOCK(td->td_proc);
866 error = selscan(td, ibits, obits, uap->nd);
867 PROC_LOCK(td->td_proc);
868 } else
869 mtx_unlock_spin(&sched_lock);
870 goto done;
871 }
872 ttv = atv;
873 timevalsub(&ttv, &rtv);
874 timo = ttv.tv_sec > 24 * 60 * 60 ?
875 24 * 60 * 60 * hz : tvtohz(&ttv);
876 }
877 mtx_lock_spin(&sched_lock);
878 td->td_flags &= ~TDF_SELECT;
879 mtx_unlock_spin(&sched_lock);
880
881 if (timo > 0)
882 error = cv_timedwait_sig(&selwait, &td->td_proc->p_mtx, timo);
883 else
884 error = cv_wait_sig(&selwait, &td->td_proc->p_mtx);
885
886 if (error == 0)
887 goto retry;
888
889done:
890 mtx_lock_spin(&sched_lock);
891 td->td_flags &= ~TDF_SELECT;
892 mtx_unlock_spin(&sched_lock);
893 PROC_UNLOCK(td->td_proc);
894 selholddrop(td, hibits, hobits, uap->nd, 0);
895done_noproclock:
896 /* select is not restarted after signals... */
897 if (error == ERESTART)
898 error = EINTR;
899 if (error == EWOULDBLOCK)
900 error = 0;
901#define putbits(name, x) \
902 if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \
903 error = error2;
904 if (error == 0) {
905 int error2;
906
907 putbits(in, 0);
908 putbits(ou, 1);
909 putbits(ex, 2);
910#undef putbits
911 }
912 if (selbits != &s_selbits[0])
913 free(selbits, M_SELECT);
914 if (heldbits != &s_heldbits[0])
915 free(heldbits, M_SELECT);
916
917 mtx_unlock(&Giant);
918 return (error);
919}
920
921/*
922 * Used to hold then release a group of fds for select(2).
923 * Hold (hold == 1) or release (hold == 0) a group of filedescriptors.
924 * if holding then use ibits setting the bits in obits, otherwise use obits.
925 */
926static int
927selholddrop(td, ibits, obits, nfd, hold)
928 struct thread *td;
929 fd_mask *ibits, *obits;
930 int nfd, hold;
931{
932 struct filedesc *fdp = td->td_proc->p_fd;
933 int i, fd;
934 fd_mask bits;
935 struct file *fp;
936
937 FILEDESC_LOCK(fdp);
938 for (i = 0; i < nfd; i += NFDBITS) {
939 if (hold)
940 bits = ibits[i/NFDBITS];
941 else
942 bits = obits[i/NFDBITS];
943 /* ffs(int mask) not portable, fd_mask is long */
944 for (fd = i; bits && fd < nfd; fd++, bits >>= 1) {
945 if (!(bits & 1))
946 continue;
947 fp = fdp->fd_ofiles[fd];
948 if (fp == NULL) {
949 FILEDESC_UNLOCK(fdp);
950 return (EBADF);
951 }
952 if (hold) {
953 fhold(fp);
954 obits[(fd)/NFDBITS] |=
955 ((fd_mask)1 << ((fd) % NFDBITS));
956 } else {
957 /* XXX: optimize by making a special
958 * version of fdrop that only unlocks
959 * the filedesc if needed? This would
960 * redcuce the number of lock/unlock
961 * pairs by quite a bit.
962 */
963 FILEDESC_UNLOCK(fdp);
964 fdrop(fp, td);
965 FILEDESC_LOCK(fdp);
966 }
967 }
968 }
969 FILEDESC_UNLOCK(fdp);
970 return (0);
971}
972
973static int
974selscan(td, ibits, obits, nfd)
975 struct thread *td;
976 fd_mask **ibits, **obits;
977 int nfd;
978{
979 int msk, i, fd;
980 fd_mask bits;
981 struct file *fp;
982 int n = 0;
983 /* Note: backend also returns POLLHUP/POLLERR if appropriate. */
984 static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND };
985
986 for (msk = 0; msk < 3; msk++) {
987 if (ibits[msk] == NULL)
988 continue;
989 for (i = 0; i < nfd; i += NFDBITS) {
990 bits = ibits[msk][i/NFDBITS];
991 /* ffs(int mask) not portable, fd_mask is long */
992 for (fd = i; bits && fd < nfd; fd++, bits >>= 1) {
993 if (!(bits & 1))
994 continue;
621 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
622 fdrop(fp, td);
623 return (EBADF);
624 }
625 fdp = td->td_proc->p_fd;
626 switch (com = uap->com) {
627 case FIONCLEX:
628 FILEDESC_LOCK(fdp);
629 fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE;
630 FILEDESC_UNLOCK(fdp);
631 fdrop(fp, td);
632 return (0);
633 case FIOCLEX:
634 FILEDESC_LOCK(fdp);
635 fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE;
636 FILEDESC_UNLOCK(fdp);
637 fdrop(fp, td);
638 return (0);
639 }
640
641 /*
642 * Interpret high order word to find amount of data to be
643 * copied to/from the user's address space.
644 */
645 size = IOCPARM_LEN(com);
646 if (size > IOCPARM_MAX) {
647 fdrop(fp, td);
648 return (ENOTTY);
649 }
650
651 mtx_lock(&Giant);
652 memp = NULL;
653 if (size > sizeof (ubuf.stkbuf)) {
654 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
655 data = memp;
656 } else {
657 data = ubuf.stkbuf;
658 }
659 if (com&IOC_IN) {
660 if (size) {
661 error = copyin(uap->data, data, (u_int)size);
662 if (error) {
663 if (memp)
664 free(memp, M_IOCTLOPS);
665 fdrop(fp, td);
666 goto done;
667 }
668 } else {
669 *(caddr_t *)data = uap->data;
670 }
671 } else if ((com&IOC_OUT) && size) {
672 /*
673 * Zero the buffer so the user always
674 * gets back something deterministic.
675 */
676 bzero(data, size);
677 } else if (com&IOC_VOID) {
678 *(caddr_t *)data = uap->data;
679 }
680
681 switch (com) {
682
683 case FIONBIO:
684 FILE_LOCK(fp);
685 if ((tmp = *(int *)data))
686 fp->f_flag |= FNONBLOCK;
687 else
688 fp->f_flag &= ~FNONBLOCK;
689 FILE_UNLOCK(fp);
690 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td);
691 break;
692
693 case FIOASYNC:
694 FILE_LOCK(fp);
695 if ((tmp = *(int *)data))
696 fp->f_flag |= FASYNC;
697 else
698 fp->f_flag &= ~FASYNC;
699 FILE_UNLOCK(fp);
700 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, td);
701 break;
702
703 default:
704 error = fo_ioctl(fp, com, data, td);
705 /*
706 * Copy any data to user, size was
707 * already set and checked above.
708 */
709 if (error == 0 && (com&IOC_OUT) && size)
710 error = copyout(data, uap->data, (u_int)size);
711 break;
712 }
713 if (memp)
714 free(memp, M_IOCTLOPS);
715 fdrop(fp, td);
716done:
717 mtx_unlock(&Giant);
718 return (error);
719}
720
721static int nselcoll; /* Select collisions since boot */
722struct cv selwait;
723SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, "");
724
725/*
726 * Select system call.
727 */
728#ifndef _SYS_SYSPROTO_H_
729struct select_args {
730 int nd;
731 fd_set *in, *ou, *ex;
732 struct timeval *tv;
733};
734#endif
735/*
736 * MPSAFE
737 */
738int
739select(td, uap)
740 register struct thread *td;
741 register struct select_args *uap;
742{
743 struct filedesc *fdp;
744 /*
745 * The magic 2048 here is chosen to be just enough for FD_SETSIZE
746 * infds with the new FD_SETSIZE of 1024, and more than enough for
747 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE
748 * of 256.
749 */
750 fd_mask s_selbits[howmany(2048, NFDBITS)];
751 fd_mask s_heldbits[howmany(2048, NFDBITS)];
752 fd_mask *ibits[3], *obits[3], *selbits, *sbp, *heldbits, *hibits, *hobits;
753 struct timeval atv, rtv, ttv;
754 int ncoll, error, timo, i;
755 u_int nbufbytes, ncpbytes, nfdbits;
756
757 if (uap->nd < 0)
758 return (EINVAL);
759 fdp = td->td_proc->p_fd;
760 mtx_lock(&Giant);
761 FILEDESC_LOCK(fdp);
762
763 if (uap->nd > td->td_proc->p_fd->fd_nfiles)
764 uap->nd = td->td_proc->p_fd->fd_nfiles; /* forgiving; slightly wrong */
765 FILEDESC_UNLOCK(fdp);
766
767 /*
768 * Allocate just enough bits for the non-null fd_sets. Use the
769 * preallocated auto buffer if possible.
770 */
771 nfdbits = roundup(uap->nd, NFDBITS);
772 ncpbytes = nfdbits / NBBY;
773 nbufbytes = 0;
774 if (uap->in != NULL)
775 nbufbytes += 2 * ncpbytes;
776 if (uap->ou != NULL)
777 nbufbytes += 2 * ncpbytes;
778 if (uap->ex != NULL)
779 nbufbytes += 2 * ncpbytes;
780 if (nbufbytes <= sizeof s_selbits)
781 selbits = &s_selbits[0];
782 else
783 selbits = malloc(nbufbytes, M_SELECT, M_WAITOK);
784 if (2 * ncpbytes <= sizeof s_heldbits) {
785 bzero(s_heldbits, sizeof(s_heldbits));
786 heldbits = &s_heldbits[0];
787 } else
788 heldbits = malloc(2 * ncpbytes, M_SELECT, M_WAITOK | M_ZERO);
789
790 /*
791 * Assign pointers into the bit buffers and fetch the input bits.
792 * Put the output buffers together so that they can be bzeroed
793 * together.
794 */
795 sbp = selbits;
796 hibits = heldbits + ncpbytes / sizeof *heldbits;
797 hobits = heldbits;
798#define getbits(name, x) \
799 do { \
800 if (uap->name == NULL) \
801 ibits[x] = NULL; \
802 else { \
803 ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \
804 obits[x] = sbp; \
805 sbp += ncpbytes / sizeof *sbp; \
806 error = copyin(uap->name, ibits[x], ncpbytes); \
807 if (error != 0) \
808 goto done_noproclock; \
809 for (i = 0; \
810 i < ncpbytes / sizeof ibits[i][0]; \
811 i++) \
812 hibits[i] |= ibits[x][i]; \
813 } \
814 } while (0)
815 getbits(in, 0);
816 getbits(ou, 1);
817 getbits(ex, 2);
818#undef getbits
819 if (nbufbytes != 0)
820 bzero(selbits, nbufbytes / 2);
821
822 if (uap->tv) {
823 error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
824 sizeof (atv));
825 if (error)
826 goto done_noproclock;
827 if (itimerfix(&atv)) {
828 error = EINVAL;
829 goto done_noproclock;
830 }
831 getmicrouptime(&rtv);
832 timevaladd(&atv, &rtv);
833 } else {
834 atv.tv_sec = 0;
835 atv.tv_usec = 0;
836 }
837 selholddrop(td, hibits, hobits, uap->nd, 1);
838 timo = 0;
839 PROC_LOCK(td->td_proc);
840retry:
841 ncoll = nselcoll;
842 mtx_lock_spin(&sched_lock);
843 td->td_flags |= TDF_SELECT;
844 mtx_unlock_spin(&sched_lock);
845 PROC_UNLOCK(td->td_proc);
846 error = selscan(td, ibits, obits, uap->nd);
847 PROC_LOCK(td->td_proc);
848 if (error || td->td_retval[0])
849 goto done;
850 if (atv.tv_sec || atv.tv_usec) {
851 getmicrouptime(&rtv);
852 if (timevalcmp(&rtv, &atv, >=)) {
853 /*
854 * An event of our interest may occur during locking a process.
855 * In order to avoid missing the event that occured during locking
856 * the process, test TDF_SELECT and rescan file descriptors if
857 * necessary.
858 */
859 mtx_lock_spin(&sched_lock);
860 if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
861 ncoll = nselcoll;
862 td->td_flags |= TDF_SELECT;
863 mtx_unlock_spin(&sched_lock);
864 PROC_UNLOCK(td->td_proc);
865 error = selscan(td, ibits, obits, uap->nd);
866 PROC_LOCK(td->td_proc);
867 } else
868 mtx_unlock_spin(&sched_lock);
869 goto done;
870 }
871 ttv = atv;
872 timevalsub(&ttv, &rtv);
873 timo = ttv.tv_sec > 24 * 60 * 60 ?
874 24 * 60 * 60 * hz : tvtohz(&ttv);
875 }
876 mtx_lock_spin(&sched_lock);
877 td->td_flags &= ~TDF_SELECT;
878 mtx_unlock_spin(&sched_lock);
879
880 if (timo > 0)
881 error = cv_timedwait_sig(&selwait, &td->td_proc->p_mtx, timo);
882 else
883 error = cv_wait_sig(&selwait, &td->td_proc->p_mtx);
884
885 if (error == 0)
886 goto retry;
887
888done:
889 mtx_lock_spin(&sched_lock);
890 td->td_flags &= ~TDF_SELECT;
891 mtx_unlock_spin(&sched_lock);
892 PROC_UNLOCK(td->td_proc);
893 selholddrop(td, hibits, hobits, uap->nd, 0);
894done_noproclock:
895 /* select is not restarted after signals... */
896 if (error == ERESTART)
897 error = EINTR;
898 if (error == EWOULDBLOCK)
899 error = 0;
900#define putbits(name, x) \
901 if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \
902 error = error2;
903 if (error == 0) {
904 int error2;
905
906 putbits(in, 0);
907 putbits(ou, 1);
908 putbits(ex, 2);
909#undef putbits
910 }
911 if (selbits != &s_selbits[0])
912 free(selbits, M_SELECT);
913 if (heldbits != &s_heldbits[0])
914 free(heldbits, M_SELECT);
915
916 mtx_unlock(&Giant);
917 return (error);
918}
919
920/*
921 * Used to hold then release a group of fds for select(2).
922 * Hold (hold == 1) or release (hold == 0) a group of filedescriptors.
923 * if holding then use ibits setting the bits in obits, otherwise use obits.
924 */
925static int
926selholddrop(td, ibits, obits, nfd, hold)
927 struct thread *td;
928 fd_mask *ibits, *obits;
929 int nfd, hold;
930{
931 struct filedesc *fdp = td->td_proc->p_fd;
932 int i, fd;
933 fd_mask bits;
934 struct file *fp;
935
936 FILEDESC_LOCK(fdp);
937 for (i = 0; i < nfd; i += NFDBITS) {
938 if (hold)
939 bits = ibits[i/NFDBITS];
940 else
941 bits = obits[i/NFDBITS];
942 /* ffs(int mask) not portable, fd_mask is long */
943 for (fd = i; bits && fd < nfd; fd++, bits >>= 1) {
944 if (!(bits & 1))
945 continue;
946 fp = fdp->fd_ofiles[fd];
947 if (fp == NULL) {
948 FILEDESC_UNLOCK(fdp);
949 return (EBADF);
950 }
951 if (hold) {
952 fhold(fp);
953 obits[(fd)/NFDBITS] |=
954 ((fd_mask)1 << ((fd) % NFDBITS));
955 } else {
956 /* XXX: optimize by making a special
957 * version of fdrop that only unlocks
958 * the filedesc if needed? This would
959 * redcuce the number of lock/unlock
960 * pairs by quite a bit.
961 */
962 FILEDESC_UNLOCK(fdp);
963 fdrop(fp, td);
964 FILEDESC_LOCK(fdp);
965 }
966 }
967 }
968 FILEDESC_UNLOCK(fdp);
969 return (0);
970}
971
972static int
973selscan(td, ibits, obits, nfd)
974 struct thread *td;
975 fd_mask **ibits, **obits;
976 int nfd;
977{
978 int msk, i, fd;
979 fd_mask bits;
980 struct file *fp;
981 int n = 0;
982 /* Note: backend also returns POLLHUP/POLLERR if appropriate. */
983 static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND };
984
985 for (msk = 0; msk < 3; msk++) {
986 if (ibits[msk] == NULL)
987 continue;
988 for (i = 0; i < nfd; i += NFDBITS) {
989 bits = ibits[msk][i/NFDBITS];
990 /* ffs(int mask) not portable, fd_mask is long */
991 for (fd = i; bits && fd < nfd; fd++, bits >>= 1) {
992 if (!(bits & 1))
993 continue;
995 fp = ffind_hold(td, fd);
996 if (fp == NULL)
994 if (fget(td, fd, &fp))
997 return (EBADF);
998 if (fo_poll(fp, flag[msk], fp->f_cred, td)) {
999 obits[msk][(fd)/NFDBITS] |=
1000 ((fd_mask)1 << ((fd) % NFDBITS));
1001 n++;
1002 }
1003 fdrop(fp, td);
1004 }
1005 }
1006 }
1007 td->td_retval[0] = n;
1008 return (0);
1009}
1010
1011/*
1012 * Poll system call.
1013 */
1014#ifndef _SYS_SYSPROTO_H_
1015struct poll_args {
1016 struct pollfd *fds;
1017 u_int nfds;
1018 int timeout;
1019};
1020#endif
1021/*
1022 * MPSAFE
1023 */
1024int
1025poll(td, uap)
1026 struct thread *td;
1027 struct poll_args *uap;
1028{
1029 caddr_t bits;
1030 char smallbits[32 * sizeof(struct pollfd)];
1031 struct timeval atv, rtv, ttv;
1032 int ncoll, error = 0, timo;
1033 u_int nfds;
1034 size_t ni;
1035 struct pollfd p_heldbits[32];
1036 struct pollfd *heldbits;
1037
1038 nfds = SCARG(uap, nfds);
1039
1040 mtx_lock(&Giant);
1041 /*
1042 * This is kinda bogus. We have fd limits, but that is not
1043 * really related to the size of the pollfd array. Make sure
1044 * we let the process use at least FD_SETSIZE entries and at
1045 * least enough for the current limits. We want to be reasonably
1046 * safe, but not overly restrictive.
1047 */
1048 if ((nfds > td->td_proc->p_rlimit[RLIMIT_NOFILE].rlim_cur) &&
1049 (nfds > FD_SETSIZE)) {
1050 error = EINVAL;
1051 goto done2;
1052 }
1053 ni = nfds * sizeof(struct pollfd);
1054 if (ni > sizeof(smallbits))
1055 bits = malloc(ni, M_TEMP, M_WAITOK);
1056 else
1057 bits = smallbits;
1058 if (ni > sizeof(p_heldbits))
1059 heldbits = malloc(ni, M_TEMP, M_WAITOK);
1060 else {
1061 bzero(p_heldbits, sizeof(p_heldbits));
1062 heldbits = p_heldbits;
1063 }
1064 error = copyin(SCARG(uap, fds), bits, ni);
1065 if (error)
1066 goto done_noproclock;
1067 bcopy(bits, heldbits, ni);
1068 if (SCARG(uap, timeout) != INFTIM) {
1069 atv.tv_sec = SCARG(uap, timeout) / 1000;
1070 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
1071 if (itimerfix(&atv)) {
1072 error = EINVAL;
1073 goto done_noproclock;
1074 }
1075 getmicrouptime(&rtv);
1076 timevaladd(&atv, &rtv);
1077 } else {
1078 atv.tv_sec = 0;
1079 atv.tv_usec = 0;
1080 }
1081 pollholddrop(td, heldbits, nfds, 1);
1082 timo = 0;
1083 PROC_LOCK(td->td_proc);
1084retry:
1085 ncoll = nselcoll;
1086 mtx_lock_spin(&sched_lock);
1087 td->td_flags |= TDF_SELECT;
1088 mtx_unlock_spin(&sched_lock);
1089 PROC_UNLOCK(td->td_proc);
1090 error = pollscan(td, (struct pollfd *)bits, nfds);
1091 PROC_LOCK(td->td_proc);
1092 if (error || td->td_retval[0])
1093 goto done;
1094 if (atv.tv_sec || atv.tv_usec) {
1095 getmicrouptime(&rtv);
1096 if (timevalcmp(&rtv, &atv, >=)) {
1097 /*
1098 * An event of our interest may occur during locking a process.
1099 * In order to avoid missing the event that occured during locking
1100 * the process, test TDF_SELECT and rescan file descriptors if
1101 * necessary.
1102 */
1103 mtx_lock_spin(&sched_lock);
1104 if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
1105 ncoll = nselcoll;
1106 td->td_flags |= TDF_SELECT;
1107 mtx_unlock_spin(&sched_lock);
1108 PROC_UNLOCK(td->td_proc);
1109 error = pollscan(td, (struct pollfd *)bits, nfds);
1110 PROC_LOCK(td->td_proc);
1111 } else
1112 mtx_unlock_spin(&sched_lock);
1113 goto done;
1114 }
1115 ttv = atv;
1116 timevalsub(&ttv, &rtv);
1117 timo = ttv.tv_sec > 24 * 60 * 60 ?
1118 24 * 60 * 60 * hz : tvtohz(&ttv);
1119 }
1120 mtx_lock_spin(&sched_lock);
1121 td->td_flags &= ~TDF_SELECT;
1122 mtx_unlock_spin(&sched_lock);
1123 if (timo > 0)
1124 error = cv_timedwait_sig(&selwait, &td->td_proc->p_mtx, timo);
1125 else
1126 error = cv_wait_sig(&selwait, &td->td_proc->p_mtx);
1127 if (error == 0)
1128 goto retry;
1129
1130done:
1131 mtx_lock_spin(&sched_lock);
1132 td->td_flags &= ~TDF_SELECT;
1133 mtx_unlock_spin(&sched_lock);
1134 PROC_UNLOCK(td->td_proc);
1135 pollholddrop(td, heldbits, nfds, 0);
1136done_noproclock:
1137 /* poll is not restarted after signals... */
1138 if (error == ERESTART)
1139 error = EINTR;
1140 if (error == EWOULDBLOCK)
1141 error = 0;
1142 if (error == 0) {
1143 error = copyout(bits, SCARG(uap, fds), ni);
1144 if (error)
1145 goto out;
1146 }
1147out:
1148 if (ni > sizeof(smallbits))
1149 free(bits, M_TEMP);
1150 if (ni > sizeof(p_heldbits))
1151 free(heldbits, M_TEMP);
1152done2:
1153 mtx_unlock(&Giant);
1154 return (error);
1155}
1156
1157static int
1158pollholddrop(td, fds, nfd, hold)
1159 struct thread *td;
1160 struct pollfd *fds;
1161 u_int nfd;
1162 int hold;
1163{
1164 register struct filedesc *fdp = td->td_proc->p_fd;
1165 int i;
1166 struct file *fp;
1167
1168 FILEDESC_LOCK(fdp);
1169 for (i = 0; i < nfd; i++, fds++) {
1170 if (0 <= fds->fd && fds->fd < fdp->fd_nfiles) {
1171 fp = fdp->fd_ofiles[fds->fd];
1172 if (hold) {
1173 if (fp != NULL) {
1174 fhold(fp);
1175 fds->revents = 1;
1176 } else
1177 fds->revents = 0;
1178 } else if(fp != NULL && fds->revents) {
1179 FILE_LOCK(fp);
1180 FILEDESC_UNLOCK(fdp);
1181 fdrop_locked(fp, td);
1182 FILEDESC_LOCK(fdp);
1183 }
1184 }
1185 }
1186 FILEDESC_UNLOCK(fdp);
1187 return (0);
1188}
1189
1190static int
1191pollscan(td, fds, nfd)
1192 struct thread *td;
1193 struct pollfd *fds;
1194 u_int nfd;
1195{
1196 register struct filedesc *fdp = td->td_proc->p_fd;
1197 int i;
1198 struct file *fp;
1199 int n = 0;
1200
1201 for (i = 0; i < nfd; i++, fds++) {
1202 FILEDESC_LOCK(fdp);
1203 if (fds->fd >= fdp->fd_nfiles) {
1204 fds->revents = POLLNVAL;
1205 n++;
1206 FILEDESC_UNLOCK(fdp);
1207 } else if (fds->fd < 0) {
1208 fds->revents = 0;
1209 FILEDESC_UNLOCK(fdp);
1210 } else {
1211 fp = fdp->fd_ofiles[fds->fd];
1212 FILEDESC_UNLOCK(fdp);
1213 if (fp == NULL) {
1214 fds->revents = POLLNVAL;
1215 n++;
1216 } else {
1217 /*
1218 * Note: backend also returns POLLHUP and
1219 * POLLERR if appropriate.
1220 */
1221 fds->revents = fo_poll(fp, fds->events,
1222 fp->f_cred, td);
1223 if (fds->revents != 0)
1224 n++;
1225 }
1226 }
1227 }
1228 td->td_retval[0] = n;
1229 return (0);
1230}
1231
1232/*
1233 * OpenBSD poll system call.
1234 * XXX this isn't quite a true representation.. OpenBSD uses select ops.
1235 */
1236#ifndef _SYS_SYSPROTO_H_
1237struct openbsd_poll_args {
1238 struct pollfd *fds;
1239 u_int nfds;
1240 int timeout;
1241};
1242#endif
1243/*
1244 * MPSAFE
1245 */
1246int
1247openbsd_poll(td, uap)
1248 register struct thread *td;
1249 register struct openbsd_poll_args *uap;
1250{
1251 return (poll(td, (struct poll_args *)uap));
1252}
1253
1254/*ARGSUSED*/
1255int
1256seltrue(dev, events, td)
1257 dev_t dev;
1258 int events;
1259 struct thread *td;
1260{
1261
1262 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
1263}
1264
1265static int
1266find_thread_in_proc(struct proc *p, struct thread *td)
1267{
1268 struct thread *td2;
1269 FOREACH_THREAD_IN_PROC(p, td2) {
1270 if (td2 == td) {
1271 return (1);
1272 }
1273 }
1274 return (0);
1275}
1276
1277/*
1278 * Record a select request.
1279 */
1280void
1281selrecord(selector, sip)
1282 struct thread *selector;
1283 struct selinfo *sip;
1284{
1285 struct proc *p;
1286 pid_t mypid;
1287
1288 mypid = selector->td_proc->p_pid;
1289 if ((sip->si_pid == mypid) &&
1290 (sip->si_thread == selector)) { /* XXXKSE should be an ID? */
1291 return;
1292 }
1293 if (sip->si_pid &&
1294 (p = pfind(sip->si_pid)) &&
1295 (find_thread_in_proc(p, sip->si_thread))) {
1296 mtx_lock_spin(&sched_lock);
1297 if (sip->si_thread->td_wchan == (caddr_t)&selwait) {
1298 mtx_unlock_spin(&sched_lock);
1299 PROC_UNLOCK(p);
1300 sip->si_flags |= SI_COLL;
1301 return;
1302 }
1303 mtx_unlock_spin(&sched_lock);
1304 PROC_UNLOCK(p);
1305 }
1306 sip->si_pid = mypid;
1307 sip->si_thread = selector;
1308}
1309
1310/*
1311 * Do a wakeup when a selectable event occurs.
1312 */
1313void
1314selwakeup(sip)
1315 register struct selinfo *sip;
1316{
1317 struct thread *td;
1318 register struct proc *p;
1319
1320 if (sip->si_pid == 0)
1321 return;
1322 if (sip->si_flags & SI_COLL) {
1323 nselcoll++;
1324 sip->si_flags &= ~SI_COLL;
1325 cv_broadcast(&selwait);
1326 }
1327 p = pfind(sip->si_pid);
1328 sip->si_pid = 0;
1329 td = sip->si_thread;
1330 if (p != NULL) {
1331 if (!find_thread_in_proc(p, td)) {
1332 PROC_UNLOCK(p); /* lock is in pfind() */;
1333 return;
1334 }
1335 mtx_lock_spin(&sched_lock);
1336 if (td->td_wchan == (caddr_t)&selwait) {
1337 if (td->td_proc->p_stat == SSLEEP)
1338 setrunnable(td);
1339 else
1340 cv_waitq_remove(td);
1341 } else
1342 td->td_flags &= ~TDF_SELECT;
1343 mtx_unlock_spin(&sched_lock);
1344 PROC_UNLOCK(p); /* Lock is in pfind() */
1345 }
1346}
1347
1348static void selectinit __P((void *));
1349SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, selectinit, NULL)
1350
1351/* ARGSUSED*/
1352static void
1353selectinit(dummy)
1354 void *dummy;
1355{
1356 cv_init(&selwait, "select");
1357}
995 return (EBADF);
996 if (fo_poll(fp, flag[msk], fp->f_cred, td)) {
997 obits[msk][(fd)/NFDBITS] |=
998 ((fd_mask)1 << ((fd) % NFDBITS));
999 n++;
1000 }
1001 fdrop(fp, td);
1002 }
1003 }
1004 }
1005 td->td_retval[0] = n;
1006 return (0);
1007}
1008
1009/*
1010 * Poll system call.
1011 */
1012#ifndef _SYS_SYSPROTO_H_
1013struct poll_args {
1014 struct pollfd *fds;
1015 u_int nfds;
1016 int timeout;
1017};
1018#endif
1019/*
1020 * MPSAFE
1021 */
1022int
1023poll(td, uap)
1024 struct thread *td;
1025 struct poll_args *uap;
1026{
1027 caddr_t bits;
1028 char smallbits[32 * sizeof(struct pollfd)];
1029 struct timeval atv, rtv, ttv;
1030 int ncoll, error = 0, timo;
1031 u_int nfds;
1032 size_t ni;
1033 struct pollfd p_heldbits[32];
1034 struct pollfd *heldbits;
1035
1036 nfds = SCARG(uap, nfds);
1037
1038 mtx_lock(&Giant);
1039 /*
1040 * This is kinda bogus. We have fd limits, but that is not
1041 * really related to the size of the pollfd array. Make sure
1042 * we let the process use at least FD_SETSIZE entries and at
1043 * least enough for the current limits. We want to be reasonably
1044 * safe, but not overly restrictive.
1045 */
1046 if ((nfds > td->td_proc->p_rlimit[RLIMIT_NOFILE].rlim_cur) &&
1047 (nfds > FD_SETSIZE)) {
1048 error = EINVAL;
1049 goto done2;
1050 }
1051 ni = nfds * sizeof(struct pollfd);
1052 if (ni > sizeof(smallbits))
1053 bits = malloc(ni, M_TEMP, M_WAITOK);
1054 else
1055 bits = smallbits;
1056 if (ni > sizeof(p_heldbits))
1057 heldbits = malloc(ni, M_TEMP, M_WAITOK);
1058 else {
1059 bzero(p_heldbits, sizeof(p_heldbits));
1060 heldbits = p_heldbits;
1061 }
1062 error = copyin(SCARG(uap, fds), bits, ni);
1063 if (error)
1064 goto done_noproclock;
1065 bcopy(bits, heldbits, ni);
1066 if (SCARG(uap, timeout) != INFTIM) {
1067 atv.tv_sec = SCARG(uap, timeout) / 1000;
1068 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
1069 if (itimerfix(&atv)) {
1070 error = EINVAL;
1071 goto done_noproclock;
1072 }
1073 getmicrouptime(&rtv);
1074 timevaladd(&atv, &rtv);
1075 } else {
1076 atv.tv_sec = 0;
1077 atv.tv_usec = 0;
1078 }
1079 pollholddrop(td, heldbits, nfds, 1);
1080 timo = 0;
1081 PROC_LOCK(td->td_proc);
1082retry:
1083 ncoll = nselcoll;
1084 mtx_lock_spin(&sched_lock);
1085 td->td_flags |= TDF_SELECT;
1086 mtx_unlock_spin(&sched_lock);
1087 PROC_UNLOCK(td->td_proc);
1088 error = pollscan(td, (struct pollfd *)bits, nfds);
1089 PROC_LOCK(td->td_proc);
1090 if (error || td->td_retval[0])
1091 goto done;
1092 if (atv.tv_sec || atv.tv_usec) {
1093 getmicrouptime(&rtv);
1094 if (timevalcmp(&rtv, &atv, >=)) {
1095 /*
1096 * An event of our interest may occur during locking a process.
1097 * In order to avoid missing the event that occured during locking
1098 * the process, test TDF_SELECT and rescan file descriptors if
1099 * necessary.
1100 */
1101 mtx_lock_spin(&sched_lock);
1102 if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
1103 ncoll = nselcoll;
1104 td->td_flags |= TDF_SELECT;
1105 mtx_unlock_spin(&sched_lock);
1106 PROC_UNLOCK(td->td_proc);
1107 error = pollscan(td, (struct pollfd *)bits, nfds);
1108 PROC_LOCK(td->td_proc);
1109 } else
1110 mtx_unlock_spin(&sched_lock);
1111 goto done;
1112 }
1113 ttv = atv;
1114 timevalsub(&ttv, &rtv);
1115 timo = ttv.tv_sec > 24 * 60 * 60 ?
1116 24 * 60 * 60 * hz : tvtohz(&ttv);
1117 }
1118 mtx_lock_spin(&sched_lock);
1119 td->td_flags &= ~TDF_SELECT;
1120 mtx_unlock_spin(&sched_lock);
1121 if (timo > 0)
1122 error = cv_timedwait_sig(&selwait, &td->td_proc->p_mtx, timo);
1123 else
1124 error = cv_wait_sig(&selwait, &td->td_proc->p_mtx);
1125 if (error == 0)
1126 goto retry;
1127
1128done:
1129 mtx_lock_spin(&sched_lock);
1130 td->td_flags &= ~TDF_SELECT;
1131 mtx_unlock_spin(&sched_lock);
1132 PROC_UNLOCK(td->td_proc);
1133 pollholddrop(td, heldbits, nfds, 0);
1134done_noproclock:
1135 /* poll is not restarted after signals... */
1136 if (error == ERESTART)
1137 error = EINTR;
1138 if (error == EWOULDBLOCK)
1139 error = 0;
1140 if (error == 0) {
1141 error = copyout(bits, SCARG(uap, fds), ni);
1142 if (error)
1143 goto out;
1144 }
1145out:
1146 if (ni > sizeof(smallbits))
1147 free(bits, M_TEMP);
1148 if (ni > sizeof(p_heldbits))
1149 free(heldbits, M_TEMP);
1150done2:
1151 mtx_unlock(&Giant);
1152 return (error);
1153}
1154
1155static int
1156pollholddrop(td, fds, nfd, hold)
1157 struct thread *td;
1158 struct pollfd *fds;
1159 u_int nfd;
1160 int hold;
1161{
1162 register struct filedesc *fdp = td->td_proc->p_fd;
1163 int i;
1164 struct file *fp;
1165
1166 FILEDESC_LOCK(fdp);
1167 for (i = 0; i < nfd; i++, fds++) {
1168 if (0 <= fds->fd && fds->fd < fdp->fd_nfiles) {
1169 fp = fdp->fd_ofiles[fds->fd];
1170 if (hold) {
1171 if (fp != NULL) {
1172 fhold(fp);
1173 fds->revents = 1;
1174 } else
1175 fds->revents = 0;
1176 } else if(fp != NULL && fds->revents) {
1177 FILE_LOCK(fp);
1178 FILEDESC_UNLOCK(fdp);
1179 fdrop_locked(fp, td);
1180 FILEDESC_LOCK(fdp);
1181 }
1182 }
1183 }
1184 FILEDESC_UNLOCK(fdp);
1185 return (0);
1186}
1187
1188static int
1189pollscan(td, fds, nfd)
1190 struct thread *td;
1191 struct pollfd *fds;
1192 u_int nfd;
1193{
1194 register struct filedesc *fdp = td->td_proc->p_fd;
1195 int i;
1196 struct file *fp;
1197 int n = 0;
1198
1199 for (i = 0; i < nfd; i++, fds++) {
1200 FILEDESC_LOCK(fdp);
1201 if (fds->fd >= fdp->fd_nfiles) {
1202 fds->revents = POLLNVAL;
1203 n++;
1204 FILEDESC_UNLOCK(fdp);
1205 } else if (fds->fd < 0) {
1206 fds->revents = 0;
1207 FILEDESC_UNLOCK(fdp);
1208 } else {
1209 fp = fdp->fd_ofiles[fds->fd];
1210 FILEDESC_UNLOCK(fdp);
1211 if (fp == NULL) {
1212 fds->revents = POLLNVAL;
1213 n++;
1214 } else {
1215 /*
1216 * Note: backend also returns POLLHUP and
1217 * POLLERR if appropriate.
1218 */
1219 fds->revents = fo_poll(fp, fds->events,
1220 fp->f_cred, td);
1221 if (fds->revents != 0)
1222 n++;
1223 }
1224 }
1225 }
1226 td->td_retval[0] = n;
1227 return (0);
1228}
1229
1230/*
1231 * OpenBSD poll system call.
1232 * XXX this isn't quite a true representation.. OpenBSD uses select ops.
1233 */
1234#ifndef _SYS_SYSPROTO_H_
1235struct openbsd_poll_args {
1236 struct pollfd *fds;
1237 u_int nfds;
1238 int timeout;
1239};
1240#endif
1241/*
1242 * MPSAFE
1243 */
1244int
1245openbsd_poll(td, uap)
1246 register struct thread *td;
1247 register struct openbsd_poll_args *uap;
1248{
1249 return (poll(td, (struct poll_args *)uap));
1250}
1251
1252/*ARGSUSED*/
1253int
1254seltrue(dev, events, td)
1255 dev_t dev;
1256 int events;
1257 struct thread *td;
1258{
1259
1260 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
1261}
1262
1263static int
1264find_thread_in_proc(struct proc *p, struct thread *td)
1265{
1266 struct thread *td2;
1267 FOREACH_THREAD_IN_PROC(p, td2) {
1268 if (td2 == td) {
1269 return (1);
1270 }
1271 }
1272 return (0);
1273}
1274
1275/*
1276 * Record a select request.
1277 */
1278void
1279selrecord(selector, sip)
1280 struct thread *selector;
1281 struct selinfo *sip;
1282{
1283 struct proc *p;
1284 pid_t mypid;
1285
1286 mypid = selector->td_proc->p_pid;
1287 if ((sip->si_pid == mypid) &&
1288 (sip->si_thread == selector)) { /* XXXKSE should be an ID? */
1289 return;
1290 }
1291 if (sip->si_pid &&
1292 (p = pfind(sip->si_pid)) &&
1293 (find_thread_in_proc(p, sip->si_thread))) {
1294 mtx_lock_spin(&sched_lock);
1295 if (sip->si_thread->td_wchan == (caddr_t)&selwait) {
1296 mtx_unlock_spin(&sched_lock);
1297 PROC_UNLOCK(p);
1298 sip->si_flags |= SI_COLL;
1299 return;
1300 }
1301 mtx_unlock_spin(&sched_lock);
1302 PROC_UNLOCK(p);
1303 }
1304 sip->si_pid = mypid;
1305 sip->si_thread = selector;
1306}
1307
1308/*
1309 * Do a wakeup when a selectable event occurs.
1310 */
1311void
1312selwakeup(sip)
1313 register struct selinfo *sip;
1314{
1315 struct thread *td;
1316 register struct proc *p;
1317
1318 if (sip->si_pid == 0)
1319 return;
1320 if (sip->si_flags & SI_COLL) {
1321 nselcoll++;
1322 sip->si_flags &= ~SI_COLL;
1323 cv_broadcast(&selwait);
1324 }
1325 p = pfind(sip->si_pid);
1326 sip->si_pid = 0;
1327 td = sip->si_thread;
1328 if (p != NULL) {
1329 if (!find_thread_in_proc(p, td)) {
1330 PROC_UNLOCK(p); /* lock is in pfind() */;
1331 return;
1332 }
1333 mtx_lock_spin(&sched_lock);
1334 if (td->td_wchan == (caddr_t)&selwait) {
1335 if (td->td_proc->p_stat == SSLEEP)
1336 setrunnable(td);
1337 else
1338 cv_waitq_remove(td);
1339 } else
1340 td->td_flags &= ~TDF_SELECT;
1341 mtx_unlock_spin(&sched_lock);
1342 PROC_UNLOCK(p); /* Lock is in pfind() */
1343 }
1344}
1345
1346static void selectinit __P((void *));
1347SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, selectinit, NULL)
1348
1349/* ARGSUSED*/
1350static void
1351selectinit(dummy)
1352 void *dummy;
1353{
1354 cv_init(&selwait, "select");
1355}