sys_generic.c revision 50477
1/* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 * $FreeBSD: head/sys/kern/sys_generic.c 50477 1999-08-28 01:08:13Z peter $ 40 */ 41 42#include "opt_ktrace.h" 43 44#include <sys/param.h> 45#include <sys/systm.h> 46#include <sys/sysproto.h> 47#include <sys/filedesc.h> 48#include <sys/filio.h> 49#include <sys/ttycom.h> 50#include <sys/fcntl.h> 51#include <sys/file.h> 52#include <sys/proc.h> 53#include <sys/signalvar.h> 54#include <sys/socketvar.h> 55#include <sys/uio.h> 56#include <sys/kernel.h> 57#include <sys/malloc.h> 58#include <sys/poll.h> 59#include <sys/sysent.h> 60#ifdef KTRACE 61#include <sys/ktrace.h> 62#endif 63 64#include <machine/limits.h> 65 66static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 67static MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); 68MALLOC_DEFINE(M_IOV, "iov", "large iov's"); 69 70static int pollscan __P((struct proc *, struct pollfd *, int)); 71static int selscan __P((struct proc *, fd_mask **, fd_mask **, int)); 72static struct file* getfp __P((struct filedesc *, int, int)); 73static int dofileread __P((struct proc *, struct file *, int, void *, 74 size_t, off_t, int)); 75static int dofilewrite __P((struct proc *, struct file *, int, 76 const void *, size_t, off_t, int)); 77 78static struct file* 79getfp(fdp, fd, flag) 80 struct filedesc* fdp; 81 int fd, flag; 82{ 83 struct file* fp; 84 85 if (((u_int)fd) >= fdp->fd_nfiles || 86 (fp = fdp->fd_ofiles[fd]) == NULL || 87 (fp->f_flag & flag) == 0) 88 return (NULL); 89 return (fp); 90} 91 92/* 93 * Read system call. 94 */ 95#ifndef _SYS_SYSPROTO_H_ 96struct read_args { 97 int fd; 98 void *buf; 99 size_t nbyte; 100}; 101#endif 102int 103read(p, uap) 104 struct proc *p; 105 register struct read_args *uap; 106{ 107 register struct file *fp; 108 109 if ((fp = getfp(p->p_fd, uap->fd, FREAD)) == NULL) 110 return (EBADF); 111 return (dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0)); 112} 113 114/* 115 * Pread system call 116 */ 117#ifndef _SYS_SYSPROTO_H_ 118struct pread_args { 119 int fd; 120 void *buf; 121 size_t nbyte; 122 int pad; 123 off_t offset; 124}; 125#endif 126int 127pread(p, uap) 128 struct proc *p; 129 register struct pread_args *uap; 130{ 131 register struct file *fp; 132 133 if ((fp = getfp(p->p_fd, uap->fd, FREAD)) == NULL) 134 return (EBADF); 135 if (fp->f_type != DTYPE_VNODE) 136 return (ESPIPE); 137 return (dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, uap->offset, 138 FOF_OFFSET)); 139} 140 141/* 142 * Code common for read and pread 143 */ 144int 145dofileread(p, fp, fd, buf, nbyte, offset, flags) 146 struct proc *p; 147 struct file *fp; 148 int fd, flags; 149 void *buf; 150 size_t nbyte; 151 off_t offset; 152{ 153 struct uio auio; 154 struct iovec aiov; 155 long cnt, error = 0; 156#ifdef KTRACE 157 struct iovec ktriov; 158#endif 159 160 aiov.iov_base = (caddr_t)buf; 161 aiov.iov_len = nbyte; 162 auio.uio_iov = &aiov; 163 auio.uio_iovcnt = 1; 164 auio.uio_offset = offset; 165 if (nbyte > INT_MAX) 166 return (EINVAL); 167 auio.uio_resid = nbyte; 168 auio.uio_rw = UIO_READ; 169 auio.uio_segflg = UIO_USERSPACE; 170 auio.uio_procp = p; 171#ifdef KTRACE 172 /* 173 * if tracing, save a copy of iovec 174 */ 175 if (KTRPOINT(p, KTR_GENIO)) 176 ktriov = aiov; 177#endif 178 cnt = nbyte; 179 if ((error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred, flags))) 180 if (auio.uio_resid != cnt && (error == ERESTART || 181 error == EINTR || error == EWOULDBLOCK)) 182 error = 0; 183 cnt -= auio.uio_resid; 184#ifdef KTRACE 185 if (KTRPOINT(p, KTR_GENIO) && error == 0) 186 ktrgenio(p->p_tracep, fd, UIO_READ, &ktriov, cnt, error); 187#endif 188 p->p_retval[0] = cnt; 189 return (error); 190} 191 192/* 193 * Scatter read system call. 194 */ 195#ifndef _SYS_SYSPROTO_H_ 196struct readv_args { 197 int fd; 198 struct iovec *iovp; 199 u_int iovcnt; 200}; 201#endif 202int 203readv(p, uap) 204 struct proc *p; 205 register struct readv_args *uap; 206{ 207 register struct file *fp; 208 register struct filedesc *fdp = p->p_fd; 209 struct uio auio; 210 register struct iovec *iov; 211 struct iovec *needfree; 212 struct iovec aiov[UIO_SMALLIOV]; 213 long i, cnt, error = 0; 214 u_int iovlen; 215#ifdef KTRACE 216 struct iovec *ktriov = NULL; 217#endif 218 219 if ((fp = getfp(fdp, uap->fd, FREAD)) == NULL) 220 return (EBADF); 221 /* note: can't use iovlen until iovcnt is validated */ 222 iovlen = uap->iovcnt * sizeof (struct iovec); 223 if (uap->iovcnt > UIO_SMALLIOV) { 224 if (uap->iovcnt > UIO_MAXIOV) 225 return (EINVAL); 226 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 227 needfree = iov; 228 } else { 229 iov = aiov; 230 needfree = NULL; 231 } 232 auio.uio_iov = iov; 233 auio.uio_iovcnt = uap->iovcnt; 234 auio.uio_rw = UIO_READ; 235 auio.uio_segflg = UIO_USERSPACE; 236 auio.uio_procp = p; 237 auio.uio_offset = -1; 238 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 239 goto done; 240 auio.uio_resid = 0; 241 for (i = 0; i < uap->iovcnt; i++) { 242 if (iov->iov_len > INT_MAX - auio.uio_resid) { 243 error = EINVAL; 244 goto done; 245 } 246 auio.uio_resid += iov->iov_len; 247 iov++; 248 } 249#ifdef KTRACE 250 /* 251 * if tracing, save a copy of iovec 252 */ 253 if (KTRPOINT(p, KTR_GENIO)) { 254 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 255 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 256 } 257#endif 258 cnt = auio.uio_resid; 259 if ((error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred, 0))) 260 if (auio.uio_resid != cnt && (error == ERESTART || 261 error == EINTR || error == EWOULDBLOCK)) 262 error = 0; 263 cnt -= auio.uio_resid; 264#ifdef KTRACE 265 if (ktriov != NULL) { 266 if (error == 0) 267 ktrgenio(p->p_tracep, uap->fd, UIO_READ, ktriov, 268 cnt, error); 269 FREE(ktriov, M_TEMP); 270 } 271#endif 272 p->p_retval[0] = cnt; 273done: 274 if (needfree) 275 FREE(needfree, M_IOV); 276 return (error); 277} 278 279/* 280 * Write system call 281 */ 282#ifndef _SYS_SYSPROTO_H_ 283struct write_args { 284 int fd; 285 const void *buf; 286 size_t nbyte; 287}; 288#endif 289int 290write(p, uap) 291 struct proc *p; 292 register struct write_args *uap; 293{ 294 register struct file *fp; 295 296 if ((fp = getfp(p->p_fd, uap->fd, FWRITE)) == NULL) 297 return (EBADF); 298 return (dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0)); 299} 300 301/* 302 * Pwrite system call 303 */ 304#ifndef _SYS_SYSPROTO_H_ 305struct pwrite_args { 306 int fd; 307 const void *buf; 308 size_t nbyte; 309 int pad; 310 off_t offset; 311}; 312#endif 313int 314pwrite(p, uap) 315 struct proc *p; 316 register struct pwrite_args *uap; 317{ 318 register struct file *fp; 319 320 if ((fp = getfp(p->p_fd, uap->fd, FWRITE)) == NULL) 321 return (EBADF); 322 if (fp->f_type != DTYPE_VNODE) 323 return (ESPIPE); 324 return (dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, uap->offset, 325 FOF_OFFSET)); 326} 327 328static int 329dofilewrite(p, fp, fd, buf, nbyte, offset, flags) 330 struct proc *p; 331 struct file *fp; 332 int fd, flags; 333 const void *buf; 334 size_t nbyte; 335 off_t offset; 336{ 337 struct uio auio; 338 struct iovec aiov; 339 long cnt, error = 0; 340#ifdef KTRACE 341 struct iovec ktriov; 342#endif 343 344 aiov.iov_base = (void *)buf; 345 aiov.iov_len = nbyte; 346 auio.uio_iov = &aiov; 347 auio.uio_iovcnt = 1; 348 auio.uio_offset = offset; 349 if (nbyte > INT_MAX) 350 return (EINVAL); 351 auio.uio_resid = nbyte; 352 auio.uio_rw = UIO_WRITE; 353 auio.uio_segflg = UIO_USERSPACE; 354 auio.uio_procp = p; 355#ifdef KTRACE 356 /* 357 * if tracing, save a copy of iovec 358 */ 359 if (KTRPOINT(p, KTR_GENIO)) 360 ktriov = aiov; 361#endif 362 cnt = nbyte; 363 if ((error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred, flags))) { 364 if (auio.uio_resid != cnt && (error == ERESTART || 365 error == EINTR || error == EWOULDBLOCK)) 366 error = 0; 367 if (error == EPIPE) 368 psignal(p, SIGPIPE); 369 } 370 cnt -= auio.uio_resid; 371#ifdef KTRACE 372 if (KTRPOINT(p, KTR_GENIO) && error == 0) 373 ktrgenio(p->p_tracep, fd, UIO_WRITE, 374 &ktriov, cnt, error); 375#endif 376 p->p_retval[0] = cnt; 377 return (error); 378} 379 380/* 381 * Gather write system call 382 */ 383#ifndef _SYS_SYSPROTO_H_ 384struct writev_args { 385 int fd; 386 struct iovec *iovp; 387 u_int iovcnt; 388}; 389#endif 390int 391writev(p, uap) 392 struct proc *p; 393 register struct writev_args *uap; 394{ 395 register struct file *fp; 396 register struct filedesc *fdp = p->p_fd; 397 struct uio auio; 398 register struct iovec *iov; 399 struct iovec *needfree; 400 struct iovec aiov[UIO_SMALLIOV]; 401 long i, cnt, error = 0; 402 u_int iovlen; 403#ifdef KTRACE 404 struct iovec *ktriov = NULL; 405#endif 406 407 if ((fp = getfp(fdp, uap->fd, FWRITE)) == NULL) 408 return (EBADF); 409 /* note: can't use iovlen until iovcnt is validated */ 410 iovlen = uap->iovcnt * sizeof (struct iovec); 411 if (uap->iovcnt > UIO_SMALLIOV) { 412 if (uap->iovcnt > UIO_MAXIOV) 413 return (EINVAL); 414 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 415 needfree = iov; 416 } else { 417 iov = aiov; 418 needfree = NULL; 419 } 420 auio.uio_iov = iov; 421 auio.uio_iovcnt = uap->iovcnt; 422 auio.uio_rw = UIO_WRITE; 423 auio.uio_segflg = UIO_USERSPACE; 424 auio.uio_procp = p; 425 auio.uio_offset = -1; 426 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 427 goto done; 428 auio.uio_resid = 0; 429 for (i = 0; i < uap->iovcnt; i++) { 430 if (iov->iov_len > INT_MAX - auio.uio_resid) { 431 error = EINVAL; 432 goto done; 433 } 434 auio.uio_resid += iov->iov_len; 435 iov++; 436 } 437#ifdef KTRACE 438 /* 439 * if tracing, save a copy of iovec 440 */ 441 if (KTRPOINT(p, KTR_GENIO)) { 442 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 443 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 444 } 445#endif 446 cnt = auio.uio_resid; 447 if ((error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred, 0))) { 448 if (auio.uio_resid != cnt && (error == ERESTART || 449 error == EINTR || error == EWOULDBLOCK)) 450 error = 0; 451 if (error == EPIPE) 452 psignal(p, SIGPIPE); 453 } 454 cnt -= auio.uio_resid; 455#ifdef KTRACE 456 if (ktriov != NULL) { 457 if (error == 0) 458 ktrgenio(p->p_tracep, uap->fd, UIO_WRITE, 459 ktriov, cnt, error); 460 FREE(ktriov, M_TEMP); 461 } 462#endif 463 p->p_retval[0] = cnt; 464done: 465 if (needfree) 466 FREE(needfree, M_IOV); 467 return (error); 468} 469 470/* 471 * Ioctl system call 472 */ 473#ifndef _SYS_SYSPROTO_H_ 474struct ioctl_args { 475 int fd; 476 u_long com; 477 caddr_t data; 478}; 479#endif 480/* ARGSUSED */ 481int 482ioctl(p, uap) 483 struct proc *p; 484 register struct ioctl_args *uap; 485{ 486 register struct file *fp; 487 register struct filedesc *fdp; 488 register u_long com; 489 int error; 490 register u_int size; 491 caddr_t data, memp; 492 int tmp; 493#define STK_PARAMS 128 494 char stkbuf[STK_PARAMS]; 495 496 fdp = p->p_fd; 497 if ((u_int)uap->fd >= fdp->fd_nfiles || 498 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 499 return (EBADF); 500 501 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 502 return (EBADF); 503 504 switch (com = uap->com) { 505 case FIONCLEX: 506 fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE; 507 return (0); 508 case FIOCLEX: 509 fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE; 510 return (0); 511 } 512 513 /* 514 * Interpret high order word to find amount of data to be 515 * copied to/from the user's address space. 516 */ 517 size = IOCPARM_LEN(com); 518 if (size > IOCPARM_MAX) 519 return (ENOTTY); 520 memp = NULL; 521 if (size > sizeof (stkbuf)) { 522 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 523 data = memp; 524 } else 525 data = stkbuf; 526 if (com&IOC_IN) { 527 if (size) { 528 error = copyin(uap->data, data, (u_int)size); 529 if (error) { 530 if (memp) 531 free(memp, M_IOCTLOPS); 532 return (error); 533 } 534 } else 535 *(caddr_t *)data = uap->data; 536 } else if ((com&IOC_OUT) && size) 537 /* 538 * Zero the buffer so the user always 539 * gets back something deterministic. 540 */ 541 bzero(data, size); 542 else if (com&IOC_VOID) 543 *(caddr_t *)data = uap->data; 544 545 switch (com) { 546 547 case FIONBIO: 548 if ((tmp = *(int *)data)) 549 fp->f_flag |= FNONBLOCK; 550 else 551 fp->f_flag &= ~FNONBLOCK; 552 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 553 break; 554 555 case FIOASYNC: 556 if ((tmp = *(int *)data)) 557 fp->f_flag |= FASYNC; 558 else 559 fp->f_flag &= ~FASYNC; 560 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 561 break; 562 563 default: 564 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 565 /* 566 * Copy any data to user, size was 567 * already set and checked above. 568 */ 569 if (error == 0 && (com&IOC_OUT) && size) 570 error = copyout(data, uap->data, (u_int)size); 571 break; 572 } 573 if (memp) 574 free(memp, M_IOCTLOPS); 575 return (error); 576} 577 578static int nselcoll; 579int selwait; 580 581/* 582 * Select system call. 583 */ 584#ifndef _SYS_SYSPROTO_H_ 585struct select_args { 586 int nd; 587 fd_set *in, *ou, *ex; 588 struct timeval *tv; 589}; 590#endif 591int 592select(p, uap) 593 register struct proc *p; 594 register struct select_args *uap; 595{ 596 /* 597 * The magic 2048 here is chosen to be just enough for FD_SETSIZE 598 * infds with the new FD_SETSIZE of 1024, and more than enough for 599 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE 600 * of 256. 601 */ 602 fd_mask s_selbits[howmany(2048, NFDBITS)]; 603 fd_mask *ibits[3], *obits[3], *selbits, *sbp; 604 struct timeval atv, rtv, ttv; 605 int s, ncoll, error, timo; 606 u_int nbufbytes, ncpbytes, nfdbits; 607 608 if (uap->nd < 0) 609 return (EINVAL); 610 if (uap->nd > p->p_fd->fd_nfiles) 611 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */ 612 613 /* 614 * Allocate just enough bits for the non-null fd_sets. Use the 615 * preallocated auto buffer if possible. 616 */ 617 nfdbits = roundup(uap->nd, NFDBITS); 618 ncpbytes = nfdbits / NBBY; 619 nbufbytes = 0; 620 if (uap->in != NULL) 621 nbufbytes += 2 * ncpbytes; 622 if (uap->ou != NULL) 623 nbufbytes += 2 * ncpbytes; 624 if (uap->ex != NULL) 625 nbufbytes += 2 * ncpbytes; 626 if (nbufbytes <= sizeof s_selbits) 627 selbits = &s_selbits[0]; 628 else 629 selbits = malloc(nbufbytes, M_SELECT, M_WAITOK); 630 631 /* 632 * Assign pointers into the bit buffers and fetch the input bits. 633 * Put the output buffers together so that they can be bzeroed 634 * together. 635 */ 636 sbp = selbits; 637#define getbits(name, x) \ 638 do { \ 639 if (uap->name == NULL) \ 640 ibits[x] = NULL; \ 641 else { \ 642 ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \ 643 obits[x] = sbp; \ 644 sbp += ncpbytes / sizeof *sbp; \ 645 error = copyin(uap->name, ibits[x], ncpbytes); \ 646 if (error != 0) \ 647 goto done; \ 648 } \ 649 } while (0) 650 getbits(in, 0); 651 getbits(ou, 1); 652 getbits(ex, 2); 653#undef getbits 654 if (nbufbytes != 0) 655 bzero(selbits, nbufbytes / 2); 656 657 if (uap->tv) { 658 error = copyin((caddr_t)uap->tv, (caddr_t)&atv, 659 sizeof (atv)); 660 if (error) 661 goto done; 662 if (itimerfix(&atv)) { 663 error = EINVAL; 664 goto done; 665 } 666 getmicrouptime(&rtv); 667 timevaladd(&atv, &rtv); 668 } else 669 atv.tv_sec = 0; 670 timo = 0; 671retry: 672 ncoll = nselcoll; 673 p->p_flag |= P_SELECT; 674 error = selscan(p, ibits, obits, uap->nd); 675 if (error || p->p_retval[0]) 676 goto done; 677 if (atv.tv_sec) { 678 getmicrouptime(&rtv); 679 if (timevalcmp(&rtv, &atv, >=)) 680 goto done; 681 ttv = atv; 682 timevalsub(&ttv, &rtv); 683 timo = ttv.tv_sec > 24 * 60 * 60 ? 684 24 * 60 * 60 * hz : tvtohz(&ttv); 685 } 686 s = splhigh(); 687 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 688 splx(s); 689 goto retry; 690 } 691 p->p_flag &= ~P_SELECT; 692 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 693 splx(s); 694 if (error == 0) 695 goto retry; 696done: 697 p->p_flag &= ~P_SELECT; 698 /* select is not restarted after signals... */ 699 if (error == ERESTART) 700 error = EINTR; 701 if (error == EWOULDBLOCK) 702 error = 0; 703#define putbits(name, x) \ 704 if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \ 705 error = error2; 706 if (error == 0) { 707 int error2; 708 709 putbits(in, 0); 710 putbits(ou, 1); 711 putbits(ex, 2); 712#undef putbits 713 } 714 if (selbits != &s_selbits[0]) 715 free(selbits, M_SELECT); 716 return (error); 717} 718 719static int 720selscan(p, ibits, obits, nfd) 721 struct proc *p; 722 fd_mask **ibits, **obits; 723 int nfd; 724{ 725 register struct filedesc *fdp = p->p_fd; 726 register int msk, i, j, fd; 727 register fd_mask bits; 728 struct file *fp; 729 int n = 0; 730 /* Note: backend also returns POLLHUP/POLLERR if appropriate. */ 731 static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND }; 732 733 for (msk = 0; msk < 3; msk++) { 734 if (ibits[msk] == NULL) 735 continue; 736 for (i = 0; i < nfd; i += NFDBITS) { 737 bits = ibits[msk][i/NFDBITS]; 738 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 739 bits &= ~(1 << j); 740 fp = fdp->fd_ofiles[fd]; 741 if (fp == NULL) 742 return (EBADF); 743 if ((*fp->f_ops->fo_poll)(fp, flag[msk], 744 fp->f_cred, p)) { 745 obits[msk][(fd)/NFDBITS] |= 746 (1 << ((fd) % NFDBITS)); 747 n++; 748 } 749 } 750 } 751 } 752 p->p_retval[0] = n; 753 return (0); 754} 755 756/* 757 * Poll system call. 758 */ 759#ifndef _SYS_SYSPROTO_H_ 760struct poll_args { 761 struct pollfd *fds; 762 u_int nfds; 763 int timeout; 764}; 765#endif 766int 767poll(p, uap) 768 register struct proc *p; 769 register struct poll_args *uap; 770{ 771 caddr_t bits; 772 char smallbits[32 * sizeof(struct pollfd)]; 773 struct timeval atv, rtv, ttv; 774 int s, ncoll, error = 0, timo; 775 size_t ni; 776 777 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) { 778 /* forgiving; slightly wrong */ 779 SCARG(uap, nfds) = p->p_fd->fd_nfiles; 780 } 781 ni = SCARG(uap, nfds) * sizeof(struct pollfd); 782 if (ni > sizeof(smallbits)) 783 bits = malloc(ni, M_TEMP, M_WAITOK); 784 else 785 bits = smallbits; 786 error = copyin(SCARG(uap, fds), bits, ni); 787 if (error) 788 goto done; 789 if (SCARG(uap, timeout) != INFTIM) { 790 atv.tv_sec = SCARG(uap, timeout) / 1000; 791 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; 792 if (itimerfix(&atv)) { 793 error = EINVAL; 794 goto done; 795 } 796 getmicrouptime(&rtv); 797 timevaladd(&atv, &rtv); 798 } else 799 atv.tv_sec = 0; 800 timo = 0; 801retry: 802 ncoll = nselcoll; 803 p->p_flag |= P_SELECT; 804 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds)); 805 if (error || p->p_retval[0]) 806 goto done; 807 if (atv.tv_sec) { 808 getmicrouptime(&rtv); 809 if (timevalcmp(&rtv, &atv, >=)) 810 goto done; 811 ttv = atv; 812 timevalsub(&ttv, &rtv); 813 timo = ttv.tv_sec > 24 * 60 * 60 ? 814 24 * 60 * 60 * hz : tvtohz(&ttv); 815 } 816 s = splhigh(); 817 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 818 splx(s); 819 goto retry; 820 } 821 p->p_flag &= ~P_SELECT; 822 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo); 823 splx(s); 824 if (error == 0) 825 goto retry; 826done: 827 p->p_flag &= ~P_SELECT; 828 /* poll is not restarted after signals... */ 829 if (error == ERESTART) 830 error = EINTR; 831 if (error == EWOULDBLOCK) 832 error = 0; 833 if (error == 0) { 834 error = copyout(bits, SCARG(uap, fds), ni); 835 if (error) 836 goto out; 837 } 838out: 839 if (ni > sizeof(smallbits)) 840 free(bits, M_TEMP); 841 return (error); 842} 843 844static int 845pollscan(p, fds, nfd) 846 struct proc *p; 847 struct pollfd *fds; 848 int nfd; 849{ 850 register struct filedesc *fdp = p->p_fd; 851 int i; 852 struct file *fp; 853 int n = 0; 854 855 for (i = 0; i < nfd; i++, fds++) { 856 if (fds->fd >= fdp->fd_nfiles) { 857 fds->revents = POLLNVAL; 858 n++; 859 } else if (fds->fd < 0) { 860 fds->revents = 0; 861 } else { 862 fp = fdp->fd_ofiles[fds->fd]; 863 if (fp == 0) { 864 fds->revents = POLLNVAL; 865 n++; 866 } else { 867 /* 868 * Note: backend also returns POLLHUP and 869 * POLLERR if appropriate. 870 */ 871 fds->revents = (*fp->f_ops->fo_poll)(fp, 872 fds->events, fp->f_cred, p); 873 if (fds->revents != 0) 874 n++; 875 } 876 } 877 } 878 p->p_retval[0] = n; 879 return (0); 880} 881 882/* 883 * OpenBSD poll system call. 884 * XXX this isn't quite a true representation.. OpenBSD uses select ops. 885 */ 886#ifndef _SYS_SYSPROTO_H_ 887struct openbsd_poll_args { 888 struct pollfd *fds; 889 u_int nfds; 890 int timeout; 891}; 892#endif 893int 894openbsd_poll(p, uap) 895 register struct proc *p; 896 register struct openbsd_poll_args *uap; 897{ 898 return (poll(p, (struct poll_args *)uap)); 899} 900 901/*ARGSUSED*/ 902int 903seltrue(dev, events, p) 904 dev_t dev; 905 int events; 906 struct proc *p; 907{ 908 909 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 910} 911 912/* 913 * Record a select request. 914 */ 915void 916selrecord(selector, sip) 917 struct proc *selector; 918 struct selinfo *sip; 919{ 920 struct proc *p; 921 pid_t mypid; 922 923 mypid = selector->p_pid; 924 if (sip->si_pid == mypid) 925 return; 926 if (sip->si_pid && (p = pfind(sip->si_pid)) && 927 p->p_wchan == (caddr_t)&selwait) 928 sip->si_flags |= SI_COLL; 929 else 930 sip->si_pid = mypid; 931} 932 933/* 934 * Do a wakeup when a selectable event occurs. 935 */ 936void 937selwakeup(sip) 938 register struct selinfo *sip; 939{ 940 register struct proc *p; 941 int s; 942 943 if (sip->si_pid == 0) 944 return; 945 if (sip->si_flags & SI_COLL) { 946 nselcoll++; 947 sip->si_flags &= ~SI_COLL; 948 wakeup((caddr_t)&selwait); 949 } 950 p = pfind(sip->si_pid); 951 sip->si_pid = 0; 952 if (p != NULL) { 953 s = splhigh(); 954 if (p->p_wchan == (caddr_t)&selwait) { 955 if (p->p_stat == SSLEEP) 956 setrunnable(p); 957 else 958 unsleep(p); 959 } else if (p->p_flag & P_SELECT) 960 p->p_flag &= ~P_SELECT; 961 splx(s); 962 } 963} 964