sys_generic.c revision 72146
1/* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 * $FreeBSD: head/sys/kern/sys_generic.c 72146 2001-02-07 23:28:01Z peter $ 40 */ 41 42#include "opt_ktrace.h" 43 44#include <sys/param.h> 45#include <sys/systm.h> 46#include <sys/sysproto.h> 47#include <sys/filedesc.h> 48#include <sys/filio.h> 49#include <sys/fcntl.h> 50#include <sys/file.h> 51#include <sys/proc.h> 52#include <sys/signalvar.h> 53#include <sys/socketvar.h> 54#include <sys/uio.h> 55#include <sys/kernel.h> 56#include <sys/malloc.h> 57#include <sys/poll.h> 58#include <sys/resourcevar.h> 59#include <sys/selinfo.h> 60#include <sys/sysctl.h> 61#include <sys/sysent.h> 62#include <sys/bio.h> 63#include <sys/buf.h> 64#ifdef KTRACE 65#include <sys/ktrace.h> 66#endif 67#include <vm/vm.h> 68#include <vm/vm_page.h> 69 70#include <machine/limits.h> 71 72static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 73static MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); 74MALLOC_DEFINE(M_IOV, "iov", "large iov's"); 75 76static int pollscan __P((struct proc *, struct pollfd *, int)); 77static int selscan __P((struct proc *, fd_mask **, fd_mask **, int)); 78static int dofileread __P((struct proc *, struct file *, int, void *, 79 size_t, off_t, int)); 80static int dofilewrite __P((struct proc *, struct file *, int, 81 const void *, size_t, off_t, int)); 82 83struct file* 84holdfp(fdp, fd, flag) 85 struct filedesc* fdp; 86 int fd, flag; 87{ 88 struct file* fp; 89 90 if (((u_int)fd) >= fdp->fd_nfiles || 91 (fp = fdp->fd_ofiles[fd]) == NULL || 92 (fp->f_flag & flag) == 0) { 93 return (NULL); 94 } 95 fhold(fp); 96 return (fp); 97} 98 99/* 100 * Read system call. 101 */ 102#ifndef _SYS_SYSPROTO_H_ 103struct read_args { 104 int fd; 105 void *buf; 106 size_t nbyte; 107}; 108#endif 109int 110read(p, uap) 111 struct proc *p; 112 register struct read_args *uap; 113{ 114 register struct file *fp; 115 int error; 116 117 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL) 118 return (EBADF); 119 error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0); 120 fdrop(fp, p); 121 return(error); 122} 123 124/* 125 * Pread system call 126 */ 127#ifndef _SYS_SYSPROTO_H_ 128struct pread_args { 129 int fd; 130 void *buf; 131 size_t nbyte; 132 int pad; 133 off_t offset; 134}; 135#endif 136int 137pread(p, uap) 138 struct proc *p; 139 register struct pread_args *uap; 140{ 141 register struct file *fp; 142 int error; 143 144 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL) 145 return (EBADF); 146 if (fp->f_type != DTYPE_VNODE) { 147 error = ESPIPE; 148 } else { 149 error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, 150 uap->offset, FOF_OFFSET); 151 } 152 fdrop(fp, p); 153 return(error); 154} 155 156/* 157 * Code common for read and pread 158 */ 159int 160dofileread(p, fp, fd, buf, nbyte, offset, flags) 161 struct proc *p; 162 struct file *fp; 163 int fd, flags; 164 void *buf; 165 size_t nbyte; 166 off_t offset; 167{ 168 struct uio auio; 169 struct iovec aiov; 170 long cnt, error = 0; 171#ifdef KTRACE 172 struct iovec ktriov; 173 struct uio ktruio; 174 int didktr = 0; 175#endif 176 177 aiov.iov_base = (caddr_t)buf; 178 aiov.iov_len = nbyte; 179 auio.uio_iov = &aiov; 180 auio.uio_iovcnt = 1; 181 auio.uio_offset = offset; 182 if (nbyte > INT_MAX) 183 return (EINVAL); 184 auio.uio_resid = nbyte; 185 auio.uio_rw = UIO_READ; 186 auio.uio_segflg = UIO_USERSPACE; 187 auio.uio_procp = p; 188#ifdef KTRACE 189 /* 190 * if tracing, save a copy of iovec 191 */ 192 if (KTRPOINT(p, KTR_GENIO)) { 193 ktriov = aiov; 194 ktruio = auio; 195 didktr = 1; 196 } 197#endif 198 cnt = nbyte; 199 200 if ((error = fo_read(fp, &auio, fp->f_cred, flags, p))) { 201 if (auio.uio_resid != cnt && (error == ERESTART || 202 error == EINTR || error == EWOULDBLOCK)) 203 error = 0; 204 } 205 cnt -= auio.uio_resid; 206#ifdef KTRACE 207 if (didktr && error == 0) { 208 ktruio.uio_iov = &ktriov; 209 ktruio.uio_resid = cnt; 210 ktrgenio(p->p_tracep, fd, UIO_READ, &ktruio, error); 211 } 212#endif 213 p->p_retval[0] = cnt; 214 return (error); 215} 216 217/* 218 * Scatter read system call. 219 */ 220#ifndef _SYS_SYSPROTO_H_ 221struct readv_args { 222 int fd; 223 struct iovec *iovp; 224 u_int iovcnt; 225}; 226#endif 227int 228readv(p, uap) 229 struct proc *p; 230 register struct readv_args *uap; 231{ 232 register struct file *fp; 233 register struct filedesc *fdp = p->p_fd; 234 struct uio auio; 235 register struct iovec *iov; 236 struct iovec *needfree; 237 struct iovec aiov[UIO_SMALLIOV]; 238 long i, cnt, error = 0; 239 u_int iovlen; 240#ifdef KTRACE 241 struct iovec *ktriov = NULL; 242 struct uio ktruio; 243#endif 244 245 if ((fp = holdfp(fdp, uap->fd, FREAD)) == NULL) 246 return (EBADF); 247 /* note: can't use iovlen until iovcnt is validated */ 248 iovlen = uap->iovcnt * sizeof (struct iovec); 249 if (uap->iovcnt > UIO_SMALLIOV) { 250 if (uap->iovcnt > UIO_MAXIOV) 251 return (EINVAL); 252 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 253 needfree = iov; 254 } else { 255 iov = aiov; 256 needfree = NULL; 257 } 258 auio.uio_iov = iov; 259 auio.uio_iovcnt = uap->iovcnt; 260 auio.uio_rw = UIO_READ; 261 auio.uio_segflg = UIO_USERSPACE; 262 auio.uio_procp = p; 263 auio.uio_offset = -1; 264 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 265 goto done; 266 auio.uio_resid = 0; 267 for (i = 0; i < uap->iovcnt; i++) { 268 if (iov->iov_len > INT_MAX - auio.uio_resid) { 269 error = EINVAL; 270 goto done; 271 } 272 auio.uio_resid += iov->iov_len; 273 iov++; 274 } 275#ifdef KTRACE 276 /* 277 * if tracing, save a copy of iovec 278 */ 279 if (KTRPOINT(p, KTR_GENIO)) { 280 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 281 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 282 ktruio = auio; 283 } 284#endif 285 cnt = auio.uio_resid; 286 if ((error = fo_read(fp, &auio, fp->f_cred, 0, p))) { 287 if (auio.uio_resid != cnt && (error == ERESTART || 288 error == EINTR || error == EWOULDBLOCK)) 289 error = 0; 290 } 291 cnt -= auio.uio_resid; 292#ifdef KTRACE 293 if (ktriov != NULL) { 294 if (error == 0) { 295 ktruio.uio_iov = ktriov; 296 ktruio.uio_resid = cnt; 297 ktrgenio(p->p_tracep, uap->fd, UIO_READ, &ktruio, 298 error); 299 } 300 FREE(ktriov, M_TEMP); 301 } 302#endif 303 p->p_retval[0] = cnt; 304done: 305 fdrop(fp, p); 306 if (needfree) 307 FREE(needfree, M_IOV); 308 return (error); 309} 310 311/* 312 * Write system call 313 */ 314#ifndef _SYS_SYSPROTO_H_ 315struct write_args { 316 int fd; 317 const void *buf; 318 size_t nbyte; 319}; 320#endif 321int 322write(p, uap) 323 struct proc *p; 324 register struct write_args *uap; 325{ 326 register struct file *fp; 327 int error; 328 329 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL) 330 return (EBADF); 331 error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0); 332 fdrop(fp, p); 333 return(error); 334} 335 336/* 337 * Pwrite system call 338 */ 339#ifndef _SYS_SYSPROTO_H_ 340struct pwrite_args { 341 int fd; 342 const void *buf; 343 size_t nbyte; 344 int pad; 345 off_t offset; 346}; 347#endif 348int 349pwrite(p, uap) 350 struct proc *p; 351 register struct pwrite_args *uap; 352{ 353 register struct file *fp; 354 int error; 355 356 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL) 357 return (EBADF); 358 if (fp->f_type != DTYPE_VNODE) { 359 error = ESPIPE; 360 } else { 361 error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, 362 uap->offset, FOF_OFFSET); 363 } 364 fdrop(fp, p); 365 return(error); 366} 367 368static int 369dofilewrite(p, fp, fd, buf, nbyte, offset, flags) 370 struct proc *p; 371 struct file *fp; 372 int fd, flags; 373 const void *buf; 374 size_t nbyte; 375 off_t offset; 376{ 377 struct uio auio; 378 struct iovec aiov; 379 long cnt, error = 0; 380#ifdef KTRACE 381 struct iovec ktriov; 382 struct uio ktruio; 383 int didktr = 0; 384#endif 385 386 aiov.iov_base = (void *)(uintptr_t)buf; 387 aiov.iov_len = nbyte; 388 auio.uio_iov = &aiov; 389 auio.uio_iovcnt = 1; 390 auio.uio_offset = offset; 391 if (nbyte > INT_MAX) 392 return (EINVAL); 393 auio.uio_resid = nbyte; 394 auio.uio_rw = UIO_WRITE; 395 auio.uio_segflg = UIO_USERSPACE; 396 auio.uio_procp = p; 397#ifdef KTRACE 398 /* 399 * if tracing, save a copy of iovec and uio 400 */ 401 if (KTRPOINT(p, KTR_GENIO)) { 402 ktriov = aiov; 403 ktruio = auio; 404 didktr = 1; 405 } 406#endif 407 cnt = nbyte; 408 if (fp->f_type == DTYPE_VNODE) 409 bwillwrite(); 410 if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) { 411 if (auio.uio_resid != cnt && (error == ERESTART || 412 error == EINTR || error == EWOULDBLOCK)) 413 error = 0; 414 if (error == EPIPE) 415 psignal(p, SIGPIPE); 416 } 417 cnt -= auio.uio_resid; 418#ifdef KTRACE 419 if (didktr && error == 0) { 420 ktruio.uio_iov = &ktriov; 421 ktruio.uio_resid = cnt; 422 ktrgenio(p->p_tracep, fd, UIO_WRITE, &ktruio, error); 423 } 424#endif 425 p->p_retval[0] = cnt; 426 return (error); 427} 428 429/* 430 * Gather write system call 431 */ 432#ifndef _SYS_SYSPROTO_H_ 433struct writev_args { 434 int fd; 435 struct iovec *iovp; 436 u_int iovcnt; 437}; 438#endif 439int 440writev(p, uap) 441 struct proc *p; 442 register struct writev_args *uap; 443{ 444 register struct file *fp; 445 register struct filedesc *fdp = p->p_fd; 446 struct uio auio; 447 register struct iovec *iov; 448 struct iovec *needfree; 449 struct iovec aiov[UIO_SMALLIOV]; 450 long i, cnt, error = 0; 451 u_int iovlen; 452#ifdef KTRACE 453 struct iovec *ktriov = NULL; 454 struct uio ktruio; 455#endif 456 457 if ((fp = holdfp(fdp, uap->fd, FWRITE)) == NULL) 458 return (EBADF); 459 /* note: can't use iovlen until iovcnt is validated */ 460 iovlen = uap->iovcnt * sizeof (struct iovec); 461 if (uap->iovcnt > UIO_SMALLIOV) { 462 if (uap->iovcnt > UIO_MAXIOV) { 463 needfree = NULL; 464 error = EINVAL; 465 goto done; 466 } 467 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 468 needfree = iov; 469 } else { 470 iov = aiov; 471 needfree = NULL; 472 } 473 auio.uio_iov = iov; 474 auio.uio_iovcnt = uap->iovcnt; 475 auio.uio_rw = UIO_WRITE; 476 auio.uio_segflg = UIO_USERSPACE; 477 auio.uio_procp = p; 478 auio.uio_offset = -1; 479 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 480 goto done; 481 auio.uio_resid = 0; 482 for (i = 0; i < uap->iovcnt; i++) { 483 if (iov->iov_len > INT_MAX - auio.uio_resid) { 484 error = EINVAL; 485 goto done; 486 } 487 auio.uio_resid += iov->iov_len; 488 iov++; 489 } 490#ifdef KTRACE 491 /* 492 * if tracing, save a copy of iovec and uio 493 */ 494 if (KTRPOINT(p, KTR_GENIO)) { 495 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 496 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 497 ktruio = auio; 498 } 499#endif 500 cnt = auio.uio_resid; 501 if (fp->f_type == DTYPE_VNODE) 502 bwillwrite(); 503 if ((error = fo_write(fp, &auio, fp->f_cred, 0, p))) { 504 if (auio.uio_resid != cnt && (error == ERESTART || 505 error == EINTR || error == EWOULDBLOCK)) 506 error = 0; 507 if (error == EPIPE) 508 psignal(p, SIGPIPE); 509 } 510 cnt -= auio.uio_resid; 511#ifdef KTRACE 512 if (ktriov != NULL) { 513 if (error == 0) { 514 ktruio.uio_iov = ktriov; 515 ktruio.uio_resid = cnt; 516 ktrgenio(p->p_tracep, uap->fd, UIO_WRITE, &ktruio, 517 error); 518 } 519 FREE(ktriov, M_TEMP); 520 } 521#endif 522 p->p_retval[0] = cnt; 523done: 524 fdrop(fp, p); 525 if (needfree) 526 FREE(needfree, M_IOV); 527 return (error); 528} 529 530/* 531 * Ioctl system call 532 */ 533#ifndef _SYS_SYSPROTO_H_ 534struct ioctl_args { 535 int fd; 536 u_long com; 537 caddr_t data; 538}; 539#endif 540/* ARGSUSED */ 541int 542ioctl(p, uap) 543 struct proc *p; 544 register struct ioctl_args *uap; 545{ 546 register struct file *fp; 547 register struct filedesc *fdp; 548 register u_long com; 549 int error; 550 register u_int size; 551 caddr_t data, memp; 552 int tmp; 553#define STK_PARAMS 128 554 union { 555 char stkbuf[STK_PARAMS]; 556 long align; 557 } ubuf; 558 559 fdp = p->p_fd; 560 if ((u_int)uap->fd >= fdp->fd_nfiles || 561 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 562 return (EBADF); 563 564 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 565 return (EBADF); 566 567 switch (com = uap->com) { 568 case FIONCLEX: 569 fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE; 570 return (0); 571 case FIOCLEX: 572 fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE; 573 return (0); 574 } 575 576 /* 577 * Interpret high order word to find amount of data to be 578 * copied to/from the user's address space. 579 */ 580 size = IOCPARM_LEN(com); 581 if (size > IOCPARM_MAX) 582 return (ENOTTY); 583 584 fhold(fp); 585 586 memp = NULL; 587 if (size > sizeof (ubuf.stkbuf)) { 588 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 589 data = memp; 590 } else { 591 data = ubuf.stkbuf; 592 } 593 if (com&IOC_IN) { 594 if (size) { 595 error = copyin(uap->data, data, (u_int)size); 596 if (error) { 597 if (memp) 598 free(memp, M_IOCTLOPS); 599 fdrop(fp, p); 600 return (error); 601 } 602 } else { 603 *(caddr_t *)data = uap->data; 604 } 605 } else if ((com&IOC_OUT) && size) { 606 /* 607 * Zero the buffer so the user always 608 * gets back something deterministic. 609 */ 610 bzero(data, size); 611 } else if (com&IOC_VOID) { 612 *(caddr_t *)data = uap->data; 613 } 614 615 switch (com) { 616 617 case FIONBIO: 618 if ((tmp = *(int *)data)) 619 fp->f_flag |= FNONBLOCK; 620 else 621 fp->f_flag &= ~FNONBLOCK; 622 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p); 623 break; 624 625 case FIOASYNC: 626 if ((tmp = *(int *)data)) 627 fp->f_flag |= FASYNC; 628 else 629 fp->f_flag &= ~FASYNC; 630 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p); 631 break; 632 633 default: 634 error = fo_ioctl(fp, com, data, p); 635 /* 636 * Copy any data to user, size was 637 * already set and checked above. 638 */ 639 if (error == 0 && (com&IOC_OUT) && size) 640 error = copyout(data, uap->data, (u_int)size); 641 break; 642 } 643 if (memp) 644 free(memp, M_IOCTLOPS); 645 fdrop(fp, p); 646 return (error); 647} 648 649static int nselcoll; /* Select collisions since boot */ 650int selwait; 651SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, ""); 652 653/* 654 * Select system call. 655 */ 656#ifndef _SYS_SYSPROTO_H_ 657struct select_args { 658 int nd; 659 fd_set *in, *ou, *ex; 660 struct timeval *tv; 661}; 662#endif 663int 664select(p, uap) 665 register struct proc *p; 666 register struct select_args *uap; 667{ 668 /* 669 * The magic 2048 here is chosen to be just enough for FD_SETSIZE 670 * infds with the new FD_SETSIZE of 1024, and more than enough for 671 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE 672 * of 256. 673 */ 674 fd_mask s_selbits[howmany(2048, NFDBITS)]; 675 fd_mask *ibits[3], *obits[3], *selbits, *sbp; 676 struct timeval atv, rtv, ttv; 677 int s, ncoll, error, timo; 678 u_int nbufbytes, ncpbytes, nfdbits; 679 680 if (uap->nd < 0) 681 return (EINVAL); 682 if (uap->nd > p->p_fd->fd_nfiles) 683 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */ 684 685 /* 686 * Allocate just enough bits for the non-null fd_sets. Use the 687 * preallocated auto buffer if possible. 688 */ 689 nfdbits = roundup(uap->nd, NFDBITS); 690 ncpbytes = nfdbits / NBBY; 691 nbufbytes = 0; 692 if (uap->in != NULL) 693 nbufbytes += 2 * ncpbytes; 694 if (uap->ou != NULL) 695 nbufbytes += 2 * ncpbytes; 696 if (uap->ex != NULL) 697 nbufbytes += 2 * ncpbytes; 698 if (nbufbytes <= sizeof s_selbits) 699 selbits = &s_selbits[0]; 700 else 701 selbits = malloc(nbufbytes, M_SELECT, M_WAITOK); 702 703 /* 704 * Assign pointers into the bit buffers and fetch the input bits. 705 * Put the output buffers together so that they can be bzeroed 706 * together. 707 */ 708 sbp = selbits; 709#define getbits(name, x) \ 710 do { \ 711 if (uap->name == NULL) \ 712 ibits[x] = NULL; \ 713 else { \ 714 ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \ 715 obits[x] = sbp; \ 716 sbp += ncpbytes / sizeof *sbp; \ 717 error = copyin(uap->name, ibits[x], ncpbytes); \ 718 if (error != 0) { \ 719 PROC_LOCK(p); \ 720 goto done; \ 721 } \ 722 } \ 723 } while (0) 724 getbits(in, 0); 725 getbits(ou, 1); 726 getbits(ex, 2); 727#undef getbits 728 if (nbufbytes != 0) 729 bzero(selbits, nbufbytes / 2); 730 731 if (uap->tv) { 732 error = copyin((caddr_t)uap->tv, (caddr_t)&atv, 733 sizeof (atv)); 734 if (error) { 735 PROC_LOCK(p); 736 goto done; 737 } 738 if (itimerfix(&atv)) { 739 error = EINVAL; 740 PROC_LOCK(p); 741 goto done; 742 } 743 getmicrouptime(&rtv); 744 timevaladd(&atv, &rtv); 745 } else { 746 atv.tv_sec = 0; 747 atv.tv_usec = 0; 748 } 749 timo = 0; 750 PROC_LOCK(p); 751retry: 752 ncoll = nselcoll; 753 p->p_flag |= P_SELECT; 754 PROC_UNLOCK(p); 755 error = selscan(p, ibits, obits, uap->nd); 756 PROC_LOCK(p); 757 if (error || p->p_retval[0]) 758 goto done; 759 if (atv.tv_sec || atv.tv_usec) { 760 getmicrouptime(&rtv); 761 if (timevalcmp(&rtv, &atv, >=)) 762 goto done; 763 ttv = atv; 764 timevalsub(&ttv, &rtv); 765 timo = ttv.tv_sec > 24 * 60 * 60 ? 766 24 * 60 * 60 * hz : tvtohz(&ttv); 767 } 768 s = splhigh(); 769 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 770 splx(s); 771 goto retry; 772 } 773 p->p_flag &= ~P_SELECT; 774 775 error = msleep((caddr_t)&selwait, &p->p_mtx, PSOCK | PCATCH, "select", 776 timo); 777 778 splx(s); 779 if (error == 0) 780 goto retry; 781done: 782 p->p_flag &= ~P_SELECT; 783 PROC_UNLOCK(p); 784 /* select is not restarted after signals... */ 785 if (error == ERESTART) 786 error = EINTR; 787 if (error == EWOULDBLOCK) 788 error = 0; 789#define putbits(name, x) \ 790 if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \ 791 error = error2; 792 if (error == 0) { 793 int error2; 794 795 putbits(in, 0); 796 putbits(ou, 1); 797 putbits(ex, 2); 798#undef putbits 799 } 800 if (selbits != &s_selbits[0]) 801 free(selbits, M_SELECT); 802 return (error); 803} 804 805static int 806selscan(p, ibits, obits, nfd) 807 struct proc *p; 808 fd_mask **ibits, **obits; 809 int nfd; 810{ 811 struct filedesc *fdp = p->p_fd; 812 int msk, i, fd; 813 fd_mask bits; 814 struct file *fp; 815 int n = 0; 816 /* Note: backend also returns POLLHUP/POLLERR if appropriate. */ 817 static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND }; 818 819 for (msk = 0; msk < 3; msk++) { 820 if (ibits[msk] == NULL) 821 continue; 822 for (i = 0; i < nfd; i += NFDBITS) { 823 bits = ibits[msk][i/NFDBITS]; 824 /* ffs(int mask) not portable, fd_mask is long */ 825 for (fd = i; bits && fd < nfd; fd++, bits >>= 1) { 826 if (!(bits & 1)) 827 continue; 828 fp = fdp->fd_ofiles[fd]; 829 if (fp == NULL) 830 return (EBADF); 831 if (fo_poll(fp, flag[msk], fp->f_cred, p)) { 832 obits[msk][(fd)/NFDBITS] |= 833 ((fd_mask)1 << ((fd) % NFDBITS)); 834 n++; 835 } 836 } 837 } 838 } 839 p->p_retval[0] = n; 840 return (0); 841} 842 843/* 844 * Poll system call. 845 */ 846#ifndef _SYS_SYSPROTO_H_ 847struct poll_args { 848 struct pollfd *fds; 849 u_int nfds; 850 int timeout; 851}; 852#endif 853int 854poll(p, uap) 855 register struct proc *p; 856 register struct poll_args *uap; 857{ 858 caddr_t bits; 859 char smallbits[32 * sizeof(struct pollfd)]; 860 struct timeval atv, rtv, ttv; 861 int s, ncoll, error = 0, timo, lim, nfds; 862 size_t ni; 863 864 nfds = SCARG(uap, nfds); 865 /* 866 * This is kinda bogus. We have fd limits, but that doesn't 867 * map too well to the size of the pfd[] array. Make sure 868 * we let the process use at least FD_SETSIZE entries. 869 * The specs say we only have to support OPEN_MAX entries (64). 870 */ 871 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc); 872 lim = min(lim, FD_SETSIZE); 873 if (nfds > lim) 874 return (EINVAL); 875 ni = nfds * sizeof(struct pollfd); 876 if (ni > sizeof(smallbits)) 877 bits = malloc(ni, M_TEMP, M_WAITOK); 878 else 879 bits = smallbits; 880 error = copyin(SCARG(uap, fds), bits, ni); 881 PROC_LOCK(p); 882 if (error) 883 goto done; 884 if (SCARG(uap, timeout) != INFTIM) { 885 atv.tv_sec = SCARG(uap, timeout) / 1000; 886 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; 887 if (itimerfix(&atv)) { 888 error = EINVAL; 889 goto done; 890 } 891 getmicrouptime(&rtv); 892 timevaladd(&atv, &rtv); 893 } else { 894 atv.tv_sec = 0; 895 atv.tv_usec = 0; 896 } 897 timo = 0; 898retry: 899 ncoll = nselcoll; 900 p->p_flag |= P_SELECT; 901 PROC_UNLOCK(p); 902 error = pollscan(p, (struct pollfd *)bits, nfds); 903 PROC_LOCK(p); 904 if (error || p->p_retval[0]) 905 goto done; 906 if (atv.tv_sec || atv.tv_usec) { 907 getmicrouptime(&rtv); 908 if (timevalcmp(&rtv, &atv, >=)) 909 goto done; 910 ttv = atv; 911 timevalsub(&ttv, &rtv); 912 timo = ttv.tv_sec > 24 * 60 * 60 ? 913 24 * 60 * 60 * hz : tvtohz(&ttv); 914 } 915 s = splhigh(); 916 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 917 splx(s); 918 goto retry; 919 } 920 p->p_flag &= ~P_SELECT; 921 error = msleep((caddr_t)&selwait, &p->p_mtx, PSOCK | PCATCH, "poll", 922 timo); 923 splx(s); 924 if (error == 0) 925 goto retry; 926done: 927 p->p_flag &= ~P_SELECT; 928 PROC_UNLOCK(p); 929 /* poll is not restarted after signals... */ 930 if (error == ERESTART) 931 error = EINTR; 932 if (error == EWOULDBLOCK) 933 error = 0; 934 if (error == 0) { 935 error = copyout(bits, SCARG(uap, fds), ni); 936 if (error) 937 goto out; 938 } 939out: 940 if (ni > sizeof(smallbits)) 941 free(bits, M_TEMP); 942 return (error); 943} 944 945static int 946pollscan(p, fds, nfd) 947 struct proc *p; 948 struct pollfd *fds; 949 int nfd; 950{ 951 register struct filedesc *fdp = p->p_fd; 952 int i; 953 struct file *fp; 954 int n = 0; 955 956 for (i = 0; i < nfd; i++, fds++) { 957 if (fds->fd >= fdp->fd_nfiles) { 958 fds->revents = POLLNVAL; 959 n++; 960 } else if (fds->fd < 0) { 961 fds->revents = 0; 962 } else { 963 fp = fdp->fd_ofiles[fds->fd]; 964 if (fp == NULL) { 965 fds->revents = POLLNVAL; 966 n++; 967 } else { 968 /* 969 * Note: backend also returns POLLHUP and 970 * POLLERR if appropriate. 971 */ 972 fds->revents = fo_poll(fp, fds->events, 973 fp->f_cred, p); 974 if (fds->revents != 0) 975 n++; 976 } 977 } 978 } 979 p->p_retval[0] = n; 980 return (0); 981} 982 983/* 984 * OpenBSD poll system call. 985 * XXX this isn't quite a true representation.. OpenBSD uses select ops. 986 */ 987#ifndef _SYS_SYSPROTO_H_ 988struct openbsd_poll_args { 989 struct pollfd *fds; 990 u_int nfds; 991 int timeout; 992}; 993#endif 994int 995openbsd_poll(p, uap) 996 register struct proc *p; 997 register struct openbsd_poll_args *uap; 998{ 999 return (poll(p, (struct poll_args *)uap)); 1000} 1001 1002/*ARGSUSED*/ 1003int 1004seltrue(dev, events, p) 1005 dev_t dev; 1006 int events; 1007 struct proc *p; 1008{ 1009 1010 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 1011} 1012 1013/* 1014 * Record a select request. 1015 */ 1016void 1017selrecord(selector, sip) 1018 struct proc *selector; 1019 struct selinfo *sip; 1020{ 1021 struct proc *p; 1022 pid_t mypid; 1023 1024 mypid = selector->p_pid; 1025 if (sip->si_pid == mypid) 1026 return; 1027 if (sip->si_pid && (p = pfind(sip->si_pid))) { 1028 mtx_enter(&sched_lock, MTX_SPIN); 1029 if (p->p_wchan == (caddr_t)&selwait) { 1030 mtx_exit(&sched_lock, MTX_SPIN); 1031 sip->si_flags |= SI_COLL; 1032 return; 1033 } 1034 mtx_exit(&sched_lock, MTX_SPIN); 1035 } 1036 sip->si_pid = mypid; 1037} 1038 1039/* 1040 * Do a wakeup when a selectable event occurs. 1041 */ 1042void 1043selwakeup(sip) 1044 register struct selinfo *sip; 1045{ 1046 register struct proc *p; 1047 1048 if (sip->si_pid == 0) 1049 return; 1050 if (sip->si_flags & SI_COLL) { 1051 nselcoll++; 1052 sip->si_flags &= ~SI_COLL; 1053 wakeup((caddr_t)&selwait); 1054 } 1055 p = pfind(sip->si_pid); 1056 sip->si_pid = 0; 1057 if (p != NULL) { 1058 mtx_enter(&sched_lock, MTX_SPIN); 1059 if (p->p_wchan == (caddr_t)&selwait) { 1060 if (p->p_stat == SSLEEP) 1061 setrunnable(p); 1062 else 1063 unsleep(p); 1064 mtx_exit(&sched_lock, MTX_SPIN); 1065 } else { 1066 mtx_exit(&sched_lock, MTX_SPIN); 1067 PROC_LOCK(p); 1068 p->p_flag &= ~P_SELECT; 1069 PROC_UNLOCK(p); 1070 } 1071 } 1072} 1073