sys_generic.c revision 63057
1/* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 * $FreeBSD: head/sys/kern/sys_generic.c 63057 2000-07-13 02:12:25Z jhb $ 40 */ 41 42#include "opt_ktrace.h" 43 44#include <sys/param.h> 45#include <sys/systm.h> 46#include <sys/sysproto.h> 47#include <sys/filedesc.h> 48#include <sys/filio.h> 49#include <sys/fcntl.h> 50#include <sys/file.h> 51#include <sys/proc.h> 52#include <sys/signalvar.h> 53#include <sys/socketvar.h> 54#include <sys/uio.h> 55#include <sys/kernel.h> 56#include <sys/malloc.h> 57#include <sys/poll.h> 58#include <sys/sysctl.h> 59#include <sys/sysent.h> 60#ifdef KTRACE 61#include <sys/ktrace.h> 62#endif 63 64#include <machine/limits.h> 65 66static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 67static MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); 68MALLOC_DEFINE(M_IOV, "iov", "large iov's"); 69 70static int pollscan __P((struct proc *, struct pollfd *, int)); 71static int selscan __P((struct proc *, fd_mask **, fd_mask **, int)); 72static int dofileread __P((struct proc *, struct file *, int, void *, 73 size_t, off_t, int)); 74static int dofilewrite __P((struct proc *, struct file *, int, 75 const void *, size_t, off_t, int)); 76 77struct file* 78getfp(fdp, fd, flag) 79 struct filedesc* fdp; 80 int fd, flag; 81{ 82 struct file* fp; 83 84 if (((u_int)fd) >= fdp->fd_nfiles || 85 (fp = fdp->fd_ofiles[fd]) == NULL || 86 (fp->f_flag & flag) == 0) 87 return (NULL); 88 return (fp); 89} 90 91/* 92 * Read system call. 93 */ 94#ifndef _SYS_SYSPROTO_H_ 95struct read_args { 96 int fd; 97 void *buf; 98 size_t nbyte; 99}; 100#endif 101int 102read(p, uap) 103 struct proc *p; 104 register struct read_args *uap; 105{ 106 register struct file *fp; 107 108 if ((fp = getfp(p->p_fd, uap->fd, FREAD)) == NULL) 109 return (EBADF); 110 return (dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0)); 111} 112 113/* 114 * Pread system call 115 */ 116#ifndef _SYS_SYSPROTO_H_ 117struct pread_args { 118 int fd; 119 void *buf; 120 size_t nbyte; 121 int pad; 122 off_t offset; 123}; 124#endif 125int 126pread(p, uap) 127 struct proc *p; 128 register struct pread_args *uap; 129{ 130 register struct file *fp; 131 132 if ((fp = getfp(p->p_fd, uap->fd, FREAD)) == NULL) 133 return (EBADF); 134 if (fp->f_type != DTYPE_VNODE) 135 return (ESPIPE); 136 return (dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, uap->offset, 137 FOF_OFFSET)); 138} 139 140/* 141 * Code common for read and pread 142 */ 143int 144dofileread(p, fp, fd, buf, nbyte, offset, flags) 145 struct proc *p; 146 struct file *fp; 147 int fd, flags; 148 void *buf; 149 size_t nbyte; 150 off_t offset; 151{ 152 struct uio auio; 153 struct iovec aiov; 154 long cnt, error = 0; 155#ifdef KTRACE 156 struct iovec ktriov; 157 struct uio ktruio; 158#endif 159 160 aiov.iov_base = (caddr_t)buf; 161 aiov.iov_len = nbyte; 162 auio.uio_iov = &aiov; 163 auio.uio_iovcnt = 1; 164 auio.uio_offset = offset; 165 if (nbyte > INT_MAX) 166 return (EINVAL); 167 auio.uio_resid = nbyte; 168 auio.uio_rw = UIO_READ; 169 auio.uio_segflg = UIO_USERSPACE; 170 auio.uio_procp = p; 171#ifdef KTRACE 172 /* 173 * if tracing, save a copy of iovec 174 */ 175 if (KTRPOINT(p, KTR_GENIO)) { 176 ktriov = aiov; 177 ktruio = auio; 178 } 179#endif 180 cnt = nbyte; 181 if ((error = fo_read(fp, &auio, fp->f_cred, flags, p))) 182 if (auio.uio_resid != cnt && (error == ERESTART || 183 error == EINTR || error == EWOULDBLOCK)) 184 error = 0; 185 cnt -= auio.uio_resid; 186#ifdef KTRACE 187 if (KTRPOINT(p, KTR_GENIO) && error == 0) { 188 ktruio.uio_iov = &ktriov; 189 ktruio.uio_resid = cnt; 190 ktrgenio(p->p_tracep, fd, UIO_READ, &ktruio, error); 191 } 192#endif 193 p->p_retval[0] = cnt; 194 return (error); 195} 196 197/* 198 * Scatter read system call. 199 */ 200#ifndef _SYS_SYSPROTO_H_ 201struct readv_args { 202 int fd; 203 struct iovec *iovp; 204 u_int iovcnt; 205}; 206#endif 207int 208readv(p, uap) 209 struct proc *p; 210 register struct readv_args *uap; 211{ 212 register struct file *fp; 213 register struct filedesc *fdp = p->p_fd; 214 struct uio auio; 215 register struct iovec *iov; 216 struct iovec *needfree; 217 struct iovec aiov[UIO_SMALLIOV]; 218 long i, cnt, error = 0; 219 u_int iovlen; 220#ifdef KTRACE 221 struct iovec *ktriov = NULL; 222 struct uio ktruio; 223#endif 224 225 if ((fp = getfp(fdp, uap->fd, FREAD)) == NULL) 226 return (EBADF); 227 /* note: can't use iovlen until iovcnt is validated */ 228 iovlen = uap->iovcnt * sizeof (struct iovec); 229 if (uap->iovcnt > UIO_SMALLIOV) { 230 if (uap->iovcnt > UIO_MAXIOV) 231 return (EINVAL); 232 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 233 needfree = iov; 234 } else { 235 iov = aiov; 236 needfree = NULL; 237 } 238 auio.uio_iov = iov; 239 auio.uio_iovcnt = uap->iovcnt; 240 auio.uio_rw = UIO_READ; 241 auio.uio_segflg = UIO_USERSPACE; 242 auio.uio_procp = p; 243 auio.uio_offset = -1; 244 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 245 goto done; 246 auio.uio_resid = 0; 247 for (i = 0; i < uap->iovcnt; i++) { 248 if (iov->iov_len > INT_MAX - auio.uio_resid) { 249 error = EINVAL; 250 goto done; 251 } 252 auio.uio_resid += iov->iov_len; 253 iov++; 254 } 255#ifdef KTRACE 256 /* 257 * if tracing, save a copy of iovec 258 */ 259 if (KTRPOINT(p, KTR_GENIO)) { 260 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 261 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 262 ktruio = auio; 263 } 264#endif 265 cnt = auio.uio_resid; 266 if ((error = fo_read(fp, &auio, fp->f_cred, 0, p))) 267 if (auio.uio_resid != cnt && (error == ERESTART || 268 error == EINTR || error == EWOULDBLOCK)) 269 error = 0; 270 cnt -= auio.uio_resid; 271#ifdef KTRACE 272 if (ktriov != NULL) { 273 if (error == 0) { 274 ktruio.uio_iov = ktriov; 275 ktruio.uio_resid = cnt; 276 ktrgenio(p->p_tracep, uap->fd, UIO_READ, &ktruio, 277 error); 278 } 279 FREE(ktriov, M_TEMP); 280 } 281#endif 282 p->p_retval[0] = cnt; 283done: 284 if (needfree) 285 FREE(needfree, M_IOV); 286 return (error); 287} 288 289/* 290 * Write system call 291 */ 292#ifndef _SYS_SYSPROTO_H_ 293struct write_args { 294 int fd; 295 const void *buf; 296 size_t nbyte; 297}; 298#endif 299int 300write(p, uap) 301 struct proc *p; 302 register struct write_args *uap; 303{ 304 register struct file *fp; 305 306 if ((fp = getfp(p->p_fd, uap->fd, FWRITE)) == NULL) 307 return (EBADF); 308 return (dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0)); 309} 310 311/* 312 * Pwrite system call 313 */ 314#ifndef _SYS_SYSPROTO_H_ 315struct pwrite_args { 316 int fd; 317 const void *buf; 318 size_t nbyte; 319 int pad; 320 off_t offset; 321}; 322#endif 323int 324pwrite(p, uap) 325 struct proc *p; 326 register struct pwrite_args *uap; 327{ 328 register struct file *fp; 329 330 if ((fp = getfp(p->p_fd, uap->fd, FWRITE)) == NULL) 331 return (EBADF); 332 if (fp->f_type != DTYPE_VNODE) 333 return (ESPIPE); 334 return (dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, uap->offset, 335 FOF_OFFSET)); 336} 337 338static int 339dofilewrite(p, fp, fd, buf, nbyte, offset, flags) 340 struct proc *p; 341 struct file *fp; 342 int fd, flags; 343 const void *buf; 344 size_t nbyte; 345 off_t offset; 346{ 347 struct uio auio; 348 struct iovec aiov; 349 long cnt, error = 0; 350#ifdef KTRACE 351 struct iovec ktriov; 352 struct uio ktruio; 353#endif 354 355 aiov.iov_base = (void *)buf; 356 aiov.iov_len = nbyte; 357 auio.uio_iov = &aiov; 358 auio.uio_iovcnt = 1; 359 auio.uio_offset = offset; 360 if (nbyte > INT_MAX) 361 return (EINVAL); 362 auio.uio_resid = nbyte; 363 auio.uio_rw = UIO_WRITE; 364 auio.uio_segflg = UIO_USERSPACE; 365 auio.uio_procp = p; 366#ifdef KTRACE 367 /* 368 * if tracing, save a copy of iovec and uio 369 */ 370 if (KTRPOINT(p, KTR_GENIO)) { 371 ktriov = aiov; 372 ktruio = auio; 373 } 374#endif 375 cnt = nbyte; 376 if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) { 377 if (auio.uio_resid != cnt && (error == ERESTART || 378 error == EINTR || error == EWOULDBLOCK)) 379 error = 0; 380 if (error == EPIPE) 381 psignal(p, SIGPIPE); 382 } 383 cnt -= auio.uio_resid; 384#ifdef KTRACE 385 if (KTRPOINT(p, KTR_GENIO) && error == 0) { 386 ktruio.uio_iov = &ktriov; 387 ktruio.uio_resid = cnt; 388 ktrgenio(p->p_tracep, fd, UIO_WRITE, &ktruio, error); 389 } 390#endif 391 p->p_retval[0] = cnt; 392 return (error); 393} 394 395/* 396 * Gather write system call 397 */ 398#ifndef _SYS_SYSPROTO_H_ 399struct writev_args { 400 int fd; 401 struct iovec *iovp; 402 u_int iovcnt; 403}; 404#endif 405int 406writev(p, uap) 407 struct proc *p; 408 register struct writev_args *uap; 409{ 410 register struct file *fp; 411 register struct filedesc *fdp = p->p_fd; 412 struct uio auio; 413 register struct iovec *iov; 414 struct iovec *needfree; 415 struct iovec aiov[UIO_SMALLIOV]; 416 long i, cnt, error = 0; 417 u_int iovlen; 418#ifdef KTRACE 419 struct iovec *ktriov = NULL; 420 struct uio ktruio; 421#endif 422 423 if ((fp = getfp(fdp, uap->fd, FWRITE)) == NULL) 424 return (EBADF); 425 fhold(fp); 426 /* note: can't use iovlen until iovcnt is validated */ 427 iovlen = uap->iovcnt * sizeof (struct iovec); 428 if (uap->iovcnt > UIO_SMALLIOV) { 429 if (uap->iovcnt > UIO_MAXIOV) { 430 needfree = NULL; 431 error = EINVAL; 432 goto done; 433 } 434 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 435 needfree = iov; 436 } else { 437 iov = aiov; 438 needfree = NULL; 439 } 440 auio.uio_iov = iov; 441 auio.uio_iovcnt = uap->iovcnt; 442 auio.uio_rw = UIO_WRITE; 443 auio.uio_segflg = UIO_USERSPACE; 444 auio.uio_procp = p; 445 auio.uio_offset = -1; 446 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 447 goto done; 448 auio.uio_resid = 0; 449 for (i = 0; i < uap->iovcnt; i++) { 450 if (iov->iov_len > INT_MAX - auio.uio_resid) { 451 error = EINVAL; 452 goto done; 453 } 454 auio.uio_resid += iov->iov_len; 455 iov++; 456 } 457#ifdef KTRACE 458 /* 459 * if tracing, save a copy of iovec and uio 460 */ 461 if (KTRPOINT(p, KTR_GENIO)) { 462 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 463 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 464 ktruio = auio; 465 } 466#endif 467 cnt = auio.uio_resid; 468 if ((error = fo_write(fp, &auio, fp->f_cred, 0, p))) { 469 if (auio.uio_resid != cnt && (error == ERESTART || 470 error == EINTR || error == EWOULDBLOCK)) 471 error = 0; 472 if (error == EPIPE) 473 psignal(p, SIGPIPE); 474 } 475 cnt -= auio.uio_resid; 476#ifdef KTRACE 477 if (ktriov != NULL) { 478 if (error == 0) { 479 ktruio.uio_iov = ktriov; 480 ktruio.uio_resid = cnt; 481 ktrgenio(p->p_tracep, uap->fd, UIO_WRITE, &ktruio, 482 error); 483 } 484 FREE(ktriov, M_TEMP); 485 } 486#endif 487 p->p_retval[0] = cnt; 488done: 489 fdrop(fp, p); 490 if (needfree) 491 FREE(needfree, M_IOV); 492 return (error); 493} 494 495/* 496 * Ioctl system call 497 */ 498#ifndef _SYS_SYSPROTO_H_ 499struct ioctl_args { 500 int fd; 501 u_long com; 502 caddr_t data; 503}; 504#endif 505/* ARGSUSED */ 506int 507ioctl(p, uap) 508 struct proc *p; 509 register struct ioctl_args *uap; 510{ 511 register struct file *fp; 512 register struct filedesc *fdp; 513 register u_long com; 514 int error; 515 register u_int size; 516 caddr_t data, memp; 517 int tmp; 518#define STK_PARAMS 128 519 union { 520 char stkbuf[STK_PARAMS]; 521 long align; 522 } ubuf; 523 524 fdp = p->p_fd; 525 if ((u_int)uap->fd >= fdp->fd_nfiles || 526 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 527 return (EBADF); 528 529 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 530 return (EBADF); 531 532 switch (com = uap->com) { 533 case FIONCLEX: 534 fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE; 535 return (0); 536 case FIOCLEX: 537 fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE; 538 return (0); 539 } 540 541 /* 542 * Interpret high order word to find amount of data to be 543 * copied to/from the user's address space. 544 */ 545 size = IOCPARM_LEN(com); 546 if (size > IOCPARM_MAX) 547 return (ENOTTY); 548 memp = NULL; 549 if (size > sizeof (ubuf.stkbuf)) { 550 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 551 data = memp; 552 } else 553 data = ubuf.stkbuf; 554 if (com&IOC_IN) { 555 if (size) { 556 error = copyin(uap->data, data, (u_int)size); 557 if (error) { 558 if (memp) 559 free(memp, M_IOCTLOPS); 560 return (error); 561 } 562 } else 563 *(caddr_t *)data = uap->data; 564 } else if ((com&IOC_OUT) && size) 565 /* 566 * Zero the buffer so the user always 567 * gets back something deterministic. 568 */ 569 bzero(data, size); 570 else if (com&IOC_VOID) 571 *(caddr_t *)data = uap->data; 572 573 switch (com) { 574 575 case FIONBIO: 576 if ((tmp = *(int *)data)) 577 fp->f_flag |= FNONBLOCK; 578 else 579 fp->f_flag &= ~FNONBLOCK; 580 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p); 581 break; 582 583 case FIOASYNC: 584 if ((tmp = *(int *)data)) 585 fp->f_flag |= FASYNC; 586 else 587 fp->f_flag &= ~FASYNC; 588 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p); 589 break; 590 591 default: 592 error = fo_ioctl(fp, com, data, p); 593 /* 594 * Copy any data to user, size was 595 * already set and checked above. 596 */ 597 if (error == 0 && (com&IOC_OUT) && size) 598 error = copyout(data, uap->data, (u_int)size); 599 break; 600 } 601 if (memp) 602 free(memp, M_IOCTLOPS); 603 return (error); 604} 605 606static int nselcoll; /* Select collisions since boot */ 607int selwait; 608SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, ""); 609 610/* 611 * Select system call. 612 */ 613#ifndef _SYS_SYSPROTO_H_ 614struct select_args { 615 int nd; 616 fd_set *in, *ou, *ex; 617 struct timeval *tv; 618}; 619#endif 620int 621select(p, uap) 622 register struct proc *p; 623 register struct select_args *uap; 624{ 625 /* 626 * The magic 2048 here is chosen to be just enough for FD_SETSIZE 627 * infds with the new FD_SETSIZE of 1024, and more than enough for 628 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE 629 * of 256. 630 */ 631 fd_mask s_selbits[howmany(2048, NFDBITS)]; 632 fd_mask *ibits[3], *obits[3], *selbits, *sbp; 633 struct timeval atv, rtv, ttv; 634 int s, ncoll, error, timo; 635 u_int nbufbytes, ncpbytes, nfdbits; 636 637 if (uap->nd < 0) 638 return (EINVAL); 639 if (uap->nd > p->p_fd->fd_nfiles) 640 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */ 641 642 /* 643 * Allocate just enough bits for the non-null fd_sets. Use the 644 * preallocated auto buffer if possible. 645 */ 646 nfdbits = roundup(uap->nd, NFDBITS); 647 ncpbytes = nfdbits / NBBY; 648 nbufbytes = 0; 649 if (uap->in != NULL) 650 nbufbytes += 2 * ncpbytes; 651 if (uap->ou != NULL) 652 nbufbytes += 2 * ncpbytes; 653 if (uap->ex != NULL) 654 nbufbytes += 2 * ncpbytes; 655 if (nbufbytes <= sizeof s_selbits) 656 selbits = &s_selbits[0]; 657 else 658 selbits = malloc(nbufbytes, M_SELECT, M_WAITOK); 659 660 /* 661 * Assign pointers into the bit buffers and fetch the input bits. 662 * Put the output buffers together so that they can be bzeroed 663 * together. 664 */ 665 sbp = selbits; 666#define getbits(name, x) \ 667 do { \ 668 if (uap->name == NULL) \ 669 ibits[x] = NULL; \ 670 else { \ 671 ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \ 672 obits[x] = sbp; \ 673 sbp += ncpbytes / sizeof *sbp; \ 674 error = copyin(uap->name, ibits[x], ncpbytes); \ 675 if (error != 0) \ 676 goto done; \ 677 } \ 678 } while (0) 679 getbits(in, 0); 680 getbits(ou, 1); 681 getbits(ex, 2); 682#undef getbits 683 if (nbufbytes != 0) 684 bzero(selbits, nbufbytes / 2); 685 686 if (uap->tv) { 687 error = copyin((caddr_t)uap->tv, (caddr_t)&atv, 688 sizeof (atv)); 689 if (error) 690 goto done; 691 if (itimerfix(&atv)) { 692 error = EINVAL; 693 goto done; 694 } 695 getmicrouptime(&rtv); 696 timevaladd(&atv, &rtv); 697 } else { 698 atv.tv_sec = 0; 699 atv.tv_usec = 0; 700 } 701 timo = 0; 702retry: 703 ncoll = nselcoll; 704 p->p_flag |= P_SELECT; 705 error = selscan(p, ibits, obits, uap->nd); 706 if (error || p->p_retval[0]) 707 goto done; 708 if (atv.tv_sec || atv.tv_usec) { 709 getmicrouptime(&rtv); 710 if (timevalcmp(&rtv, &atv, >=)) 711 goto done; 712 ttv = atv; 713 timevalsub(&ttv, &rtv); 714 timo = ttv.tv_sec > 24 * 60 * 60 ? 715 24 * 60 * 60 * hz : tvtohz(&ttv); 716 } 717 s = splhigh(); 718 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 719 splx(s); 720 goto retry; 721 } 722 p->p_flag &= ~P_SELECT; 723 724 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 725 726 splx(s); 727 if (error == 0) 728 goto retry; 729done: 730 p->p_flag &= ~P_SELECT; 731 /* select is not restarted after signals... */ 732 if (error == ERESTART) 733 error = EINTR; 734 if (error == EWOULDBLOCK) 735 error = 0; 736#define putbits(name, x) \ 737 if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \ 738 error = error2; 739 if (error == 0) { 740 int error2; 741 742 putbits(in, 0); 743 putbits(ou, 1); 744 putbits(ex, 2); 745#undef putbits 746 } 747 if (selbits != &s_selbits[0]) 748 free(selbits, M_SELECT); 749 return (error); 750} 751 752static int 753selscan(p, ibits, obits, nfd) 754 struct proc *p; 755 fd_mask **ibits, **obits; 756 int nfd; 757{ 758 struct filedesc *fdp = p->p_fd; 759 int msk, i, fd; 760 fd_mask bits; 761 struct file *fp; 762 int n = 0; 763 /* Note: backend also returns POLLHUP/POLLERR if appropriate. */ 764 static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND }; 765 766 for (msk = 0; msk < 3; msk++) { 767 if (ibits[msk] == NULL) 768 continue; 769 for (i = 0; i < nfd; i += NFDBITS) { 770 bits = ibits[msk][i/NFDBITS]; 771 /* ffs(int mask) not portable, fd_mask is long */ 772 for (fd = i; bits && fd < nfd; fd++, bits >>= 1) { 773 if (!(bits & 1)) 774 continue; 775 fp = fdp->fd_ofiles[fd]; 776 if (fp == NULL) 777 return (EBADF); 778 if (fo_poll(fp, flag[msk], fp->f_cred, p)) { 779 obits[msk][(fd)/NFDBITS] |= 780 ((fd_mask)1 << ((fd) % NFDBITS)); 781 n++; 782 } 783 } 784 } 785 } 786 p->p_retval[0] = n; 787 return (0); 788} 789 790/* 791 * Poll system call. 792 */ 793#ifndef _SYS_SYSPROTO_H_ 794struct poll_args { 795 struct pollfd *fds; 796 u_int nfds; 797 int timeout; 798}; 799#endif 800int 801poll(p, uap) 802 register struct proc *p; 803 register struct poll_args *uap; 804{ 805 caddr_t bits; 806 char smallbits[32 * sizeof(struct pollfd)]; 807 struct timeval atv, rtv, ttv; 808 int s, ncoll, error = 0, timo; 809 size_t ni; 810 811 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) { 812 /* forgiving; slightly wrong */ 813 SCARG(uap, nfds) = p->p_fd->fd_nfiles; 814 } 815 ni = SCARG(uap, nfds) * sizeof(struct pollfd); 816 if (ni > sizeof(smallbits)) 817 bits = malloc(ni, M_TEMP, M_WAITOK); 818 else 819 bits = smallbits; 820 error = copyin(SCARG(uap, fds), bits, ni); 821 if (error) 822 goto done; 823 if (SCARG(uap, timeout) != INFTIM) { 824 atv.tv_sec = SCARG(uap, timeout) / 1000; 825 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; 826 if (itimerfix(&atv)) { 827 error = EINVAL; 828 goto done; 829 } 830 getmicrouptime(&rtv); 831 timevaladd(&atv, &rtv); 832 } else { 833 atv.tv_sec = 0; 834 atv.tv_usec = 0; 835 } 836 timo = 0; 837retry: 838 ncoll = nselcoll; 839 p->p_flag |= P_SELECT; 840 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds)); 841 if (error || p->p_retval[0]) 842 goto done; 843 if (atv.tv_sec || atv.tv_usec) { 844 getmicrouptime(&rtv); 845 if (timevalcmp(&rtv, &atv, >=)) 846 goto done; 847 ttv = atv; 848 timevalsub(&ttv, &rtv); 849 timo = ttv.tv_sec > 24 * 60 * 60 ? 850 24 * 60 * 60 * hz : tvtohz(&ttv); 851 } 852 s = splhigh(); 853 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 854 splx(s); 855 goto retry; 856 } 857 p->p_flag &= ~P_SELECT; 858 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo); 859 splx(s); 860 if (error == 0) 861 goto retry; 862done: 863 p->p_flag &= ~P_SELECT; 864 /* poll is not restarted after signals... */ 865 if (error == ERESTART) 866 error = EINTR; 867 if (error == EWOULDBLOCK) 868 error = 0; 869 if (error == 0) { 870 error = copyout(bits, SCARG(uap, fds), ni); 871 if (error) 872 goto out; 873 } 874out: 875 if (ni > sizeof(smallbits)) 876 free(bits, M_TEMP); 877 return (error); 878} 879 880static int 881pollscan(p, fds, nfd) 882 struct proc *p; 883 struct pollfd *fds; 884 int nfd; 885{ 886 register struct filedesc *fdp = p->p_fd; 887 int i; 888 struct file *fp; 889 int n = 0; 890 891 for (i = 0; i < nfd; i++, fds++) { 892 if (fds->fd >= fdp->fd_nfiles) { 893 fds->revents = POLLNVAL; 894 n++; 895 } else if (fds->fd < 0) { 896 fds->revents = 0; 897 } else { 898 fp = fdp->fd_ofiles[fds->fd]; 899 if (fp == 0) { 900 fds->revents = POLLNVAL; 901 n++; 902 } else { 903 /* 904 * Note: backend also returns POLLHUP and 905 * POLLERR if appropriate. 906 */ 907 fds->revents = fo_poll(fp, fds->events, 908 fp->f_cred, p); 909 if (fds->revents != 0) 910 n++; 911 } 912 } 913 } 914 p->p_retval[0] = n; 915 return (0); 916} 917 918/* 919 * OpenBSD poll system call. 920 * XXX this isn't quite a true representation.. OpenBSD uses select ops. 921 */ 922#ifndef _SYS_SYSPROTO_H_ 923struct openbsd_poll_args { 924 struct pollfd *fds; 925 u_int nfds; 926 int timeout; 927}; 928#endif 929int 930openbsd_poll(p, uap) 931 register struct proc *p; 932 register struct openbsd_poll_args *uap; 933{ 934 return (poll(p, (struct poll_args *)uap)); 935} 936 937/*ARGSUSED*/ 938int 939seltrue(dev, events, p) 940 dev_t dev; 941 int events; 942 struct proc *p; 943{ 944 945 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 946} 947 948/* 949 * Record a select request. 950 */ 951void 952selrecord(selector, sip) 953 struct proc *selector; 954 struct selinfo *sip; 955{ 956 struct proc *p; 957 pid_t mypid; 958 959 mypid = selector->p_pid; 960 if (sip->si_pid == mypid) 961 return; 962 if (sip->si_pid && (p = pfind(sip->si_pid)) && 963 p->p_wchan == (caddr_t)&selwait) 964 sip->si_flags |= SI_COLL; 965 else 966 sip->si_pid = mypid; 967} 968 969/* 970 * Do a wakeup when a selectable event occurs. 971 */ 972void 973selwakeup(sip) 974 register struct selinfo *sip; 975{ 976 register struct proc *p; 977 int s; 978 979 if (sip->si_pid == 0) 980 return; 981 if (sip->si_flags & SI_COLL) { 982 nselcoll++; 983 sip->si_flags &= ~SI_COLL; 984 wakeup((caddr_t)&selwait); 985 } 986 p = pfind(sip->si_pid); 987 sip->si_pid = 0; 988 if (p != NULL) { 989 s = splhigh(); 990 if (p->p_wchan == (caddr_t)&selwait) { 991 if (p->p_stat == SSLEEP) 992 setrunnable(p); 993 else 994 unsleep(p); 995 } else if (p->p_flag & P_SELECT) 996 p->p_flag &= ~P_SELECT; 997 splx(s); 998 } 999} 1000