sys_generic.c revision 36846
1/* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 * $Id: sys_generic.c,v 1.38 1998/05/17 11:52:51 phk Exp $ 40 */ 41 42#include "opt_ktrace.h" 43 44#include <sys/param.h> 45#include <sys/systm.h> 46#include <sys/sysproto.h> 47#include <sys/filedesc.h> 48#include <sys/filio.h> 49#include <sys/ttycom.h> 50#include <sys/fcntl.h> 51#include <sys/file.h> 52#include <sys/proc.h> 53#include <sys/signalvar.h> 54#include <sys/socketvar.h> 55#include <sys/uio.h> 56#include <sys/kernel.h> 57#include <sys/malloc.h> 58#include <sys/poll.h> 59#include <sys/sysent.h> 60#ifdef KTRACE 61#include <sys/ktrace.h> 62#endif 63 64static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 65static MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); 66MALLOC_DEFINE(M_IOV, "iov", "large iov's"); 67 68static int pollscan __P((struct proc *, struct pollfd *, int)); 69static int selscan __P((struct proc *, fd_mask **, fd_mask **, int)); 70 71/* 72 * Read system call. 73 */ 74#ifndef _SYS_SYSPROTO_H_ 75struct read_args { 76 int fd; 77 char *buf; 78 u_int nbyte; 79}; 80#endif 81/* ARGSUSED */ 82int 83read(p, uap) 84 struct proc *p; 85 register struct read_args *uap; 86{ 87 register struct file *fp; 88 register struct filedesc *fdp = p->p_fd; 89 struct uio auio; 90 struct iovec aiov; 91 long cnt, error = 0; 92#ifdef KTRACE 93 struct iovec ktriov; 94#endif 95 96 if (((u_int)uap->fd) >= fdp->fd_nfiles || 97 (fp = fdp->fd_ofiles[uap->fd]) == NULL || 98 (fp->f_flag & FREAD) == 0) 99 return (EBADF); 100 aiov.iov_base = (caddr_t)uap->buf; 101 aiov.iov_len = uap->nbyte; 102 auio.uio_iov = &aiov; 103 auio.uio_iovcnt = 1; 104 auio.uio_offset = -1; 105 106 auio.uio_resid = uap->nbyte; 107 if (auio.uio_resid < 0) 108 return (EINVAL); 109 110 auio.uio_rw = UIO_READ; 111 auio.uio_segflg = UIO_USERSPACE; 112 auio.uio_procp = p; 113#ifdef KTRACE 114 /* 115 * if tracing, save a copy of iovec 116 */ 117 if (KTRPOINT(p, KTR_GENIO)) 118 ktriov = aiov; 119#endif 120 cnt = uap->nbyte; 121 if ((error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred))) 122 if (auio.uio_resid != cnt && (error == ERESTART || 123 error == EINTR || error == EWOULDBLOCK)) 124 error = 0; 125 cnt -= auio.uio_resid; 126#ifdef KTRACE 127 if (KTRPOINT(p, KTR_GENIO) && error == 0) 128 ktrgenio(p->p_tracep, uap->fd, UIO_READ, &ktriov, cnt, error); 129#endif 130 p->p_retval[0] = cnt; 131 return (error); 132} 133 134/* 135 * Scatter read system call. 136 */ 137#ifndef _SYS_SYSPROTO_H_ 138struct readv_args { 139 int fd; 140 struct iovec *iovp; 141 u_int iovcnt; 142}; 143#endif 144int 145readv(p, uap) 146 struct proc *p; 147 register struct readv_args *uap; 148{ 149 register struct file *fp; 150 register struct filedesc *fdp = p->p_fd; 151 struct uio auio; 152 register struct iovec *iov; 153 struct iovec *needfree; 154 struct iovec aiov[UIO_SMALLIOV]; 155 long i, cnt, error = 0; 156 u_int iovlen; 157#ifdef KTRACE 158 struct iovec *ktriov = NULL; 159#endif 160 161 if (((u_int)uap->fd) >= fdp->fd_nfiles || 162 (fp = fdp->fd_ofiles[uap->fd]) == NULL || 163 (fp->f_flag & FREAD) == 0) 164 return (EBADF); 165 /* note: can't use iovlen until iovcnt is validated */ 166 iovlen = uap->iovcnt * sizeof (struct iovec); 167 if (uap->iovcnt > UIO_SMALLIOV) { 168 if (uap->iovcnt > UIO_MAXIOV) 169 return (EINVAL); 170 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 171 needfree = iov; 172 } else { 173 iov = aiov; 174 needfree = NULL; 175 } 176 auio.uio_iov = iov; 177 auio.uio_iovcnt = uap->iovcnt; 178 auio.uio_rw = UIO_READ; 179 auio.uio_segflg = UIO_USERSPACE; 180 auio.uio_procp = p; 181 auio.uio_offset = -1; 182 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 183 goto done; 184 auio.uio_resid = 0; 185 for (i = 0; i < uap->iovcnt; i++) { 186 auio.uio_resid += iov->iov_len; 187 if (auio.uio_resid < 0) { 188 error = EINVAL; 189 goto done; 190 } 191 iov++; 192 } 193#ifdef KTRACE 194 /* 195 * if tracing, save a copy of iovec 196 */ 197 if (KTRPOINT(p, KTR_GENIO)) { 198 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 199 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 200 } 201#endif 202 cnt = auio.uio_resid; 203 if ((error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred))) 204 if (auio.uio_resid != cnt && (error == ERESTART || 205 error == EINTR || error == EWOULDBLOCK)) 206 error = 0; 207 cnt -= auio.uio_resid; 208#ifdef KTRACE 209 if (ktriov != NULL) { 210 if (error == 0) 211 ktrgenio(p->p_tracep, uap->fd, UIO_READ, ktriov, 212 cnt, error); 213 FREE(ktriov, M_TEMP); 214 } 215#endif 216 p->p_retval[0] = cnt; 217done: 218 if (needfree) 219 FREE(needfree, M_IOV); 220 return (error); 221} 222 223/* 224 * Write system call 225 */ 226#ifndef _SYS_SYSPROTO_H_ 227struct write_args { 228 int fd; 229 char *buf; 230 u_int nbyte; 231}; 232#endif 233int 234write(p, uap) 235 struct proc *p; 236 register struct write_args *uap; 237{ 238 register struct file *fp; 239 register struct filedesc *fdp = p->p_fd; 240 struct uio auio; 241 struct iovec aiov; 242 long cnt, error = 0; 243#ifdef KTRACE 244 struct iovec ktriov; 245#endif 246 247 if (((u_int)uap->fd) >= fdp->fd_nfiles || 248 (fp = fdp->fd_ofiles[uap->fd]) == NULL || 249 (fp->f_flag & FWRITE) == 0) 250 return (EBADF); 251 aiov.iov_base = (caddr_t)uap->buf; 252 aiov.iov_len = uap->nbyte; 253 auio.uio_iov = &aiov; 254 auio.uio_iovcnt = 1; 255 auio.uio_offset = -1; 256 auio.uio_resid = uap->nbyte; 257 auio.uio_rw = UIO_WRITE; 258 auio.uio_segflg = UIO_USERSPACE; 259 auio.uio_procp = p; 260#ifdef KTRACE 261 /* 262 * if tracing, save a copy of iovec 263 */ 264 if (KTRPOINT(p, KTR_GENIO)) 265 ktriov = aiov; 266#endif 267 cnt = uap->nbyte; 268 if ((error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred))) { 269 if (auio.uio_resid != cnt && (error == ERESTART || 270 error == EINTR || error == EWOULDBLOCK)) 271 error = 0; 272 if (error == EPIPE) 273 psignal(p, SIGPIPE); 274 } 275 cnt -= auio.uio_resid; 276#ifdef KTRACE 277 if (KTRPOINT(p, KTR_GENIO) && error == 0) 278 ktrgenio(p->p_tracep, uap->fd, UIO_WRITE, 279 &ktriov, cnt, error); 280#endif 281 p->p_retval[0] = cnt; 282 return (error); 283} 284 285/* 286 * Gather write system call 287 */ 288#ifndef _SYS_SYSPROTO_H_ 289struct writev_args { 290 int fd; 291 struct iovec *iovp; 292 u_int iovcnt; 293}; 294#endif 295int 296writev(p, uap) 297 struct proc *p; 298 register struct writev_args *uap; 299{ 300 register struct file *fp; 301 register struct filedesc *fdp = p->p_fd; 302 struct uio auio; 303 register struct iovec *iov; 304 struct iovec *needfree; 305 struct iovec aiov[UIO_SMALLIOV]; 306 long i, cnt, error = 0; 307 u_int iovlen; 308#ifdef KTRACE 309 struct iovec *ktriov = NULL; 310#endif 311 312 if (((u_int)uap->fd) >= fdp->fd_nfiles || 313 (fp = fdp->fd_ofiles[uap->fd]) == NULL || 314 (fp->f_flag & FWRITE) == 0) 315 return (EBADF); 316 /* note: can't use iovlen until iovcnt is validated */ 317 iovlen = uap->iovcnt * sizeof (struct iovec); 318 if (uap->iovcnt > UIO_SMALLIOV) { 319 if (uap->iovcnt > UIO_MAXIOV) 320 return (EINVAL); 321 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 322 needfree = iov; 323 } else { 324 iov = aiov; 325 needfree = NULL; 326 } 327 auio.uio_iov = iov; 328 auio.uio_iovcnt = uap->iovcnt; 329 auio.uio_rw = UIO_WRITE; 330 auio.uio_segflg = UIO_USERSPACE; 331 auio.uio_procp = p; 332 auio.uio_offset = -1; 333 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 334 goto done; 335 auio.uio_resid = 0; 336 for (i = 0; i < uap->iovcnt; i++) { 337 auio.uio_resid += iov->iov_len; 338 if (auio.uio_resid < 0) { 339 error = EINVAL; 340 goto done; 341 } 342 iov++; 343 } 344#ifdef KTRACE 345 /* 346 * if tracing, save a copy of iovec 347 */ 348 if (KTRPOINT(p, KTR_GENIO)) { 349 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 350 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 351 } 352#endif 353 cnt = auio.uio_resid; 354 if ((error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred))) { 355 if (auio.uio_resid != cnt && (error == ERESTART || 356 error == EINTR || error == EWOULDBLOCK)) 357 error = 0; 358 if (error == EPIPE) 359 psignal(p, SIGPIPE); 360 } 361 cnt -= auio.uio_resid; 362#ifdef KTRACE 363 if (ktriov != NULL) { 364 if (error == 0) 365 ktrgenio(p->p_tracep, uap->fd, UIO_WRITE, 366 ktriov, cnt, error); 367 FREE(ktriov, M_TEMP); 368 } 369#endif 370 p->p_retval[0] = cnt; 371done: 372 if (needfree) 373 FREE(needfree, M_IOV); 374 return (error); 375} 376 377/* 378 * Ioctl system call 379 */ 380#ifndef _SYS_SYSPROTO_H_ 381struct ioctl_args { 382 int fd; 383 int com; 384 caddr_t data; 385}; 386#endif 387/* ARGSUSED */ 388int 389ioctl(p, uap) 390 struct proc *p; 391 register struct ioctl_args *uap; 392{ 393 register struct file *fp; 394 register struct filedesc *fdp; 395 register u_long com; 396 int error; 397 register u_int size; 398 caddr_t data, memp; 399 int tmp; 400#define STK_PARAMS 128 401 char stkbuf[STK_PARAMS]; 402 403 fdp = p->p_fd; 404 if ((u_int)uap->fd >= fdp->fd_nfiles || 405 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 406 return (EBADF); 407 408 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 409 return (EBADF); 410 411 switch (com = uap->com) { 412 case FIONCLEX: 413 fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE; 414 return (0); 415 case FIOCLEX: 416 fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE; 417 return (0); 418 } 419 420 /* 421 * Interpret high order word to find amount of data to be 422 * copied to/from the user's address space. 423 */ 424 size = IOCPARM_LEN(com); 425 if (size > IOCPARM_MAX) 426 return (ENOTTY); 427 memp = NULL; 428 if (size > sizeof (stkbuf)) { 429 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 430 data = memp; 431 } else 432 data = stkbuf; 433 if (com&IOC_IN) { 434 if (size) { 435 error = copyin(uap->data, data, (u_int)size); 436 if (error) { 437 if (memp) 438 free(memp, M_IOCTLOPS); 439 return (error); 440 } 441 } else 442 *(caddr_t *)data = uap->data; 443 } else if ((com&IOC_OUT) && size) 444 /* 445 * Zero the buffer so the user always 446 * gets back something deterministic. 447 */ 448 bzero(data, size); 449 else if (com&IOC_VOID) 450 *(caddr_t *)data = uap->data; 451 452 switch (com) { 453 454 case FIONBIO: 455 if ((tmp = *(int *)data)) 456 fp->f_flag |= FNONBLOCK; 457 else 458 fp->f_flag &= ~FNONBLOCK; 459 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 460 break; 461 462 case FIOASYNC: 463 if ((tmp = *(int *)data)) 464 fp->f_flag |= FASYNC; 465 else 466 fp->f_flag &= ~FASYNC; 467 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 468 break; 469 470 case FIOSETOWN: 471 tmp = *(int *)data; 472 if (fp->f_type == DTYPE_SOCKET) { 473 ((struct socket *)fp->f_data)->so_pgid = tmp; 474 error = 0; 475 break; 476 } 477 if (tmp <= 0) { 478 tmp = -tmp; 479 } else { 480 struct proc *p1 = pfind(tmp); 481 if (p1 == 0) { 482 error = ESRCH; 483 break; 484 } 485 tmp = p1->p_pgrp->pg_id; 486 } 487 error = (*fp->f_ops->fo_ioctl) 488 (fp, (int)TIOCSPGRP, (caddr_t)&tmp, p); 489 break; 490 491 case FIOGETOWN: 492 if (fp->f_type == DTYPE_SOCKET) { 493 error = 0; 494 *(int *)data = ((struct socket *)fp->f_data)->so_pgid; 495 break; 496 } 497 error = (*fp->f_ops->fo_ioctl)(fp, (int)TIOCGPGRP, data, p); 498 *(int *)data = -*(int *)data; 499 break; 500 501 default: 502 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 503 /* 504 * Copy any data to user, size was 505 * already set and checked above. 506 */ 507 if (error == 0 && (com&IOC_OUT) && size) 508 error = copyout(data, uap->data, (u_int)size); 509 break; 510 } 511 if (memp) 512 free(memp, M_IOCTLOPS); 513 return (error); 514} 515 516static int nselcoll; 517int selwait; 518 519/* 520 * Select system call. 521 */ 522#ifndef _SYS_SYSPROTO_H_ 523struct select_args { 524 int nd; 525 fd_set *in, *ou, *ex; 526 struct timeval *tv; 527}; 528#endif 529int 530select(p, uap) 531 register struct proc *p; 532 register struct select_args *uap; 533{ 534 /* 535 * The magic 2048 here is chosen to be just enough for FD_SETSIZE 536 * infds with the new FD_SETSIZE of 1024, and more than enough for 537 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE 538 * of 256. 539 */ 540 fd_mask s_selbits[howmany(2048, NFDBITS)]; 541 fd_mask *ibits[3], *obits[3], *selbits, *sbp; 542 struct timeval atv, rtv, ttv; 543 int s, ncoll, error, timo; 544 u_int nbufbytes, ncpbytes, nfdbits; 545 546 if (uap->nd < 0) 547 return (EINVAL); 548 if (uap->nd > p->p_fd->fd_nfiles) 549 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */ 550 551 /* 552 * Allocate just enough bits for the non-null fd_sets. Use the 553 * preallocated auto buffer if possible. 554 */ 555 nfdbits = roundup(uap->nd, NFDBITS); 556 ncpbytes = nfdbits / NBBY; 557 nbufbytes = 0; 558 if (uap->in != NULL) 559 nbufbytes += 2 * ncpbytes; 560 if (uap->ou != NULL) 561 nbufbytes += 2 * ncpbytes; 562 if (uap->ex != NULL) 563 nbufbytes += 2 * ncpbytes; 564 if (nbufbytes <= sizeof s_selbits) 565 selbits = &s_selbits[0]; 566 else 567 selbits = malloc(nbufbytes, M_SELECT, M_WAITOK); 568 569 /* 570 * Assign pointers into the bit buffers and fetch the input bits. 571 * Put the output buffers together so that they can be bzeroed 572 * together. 573 */ 574 sbp = selbits; 575#define getbits(name, x) \ 576 do { \ 577 if (uap->name == NULL) \ 578 ibits[x] = NULL; \ 579 else { \ 580 ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \ 581 obits[x] = sbp; \ 582 sbp += ncpbytes / sizeof *sbp; \ 583 error = copyin(uap->name, ibits[x], ncpbytes); \ 584 if (error != 0) \ 585 goto done; \ 586 } \ 587 } while (0) 588 getbits(in, 0); 589 getbits(ou, 1); 590 getbits(ex, 2); 591#undef getbits 592 if (nbufbytes != 0) 593 bzero(selbits, nbufbytes / 2); 594 595 if (uap->tv) { 596 error = copyin((caddr_t)uap->tv, (caddr_t)&atv, 597 sizeof (atv)); 598 if (error) 599 goto done; 600 if (itimerfix(&atv)) { 601 error = EINVAL; 602 goto done; 603 } 604 getmicrouptime(&rtv); 605 timevaladd(&atv, &rtv); 606 } else 607 atv.tv_sec = 0; 608 timo = 0; 609retry: 610 ncoll = nselcoll; 611 p->p_flag |= P_SELECT; 612 error = selscan(p, ibits, obits, uap->nd); 613 if (error || p->p_retval[0]) 614 goto done; 615 if (atv.tv_sec) { 616 getmicrouptime(&rtv); 617 if (timevalcmp(&rtv, &atv, >=)) 618 goto done; 619 ttv = atv; 620 timevalsub(&ttv, &rtv); 621 timo = ttv.tv_sec > 24 * 60 * 60 ? 622 24 * 60 * 60 * hz : tvtohz(&ttv); 623 } 624 s = splhigh(); 625 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 626 splx(s); 627 goto retry; 628 } 629 p->p_flag &= ~P_SELECT; 630 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 631 splx(s); 632 if (error == 0) 633 goto retry; 634done: 635 p->p_flag &= ~P_SELECT; 636 /* select is not restarted after signals... */ 637 if (error == ERESTART) 638 error = EINTR; 639 if (error == EWOULDBLOCK) 640 error = 0; 641#define putbits(name, x) \ 642 if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \ 643 error = error2; 644 if (error == 0) { 645 int error2; 646 647 putbits(in, 0); 648 putbits(ou, 1); 649 putbits(ex, 2); 650#undef putbits 651 } 652 if (selbits != &s_selbits[0]) 653 free(selbits, M_SELECT); 654 return (error); 655} 656 657static int 658selscan(p, ibits, obits, nfd) 659 struct proc *p; 660 fd_mask **ibits, **obits; 661 int nfd; 662{ 663 register struct filedesc *fdp = p->p_fd; 664 register int msk, i, j, fd; 665 register fd_mask bits; 666 struct file *fp; 667 int n = 0; 668 /* Note: backend also returns POLLHUP/POLLERR if appropriate. */ 669 static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND }; 670 671 for (msk = 0; msk < 3; msk++) { 672 if (ibits[msk] == NULL) 673 continue; 674 for (i = 0; i < nfd; i += NFDBITS) { 675 bits = ibits[msk][i/NFDBITS]; 676 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 677 bits &= ~(1 << j); 678 fp = fdp->fd_ofiles[fd]; 679 if (fp == NULL) 680 return (EBADF); 681 if ((*fp->f_ops->fo_poll)(fp, flag[msk], 682 fp->f_cred, p)) { 683 obits[msk][(fd)/NFDBITS] |= 684 (1 << ((fd) % NFDBITS)); 685 n++; 686 } 687 } 688 } 689 } 690 p->p_retval[0] = n; 691 return (0); 692} 693 694/* 695 * Poll system call. 696 */ 697#ifndef _SYS_SYSPROTO_H_ 698struct poll_args { 699 struct pollfd *fds; 700 u_int nfds; 701 int timeout; 702}; 703#endif 704int 705poll(p, uap) 706 register struct proc *p; 707 register struct poll_args *uap; 708{ 709 caddr_t bits; 710 char smallbits[32 * sizeof(struct pollfd)]; 711 struct timeval atv, rtv, ttv; 712 int s, ncoll, error = 0, timo; 713 size_t ni; 714 715 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) { 716 /* forgiving; slightly wrong */ 717 SCARG(uap, nfds) = p->p_fd->fd_nfiles; 718 } 719 ni = SCARG(uap, nfds) * sizeof(struct pollfd); 720 if (ni > sizeof(smallbits)) 721 bits = malloc(ni, M_TEMP, M_WAITOK); 722 else 723 bits = smallbits; 724 error = copyin(SCARG(uap, fds), bits, ni); 725 if (error) 726 goto done; 727 if (SCARG(uap, timeout) != INFTIM) { 728 atv.tv_sec = SCARG(uap, timeout) / 1000; 729 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; 730 if (itimerfix(&atv)) { 731 error = EINVAL; 732 goto done; 733 } 734 getmicrouptime(&rtv); 735 timevaladd(&atv, &rtv); 736 } else 737 atv.tv_sec = 0; 738 timo = 0; 739retry: 740 ncoll = nselcoll; 741 p->p_flag |= P_SELECT; 742 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds)); 743 if (error || p->p_retval[0]) 744 goto done; 745 if (atv.tv_sec) { 746 getmicrouptime(&rtv); 747 if (timevalcmp(&rtv, &atv, >=)) 748 goto done; 749 ttv = atv; 750 timevalsub(&ttv, &rtv); 751 timo = ttv.tv_sec > 24 * 60 * 60 ? 752 24 * 60 * 60 * hz : tvtohz(&ttv); 753 } 754 s = splhigh(); 755 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 756 splx(s); 757 goto retry; 758 } 759 p->p_flag &= ~P_SELECT; 760 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo); 761 splx(s); 762 if (error == 0) 763 goto retry; 764done: 765 p->p_flag &= ~P_SELECT; 766 /* poll is not restarted after signals... */ 767 if (error == ERESTART) 768 error = EINTR; 769 if (error == EWOULDBLOCK) 770 error = 0; 771 if (error == 0) { 772 error = copyout(bits, SCARG(uap, fds), ni); 773 if (error) 774 goto out; 775 } 776out: 777 if (ni > sizeof(smallbits)) 778 free(bits, M_TEMP); 779 return (error); 780} 781 782static int 783pollscan(p, fds, nfd) 784 struct proc *p; 785 struct pollfd *fds; 786 int nfd; 787{ 788 register struct filedesc *fdp = p->p_fd; 789 int i; 790 struct file *fp; 791 int n = 0; 792 793 for (i = 0; i < nfd; i++, fds++) { 794 if ((u_int)fds->fd >= fdp->fd_nfiles) { 795 fds->revents = POLLNVAL; 796 n++; 797 } else { 798 fp = fdp->fd_ofiles[fds->fd]; 799 if (fp == 0) { 800 fds->revents = POLLNVAL; 801 n++; 802 } else { 803 /* 804 * Note: backend also returns POLLHUP and 805 * POLLERR if appropriate. 806 */ 807 fds->revents = (*fp->f_ops->fo_poll)(fp, 808 fds->events, fp->f_cred, p); 809 if (fds->revents != 0) 810 n++; 811 } 812 } 813 } 814 p->p_retval[0] = n; 815 return (0); 816} 817 818/* 819 * OpenBSD poll system call. 820 * XXX this isn't quite a true representation.. OpenBSD uses select ops. 821 */ 822#ifndef _SYS_SYSPROTO_H_ 823struct openbsd_poll_args { 824 struct pollfd *fds; 825 u_int nfds; 826 int timeout; 827}; 828#endif 829int 830openbsd_poll(p, uap) 831 register struct proc *p; 832 register struct openbsd_poll_args *uap; 833{ 834 return (poll(p, (struct poll_args *)uap)); 835} 836 837/*ARGSUSED*/ 838int 839seltrue(dev, events, p) 840 dev_t dev; 841 int events; 842 struct proc *p; 843{ 844 845 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 846} 847 848/* 849 * Record a select request. 850 */ 851void 852selrecord(selector, sip) 853 struct proc *selector; 854 struct selinfo *sip; 855{ 856 struct proc *p; 857 pid_t mypid; 858 859 mypid = selector->p_pid; 860 if (sip->si_pid == mypid) 861 return; 862 if (sip->si_pid && (p = pfind(sip->si_pid)) && 863 p->p_wchan == (caddr_t)&selwait) 864 sip->si_flags |= SI_COLL; 865 else 866 sip->si_pid = mypid; 867} 868 869/* 870 * Do a wakeup when a selectable event occurs. 871 */ 872void 873selwakeup(sip) 874 register struct selinfo *sip; 875{ 876 register struct proc *p; 877 int s; 878 879 if (sip->si_pid == 0) 880 return; 881 if (sip->si_flags & SI_COLL) { 882 nselcoll++; 883 sip->si_flags &= ~SI_COLL; 884 wakeup((caddr_t)&selwait); 885 } 886 p = pfind(sip->si_pid); 887 sip->si_pid = 0; 888 if (p != NULL) { 889 s = splhigh(); 890 if (p->p_wchan == (caddr_t)&selwait) { 891 if (p->p_stat == SSLEEP) 892 setrunnable(p); 893 else 894 unsleep(p); 895 } else if (p->p_flag & P_SELECT) 896 p->p_flag &= ~P_SELECT; 897 splx(s); 898 } 899} 900