38 */ 39 40#include "opt_compat.h" 41#include "opt_ktrace.h" 42 43#include <sys/param.h> 44#include <sys/systm.h> 45#include <sys/kernel.h> 46#include <sys/sysproto.h> 47#include <sys/malloc.h> 48#include <sys/filedesc.h> 49#include <sys/event.h> 50#include <sys/proc.h> 51#include <sys/fcntl.h> 52#include <sys/file.h> 53#include <sys/mbuf.h> 54#include <sys/protosw.h> 55#include <sys/socket.h> 56#include <sys/socketvar.h> 57#include <sys/signalvar.h> 58#include <sys/uio.h> 59#include <sys/vnode.h> 60#include <sys/lock.h> 61#include <sys/mount.h> 62#ifdef KTRACE 63#include <sys/ktrace.h> 64#endif 65#include <vm/vm.h> 66#include <vm/vm_object.h> 67#include <vm/vm_page.h> 68#include <vm/vm_pageout.h> 69#include <vm/vm_kern.h> 70#include <vm/vm_extern.h> 71 72static void sf_buf_init(void *arg); 73SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) 74static struct sf_buf *sf_buf_alloc(void); 75static void sf_buf_free(caddr_t addr, void *args); 76 77static int sendit __P((struct proc *p, int s, struct msghdr *mp, int flags)); 78static int recvit __P((struct proc *p, int s, struct msghdr *mp, 79 caddr_t namelenp)); 80 81static int accept1 __P((struct proc *p, struct accept_args *uap, int compat)); 82static int getsockname1 __P((struct proc *p, struct getsockname_args *uap, 83 int compat)); 84static int getpeername1 __P((struct proc *p, struct getpeername_args *uap, 85 int compat)); 86 87/* 88 * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the 89 * sf_freelist head with the sf_lock mutex. 90 */ 91static struct { 92 SLIST_HEAD(, sf_buf) sf_head; 93 struct mtx sf_lock; 94} sf_freelist; 95 96static vm_offset_t sf_base; 97static struct sf_buf *sf_bufs; 98static u_int sf_buf_alloc_want; 99 100/* 101 * System call interface to the socket abstraction. 102 */ 103#if defined(COMPAT_43) || defined(COMPAT_SUNOS) 104#define COMPAT_OLDSOCK 105#endif 106 107extern struct fileops socketops; 108 109int 110socket(p, uap) 111 struct proc *p; 112 register struct socket_args /* { 113 int domain; 114 int type; 115 int protocol; 116 } */ *uap; 117{ 118 struct filedesc *fdp = p->p_fd; 119 struct socket *so; 120 struct file *fp; 121 int fd, error; 122 123 error = falloc(p, &fp, &fd); 124 if (error) 125 return (error); 126 fhold(fp); 127 error = socreate(uap->domain, &so, uap->type, uap->protocol, p); 128 if (error) { 129 if (fdp->fd_ofiles[fd] == fp) { 130 fdp->fd_ofiles[fd] = NULL; 131 fdrop(fp, p); 132 } 133 } else { 134 fp->f_data = (caddr_t)so; 135 fp->f_flag = FREAD|FWRITE; 136 fp->f_ops = &socketops; 137 fp->f_type = DTYPE_SOCKET; 138 p->p_retval[0] = fd; 139 } 140 fdrop(fp, p); 141 return (error); 142} 143 144/* ARGSUSED */ 145int 146bind(p, uap) 147 struct proc *p; 148 register struct bind_args /* { 149 int s; 150 caddr_t name; 151 int namelen; 152 } */ *uap; 153{ 154 struct file *fp; 155 struct sockaddr *sa; 156 int error; 157 158 error = holdsock(p->p_fd, uap->s, &fp); 159 if (error) 160 return (error); 161 error = getsockaddr(&sa, uap->name, uap->namelen); 162 if (error) { 163 fdrop(fp, p); 164 return (error); 165 } 166 error = sobind((struct socket *)fp->f_data, sa, p); 167 FREE(sa, M_SONAME); 168 fdrop(fp, p); 169 return (error); 170} 171 172/* ARGSUSED */ 173int 174listen(p, uap) 175 struct proc *p; 176 register struct listen_args /* { 177 int s; 178 int backlog; 179 } */ *uap; 180{ 181 struct file *fp; 182 int error; 183 184 error = holdsock(p->p_fd, uap->s, &fp); 185 if (error) 186 return (error); 187 error = solisten((struct socket *)fp->f_data, uap->backlog, p); 188 fdrop(fp, p); 189 return(error); 190} 191 192static int 193accept1(p, uap, compat) 194 struct proc *p; 195 register struct accept_args /* { 196 int s; 197 caddr_t name; 198 int *anamelen; 199 } */ *uap; 200 int compat; 201{ 202 struct filedesc *fdp = p->p_fd; 203 struct file *lfp = NULL; 204 struct file *nfp = NULL; 205 struct sockaddr *sa; 206 int namelen, error, s; 207 struct socket *head, *so; 208 int fd; 209 short fflag; /* type must match fp->f_flag */ 210 211 if (uap->name) { 212 error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen, 213 sizeof (namelen)); 214 if(error) 215 return (error); 216 } 217 error = holdsock(fdp, uap->s, &lfp); 218 if (error) 219 return (error); 220 s = splnet(); 221 head = (struct socket *)lfp->f_data; 222 if ((head->so_options & SO_ACCEPTCONN) == 0) { 223 splx(s); 224 error = EINVAL; 225 goto done; 226 } 227 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 228 splx(s); 229 error = EWOULDBLOCK; 230 goto done; 231 } 232 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 233 if (head->so_state & SS_CANTRCVMORE) { 234 head->so_error = ECONNABORTED; 235 break; 236 } 237 error = tsleep((caddr_t)&head->so_timeo, PSOCK | PCATCH, 238 "accept", 0); 239 if (error) { 240 splx(s); 241 goto done; 242 } 243 } 244 if (head->so_error) { 245 error = head->so_error; 246 head->so_error = 0; 247 splx(s); 248 goto done; 249 } 250 251 /* 252 * At this point we know that there is at least one connection 253 * ready to be accepted. Remove it from the queue prior to 254 * allocating the file descriptor for it since falloc() may 255 * block allowing another process to accept the connection 256 * instead. 257 */ 258 so = TAILQ_FIRST(&head->so_comp); 259 TAILQ_REMOVE(&head->so_comp, so, so_list); 260 head->so_qlen--; 261 262 fflag = lfp->f_flag; 263 error = falloc(p, &nfp, &fd); 264 if (error) { 265 /* 266 * Probably ran out of file descriptors. Put the 267 * unaccepted connection back onto the queue and 268 * do another wakeup so some other process might 269 * have a chance at it. 270 */ 271 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); 272 head->so_qlen++; 273 wakeup_one(&head->so_timeo); 274 splx(s); 275 goto done; 276 } 277 fhold(nfp); 278 p->p_retval[0] = fd; 279 280 /* connection has been removed from the listen queue */ 281 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 282 283 so->so_state &= ~SS_COMP; 284 so->so_head = NULL; 285 if (head->so_sigio != NULL) 286 fsetown(fgetown(head->so_sigio), &so->so_sigio); 287 288 nfp->f_data = (caddr_t)so; 289 nfp->f_flag = fflag; 290 nfp->f_ops = &socketops; 291 nfp->f_type = DTYPE_SOCKET; 292 sa = 0; 293 (void) soaccept(so, &sa); 294 if (sa == NULL) { 295 namelen = 0; 296 if (uap->name) 297 goto gotnoname; 298 splx(s); 299 error = 0; 300 goto done; 301 } 302 if (uap->name) { 303 /* check sa_len before it is destroyed */ 304 if (namelen > sa->sa_len) 305 namelen = sa->sa_len; 306#ifdef COMPAT_OLDSOCK 307 if (compat) 308 ((struct osockaddr *)sa)->sa_family = 309 sa->sa_family; 310#endif 311 error = copyout(sa, (caddr_t)uap->name, (u_int)namelen); 312 if (!error) 313gotnoname: 314 error = copyout((caddr_t)&namelen, 315 (caddr_t)uap->anamelen, sizeof (*uap->anamelen)); 316 } 317 if (sa) 318 FREE(sa, M_SONAME); 319 320 /* 321 * close the new descriptor, assuming someone hasn't ripped it 322 * out from under us. 323 */ 324 if (error) { 325 if (fdp->fd_ofiles[fd] == nfp) { 326 fdp->fd_ofiles[fd] = NULL; 327 fdrop(nfp, p); 328 } 329 } 330 splx(s); 331 332 /* 333 * Release explicitly held references before returning. 334 */ 335done: 336 if (nfp != NULL) 337 fdrop(nfp, p); 338 fdrop(lfp, p); 339 return (error); 340} 341 342int 343accept(p, uap) 344 struct proc *p; 345 struct accept_args *uap; 346{ 347 348 return (accept1(p, uap, 0)); 349} 350 351#ifdef COMPAT_OLDSOCK 352int 353oaccept(p, uap) 354 struct proc *p; 355 struct accept_args *uap; 356{ 357 358 return (accept1(p, uap, 1)); 359} 360#endif /* COMPAT_OLDSOCK */ 361 362/* ARGSUSED */ 363int 364connect(p, uap) 365 struct proc *p; 366 register struct connect_args /* { 367 int s; 368 caddr_t name; 369 int namelen; 370 } */ *uap; 371{ 372 struct file *fp; 373 register struct socket *so; 374 struct sockaddr *sa; 375 int error, s; 376 377 error = holdsock(p->p_fd, uap->s, &fp); 378 if (error) 379 return (error); 380 so = (struct socket *)fp->f_data; 381 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 382 error = EALREADY; 383 goto done; 384 } 385 error = getsockaddr(&sa, uap->name, uap->namelen); 386 if (error) 387 goto done; 388 error = soconnect(so, sa, p); 389 if (error) 390 goto bad; 391 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 392 FREE(sa, M_SONAME); 393 error = EINPROGRESS; 394 goto done; 395 } 396 s = splnet(); 397 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 398 error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH, 399 "connec", 0); 400 if (error) 401 break; 402 } 403 if (error == 0) { 404 error = so->so_error; 405 so->so_error = 0; 406 } 407 splx(s); 408bad: 409 so->so_state &= ~SS_ISCONNECTING; 410 FREE(sa, M_SONAME); 411 if (error == ERESTART) 412 error = EINTR; 413done: 414 fdrop(fp, p); 415 return (error); 416} 417 418int 419socketpair(p, uap) 420 struct proc *p; 421 register struct socketpair_args /* { 422 int domain; 423 int type; 424 int protocol; 425 int *rsv; 426 } */ *uap; 427{ 428 register struct filedesc *fdp = p->p_fd; 429 struct file *fp1, *fp2; 430 struct socket *so1, *so2; 431 int fd, error, sv[2]; 432 433 error = socreate(uap->domain, &so1, uap->type, uap->protocol, p); 434 if (error) 435 return (error); 436 error = socreate(uap->domain, &so2, uap->type, uap->protocol, p); 437 if (error) 438 goto free1; 439 error = falloc(p, &fp1, &fd); 440 if (error) 441 goto free2; 442 fhold(fp1); 443 sv[0] = fd; 444 fp1->f_data = (caddr_t)so1; 445 error = falloc(p, &fp2, &fd); 446 if (error) 447 goto free3; 448 fhold(fp2); 449 fp2->f_data = (caddr_t)so2; 450 sv[1] = fd; 451 error = soconnect2(so1, so2); 452 if (error) 453 goto free4; 454 if (uap->type == SOCK_DGRAM) { 455 /* 456 * Datagram socket connection is asymmetric. 457 */ 458 error = soconnect2(so2, so1); 459 if (error) 460 goto free4; 461 } 462 fp1->f_flag = fp2->f_flag = FREAD|FWRITE; 463 fp1->f_ops = fp2->f_ops = &socketops; 464 fp1->f_type = fp2->f_type = DTYPE_SOCKET; 465 error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int)); 466 fdrop(fp1, p); 467 fdrop(fp2, p); 468 return (error); 469free4: 470 if (fdp->fd_ofiles[sv[1]] == fp2) { 471 fdp->fd_ofiles[sv[1]] = NULL; 472 fdrop(fp2, p); 473 } 474 fdrop(fp2, p); 475free3: 476 if (fdp->fd_ofiles[sv[0]] == fp1) { 477 fdp->fd_ofiles[sv[0]] = NULL; 478 fdrop(fp1, p); 479 } 480 fdrop(fp1, p); 481free2: 482 (void)soclose(so2); 483free1: 484 (void)soclose(so1); 485 return (error); 486} 487 488static int 489sendit(p, s, mp, flags) 490 register struct proc *p; 491 int s; 492 register struct msghdr *mp; 493 int flags; 494{ 495 struct file *fp; 496 struct uio auio; 497 register struct iovec *iov; 498 register int i; 499 struct mbuf *control; 500 struct sockaddr *to; 501 int len, error; 502 struct socket *so; 503#ifdef KTRACE 504 struct iovec *ktriov = NULL; 505 struct uio ktruio; 506#endif 507 508 error = holdsock(p->p_fd, s, &fp); 509 if (error) 510 return (error); 511 auio.uio_iov = mp->msg_iov; 512 auio.uio_iovcnt = mp->msg_iovlen; 513 auio.uio_segflg = UIO_USERSPACE; 514 auio.uio_rw = UIO_WRITE; 515 auio.uio_procp = p; 516 auio.uio_offset = 0; /* XXX */ 517 auio.uio_resid = 0; 518 iov = mp->msg_iov; 519 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 520 if ((auio.uio_resid += iov->iov_len) < 0) { 521 fdrop(fp, p); 522 return (EINVAL); 523 } 524 } 525 if (mp->msg_name) { 526 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 527 if (error) { 528 fdrop(fp, p); 529 return (error); 530 } 531 } else { 532 to = 0; 533 } 534 if (mp->msg_control) { 535 if (mp->msg_controllen < sizeof(struct cmsghdr) 536#ifdef COMPAT_OLDSOCK 537 && mp->msg_flags != MSG_COMPAT 538#endif 539 ) { 540 error = EINVAL; 541 goto bad; 542 } 543 error = sockargs(&control, mp->msg_control, 544 mp->msg_controllen, MT_CONTROL); 545 if (error) 546 goto bad; 547#ifdef COMPAT_OLDSOCK 548 if (mp->msg_flags == MSG_COMPAT) { 549 register struct cmsghdr *cm; 550 551 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 552 if (control == 0) { 553 error = ENOBUFS; 554 goto bad; 555 } else { 556 cm = mtod(control, struct cmsghdr *); 557 cm->cmsg_len = control->m_len; 558 cm->cmsg_level = SOL_SOCKET; 559 cm->cmsg_type = SCM_RIGHTS; 560 } 561 } 562#endif 563 } else { 564 control = 0; 565 } 566#ifdef KTRACE 567 if (KTRPOINT(p, KTR_GENIO)) { 568 int iovlen = auio.uio_iovcnt * sizeof (struct iovec); 569 570 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 571 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 572 ktruio = auio; 573 } 574#endif 575 len = auio.uio_resid; 576 so = (struct socket *)fp->f_data; 577 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control, 578 flags, p); 579 if (error) { 580 if (auio.uio_resid != len && (error == ERESTART || 581 error == EINTR || error == EWOULDBLOCK)) 582 error = 0; 583 if (error == EPIPE) 584 psignal(p, SIGPIPE); 585 } 586 if (error == 0) 587 p->p_retval[0] = len - auio.uio_resid; 588#ifdef KTRACE 589 if (ktriov != NULL) { 590 if (error == 0) { 591 ktruio.uio_iov = ktriov; 592 ktruio.uio_resid = p->p_retval[0]; 593 ktrgenio(p->p_tracep, s, UIO_WRITE, &ktruio, error); 594 } 595 FREE(ktriov, M_TEMP); 596 } 597#endif 598bad: 599 fdrop(fp, p); 600 if (to) 601 FREE(to, M_SONAME); 602 return (error); 603} 604 605int 606sendto(p, uap) 607 struct proc *p; 608 register struct sendto_args /* { 609 int s; 610 caddr_t buf; 611 size_t len; 612 int flags; 613 caddr_t to; 614 int tolen; 615 } */ *uap; 616{ 617 struct msghdr msg; 618 struct iovec aiov; 619 620 msg.msg_name = uap->to; 621 msg.msg_namelen = uap->tolen; 622 msg.msg_iov = &aiov; 623 msg.msg_iovlen = 1; 624 msg.msg_control = 0; 625#ifdef COMPAT_OLDSOCK 626 msg.msg_flags = 0; 627#endif 628 aiov.iov_base = uap->buf; 629 aiov.iov_len = uap->len; 630 return (sendit(p, uap->s, &msg, uap->flags)); 631} 632 633#ifdef COMPAT_OLDSOCK 634int 635osend(p, uap) 636 struct proc *p; 637 register struct osend_args /* { 638 int s; 639 caddr_t buf; 640 int len; 641 int flags; 642 } */ *uap; 643{ 644 struct msghdr msg; 645 struct iovec aiov; 646 647 msg.msg_name = 0; 648 msg.msg_namelen = 0; 649 msg.msg_iov = &aiov; 650 msg.msg_iovlen = 1; 651 aiov.iov_base = uap->buf; 652 aiov.iov_len = uap->len; 653 msg.msg_control = 0; 654 msg.msg_flags = 0; 655 return (sendit(p, uap->s, &msg, uap->flags)); 656} 657 658int 659osendmsg(p, uap) 660 struct proc *p; 661 register struct osendmsg_args /* { 662 int s; 663 caddr_t msg; 664 int flags; 665 } */ *uap; 666{ 667 struct msghdr msg; 668 struct iovec aiov[UIO_SMALLIOV], *iov; 669 int error; 670 671 error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr)); 672 if (error) 673 return (error); 674 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 675 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) 676 return (EMSGSIZE); 677 MALLOC(iov, struct iovec *, 678 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 679 M_WAITOK); 680 } else 681 iov = aiov; 682 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, 683 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 684 if (error) 685 goto done; 686 msg.msg_flags = MSG_COMPAT; 687 msg.msg_iov = iov; 688 error = sendit(p, uap->s, &msg, uap->flags); 689done: 690 if (iov != aiov) 691 FREE(iov, M_IOV); 692 return (error); 693} 694#endif 695 696int 697sendmsg(p, uap) 698 struct proc *p; 699 register struct sendmsg_args /* { 700 int s; 701 caddr_t msg; 702 int flags; 703 } */ *uap; 704{ 705 struct msghdr msg; 706 struct iovec aiov[UIO_SMALLIOV], *iov; 707 int error; 708 709 error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg)); 710 if (error) 711 return (error); 712 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 713 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) 714 return (EMSGSIZE); 715 MALLOC(iov, struct iovec *, 716 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 717 M_WAITOK); 718 } else 719 iov = aiov; 720 if (msg.msg_iovlen && 721 (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, 722 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) 723 goto done; 724 msg.msg_iov = iov; 725#ifdef COMPAT_OLDSOCK 726 msg.msg_flags = 0; 727#endif 728 error = sendit(p, uap->s, &msg, uap->flags); 729done: 730 if (iov != aiov) 731 FREE(iov, M_IOV); 732 return (error); 733} 734 735static int 736recvit(p, s, mp, namelenp) 737 register struct proc *p; 738 int s; 739 register struct msghdr *mp; 740 caddr_t namelenp; 741{ 742 struct file *fp; 743 struct uio auio; 744 register struct iovec *iov; 745 register int i; 746 int len, error; 747 struct mbuf *m, *control = 0; 748 caddr_t ctlbuf; 749 struct socket *so; 750 struct sockaddr *fromsa = 0; 751#ifdef KTRACE 752 struct iovec *ktriov = NULL; 753 struct uio ktruio; 754#endif 755 756 error = holdsock(p->p_fd, s, &fp); 757 if (error) 758 return (error); 759 auio.uio_iov = mp->msg_iov; 760 auio.uio_iovcnt = mp->msg_iovlen; 761 auio.uio_segflg = UIO_USERSPACE; 762 auio.uio_rw = UIO_READ; 763 auio.uio_procp = p; 764 auio.uio_offset = 0; /* XXX */ 765 auio.uio_resid = 0; 766 iov = mp->msg_iov; 767 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 768 if ((auio.uio_resid += iov->iov_len) < 0) { 769 fdrop(fp, p); 770 return (EINVAL); 771 } 772 } 773#ifdef KTRACE 774 if (KTRPOINT(p, KTR_GENIO)) { 775 int iovlen = auio.uio_iovcnt * sizeof (struct iovec); 776 777 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 778 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 779 ktruio = auio; 780 } 781#endif 782 len = auio.uio_resid; 783 so = (struct socket *)fp->f_data; 784 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 785 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 786 &mp->msg_flags); 787 if (error) { 788 if (auio.uio_resid != len && (error == ERESTART || 789 error == EINTR || error == EWOULDBLOCK)) 790 error = 0; 791 } 792#ifdef KTRACE 793 if (ktriov != NULL) { 794 if (error == 0) { 795 ktruio.uio_iov = ktriov; 796 ktruio.uio_resid = len - auio.uio_resid; 797 ktrgenio(p->p_tracep, s, UIO_READ, &ktruio, error); 798 } 799 FREE(ktriov, M_TEMP); 800 } 801#endif 802 if (error) 803 goto out; 804 p->p_retval[0] = len - auio.uio_resid; 805 if (mp->msg_name) { 806 len = mp->msg_namelen; 807 if (len <= 0 || fromsa == 0) 808 len = 0; 809 else { 810#ifndef MIN 811#define MIN(a,b) ((a)>(b)?(b):(a)) 812#endif 813 /* save sa_len before it is destroyed by MSG_COMPAT */ 814 len = MIN(len, fromsa->sa_len); 815#ifdef COMPAT_OLDSOCK 816 if (mp->msg_flags & MSG_COMPAT) 817 ((struct osockaddr *)fromsa)->sa_family = 818 fromsa->sa_family; 819#endif 820 error = copyout(fromsa, 821 (caddr_t)mp->msg_name, (unsigned)len); 822 if (error) 823 goto out; 824 } 825 mp->msg_namelen = len; 826 if (namelenp && 827 (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) { 828#ifdef COMPAT_OLDSOCK 829 if (mp->msg_flags & MSG_COMPAT) 830 error = 0; /* old recvfrom didn't check */ 831 else 832#endif 833 goto out; 834 } 835 } 836 if (mp->msg_control) { 837#ifdef COMPAT_OLDSOCK 838 /* 839 * We assume that old recvmsg calls won't receive access 840 * rights and other control info, esp. as control info 841 * is always optional and those options didn't exist in 4.3. 842 * If we receive rights, trim the cmsghdr; anything else 843 * is tossed. 844 */ 845 if (control && mp->msg_flags & MSG_COMPAT) { 846 if (mtod(control, struct cmsghdr *)->cmsg_level != 847 SOL_SOCKET || 848 mtod(control, struct cmsghdr *)->cmsg_type != 849 SCM_RIGHTS) { 850 mp->msg_controllen = 0; 851 goto out; 852 } 853 control->m_len -= sizeof (struct cmsghdr); 854 control->m_data += sizeof (struct cmsghdr); 855 } 856#endif 857 len = mp->msg_controllen; 858 m = control; 859 mp->msg_controllen = 0; 860 ctlbuf = (caddr_t) mp->msg_control; 861 862 while (m && len > 0) { 863 unsigned int tocopy; 864 865 if (len >= m->m_len) 866 tocopy = m->m_len; 867 else { 868 mp->msg_flags |= MSG_CTRUNC; 869 tocopy = len; 870 } 871 872 if ((error = copyout((caddr_t)mtod(m, caddr_t), 873 ctlbuf, tocopy)) != 0) 874 goto out; 875 876 ctlbuf += tocopy; 877 len -= tocopy; 878 m = m->m_next; 879 } 880 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 881 } 882out: 883 fdrop(fp, p); 884 if (fromsa) 885 FREE(fromsa, M_SONAME); 886 if (control) 887 m_freem(control); 888 return (error); 889} 890 891int 892recvfrom(p, uap) 893 struct proc *p; 894 register struct recvfrom_args /* { 895 int s; 896 caddr_t buf; 897 size_t len; 898 int flags; 899 caddr_t from; 900 int *fromlenaddr; 901 } */ *uap; 902{ 903 struct msghdr msg; 904 struct iovec aiov; 905 int error; 906 907 if (uap->fromlenaddr) { 908 error = copyin((caddr_t)uap->fromlenaddr, 909 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen)); 910 if (error) 911 return (error); 912 } else 913 msg.msg_namelen = 0; 914 msg.msg_name = uap->from; 915 msg.msg_iov = &aiov; 916 msg.msg_iovlen = 1; 917 aiov.iov_base = uap->buf; 918 aiov.iov_len = uap->len; 919 msg.msg_control = 0; 920 msg.msg_flags = uap->flags; 921 return (recvit(p, uap->s, &msg, (caddr_t)uap->fromlenaddr)); 922} 923 924#ifdef COMPAT_OLDSOCK 925int 926orecvfrom(p, uap) 927 struct proc *p; 928 struct recvfrom_args *uap; 929{ 930 931 uap->flags |= MSG_COMPAT; 932 return (recvfrom(p, uap)); 933} 934#endif 935 936 937#ifdef COMPAT_OLDSOCK 938int 939orecv(p, uap) 940 struct proc *p; 941 register struct orecv_args /* { 942 int s; 943 caddr_t buf; 944 int len; 945 int flags; 946 } */ *uap; 947{ 948 struct msghdr msg; 949 struct iovec aiov; 950 951 msg.msg_name = 0; 952 msg.msg_namelen = 0; 953 msg.msg_iov = &aiov; 954 msg.msg_iovlen = 1; 955 aiov.iov_base = uap->buf; 956 aiov.iov_len = uap->len; 957 msg.msg_control = 0; 958 msg.msg_flags = uap->flags; 959 return (recvit(p, uap->s, &msg, (caddr_t)0)); 960} 961 962/* 963 * Old recvmsg. This code takes advantage of the fact that the old msghdr 964 * overlays the new one, missing only the flags, and with the (old) access 965 * rights where the control fields are now. 966 */ 967int 968orecvmsg(p, uap) 969 struct proc *p; 970 register struct orecvmsg_args /* { 971 int s; 972 struct omsghdr *msg; 973 int flags; 974 } */ *uap; 975{ 976 struct msghdr msg; 977 struct iovec aiov[UIO_SMALLIOV], *iov; 978 int error; 979 980 error = copyin((caddr_t)uap->msg, (caddr_t)&msg, 981 sizeof (struct omsghdr)); 982 if (error) 983 return (error); 984 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 985 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) 986 return (EMSGSIZE); 987 MALLOC(iov, struct iovec *, 988 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 989 M_WAITOK); 990 } else 991 iov = aiov; 992 msg.msg_flags = uap->flags | MSG_COMPAT; 993 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, 994 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 995 if (error) 996 goto done; 997 msg.msg_iov = iov; 998 error = recvit(p, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen); 999 1000 if (msg.msg_controllen && error == 0) 1001 error = copyout((caddr_t)&msg.msg_controllen, 1002 (caddr_t)&uap->msg->msg_accrightslen, sizeof (int)); 1003done: 1004 if (iov != aiov) 1005 FREE(iov, M_IOV); 1006 return (error); 1007} 1008#endif 1009 1010int 1011recvmsg(p, uap) 1012 struct proc *p; 1013 register struct recvmsg_args /* { 1014 int s; 1015 struct msghdr *msg; 1016 int flags; 1017 } */ *uap; 1018{ 1019 struct msghdr msg; 1020 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; 1021 register int error; 1022 1023 error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg)); 1024 if (error) 1025 return (error); 1026 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1027 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) 1028 return (EMSGSIZE); 1029 MALLOC(iov, struct iovec *, 1030 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1031 M_WAITOK); 1032 } else 1033 iov = aiov; 1034#ifdef COMPAT_OLDSOCK 1035 msg.msg_flags = uap->flags &~ MSG_COMPAT; 1036#else 1037 msg.msg_flags = uap->flags; 1038#endif 1039 uiov = msg.msg_iov; 1040 msg.msg_iov = iov; 1041 error = copyin((caddr_t)uiov, (caddr_t)iov, 1042 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1043 if (error) 1044 goto done; 1045 error = recvit(p, uap->s, &msg, (caddr_t)0); 1046 if (!error) { 1047 msg.msg_iov = uiov; 1048 error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg)); 1049 } 1050done: 1051 if (iov != aiov) 1052 FREE(iov, M_IOV); 1053 return (error); 1054} 1055 1056/* ARGSUSED */ 1057int 1058shutdown(p, uap) 1059 struct proc *p; 1060 register struct shutdown_args /* { 1061 int s; 1062 int how; 1063 } */ *uap; 1064{ 1065 struct file *fp; 1066 int error; 1067 1068 error = holdsock(p->p_fd, uap->s, &fp); 1069 if (error) 1070 return (error); 1071 error = soshutdown((struct socket *)fp->f_data, uap->how); 1072 fdrop(fp, p); 1073 return(error); 1074} 1075 1076/* ARGSUSED */ 1077int 1078setsockopt(p, uap) 1079 struct proc *p; 1080 register struct setsockopt_args /* { 1081 int s; 1082 int level; 1083 int name; 1084 caddr_t val; 1085 int valsize; 1086 } */ *uap; 1087{ 1088 struct file *fp; 1089 struct sockopt sopt; 1090 int error; 1091 1092 if (uap->val == 0 && uap->valsize != 0) 1093 return (EFAULT); 1094 if (uap->valsize < 0) 1095 return (EINVAL); 1096 1097 error = holdsock(p->p_fd, uap->s, &fp); 1098 if (error) 1099 return (error); 1100 1101 sopt.sopt_dir = SOPT_SET; 1102 sopt.sopt_level = uap->level; 1103 sopt.sopt_name = uap->name; 1104 sopt.sopt_val = uap->val; 1105 sopt.sopt_valsize = uap->valsize; 1106 sopt.sopt_p = p; 1107 error = sosetopt((struct socket *)fp->f_data, &sopt); 1108 fdrop(fp, p); 1109 return(error); 1110} 1111 1112/* ARGSUSED */ 1113int 1114getsockopt(p, uap) 1115 struct proc *p; 1116 register struct getsockopt_args /* { 1117 int s; 1118 int level; 1119 int name; 1120 caddr_t val; 1121 int *avalsize; 1122 } */ *uap; 1123{ 1124 int valsize, error; 1125 struct file *fp; 1126 struct sockopt sopt; 1127 1128 error = holdsock(p->p_fd, uap->s, &fp); 1129 if (error) 1130 return (error); 1131 if (uap->val) { 1132 error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize, 1133 sizeof (valsize)); 1134 if (error) { 1135 fdrop(fp, p); 1136 return (error); 1137 } 1138 if (valsize < 0) { 1139 fdrop(fp, p); 1140 return (EINVAL); 1141 } 1142 } else { 1143 valsize = 0; 1144 } 1145 1146 sopt.sopt_dir = SOPT_GET; 1147 sopt.sopt_level = uap->level; 1148 sopt.sopt_name = uap->name; 1149 sopt.sopt_val = uap->val; 1150 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */ 1151 sopt.sopt_p = p; 1152 1153 error = sogetopt((struct socket *)fp->f_data, &sopt); 1154 if (error == 0) { 1155 valsize = sopt.sopt_valsize; 1156 error = copyout((caddr_t)&valsize, 1157 (caddr_t)uap->avalsize, sizeof (valsize)); 1158 } 1159 fdrop(fp, p); 1160 return (error); 1161} 1162 1163/* 1164 * Get socket name. 1165 */ 1166/* ARGSUSED */ 1167static int 1168getsockname1(p, uap, compat) 1169 struct proc *p; 1170 register struct getsockname_args /* { 1171 int fdes; 1172 caddr_t asa; 1173 int *alen; 1174 } */ *uap; 1175 int compat; 1176{ 1177 struct file *fp; 1178 register struct socket *so; 1179 struct sockaddr *sa; 1180 int len, error; 1181 1182 error = holdsock(p->p_fd, uap->fdes, &fp); 1183 if (error) 1184 return (error); 1185 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)); 1186 if (error) { 1187 fdrop(fp, p); 1188 return (error); 1189 } 1190 so = (struct socket *)fp->f_data; 1191 sa = 0; 1192 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1193 if (error) 1194 goto bad; 1195 if (sa == 0) { 1196 len = 0; 1197 goto gotnothing; 1198 } 1199 1200 len = MIN(len, sa->sa_len); 1201#ifdef COMPAT_OLDSOCK 1202 if (compat) 1203 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1204#endif 1205 error = copyout(sa, (caddr_t)uap->asa, (u_int)len); 1206 if (error == 0) 1207gotnothing: 1208 error = copyout((caddr_t)&len, (caddr_t)uap->alen, 1209 sizeof (len)); 1210bad: 1211 if (sa) 1212 FREE(sa, M_SONAME); 1213 fdrop(fp, p); 1214 return (error); 1215} 1216 1217int 1218getsockname(p, uap) 1219 struct proc *p; 1220 struct getsockname_args *uap; 1221{ 1222 1223 return (getsockname1(p, uap, 0)); 1224} 1225 1226#ifdef COMPAT_OLDSOCK 1227int 1228ogetsockname(p, uap) 1229 struct proc *p; 1230 struct getsockname_args *uap; 1231{ 1232 1233 return (getsockname1(p, uap, 1)); 1234} 1235#endif /* COMPAT_OLDSOCK */ 1236 1237/* 1238 * Get name of peer for connected socket. 1239 */ 1240/* ARGSUSED */ 1241static int 1242getpeername1(p, uap, compat) 1243 struct proc *p; 1244 register struct getpeername_args /* { 1245 int fdes; 1246 caddr_t asa; 1247 int *alen; 1248 } */ *uap; 1249 int compat; 1250{ 1251 struct file *fp; 1252 register struct socket *so; 1253 struct sockaddr *sa; 1254 int len, error; 1255 1256 error = holdsock(p->p_fd, uap->fdes, &fp); 1257 if (error) 1258 return (error); 1259 so = (struct socket *)fp->f_data; 1260 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1261 fdrop(fp, p); 1262 return (ENOTCONN); 1263 } 1264 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)); 1265 if (error) { 1266 fdrop(fp, p); 1267 return (error); 1268 } 1269 sa = 0; 1270 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1271 if (error) 1272 goto bad; 1273 if (sa == 0) { 1274 len = 0; 1275 goto gotnothing; 1276 } 1277 len = MIN(len, sa->sa_len); 1278#ifdef COMPAT_OLDSOCK 1279 if (compat) 1280 ((struct osockaddr *)sa)->sa_family = 1281 sa->sa_family; 1282#endif 1283 error = copyout(sa, (caddr_t)uap->asa, (u_int)len); 1284 if (error) 1285 goto bad; 1286gotnothing: 1287 error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len)); 1288bad: 1289 if (sa) 1290 FREE(sa, M_SONAME); 1291 fdrop(fp, p); 1292 return (error); 1293} 1294 1295int 1296getpeername(p, uap) 1297 struct proc *p; 1298 struct getpeername_args *uap; 1299{ 1300 1301 return (getpeername1(p, uap, 0)); 1302} 1303 1304#ifdef COMPAT_OLDSOCK 1305int 1306ogetpeername(p, uap) 1307 struct proc *p; 1308 struct ogetpeername_args *uap; 1309{ 1310 1311 /* XXX uap should have type `getpeername_args *' to begin with. */ 1312 return (getpeername1(p, (struct getpeername_args *)uap, 1)); 1313} 1314#endif /* COMPAT_OLDSOCK */ 1315 1316int 1317sockargs(mp, buf, buflen, type) 1318 struct mbuf **mp; 1319 caddr_t buf; 1320 int buflen, type; 1321{ 1322 register struct sockaddr *sa; 1323 register struct mbuf *m; 1324 int error; 1325 1326 if ((u_int)buflen > MLEN) { 1327#ifdef COMPAT_OLDSOCK 1328 if (type == MT_SONAME && (u_int)buflen <= 112) 1329 buflen = MLEN; /* unix domain compat. hack */ 1330 else 1331#endif 1332 return (EINVAL); 1333 } 1334 m = m_get(M_TRYWAIT, type); 1335 if (m == NULL) 1336 return (ENOBUFS); 1337 m->m_len = buflen; 1338 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1339 if (error) 1340 (void) m_free(m); 1341 else { 1342 *mp = m; 1343 if (type == MT_SONAME) { 1344 sa = mtod(m, struct sockaddr *); 1345 1346#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1347 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1348 sa->sa_family = sa->sa_len; 1349#endif 1350 sa->sa_len = buflen; 1351 } 1352 } 1353 return (error); 1354} 1355 1356int 1357getsockaddr(namp, uaddr, len) 1358 struct sockaddr **namp; 1359 caddr_t uaddr; 1360 size_t len; 1361{ 1362 struct sockaddr *sa; 1363 int error; 1364 1365 if (len > SOCK_MAXADDRLEN) 1366 return ENAMETOOLONG; 1367 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1368 error = copyin(uaddr, sa, len); 1369 if (error) { 1370 FREE(sa, M_SONAME); 1371 } else { 1372#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1373 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1374 sa->sa_family = sa->sa_len; 1375#endif 1376 sa->sa_len = len; 1377 *namp = sa; 1378 } 1379 return error; 1380} 1381 1382/* 1383 * holdsock() - load the struct file pointer associated 1384 * with a socket into *fpp. If an error occurs, non-zero 1385 * will be returned and *fpp will be set to NULL. 1386 */ 1387int 1388holdsock(fdp, fdes, fpp) 1389 struct filedesc *fdp; 1390 int fdes; 1391 struct file **fpp; 1392{ 1393 register struct file *fp = NULL; 1394 int error = 0; 1395 1396 if ((unsigned)fdes >= fdp->fd_nfiles || 1397 (fp = fdp->fd_ofiles[fdes]) == NULL) { 1398 error = EBADF; 1399 } else if (fp->f_type != DTYPE_SOCKET) { 1400 error = ENOTSOCK; 1401 fp = NULL; 1402 } else { 1403 fhold(fp); 1404 } 1405 *fpp = fp; 1406 return(error); 1407} 1408 1409/* 1410 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) 1411 * XXX - The sf_buf functions are currently private to sendfile(2), so have 1412 * been made static, but may be useful in the future for doing zero-copy in 1413 * other parts of the networking code. 1414 */ 1415static void 1416sf_buf_init(void *arg) 1417{ 1418 int i; 1419 1420 mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", MTX_DEF);
| 38 */ 39 40#include "opt_compat.h" 41#include "opt_ktrace.h" 42 43#include <sys/param.h> 44#include <sys/systm.h> 45#include <sys/kernel.h> 46#include <sys/sysproto.h> 47#include <sys/malloc.h> 48#include <sys/filedesc.h> 49#include <sys/event.h> 50#include <sys/proc.h> 51#include <sys/fcntl.h> 52#include <sys/file.h> 53#include <sys/mbuf.h> 54#include <sys/protosw.h> 55#include <sys/socket.h> 56#include <sys/socketvar.h> 57#include <sys/signalvar.h> 58#include <sys/uio.h> 59#include <sys/vnode.h> 60#include <sys/lock.h> 61#include <sys/mount.h> 62#ifdef KTRACE 63#include <sys/ktrace.h> 64#endif 65#include <vm/vm.h> 66#include <vm/vm_object.h> 67#include <vm/vm_page.h> 68#include <vm/vm_pageout.h> 69#include <vm/vm_kern.h> 70#include <vm/vm_extern.h> 71 72static void sf_buf_init(void *arg); 73SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) 74static struct sf_buf *sf_buf_alloc(void); 75static void sf_buf_free(caddr_t addr, void *args); 76 77static int sendit __P((struct proc *p, int s, struct msghdr *mp, int flags)); 78static int recvit __P((struct proc *p, int s, struct msghdr *mp, 79 caddr_t namelenp)); 80 81static int accept1 __P((struct proc *p, struct accept_args *uap, int compat)); 82static int getsockname1 __P((struct proc *p, struct getsockname_args *uap, 83 int compat)); 84static int getpeername1 __P((struct proc *p, struct getpeername_args *uap, 85 int compat)); 86 87/* 88 * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the 89 * sf_freelist head with the sf_lock mutex. 90 */ 91static struct { 92 SLIST_HEAD(, sf_buf) sf_head; 93 struct mtx sf_lock; 94} sf_freelist; 95 96static vm_offset_t sf_base; 97static struct sf_buf *sf_bufs; 98static u_int sf_buf_alloc_want; 99 100/* 101 * System call interface to the socket abstraction. 102 */ 103#if defined(COMPAT_43) || defined(COMPAT_SUNOS) 104#define COMPAT_OLDSOCK 105#endif 106 107extern struct fileops socketops; 108 109int 110socket(p, uap) 111 struct proc *p; 112 register struct socket_args /* { 113 int domain; 114 int type; 115 int protocol; 116 } */ *uap; 117{ 118 struct filedesc *fdp = p->p_fd; 119 struct socket *so; 120 struct file *fp; 121 int fd, error; 122 123 error = falloc(p, &fp, &fd); 124 if (error) 125 return (error); 126 fhold(fp); 127 error = socreate(uap->domain, &so, uap->type, uap->protocol, p); 128 if (error) { 129 if (fdp->fd_ofiles[fd] == fp) { 130 fdp->fd_ofiles[fd] = NULL; 131 fdrop(fp, p); 132 } 133 } else { 134 fp->f_data = (caddr_t)so; 135 fp->f_flag = FREAD|FWRITE; 136 fp->f_ops = &socketops; 137 fp->f_type = DTYPE_SOCKET; 138 p->p_retval[0] = fd; 139 } 140 fdrop(fp, p); 141 return (error); 142} 143 144/* ARGSUSED */ 145int 146bind(p, uap) 147 struct proc *p; 148 register struct bind_args /* { 149 int s; 150 caddr_t name; 151 int namelen; 152 } */ *uap; 153{ 154 struct file *fp; 155 struct sockaddr *sa; 156 int error; 157 158 error = holdsock(p->p_fd, uap->s, &fp); 159 if (error) 160 return (error); 161 error = getsockaddr(&sa, uap->name, uap->namelen); 162 if (error) { 163 fdrop(fp, p); 164 return (error); 165 } 166 error = sobind((struct socket *)fp->f_data, sa, p); 167 FREE(sa, M_SONAME); 168 fdrop(fp, p); 169 return (error); 170} 171 172/* ARGSUSED */ 173int 174listen(p, uap) 175 struct proc *p; 176 register struct listen_args /* { 177 int s; 178 int backlog; 179 } */ *uap; 180{ 181 struct file *fp; 182 int error; 183 184 error = holdsock(p->p_fd, uap->s, &fp); 185 if (error) 186 return (error); 187 error = solisten((struct socket *)fp->f_data, uap->backlog, p); 188 fdrop(fp, p); 189 return(error); 190} 191 192static int 193accept1(p, uap, compat) 194 struct proc *p; 195 register struct accept_args /* { 196 int s; 197 caddr_t name; 198 int *anamelen; 199 } */ *uap; 200 int compat; 201{ 202 struct filedesc *fdp = p->p_fd; 203 struct file *lfp = NULL; 204 struct file *nfp = NULL; 205 struct sockaddr *sa; 206 int namelen, error, s; 207 struct socket *head, *so; 208 int fd; 209 short fflag; /* type must match fp->f_flag */ 210 211 if (uap->name) { 212 error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen, 213 sizeof (namelen)); 214 if(error) 215 return (error); 216 } 217 error = holdsock(fdp, uap->s, &lfp); 218 if (error) 219 return (error); 220 s = splnet(); 221 head = (struct socket *)lfp->f_data; 222 if ((head->so_options & SO_ACCEPTCONN) == 0) { 223 splx(s); 224 error = EINVAL; 225 goto done; 226 } 227 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 228 splx(s); 229 error = EWOULDBLOCK; 230 goto done; 231 } 232 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 233 if (head->so_state & SS_CANTRCVMORE) { 234 head->so_error = ECONNABORTED; 235 break; 236 } 237 error = tsleep((caddr_t)&head->so_timeo, PSOCK | PCATCH, 238 "accept", 0); 239 if (error) { 240 splx(s); 241 goto done; 242 } 243 } 244 if (head->so_error) { 245 error = head->so_error; 246 head->so_error = 0; 247 splx(s); 248 goto done; 249 } 250 251 /* 252 * At this point we know that there is at least one connection 253 * ready to be accepted. Remove it from the queue prior to 254 * allocating the file descriptor for it since falloc() may 255 * block allowing another process to accept the connection 256 * instead. 257 */ 258 so = TAILQ_FIRST(&head->so_comp); 259 TAILQ_REMOVE(&head->so_comp, so, so_list); 260 head->so_qlen--; 261 262 fflag = lfp->f_flag; 263 error = falloc(p, &nfp, &fd); 264 if (error) { 265 /* 266 * Probably ran out of file descriptors. Put the 267 * unaccepted connection back onto the queue and 268 * do another wakeup so some other process might 269 * have a chance at it. 270 */ 271 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); 272 head->so_qlen++; 273 wakeup_one(&head->so_timeo); 274 splx(s); 275 goto done; 276 } 277 fhold(nfp); 278 p->p_retval[0] = fd; 279 280 /* connection has been removed from the listen queue */ 281 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 282 283 so->so_state &= ~SS_COMP; 284 so->so_head = NULL; 285 if (head->so_sigio != NULL) 286 fsetown(fgetown(head->so_sigio), &so->so_sigio); 287 288 nfp->f_data = (caddr_t)so; 289 nfp->f_flag = fflag; 290 nfp->f_ops = &socketops; 291 nfp->f_type = DTYPE_SOCKET; 292 sa = 0; 293 (void) soaccept(so, &sa); 294 if (sa == NULL) { 295 namelen = 0; 296 if (uap->name) 297 goto gotnoname; 298 splx(s); 299 error = 0; 300 goto done; 301 } 302 if (uap->name) { 303 /* check sa_len before it is destroyed */ 304 if (namelen > sa->sa_len) 305 namelen = sa->sa_len; 306#ifdef COMPAT_OLDSOCK 307 if (compat) 308 ((struct osockaddr *)sa)->sa_family = 309 sa->sa_family; 310#endif 311 error = copyout(sa, (caddr_t)uap->name, (u_int)namelen); 312 if (!error) 313gotnoname: 314 error = copyout((caddr_t)&namelen, 315 (caddr_t)uap->anamelen, sizeof (*uap->anamelen)); 316 } 317 if (sa) 318 FREE(sa, M_SONAME); 319 320 /* 321 * close the new descriptor, assuming someone hasn't ripped it 322 * out from under us. 323 */ 324 if (error) { 325 if (fdp->fd_ofiles[fd] == nfp) { 326 fdp->fd_ofiles[fd] = NULL; 327 fdrop(nfp, p); 328 } 329 } 330 splx(s); 331 332 /* 333 * Release explicitly held references before returning. 334 */ 335done: 336 if (nfp != NULL) 337 fdrop(nfp, p); 338 fdrop(lfp, p); 339 return (error); 340} 341 342int 343accept(p, uap) 344 struct proc *p; 345 struct accept_args *uap; 346{ 347 348 return (accept1(p, uap, 0)); 349} 350 351#ifdef COMPAT_OLDSOCK 352int 353oaccept(p, uap) 354 struct proc *p; 355 struct accept_args *uap; 356{ 357 358 return (accept1(p, uap, 1)); 359} 360#endif /* COMPAT_OLDSOCK */ 361 362/* ARGSUSED */ 363int 364connect(p, uap) 365 struct proc *p; 366 register struct connect_args /* { 367 int s; 368 caddr_t name; 369 int namelen; 370 } */ *uap; 371{ 372 struct file *fp; 373 register struct socket *so; 374 struct sockaddr *sa; 375 int error, s; 376 377 error = holdsock(p->p_fd, uap->s, &fp); 378 if (error) 379 return (error); 380 so = (struct socket *)fp->f_data; 381 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 382 error = EALREADY; 383 goto done; 384 } 385 error = getsockaddr(&sa, uap->name, uap->namelen); 386 if (error) 387 goto done; 388 error = soconnect(so, sa, p); 389 if (error) 390 goto bad; 391 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 392 FREE(sa, M_SONAME); 393 error = EINPROGRESS; 394 goto done; 395 } 396 s = splnet(); 397 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 398 error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH, 399 "connec", 0); 400 if (error) 401 break; 402 } 403 if (error == 0) { 404 error = so->so_error; 405 so->so_error = 0; 406 } 407 splx(s); 408bad: 409 so->so_state &= ~SS_ISCONNECTING; 410 FREE(sa, M_SONAME); 411 if (error == ERESTART) 412 error = EINTR; 413done: 414 fdrop(fp, p); 415 return (error); 416} 417 418int 419socketpair(p, uap) 420 struct proc *p; 421 register struct socketpair_args /* { 422 int domain; 423 int type; 424 int protocol; 425 int *rsv; 426 } */ *uap; 427{ 428 register struct filedesc *fdp = p->p_fd; 429 struct file *fp1, *fp2; 430 struct socket *so1, *so2; 431 int fd, error, sv[2]; 432 433 error = socreate(uap->domain, &so1, uap->type, uap->protocol, p); 434 if (error) 435 return (error); 436 error = socreate(uap->domain, &so2, uap->type, uap->protocol, p); 437 if (error) 438 goto free1; 439 error = falloc(p, &fp1, &fd); 440 if (error) 441 goto free2; 442 fhold(fp1); 443 sv[0] = fd; 444 fp1->f_data = (caddr_t)so1; 445 error = falloc(p, &fp2, &fd); 446 if (error) 447 goto free3; 448 fhold(fp2); 449 fp2->f_data = (caddr_t)so2; 450 sv[1] = fd; 451 error = soconnect2(so1, so2); 452 if (error) 453 goto free4; 454 if (uap->type == SOCK_DGRAM) { 455 /* 456 * Datagram socket connection is asymmetric. 457 */ 458 error = soconnect2(so2, so1); 459 if (error) 460 goto free4; 461 } 462 fp1->f_flag = fp2->f_flag = FREAD|FWRITE; 463 fp1->f_ops = fp2->f_ops = &socketops; 464 fp1->f_type = fp2->f_type = DTYPE_SOCKET; 465 error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int)); 466 fdrop(fp1, p); 467 fdrop(fp2, p); 468 return (error); 469free4: 470 if (fdp->fd_ofiles[sv[1]] == fp2) { 471 fdp->fd_ofiles[sv[1]] = NULL; 472 fdrop(fp2, p); 473 } 474 fdrop(fp2, p); 475free3: 476 if (fdp->fd_ofiles[sv[0]] == fp1) { 477 fdp->fd_ofiles[sv[0]] = NULL; 478 fdrop(fp1, p); 479 } 480 fdrop(fp1, p); 481free2: 482 (void)soclose(so2); 483free1: 484 (void)soclose(so1); 485 return (error); 486} 487 488static int 489sendit(p, s, mp, flags) 490 register struct proc *p; 491 int s; 492 register struct msghdr *mp; 493 int flags; 494{ 495 struct file *fp; 496 struct uio auio; 497 register struct iovec *iov; 498 register int i; 499 struct mbuf *control; 500 struct sockaddr *to; 501 int len, error; 502 struct socket *so; 503#ifdef KTRACE 504 struct iovec *ktriov = NULL; 505 struct uio ktruio; 506#endif 507 508 error = holdsock(p->p_fd, s, &fp); 509 if (error) 510 return (error); 511 auio.uio_iov = mp->msg_iov; 512 auio.uio_iovcnt = mp->msg_iovlen; 513 auio.uio_segflg = UIO_USERSPACE; 514 auio.uio_rw = UIO_WRITE; 515 auio.uio_procp = p; 516 auio.uio_offset = 0; /* XXX */ 517 auio.uio_resid = 0; 518 iov = mp->msg_iov; 519 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 520 if ((auio.uio_resid += iov->iov_len) < 0) { 521 fdrop(fp, p); 522 return (EINVAL); 523 } 524 } 525 if (mp->msg_name) { 526 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 527 if (error) { 528 fdrop(fp, p); 529 return (error); 530 } 531 } else { 532 to = 0; 533 } 534 if (mp->msg_control) { 535 if (mp->msg_controllen < sizeof(struct cmsghdr) 536#ifdef COMPAT_OLDSOCK 537 && mp->msg_flags != MSG_COMPAT 538#endif 539 ) { 540 error = EINVAL; 541 goto bad; 542 } 543 error = sockargs(&control, mp->msg_control, 544 mp->msg_controllen, MT_CONTROL); 545 if (error) 546 goto bad; 547#ifdef COMPAT_OLDSOCK 548 if (mp->msg_flags == MSG_COMPAT) { 549 register struct cmsghdr *cm; 550 551 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 552 if (control == 0) { 553 error = ENOBUFS; 554 goto bad; 555 } else { 556 cm = mtod(control, struct cmsghdr *); 557 cm->cmsg_len = control->m_len; 558 cm->cmsg_level = SOL_SOCKET; 559 cm->cmsg_type = SCM_RIGHTS; 560 } 561 } 562#endif 563 } else { 564 control = 0; 565 } 566#ifdef KTRACE 567 if (KTRPOINT(p, KTR_GENIO)) { 568 int iovlen = auio.uio_iovcnt * sizeof (struct iovec); 569 570 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 571 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 572 ktruio = auio; 573 } 574#endif 575 len = auio.uio_resid; 576 so = (struct socket *)fp->f_data; 577 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control, 578 flags, p); 579 if (error) { 580 if (auio.uio_resid != len && (error == ERESTART || 581 error == EINTR || error == EWOULDBLOCK)) 582 error = 0; 583 if (error == EPIPE) 584 psignal(p, SIGPIPE); 585 } 586 if (error == 0) 587 p->p_retval[0] = len - auio.uio_resid; 588#ifdef KTRACE 589 if (ktriov != NULL) { 590 if (error == 0) { 591 ktruio.uio_iov = ktriov; 592 ktruio.uio_resid = p->p_retval[0]; 593 ktrgenio(p->p_tracep, s, UIO_WRITE, &ktruio, error); 594 } 595 FREE(ktriov, M_TEMP); 596 } 597#endif 598bad: 599 fdrop(fp, p); 600 if (to) 601 FREE(to, M_SONAME); 602 return (error); 603} 604 605int 606sendto(p, uap) 607 struct proc *p; 608 register struct sendto_args /* { 609 int s; 610 caddr_t buf; 611 size_t len; 612 int flags; 613 caddr_t to; 614 int tolen; 615 } */ *uap; 616{ 617 struct msghdr msg; 618 struct iovec aiov; 619 620 msg.msg_name = uap->to; 621 msg.msg_namelen = uap->tolen; 622 msg.msg_iov = &aiov; 623 msg.msg_iovlen = 1; 624 msg.msg_control = 0; 625#ifdef COMPAT_OLDSOCK 626 msg.msg_flags = 0; 627#endif 628 aiov.iov_base = uap->buf; 629 aiov.iov_len = uap->len; 630 return (sendit(p, uap->s, &msg, uap->flags)); 631} 632 633#ifdef COMPAT_OLDSOCK 634int 635osend(p, uap) 636 struct proc *p; 637 register struct osend_args /* { 638 int s; 639 caddr_t buf; 640 int len; 641 int flags; 642 } */ *uap; 643{ 644 struct msghdr msg; 645 struct iovec aiov; 646 647 msg.msg_name = 0; 648 msg.msg_namelen = 0; 649 msg.msg_iov = &aiov; 650 msg.msg_iovlen = 1; 651 aiov.iov_base = uap->buf; 652 aiov.iov_len = uap->len; 653 msg.msg_control = 0; 654 msg.msg_flags = 0; 655 return (sendit(p, uap->s, &msg, uap->flags)); 656} 657 658int 659osendmsg(p, uap) 660 struct proc *p; 661 register struct osendmsg_args /* { 662 int s; 663 caddr_t msg; 664 int flags; 665 } */ *uap; 666{ 667 struct msghdr msg; 668 struct iovec aiov[UIO_SMALLIOV], *iov; 669 int error; 670 671 error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr)); 672 if (error) 673 return (error); 674 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 675 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) 676 return (EMSGSIZE); 677 MALLOC(iov, struct iovec *, 678 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 679 M_WAITOK); 680 } else 681 iov = aiov; 682 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, 683 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 684 if (error) 685 goto done; 686 msg.msg_flags = MSG_COMPAT; 687 msg.msg_iov = iov; 688 error = sendit(p, uap->s, &msg, uap->flags); 689done: 690 if (iov != aiov) 691 FREE(iov, M_IOV); 692 return (error); 693} 694#endif 695 696int 697sendmsg(p, uap) 698 struct proc *p; 699 register struct sendmsg_args /* { 700 int s; 701 caddr_t msg; 702 int flags; 703 } */ *uap; 704{ 705 struct msghdr msg; 706 struct iovec aiov[UIO_SMALLIOV], *iov; 707 int error; 708 709 error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg)); 710 if (error) 711 return (error); 712 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 713 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) 714 return (EMSGSIZE); 715 MALLOC(iov, struct iovec *, 716 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 717 M_WAITOK); 718 } else 719 iov = aiov; 720 if (msg.msg_iovlen && 721 (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, 722 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) 723 goto done; 724 msg.msg_iov = iov; 725#ifdef COMPAT_OLDSOCK 726 msg.msg_flags = 0; 727#endif 728 error = sendit(p, uap->s, &msg, uap->flags); 729done: 730 if (iov != aiov) 731 FREE(iov, M_IOV); 732 return (error); 733} 734 735static int 736recvit(p, s, mp, namelenp) 737 register struct proc *p; 738 int s; 739 register struct msghdr *mp; 740 caddr_t namelenp; 741{ 742 struct file *fp; 743 struct uio auio; 744 register struct iovec *iov; 745 register int i; 746 int len, error; 747 struct mbuf *m, *control = 0; 748 caddr_t ctlbuf; 749 struct socket *so; 750 struct sockaddr *fromsa = 0; 751#ifdef KTRACE 752 struct iovec *ktriov = NULL; 753 struct uio ktruio; 754#endif 755 756 error = holdsock(p->p_fd, s, &fp); 757 if (error) 758 return (error); 759 auio.uio_iov = mp->msg_iov; 760 auio.uio_iovcnt = mp->msg_iovlen; 761 auio.uio_segflg = UIO_USERSPACE; 762 auio.uio_rw = UIO_READ; 763 auio.uio_procp = p; 764 auio.uio_offset = 0; /* XXX */ 765 auio.uio_resid = 0; 766 iov = mp->msg_iov; 767 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 768 if ((auio.uio_resid += iov->iov_len) < 0) { 769 fdrop(fp, p); 770 return (EINVAL); 771 } 772 } 773#ifdef KTRACE 774 if (KTRPOINT(p, KTR_GENIO)) { 775 int iovlen = auio.uio_iovcnt * sizeof (struct iovec); 776 777 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 778 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 779 ktruio = auio; 780 } 781#endif 782 len = auio.uio_resid; 783 so = (struct socket *)fp->f_data; 784 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 785 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 786 &mp->msg_flags); 787 if (error) { 788 if (auio.uio_resid != len && (error == ERESTART || 789 error == EINTR || error == EWOULDBLOCK)) 790 error = 0; 791 } 792#ifdef KTRACE 793 if (ktriov != NULL) { 794 if (error == 0) { 795 ktruio.uio_iov = ktriov; 796 ktruio.uio_resid = len - auio.uio_resid; 797 ktrgenio(p->p_tracep, s, UIO_READ, &ktruio, error); 798 } 799 FREE(ktriov, M_TEMP); 800 } 801#endif 802 if (error) 803 goto out; 804 p->p_retval[0] = len - auio.uio_resid; 805 if (mp->msg_name) { 806 len = mp->msg_namelen; 807 if (len <= 0 || fromsa == 0) 808 len = 0; 809 else { 810#ifndef MIN 811#define MIN(a,b) ((a)>(b)?(b):(a)) 812#endif 813 /* save sa_len before it is destroyed by MSG_COMPAT */ 814 len = MIN(len, fromsa->sa_len); 815#ifdef COMPAT_OLDSOCK 816 if (mp->msg_flags & MSG_COMPAT) 817 ((struct osockaddr *)fromsa)->sa_family = 818 fromsa->sa_family; 819#endif 820 error = copyout(fromsa, 821 (caddr_t)mp->msg_name, (unsigned)len); 822 if (error) 823 goto out; 824 } 825 mp->msg_namelen = len; 826 if (namelenp && 827 (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) { 828#ifdef COMPAT_OLDSOCK 829 if (mp->msg_flags & MSG_COMPAT) 830 error = 0; /* old recvfrom didn't check */ 831 else 832#endif 833 goto out; 834 } 835 } 836 if (mp->msg_control) { 837#ifdef COMPAT_OLDSOCK 838 /* 839 * We assume that old recvmsg calls won't receive access 840 * rights and other control info, esp. as control info 841 * is always optional and those options didn't exist in 4.3. 842 * If we receive rights, trim the cmsghdr; anything else 843 * is tossed. 844 */ 845 if (control && mp->msg_flags & MSG_COMPAT) { 846 if (mtod(control, struct cmsghdr *)->cmsg_level != 847 SOL_SOCKET || 848 mtod(control, struct cmsghdr *)->cmsg_type != 849 SCM_RIGHTS) { 850 mp->msg_controllen = 0; 851 goto out; 852 } 853 control->m_len -= sizeof (struct cmsghdr); 854 control->m_data += sizeof (struct cmsghdr); 855 } 856#endif 857 len = mp->msg_controllen; 858 m = control; 859 mp->msg_controllen = 0; 860 ctlbuf = (caddr_t) mp->msg_control; 861 862 while (m && len > 0) { 863 unsigned int tocopy; 864 865 if (len >= m->m_len) 866 tocopy = m->m_len; 867 else { 868 mp->msg_flags |= MSG_CTRUNC; 869 tocopy = len; 870 } 871 872 if ((error = copyout((caddr_t)mtod(m, caddr_t), 873 ctlbuf, tocopy)) != 0) 874 goto out; 875 876 ctlbuf += tocopy; 877 len -= tocopy; 878 m = m->m_next; 879 } 880 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 881 } 882out: 883 fdrop(fp, p); 884 if (fromsa) 885 FREE(fromsa, M_SONAME); 886 if (control) 887 m_freem(control); 888 return (error); 889} 890 891int 892recvfrom(p, uap) 893 struct proc *p; 894 register struct recvfrom_args /* { 895 int s; 896 caddr_t buf; 897 size_t len; 898 int flags; 899 caddr_t from; 900 int *fromlenaddr; 901 } */ *uap; 902{ 903 struct msghdr msg; 904 struct iovec aiov; 905 int error; 906 907 if (uap->fromlenaddr) { 908 error = copyin((caddr_t)uap->fromlenaddr, 909 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen)); 910 if (error) 911 return (error); 912 } else 913 msg.msg_namelen = 0; 914 msg.msg_name = uap->from; 915 msg.msg_iov = &aiov; 916 msg.msg_iovlen = 1; 917 aiov.iov_base = uap->buf; 918 aiov.iov_len = uap->len; 919 msg.msg_control = 0; 920 msg.msg_flags = uap->flags; 921 return (recvit(p, uap->s, &msg, (caddr_t)uap->fromlenaddr)); 922} 923 924#ifdef COMPAT_OLDSOCK 925int 926orecvfrom(p, uap) 927 struct proc *p; 928 struct recvfrom_args *uap; 929{ 930 931 uap->flags |= MSG_COMPAT; 932 return (recvfrom(p, uap)); 933} 934#endif 935 936 937#ifdef COMPAT_OLDSOCK 938int 939orecv(p, uap) 940 struct proc *p; 941 register struct orecv_args /* { 942 int s; 943 caddr_t buf; 944 int len; 945 int flags; 946 } */ *uap; 947{ 948 struct msghdr msg; 949 struct iovec aiov; 950 951 msg.msg_name = 0; 952 msg.msg_namelen = 0; 953 msg.msg_iov = &aiov; 954 msg.msg_iovlen = 1; 955 aiov.iov_base = uap->buf; 956 aiov.iov_len = uap->len; 957 msg.msg_control = 0; 958 msg.msg_flags = uap->flags; 959 return (recvit(p, uap->s, &msg, (caddr_t)0)); 960} 961 962/* 963 * Old recvmsg. This code takes advantage of the fact that the old msghdr 964 * overlays the new one, missing only the flags, and with the (old) access 965 * rights where the control fields are now. 966 */ 967int 968orecvmsg(p, uap) 969 struct proc *p; 970 register struct orecvmsg_args /* { 971 int s; 972 struct omsghdr *msg; 973 int flags; 974 } */ *uap; 975{ 976 struct msghdr msg; 977 struct iovec aiov[UIO_SMALLIOV], *iov; 978 int error; 979 980 error = copyin((caddr_t)uap->msg, (caddr_t)&msg, 981 sizeof (struct omsghdr)); 982 if (error) 983 return (error); 984 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 985 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) 986 return (EMSGSIZE); 987 MALLOC(iov, struct iovec *, 988 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 989 M_WAITOK); 990 } else 991 iov = aiov; 992 msg.msg_flags = uap->flags | MSG_COMPAT; 993 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, 994 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 995 if (error) 996 goto done; 997 msg.msg_iov = iov; 998 error = recvit(p, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen); 999 1000 if (msg.msg_controllen && error == 0) 1001 error = copyout((caddr_t)&msg.msg_controllen, 1002 (caddr_t)&uap->msg->msg_accrightslen, sizeof (int)); 1003done: 1004 if (iov != aiov) 1005 FREE(iov, M_IOV); 1006 return (error); 1007} 1008#endif 1009 1010int 1011recvmsg(p, uap) 1012 struct proc *p; 1013 register struct recvmsg_args /* { 1014 int s; 1015 struct msghdr *msg; 1016 int flags; 1017 } */ *uap; 1018{ 1019 struct msghdr msg; 1020 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; 1021 register int error; 1022 1023 error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg)); 1024 if (error) 1025 return (error); 1026 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1027 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) 1028 return (EMSGSIZE); 1029 MALLOC(iov, struct iovec *, 1030 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1031 M_WAITOK); 1032 } else 1033 iov = aiov; 1034#ifdef COMPAT_OLDSOCK 1035 msg.msg_flags = uap->flags &~ MSG_COMPAT; 1036#else 1037 msg.msg_flags = uap->flags; 1038#endif 1039 uiov = msg.msg_iov; 1040 msg.msg_iov = iov; 1041 error = copyin((caddr_t)uiov, (caddr_t)iov, 1042 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1043 if (error) 1044 goto done; 1045 error = recvit(p, uap->s, &msg, (caddr_t)0); 1046 if (!error) { 1047 msg.msg_iov = uiov; 1048 error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg)); 1049 } 1050done: 1051 if (iov != aiov) 1052 FREE(iov, M_IOV); 1053 return (error); 1054} 1055 1056/* ARGSUSED */ 1057int 1058shutdown(p, uap) 1059 struct proc *p; 1060 register struct shutdown_args /* { 1061 int s; 1062 int how; 1063 } */ *uap; 1064{ 1065 struct file *fp; 1066 int error; 1067 1068 error = holdsock(p->p_fd, uap->s, &fp); 1069 if (error) 1070 return (error); 1071 error = soshutdown((struct socket *)fp->f_data, uap->how); 1072 fdrop(fp, p); 1073 return(error); 1074} 1075 1076/* ARGSUSED */ 1077int 1078setsockopt(p, uap) 1079 struct proc *p; 1080 register struct setsockopt_args /* { 1081 int s; 1082 int level; 1083 int name; 1084 caddr_t val; 1085 int valsize; 1086 } */ *uap; 1087{ 1088 struct file *fp; 1089 struct sockopt sopt; 1090 int error; 1091 1092 if (uap->val == 0 && uap->valsize != 0) 1093 return (EFAULT); 1094 if (uap->valsize < 0) 1095 return (EINVAL); 1096 1097 error = holdsock(p->p_fd, uap->s, &fp); 1098 if (error) 1099 return (error); 1100 1101 sopt.sopt_dir = SOPT_SET; 1102 sopt.sopt_level = uap->level; 1103 sopt.sopt_name = uap->name; 1104 sopt.sopt_val = uap->val; 1105 sopt.sopt_valsize = uap->valsize; 1106 sopt.sopt_p = p; 1107 error = sosetopt((struct socket *)fp->f_data, &sopt); 1108 fdrop(fp, p); 1109 return(error); 1110} 1111 1112/* ARGSUSED */ 1113int 1114getsockopt(p, uap) 1115 struct proc *p; 1116 register struct getsockopt_args /* { 1117 int s; 1118 int level; 1119 int name; 1120 caddr_t val; 1121 int *avalsize; 1122 } */ *uap; 1123{ 1124 int valsize, error; 1125 struct file *fp; 1126 struct sockopt sopt; 1127 1128 error = holdsock(p->p_fd, uap->s, &fp); 1129 if (error) 1130 return (error); 1131 if (uap->val) { 1132 error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize, 1133 sizeof (valsize)); 1134 if (error) { 1135 fdrop(fp, p); 1136 return (error); 1137 } 1138 if (valsize < 0) { 1139 fdrop(fp, p); 1140 return (EINVAL); 1141 } 1142 } else { 1143 valsize = 0; 1144 } 1145 1146 sopt.sopt_dir = SOPT_GET; 1147 sopt.sopt_level = uap->level; 1148 sopt.sopt_name = uap->name; 1149 sopt.sopt_val = uap->val; 1150 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */ 1151 sopt.sopt_p = p; 1152 1153 error = sogetopt((struct socket *)fp->f_data, &sopt); 1154 if (error == 0) { 1155 valsize = sopt.sopt_valsize; 1156 error = copyout((caddr_t)&valsize, 1157 (caddr_t)uap->avalsize, sizeof (valsize)); 1158 } 1159 fdrop(fp, p); 1160 return (error); 1161} 1162 1163/* 1164 * Get socket name. 1165 */ 1166/* ARGSUSED */ 1167static int 1168getsockname1(p, uap, compat) 1169 struct proc *p; 1170 register struct getsockname_args /* { 1171 int fdes; 1172 caddr_t asa; 1173 int *alen; 1174 } */ *uap; 1175 int compat; 1176{ 1177 struct file *fp; 1178 register struct socket *so; 1179 struct sockaddr *sa; 1180 int len, error; 1181 1182 error = holdsock(p->p_fd, uap->fdes, &fp); 1183 if (error) 1184 return (error); 1185 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)); 1186 if (error) { 1187 fdrop(fp, p); 1188 return (error); 1189 } 1190 so = (struct socket *)fp->f_data; 1191 sa = 0; 1192 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1193 if (error) 1194 goto bad; 1195 if (sa == 0) { 1196 len = 0; 1197 goto gotnothing; 1198 } 1199 1200 len = MIN(len, sa->sa_len); 1201#ifdef COMPAT_OLDSOCK 1202 if (compat) 1203 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1204#endif 1205 error = copyout(sa, (caddr_t)uap->asa, (u_int)len); 1206 if (error == 0) 1207gotnothing: 1208 error = copyout((caddr_t)&len, (caddr_t)uap->alen, 1209 sizeof (len)); 1210bad: 1211 if (sa) 1212 FREE(sa, M_SONAME); 1213 fdrop(fp, p); 1214 return (error); 1215} 1216 1217int 1218getsockname(p, uap) 1219 struct proc *p; 1220 struct getsockname_args *uap; 1221{ 1222 1223 return (getsockname1(p, uap, 0)); 1224} 1225 1226#ifdef COMPAT_OLDSOCK 1227int 1228ogetsockname(p, uap) 1229 struct proc *p; 1230 struct getsockname_args *uap; 1231{ 1232 1233 return (getsockname1(p, uap, 1)); 1234} 1235#endif /* COMPAT_OLDSOCK */ 1236 1237/* 1238 * Get name of peer for connected socket. 1239 */ 1240/* ARGSUSED */ 1241static int 1242getpeername1(p, uap, compat) 1243 struct proc *p; 1244 register struct getpeername_args /* { 1245 int fdes; 1246 caddr_t asa; 1247 int *alen; 1248 } */ *uap; 1249 int compat; 1250{ 1251 struct file *fp; 1252 register struct socket *so; 1253 struct sockaddr *sa; 1254 int len, error; 1255 1256 error = holdsock(p->p_fd, uap->fdes, &fp); 1257 if (error) 1258 return (error); 1259 so = (struct socket *)fp->f_data; 1260 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1261 fdrop(fp, p); 1262 return (ENOTCONN); 1263 } 1264 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)); 1265 if (error) { 1266 fdrop(fp, p); 1267 return (error); 1268 } 1269 sa = 0; 1270 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1271 if (error) 1272 goto bad; 1273 if (sa == 0) { 1274 len = 0; 1275 goto gotnothing; 1276 } 1277 len = MIN(len, sa->sa_len); 1278#ifdef COMPAT_OLDSOCK 1279 if (compat) 1280 ((struct osockaddr *)sa)->sa_family = 1281 sa->sa_family; 1282#endif 1283 error = copyout(sa, (caddr_t)uap->asa, (u_int)len); 1284 if (error) 1285 goto bad; 1286gotnothing: 1287 error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len)); 1288bad: 1289 if (sa) 1290 FREE(sa, M_SONAME); 1291 fdrop(fp, p); 1292 return (error); 1293} 1294 1295int 1296getpeername(p, uap) 1297 struct proc *p; 1298 struct getpeername_args *uap; 1299{ 1300 1301 return (getpeername1(p, uap, 0)); 1302} 1303 1304#ifdef COMPAT_OLDSOCK 1305int 1306ogetpeername(p, uap) 1307 struct proc *p; 1308 struct ogetpeername_args *uap; 1309{ 1310 1311 /* XXX uap should have type `getpeername_args *' to begin with. */ 1312 return (getpeername1(p, (struct getpeername_args *)uap, 1)); 1313} 1314#endif /* COMPAT_OLDSOCK */ 1315 1316int 1317sockargs(mp, buf, buflen, type) 1318 struct mbuf **mp; 1319 caddr_t buf; 1320 int buflen, type; 1321{ 1322 register struct sockaddr *sa; 1323 register struct mbuf *m; 1324 int error; 1325 1326 if ((u_int)buflen > MLEN) { 1327#ifdef COMPAT_OLDSOCK 1328 if (type == MT_SONAME && (u_int)buflen <= 112) 1329 buflen = MLEN; /* unix domain compat. hack */ 1330 else 1331#endif 1332 return (EINVAL); 1333 } 1334 m = m_get(M_TRYWAIT, type); 1335 if (m == NULL) 1336 return (ENOBUFS); 1337 m->m_len = buflen; 1338 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1339 if (error) 1340 (void) m_free(m); 1341 else { 1342 *mp = m; 1343 if (type == MT_SONAME) { 1344 sa = mtod(m, struct sockaddr *); 1345 1346#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1347 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1348 sa->sa_family = sa->sa_len; 1349#endif 1350 sa->sa_len = buflen; 1351 } 1352 } 1353 return (error); 1354} 1355 1356int 1357getsockaddr(namp, uaddr, len) 1358 struct sockaddr **namp; 1359 caddr_t uaddr; 1360 size_t len; 1361{ 1362 struct sockaddr *sa; 1363 int error; 1364 1365 if (len > SOCK_MAXADDRLEN) 1366 return ENAMETOOLONG; 1367 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1368 error = copyin(uaddr, sa, len); 1369 if (error) { 1370 FREE(sa, M_SONAME); 1371 } else { 1372#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1373 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1374 sa->sa_family = sa->sa_len; 1375#endif 1376 sa->sa_len = len; 1377 *namp = sa; 1378 } 1379 return error; 1380} 1381 1382/* 1383 * holdsock() - load the struct file pointer associated 1384 * with a socket into *fpp. If an error occurs, non-zero 1385 * will be returned and *fpp will be set to NULL. 1386 */ 1387int 1388holdsock(fdp, fdes, fpp) 1389 struct filedesc *fdp; 1390 int fdes; 1391 struct file **fpp; 1392{ 1393 register struct file *fp = NULL; 1394 int error = 0; 1395 1396 if ((unsigned)fdes >= fdp->fd_nfiles || 1397 (fp = fdp->fd_ofiles[fdes]) == NULL) { 1398 error = EBADF; 1399 } else if (fp->f_type != DTYPE_SOCKET) { 1400 error = ENOTSOCK; 1401 fp = NULL; 1402 } else { 1403 fhold(fp); 1404 } 1405 *fpp = fp; 1406 return(error); 1407} 1408 1409/* 1410 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) 1411 * XXX - The sf_buf functions are currently private to sendfile(2), so have 1412 * been made static, but may be useful in the future for doing zero-copy in 1413 * other parts of the networking code. 1414 */ 1415static void 1416sf_buf_init(void *arg) 1417{ 1418 int i; 1419 1420 mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", MTX_DEF);
|
1485} 1486 1487/* 1488 * sendfile(2) 1489 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1490 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1491 * 1492 * Send a file specified by 'fd' and starting at 'offset' to a socket 1493 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1494 * nbytes == 0. Optionally add a header and/or trailer to the socket 1495 * output. If specified, write the total number of bytes sent into *sbytes. 1496 */ 1497int 1498sendfile(struct proc *p, struct sendfile_args *uap) 1499{ 1500 struct file *fp; 1501 struct filedesc *fdp = p->p_fd; 1502 struct vnode *vp; 1503 struct vm_object *obj; 1504 struct socket *so; 1505 struct mbuf *m; 1506 struct sf_buf *sf; 1507 struct vm_page *pg; 1508 struct writev_args nuap; 1509 struct sf_hdtr hdtr; 1510 off_t off, xfsize, sbytes = 0; 1511 int error = 0, s; 1512 1513 vp = NULL; 1514 /* 1515 * Do argument checking. Must be a regular file in, stream 1516 * type and connected socket out, positive offset. 1517 */ 1518 fp = holdfp(fdp, uap->fd, FREAD); 1519 if (fp == NULL) { 1520 error = EBADF; 1521 goto done; 1522 } 1523 if (fp->f_type != DTYPE_VNODE) { 1524 error = EINVAL; 1525 goto done; 1526 } 1527 vp = (struct vnode *)fp->f_data; 1528 vref(vp); 1529 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1530 error = EINVAL; 1531 goto done; 1532 } 1533 fdrop(fp, p); 1534 error = holdsock(p->p_fd, uap->s, &fp); 1535 if (error) 1536 goto done; 1537 so = (struct socket *)fp->f_data; 1538 if (so->so_type != SOCK_STREAM) { 1539 error = EINVAL; 1540 goto done; 1541 } 1542 if ((so->so_state & SS_ISCONNECTED) == 0) { 1543 error = ENOTCONN; 1544 goto done; 1545 } 1546 if (uap->offset < 0) { 1547 error = EINVAL; 1548 goto done; 1549 } 1550 1551 /* 1552 * If specified, get the pointer to the sf_hdtr struct for 1553 * any headers/trailers. 1554 */ 1555 if (uap->hdtr != NULL) { 1556 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1557 if (error) 1558 goto done; 1559 /* 1560 * Send any headers. Wimp out and use writev(2). 1561 */ 1562 if (hdtr.headers != NULL) { 1563 nuap.fd = uap->s; 1564 nuap.iovp = hdtr.headers; 1565 nuap.iovcnt = hdtr.hdr_cnt; 1566 error = writev(p, &nuap); 1567 if (error) 1568 goto done; 1569 sbytes += p->p_retval[0]; 1570 } 1571 } 1572 1573 /* 1574 * Protect against multiple writers to the socket. 1575 */ 1576 (void) sblock(&so->so_snd, M_WAITOK); 1577 1578 /* 1579 * Loop through the pages in the file, starting with the requested 1580 * offset. Get a file page (do I/O if necessary), map the file page 1581 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1582 * it on the socket. 1583 */ 1584 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1585 vm_pindex_t pindex; 1586 vm_offset_t pgoff; 1587 1588 pindex = OFF_TO_IDX(off); 1589retry_lookup: 1590 /* 1591 * Calculate the amount to transfer. Not to exceed a page, 1592 * the EOF, or the passed in nbytes. 1593 */ 1594 xfsize = obj->un_pager.vnp.vnp_size - off; 1595 if (xfsize > PAGE_SIZE) 1596 xfsize = PAGE_SIZE; 1597 pgoff = (vm_offset_t)(off & PAGE_MASK); 1598 if (PAGE_SIZE - pgoff < xfsize) 1599 xfsize = PAGE_SIZE - pgoff; 1600 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1601 xfsize = uap->nbytes - sbytes; 1602 if (xfsize <= 0) 1603 break; 1604 /* 1605 * Optimize the non-blocking case by looking at the socket space 1606 * before going to the extra work of constituting the sf_buf. 1607 */ 1608 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1609 if (so->so_state & SS_CANTSENDMORE) 1610 error = EPIPE; 1611 else 1612 error = EAGAIN; 1613 sbunlock(&so->so_snd); 1614 goto done; 1615 } 1616 /* 1617 * Attempt to look up the page. 1618 * 1619 * Allocate if not found 1620 * 1621 * Wait and loop if busy. 1622 */ 1623 pg = vm_page_lookup(obj, pindex); 1624 1625 if (pg == NULL) { 1626 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL); 1627 if (pg == NULL) { 1628 VM_WAIT; 1629 goto retry_lookup; 1630 } 1631 vm_page_wakeup(pg); 1632 } else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) { 1633 goto retry_lookup; 1634 } 1635 1636 /* 1637 * Wire the page so it does not get ripped out from under 1638 * us. 1639 */ 1640 1641 vm_page_wire(pg); 1642 1643 /* 1644 * If page is not valid for what we need, initiate I/O 1645 */ 1646 1647 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1648 struct uio auio; 1649 struct iovec aiov; 1650 int bsize; 1651 1652 /* 1653 * Ensure that our page is still around when the I/O 1654 * completes. 1655 */ 1656 vm_page_io_start(pg); 1657 1658 /* 1659 * Get the page from backing store. 1660 */ 1661 bsize = vp->v_mount->mnt_stat.f_iosize; 1662 auio.uio_iov = &aiov; 1663 auio.uio_iovcnt = 1; 1664 aiov.iov_base = 0; 1665 aiov.iov_len = MAXBSIZE; 1666 auio.uio_resid = MAXBSIZE; 1667 auio.uio_offset = trunc_page(off); 1668 auio.uio_segflg = UIO_NOCOPY; 1669 auio.uio_rw = UIO_READ; 1670 auio.uio_procp = p; 1671 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, p); 1672 error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16), 1673 p->p_ucred); 1674 VOP_UNLOCK(vp, 0, p); 1675 vm_page_flag_clear(pg, PG_ZERO); 1676 vm_page_io_finish(pg); 1677 if (error) { 1678 vm_page_unwire(pg, 0); 1679 /* 1680 * See if anyone else might know about this page. 1681 * If not and it is not valid, then free it. 1682 */ 1683 if (pg->wire_count == 0 && pg->valid == 0 && 1684 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1685 pg->hold_count == 0) { 1686 vm_page_busy(pg); 1687 vm_page_free(pg); 1688 } 1689 sbunlock(&so->so_snd); 1690 goto done; 1691 } 1692 } 1693 1694 /* 1695 * Allocate a kernel virtual page and insert the physical page 1696 * into it. 1697 */ 1698 1699 sf = sf_buf_alloc(); 1700 sf->m = pg; 1701 pmap_qenter(sf->kva, &pg, 1); 1702 /* 1703 * Get an mbuf header and set it up as having external storage. 1704 */ 1705 MGETHDR(m, M_TRYWAIT, MT_DATA); 1706 if (m == NULL) { 1707 error = ENOBUFS; 1708 sf_buf_free((void *)sf->kva, NULL); 1709 goto done; 1710 } 1711 /* 1712 * Setup external storage for mbuf. 1713 */ 1714 MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, NULL, M_RDONLY, 1715 EXT_SFBUF); 1716 m->m_data = (char *) sf->kva + pgoff; 1717 m->m_pkthdr.len = m->m_len = xfsize; 1718 /* 1719 * Add the buffer to the socket buffer chain. 1720 */ 1721 s = splnet(); 1722retry_space: 1723 /* 1724 * Make sure that the socket is still able to take more data. 1725 * CANTSENDMORE being true usually means that the connection 1726 * was closed. so_error is true when an error was sensed after 1727 * a previous send. 1728 * The state is checked after the page mapping and buffer 1729 * allocation above since those operations may block and make 1730 * any socket checks stale. From this point forward, nothing 1731 * blocks before the pru_send (or more accurately, any blocking 1732 * results in a loop back to here to re-check). 1733 */ 1734 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 1735 if (so->so_state & SS_CANTSENDMORE) { 1736 error = EPIPE; 1737 } else { 1738 error = so->so_error; 1739 so->so_error = 0; 1740 } 1741 m_freem(m); 1742 sbunlock(&so->so_snd); 1743 splx(s); 1744 goto done; 1745 } 1746 /* 1747 * Wait for socket space to become available. We do this just 1748 * after checking the connection state above in order to avoid 1749 * a race condition with sbwait(). 1750 */ 1751 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 1752 if (so->so_state & SS_NBIO) { 1753 m_freem(m); 1754 sbunlock(&so->so_snd); 1755 splx(s); 1756 error = EAGAIN; 1757 goto done; 1758 } 1759 error = sbwait(&so->so_snd); 1760 /* 1761 * An error from sbwait usually indicates that we've 1762 * been interrupted by a signal. If we've sent anything 1763 * then return bytes sent, otherwise return the error. 1764 */ 1765 if (error) { 1766 m_freem(m); 1767 sbunlock(&so->so_snd); 1768 splx(s); 1769 goto done; 1770 } 1771 goto retry_space; 1772 } 1773 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, p); 1774 splx(s); 1775 if (error) { 1776 sbunlock(&so->so_snd); 1777 goto done; 1778 } 1779 } 1780 sbunlock(&so->so_snd); 1781 1782 /* 1783 * Send trailers. Wimp out and use writev(2). 1784 */ 1785 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 1786 nuap.fd = uap->s; 1787 nuap.iovp = hdtr.trailers; 1788 nuap.iovcnt = hdtr.trl_cnt; 1789 error = writev(p, &nuap); 1790 if (error) 1791 goto done; 1792 sbytes += p->p_retval[0]; 1793 } 1794 1795done: 1796 if (uap->sbytes != NULL) { 1797 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 1798 } 1799 if (vp) 1800 vrele(vp); 1801 if (fp) 1802 fdrop(fp, p); 1803 return (error); 1804}
| 1485} 1486 1487/* 1488 * sendfile(2) 1489 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1490 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1491 * 1492 * Send a file specified by 'fd' and starting at 'offset' to a socket 1493 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1494 * nbytes == 0. Optionally add a header and/or trailer to the socket 1495 * output. If specified, write the total number of bytes sent into *sbytes. 1496 */ 1497int 1498sendfile(struct proc *p, struct sendfile_args *uap) 1499{ 1500 struct file *fp; 1501 struct filedesc *fdp = p->p_fd; 1502 struct vnode *vp; 1503 struct vm_object *obj; 1504 struct socket *so; 1505 struct mbuf *m; 1506 struct sf_buf *sf; 1507 struct vm_page *pg; 1508 struct writev_args nuap; 1509 struct sf_hdtr hdtr; 1510 off_t off, xfsize, sbytes = 0; 1511 int error = 0, s; 1512 1513 vp = NULL; 1514 /* 1515 * Do argument checking. Must be a regular file in, stream 1516 * type and connected socket out, positive offset. 1517 */ 1518 fp = holdfp(fdp, uap->fd, FREAD); 1519 if (fp == NULL) { 1520 error = EBADF; 1521 goto done; 1522 } 1523 if (fp->f_type != DTYPE_VNODE) { 1524 error = EINVAL; 1525 goto done; 1526 } 1527 vp = (struct vnode *)fp->f_data; 1528 vref(vp); 1529 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1530 error = EINVAL; 1531 goto done; 1532 } 1533 fdrop(fp, p); 1534 error = holdsock(p->p_fd, uap->s, &fp); 1535 if (error) 1536 goto done; 1537 so = (struct socket *)fp->f_data; 1538 if (so->so_type != SOCK_STREAM) { 1539 error = EINVAL; 1540 goto done; 1541 } 1542 if ((so->so_state & SS_ISCONNECTED) == 0) { 1543 error = ENOTCONN; 1544 goto done; 1545 } 1546 if (uap->offset < 0) { 1547 error = EINVAL; 1548 goto done; 1549 } 1550 1551 /* 1552 * If specified, get the pointer to the sf_hdtr struct for 1553 * any headers/trailers. 1554 */ 1555 if (uap->hdtr != NULL) { 1556 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1557 if (error) 1558 goto done; 1559 /* 1560 * Send any headers. Wimp out and use writev(2). 1561 */ 1562 if (hdtr.headers != NULL) { 1563 nuap.fd = uap->s; 1564 nuap.iovp = hdtr.headers; 1565 nuap.iovcnt = hdtr.hdr_cnt; 1566 error = writev(p, &nuap); 1567 if (error) 1568 goto done; 1569 sbytes += p->p_retval[0]; 1570 } 1571 } 1572 1573 /* 1574 * Protect against multiple writers to the socket. 1575 */ 1576 (void) sblock(&so->so_snd, M_WAITOK); 1577 1578 /* 1579 * Loop through the pages in the file, starting with the requested 1580 * offset. Get a file page (do I/O if necessary), map the file page 1581 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1582 * it on the socket. 1583 */ 1584 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1585 vm_pindex_t pindex; 1586 vm_offset_t pgoff; 1587 1588 pindex = OFF_TO_IDX(off); 1589retry_lookup: 1590 /* 1591 * Calculate the amount to transfer. Not to exceed a page, 1592 * the EOF, or the passed in nbytes. 1593 */ 1594 xfsize = obj->un_pager.vnp.vnp_size - off; 1595 if (xfsize > PAGE_SIZE) 1596 xfsize = PAGE_SIZE; 1597 pgoff = (vm_offset_t)(off & PAGE_MASK); 1598 if (PAGE_SIZE - pgoff < xfsize) 1599 xfsize = PAGE_SIZE - pgoff; 1600 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1601 xfsize = uap->nbytes - sbytes; 1602 if (xfsize <= 0) 1603 break; 1604 /* 1605 * Optimize the non-blocking case by looking at the socket space 1606 * before going to the extra work of constituting the sf_buf. 1607 */ 1608 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1609 if (so->so_state & SS_CANTSENDMORE) 1610 error = EPIPE; 1611 else 1612 error = EAGAIN; 1613 sbunlock(&so->so_snd); 1614 goto done; 1615 } 1616 /* 1617 * Attempt to look up the page. 1618 * 1619 * Allocate if not found 1620 * 1621 * Wait and loop if busy. 1622 */ 1623 pg = vm_page_lookup(obj, pindex); 1624 1625 if (pg == NULL) { 1626 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL); 1627 if (pg == NULL) { 1628 VM_WAIT; 1629 goto retry_lookup; 1630 } 1631 vm_page_wakeup(pg); 1632 } else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) { 1633 goto retry_lookup; 1634 } 1635 1636 /* 1637 * Wire the page so it does not get ripped out from under 1638 * us. 1639 */ 1640 1641 vm_page_wire(pg); 1642 1643 /* 1644 * If page is not valid for what we need, initiate I/O 1645 */ 1646 1647 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1648 struct uio auio; 1649 struct iovec aiov; 1650 int bsize; 1651 1652 /* 1653 * Ensure that our page is still around when the I/O 1654 * completes. 1655 */ 1656 vm_page_io_start(pg); 1657 1658 /* 1659 * Get the page from backing store. 1660 */ 1661 bsize = vp->v_mount->mnt_stat.f_iosize; 1662 auio.uio_iov = &aiov; 1663 auio.uio_iovcnt = 1; 1664 aiov.iov_base = 0; 1665 aiov.iov_len = MAXBSIZE; 1666 auio.uio_resid = MAXBSIZE; 1667 auio.uio_offset = trunc_page(off); 1668 auio.uio_segflg = UIO_NOCOPY; 1669 auio.uio_rw = UIO_READ; 1670 auio.uio_procp = p; 1671 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, p); 1672 error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16), 1673 p->p_ucred); 1674 VOP_UNLOCK(vp, 0, p); 1675 vm_page_flag_clear(pg, PG_ZERO); 1676 vm_page_io_finish(pg); 1677 if (error) { 1678 vm_page_unwire(pg, 0); 1679 /* 1680 * See if anyone else might know about this page. 1681 * If not and it is not valid, then free it. 1682 */ 1683 if (pg->wire_count == 0 && pg->valid == 0 && 1684 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1685 pg->hold_count == 0) { 1686 vm_page_busy(pg); 1687 vm_page_free(pg); 1688 } 1689 sbunlock(&so->so_snd); 1690 goto done; 1691 } 1692 } 1693 1694 /* 1695 * Allocate a kernel virtual page and insert the physical page 1696 * into it. 1697 */ 1698 1699 sf = sf_buf_alloc(); 1700 sf->m = pg; 1701 pmap_qenter(sf->kva, &pg, 1); 1702 /* 1703 * Get an mbuf header and set it up as having external storage. 1704 */ 1705 MGETHDR(m, M_TRYWAIT, MT_DATA); 1706 if (m == NULL) { 1707 error = ENOBUFS; 1708 sf_buf_free((void *)sf->kva, NULL); 1709 goto done; 1710 } 1711 /* 1712 * Setup external storage for mbuf. 1713 */ 1714 MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, NULL, M_RDONLY, 1715 EXT_SFBUF); 1716 m->m_data = (char *) sf->kva + pgoff; 1717 m->m_pkthdr.len = m->m_len = xfsize; 1718 /* 1719 * Add the buffer to the socket buffer chain. 1720 */ 1721 s = splnet(); 1722retry_space: 1723 /* 1724 * Make sure that the socket is still able to take more data. 1725 * CANTSENDMORE being true usually means that the connection 1726 * was closed. so_error is true when an error was sensed after 1727 * a previous send. 1728 * The state is checked after the page mapping and buffer 1729 * allocation above since those operations may block and make 1730 * any socket checks stale. From this point forward, nothing 1731 * blocks before the pru_send (or more accurately, any blocking 1732 * results in a loop back to here to re-check). 1733 */ 1734 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 1735 if (so->so_state & SS_CANTSENDMORE) { 1736 error = EPIPE; 1737 } else { 1738 error = so->so_error; 1739 so->so_error = 0; 1740 } 1741 m_freem(m); 1742 sbunlock(&so->so_snd); 1743 splx(s); 1744 goto done; 1745 } 1746 /* 1747 * Wait for socket space to become available. We do this just 1748 * after checking the connection state above in order to avoid 1749 * a race condition with sbwait(). 1750 */ 1751 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 1752 if (so->so_state & SS_NBIO) { 1753 m_freem(m); 1754 sbunlock(&so->so_snd); 1755 splx(s); 1756 error = EAGAIN; 1757 goto done; 1758 } 1759 error = sbwait(&so->so_snd); 1760 /* 1761 * An error from sbwait usually indicates that we've 1762 * been interrupted by a signal. If we've sent anything 1763 * then return bytes sent, otherwise return the error. 1764 */ 1765 if (error) { 1766 m_freem(m); 1767 sbunlock(&so->so_snd); 1768 splx(s); 1769 goto done; 1770 } 1771 goto retry_space; 1772 } 1773 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, p); 1774 splx(s); 1775 if (error) { 1776 sbunlock(&so->so_snd); 1777 goto done; 1778 } 1779 } 1780 sbunlock(&so->so_snd); 1781 1782 /* 1783 * Send trailers. Wimp out and use writev(2). 1784 */ 1785 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 1786 nuap.fd = uap->s; 1787 nuap.iovp = hdtr.trailers; 1788 nuap.iovcnt = hdtr.trl_cnt; 1789 error = writev(p, &nuap); 1790 if (error) 1791 goto done; 1792 sbytes += p->p_retval[0]; 1793 } 1794 1795done: 1796 if (uap->sbytes != NULL) { 1797 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 1798 } 1799 if (vp) 1800 vrele(vp); 1801 if (fp) 1802 fdrop(fp, p); 1803 return (error); 1804}
|